Merge pull request #240 from PhilipDaniels/next

Filter ignored content in page.rs.
This commit is contained in:
Vincent Prouillet 2018-02-27 08:37:26 +01:00 committed by GitHub
commit f218f2eaf5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 177 additions and 16 deletions

15
Cargo.lock generated
View file

@ -172,6 +172,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"errors 0.1.0", "errors 0.1.0",
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"highlighting 0.1.0", "highlighting 0.1.0",
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
@ -185,6 +186,7 @@ dependencies = [
"config 0.1.0", "config 0.1.0",
"errors 0.1.0", "errors 0.1.0",
"front_matter 0.1.0", "front_matter 0.1.0",
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rendering 0.1.0", "rendering 0.1.0",
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
@ -364,6 +366,18 @@ name = "glob"
version = "0.2.11" version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "globset"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "gutenberg" name = "gutenberg"
version = "0.3.1" version = "0.3.1"
@ -1546,6 +1560,7 @@ dependencies = [
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb" "checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb"
"checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05" "checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1e96ab92362c06811385ae9a34d2698e8a1160745e0c78fbb434a44c8de3fabc"
"checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37" "checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37"
"checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" "checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e"
"checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2" "checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2"

View file

@ -8,6 +8,7 @@ toml = "0.4"
serde = "1" serde = "1"
serde_derive = "1" serde_derive = "1"
chrono = "0.4" chrono = "0.4"
globset = "0.3.0"
errors = { path = "../errors" } errors = { path = "../errors" }
highlighting = { path = "../highlighting"} highlighting = { path = "../highlighting"}

View file

@ -5,6 +5,7 @@ extern crate toml;
extern crate errors; extern crate errors;
extern crate highlighting; extern crate highlighting;
extern crate chrono; extern crate chrono;
extern crate globset;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::File; use std::fs::File;
@ -13,6 +14,7 @@ use std::path::{Path, PathBuf};
use toml::{Value as Toml}; use toml::{Value as Toml};
use chrono::Utc; use chrono::Utc;
use globset::{Glob, GlobSet, GlobSetBuilder};
use errors::{Result, ResultExt}; use errors::{Result, ResultExt};
use highlighting::THEME_SET; use highlighting::THEME_SET;
@ -22,7 +24,7 @@ mod theme;
use theme::Theme; use theme::Theme;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Config { pub struct Config {
/// Base URL of the site, the only required config argument /// Base URL of the site, the only required config argument
pub base_url: String, pub base_url: String,
@ -49,6 +51,12 @@ pub struct Config {
pub generate_categories_pages: Option<bool>, pub generate_categories_pages: Option<bool>,
/// Whether to compile the `sass` directory and output the css files into the static folder /// Whether to compile the `sass` directory and output the css files into the static folder
pub compile_sass: Option<bool>, pub compile_sass: Option<bool>,
/// A list of file glob patterns to ignore when processing the content folder. Defaults to none.
/// Had to remove the PartialEq derive because GlobSet does not implement it. No impact
/// because it's unused anyway (who wants to sort Configs?).
pub ignored_content: Option<Vec<String>>,
#[serde(skip_serializing, skip_deserializing)]
pub ignored_content_globber: Option<GlobSet>,
/// Languages list and translated strings /// Languages list and translated strings
pub translations: Option<HashMap<String, Toml>>, pub translations: Option<HashMap<String, Toml>>,
@ -84,6 +92,7 @@ impl Config {
set_default!(config.generate_tags_pages, false); set_default!(config.generate_tags_pages, false);
set_default!(config.generate_categories_pages, false); set_default!(config.generate_categories_pages, false);
set_default!(config.compile_sass, false); set_default!(config.compile_sass, false);
set_default!(config.ignored_content, Vec::new());
set_default!(config.translations, HashMap::new()); set_default!(config.translations, HashMap::new());
set_default!(config.extra, HashMap::new()); set_default!(config.extra, HashMap::new());
@ -97,6 +106,25 @@ impl Config {
}; };
config.build_timestamp = Some(Utc::now().timestamp()); config.build_timestamp = Some(Utc::now().timestamp());
// Convert the file glob strings into a compiled glob set matcher. We want to do this once,
// at program initialization, rather than for every page, for example. We arrange for the
// globset matcher to always exist (even though it has to be an inside an Option at the
// moment because of the TOML serializer); if the glob set is empty the `is_match` function
// of the globber always returns false.
let mut glob_set_builder = GlobSetBuilder::new();
if let Some(ref v) = config.ignored_content {
for pat in v {
let glob = match Glob::new(pat) {
Ok(g) => g,
Err(e) => bail!("Invalid ignored_content glob pattern: {}, error = {}", pat, e)
};
glob_set_builder.add(glob);
}
}
config.ignored_content_globber = Some(glob_set_builder.build().expect("Bad ignored_content in config file."));
Ok(config) Ok(config)
} }
@ -176,6 +204,8 @@ impl Default for Config {
generate_tags_pages: Some(true), generate_tags_pages: Some(true),
generate_categories_pages: Some(true), generate_categories_pages: Some(true),
compile_sass: Some(false), compile_sass: Some(false),
ignored_content: Some(Vec::new()),
ignored_content_globber: Some(GlobSetBuilder::new().build().unwrap()),
translations: None, translations: None,
extra: None, extra: None,
build_timestamp: Some(1), build_timestamp: Some(1),
@ -330,4 +360,51 @@ title = "A title"
assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title"); assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title");
} }
#[test]
fn missing_ignored_content_results_in_empty_vector_and_empty_globber() {
let config_str = r#"
title = "My site"
base_url = "example.com"
"#;
let config = Config::parse(config_str).unwrap();
let v = config.ignored_content.unwrap();
assert_eq!(v.len(), 0);
assert!(config.ignored_content_globber.unwrap().is_empty());
}
#[test]
fn empty_ignored_content_results_in_empty_vector_and_empty_globber() {
let config_str = r#"
title = "My site"
base_url = "example.com"
ignored_content = []
"#;
let config = Config::parse(config_str).unwrap();
assert_eq!(config.ignored_content.unwrap().len(), 0);
assert!(config.ignored_content_globber.unwrap().is_empty());
}
#[test]
fn non_empty_ignored_content_results_in_vector_of_patterns_and_configured_globber() {
let config_str = r#"
title = "My site"
base_url = "example.com"
ignored_content = ["*.{graphml,iso}", "*.py?"]
"#;
let config = Config::parse(config_str).unwrap();
let v = config.ignored_content.unwrap();
assert_eq!(v, vec!["*.{graphml,iso}", "*.py?"]);
let g = config.ignored_content_globber.unwrap();
assert_eq!(g.len(), 2);
assert!(g.is_match("foo.graphml"));
assert!(g.is_match("foo.iso"));
assert!(!g.is_match("foo.png"));
assert!(g.is_match("foo.py2"));
assert!(g.is_match("foo.py3"));
assert!(!g.is_match("foo.py"));
}
} }

View file

@ -18,3 +18,4 @@ front_matter = { path = "../front_matter" }
[dev-dependencies] [dev-dependencies]
tempdir = "0.3" tempdir = "0.3"
toml = "0.4" toml = "0.4"
globset = "0.3.0"

View file

@ -13,6 +13,8 @@ extern crate utils;
extern crate tempdir; extern crate tempdir;
#[cfg(test)] #[cfg(test)]
extern crate toml; extern crate toml;
#[cfg(test)]
extern crate globset;
mod file_info; mod file_info;
mod page; mod page;

View file

@ -128,10 +128,27 @@ impl Page {
let path = path.as_ref(); let path = path.as_ref();
let content = read_file(path)?; let content = read_file(path)?;
let mut page = Page::parse(path, &content, config)?; let mut page = Page::parse(path, &content, config)?;
page.assets = vec![];
if page.file.name == "index" { if page.file.name == "index" {
page.assets = find_related_assets(path.parent().unwrap()); // `find_related_assets` only scans the immediate directory (it is not recursive) so our
// filtering only needs to work against the file_name component, not the full suffix. If
// `find_related_assets` was changed to also return files in subdirectories, we could
// use `PathBuf.strip_prefix` to remove the parent directory and then glob-filter
// against the remaining path. Note that the current behaviour effectively means that
// the `ignored_content` setting in the config file is limited to single-file glob
// patterns (no "**" patterns).
let globber = config.ignored_content_globber.as_ref().unwrap();
let parent_dir = path.parent().unwrap();
page.assets = find_related_assets(parent_dir).into_iter()
.filter(|path|
match path.file_name() {
None => true,
Some(file) => !globber.is_match(file)
}
).collect();
} else {
page.assets = vec![];
} }
Ok(page) Ok(page)
@ -240,6 +257,7 @@ mod tests {
use tera::Tera; use tera::Tera;
use tempdir::TempDir; use tempdir::TempDir;
use globset::{Glob, GlobSetBuilder};
use config::Config; use config::Config;
use super::Page; use super::Page;
@ -419,4 +437,34 @@ Hello world
assert_eq!(page.assets.len(), 3); assert_eq!(page.assets.len(), 3);
assert_eq!(page.permalink, "http://a-website.com/posts/hey/"); assert_eq!(page.permalink, "http://a-website.com/posts/hey/");
} }
#[test]
fn page_with_ignored_assets_filters_out_correct_files() {
let tmp_dir = TempDir::new("example").expect("create temp dir");
let path = tmp_dir.path();
create_dir(&path.join("content")).expect("create content temp dir");
create_dir(&path.join("content").join("posts")).expect("create posts temp dir");
let nested_path = path.join("content").join("posts").join("with-assets");
create_dir(&nested_path).expect("create nested temp dir");
let mut f = File::create(nested_path.join("index.md")).unwrap();
f.write_all(b"+++\nslug=\"hey\"\n+++\n").unwrap();
File::create(nested_path.join("example.js")).unwrap();
File::create(nested_path.join("graph.jpg")).unwrap();
File::create(nested_path.join("fail.png")).unwrap();
let mut gsb = GlobSetBuilder::new();
gsb.add(Glob::new("*.{js,png}").unwrap());
let mut config = Config::default();
config.ignored_content_globber = Some(gsb.build().unwrap());
let res = Page::from_file(
nested_path.join("index.md").as_path(),
&config
);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.assets.len(), 1);
assert_eq!(page.assets[0].file_name().unwrap().to_str(), Some("graph.jpg"));
}
} }

View file

@ -5,8 +5,8 @@ weight = 10
Gutenberg uses the folder structure to determine the site structure. Gutenberg uses the folder structure to determine the site structure.
Each folder in the `content` directory represents a [section](./documentation/content/section.md) Each folder in the `content` directory represents a [section](./documentation/content/section.md)
that contains [pages](./documentation/content/page.md): your `.md` files. that contains [pages](./documentation/content/page.md): your `.md` files.
```bash ```bash
. .
@ -26,21 +26,21 @@ that contains [pages](./documentation/content/page.md): your `.md` files.
Each page path (the part after the `base_url`, for example `blog/cli-usage/`) can be customised by changing the `path` or `slug` Each page path (the part after the `base_url`, for example `blog/cli-usage/`) can be customised by changing the `path` or `slug`
attribute of the [page front-matter](./documentation/content/page.md#front-matter). attribute of the [page front-matter](./documentation/content/page.md#front-matter).
You might have noticed a file named `_index.md` in the example above. You might have noticed a file named `_index.md` in the example above.
This file will be used for the metadata and content of the section itself and is not considered a page. This file will be used for the metadata and content of the section itself and is not considered a page.
To make sure the terminology used in the rest of the documentation is understood, let's go over the example above. To make sure the terminology used in the rest of the documentation is understood, let's go over the example above.
The `content` directory in this case has three `sections`: `content`, `blog` and `landing`. The `content` section has only The `content` directory in this case has three `sections`: `content`, `blog` and `landing`. The `content` section has only
one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md`
and `installation.md`. and `installation.md`.
While not shown in the example, sections can be nested indefinitely. While not shown in the example, sections can be nested indefinitely.
## Assets colocation ## Assets colocation
The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related
assets. assets.
Gutenberg supports that pattern out of the box: create a folder, add a `index.md` file and as many non-markdown files as you want. Gutenberg supports that pattern out of the box: create a folder, add a `index.md` file and as many non-markdown files as you want.
Those assets will be copied in the same folder when building the site which allows you to use a relative path to access them. Those assets will be copied in the same folder when building the site which allows you to use a relative path to access them.
@ -52,3 +52,14 @@ Those assets will be copied in the same folder when building the site which allo
``` ```
By default, this page will get the folder name (`with-assets` in this case) as its slug. By default, this page will get the folder name (`with-assets` in this case) as its slug.
It is possible to ignore selected asset files using the
[ignored_content](./documentation/getting-started/configuration.md) setting in the config file.
For example, say you have an Excel spreadsheet from which you are taking several screenshots and
then linking to those image files on your website. For maintainability purposes, you want to keep
the spreadsheet in the same folder as the markdown, but you don't want to copy the spreadsheet to
the public web site. You can achieve this by simply setting `ignored_content` in the config file:
```
ignored_content = ["*.xlsx"]
```

View file

@ -3,10 +3,10 @@ title = "Configuration"
weight = 4 weight = 4
+++ +++
The default configuration will be enough to get Gutenberg running locally but not more than that. The default configuration will be enough to get Gutenberg running locally but not more than that.
It follows the philosophy of only paying for what you need: almost everything is turned off by default. It follows the philosophy of only paying for what you need: almost everything is turned off by default.
To change the config, edit the `config.toml` file. To change the config, edit the `config.toml` file.
If you are not familiar with TOML, have a look at [the TOML Spec](https://github.com/toml-lang/toml) If you are not familiar with TOML, have a look at [the TOML Spec](https://github.com/toml-lang/toml)
to learn about it. to learn about it.
@ -30,7 +30,7 @@ theme = ""
# Highlight all code blocks found # Highlight all code blocks found
highlight_code = false highlight_code = false
# Which theme to use for the code highlighting. # Which theme to use for the code highlighting.
# See below for list of accepted values # See below for list of accepted values
highlight_theme = "base16-ocean-dark" highlight_theme = "base16-ocean-dark"
@ -40,21 +40,27 @@ generate_rss = false
# The number of articles to include in the RSS feed # The number of articles to include in the RSS feed
rss_limit = 20 rss_limit = 20
# Whether to generate a tags page and individual # Whether to generate a tags page and individual
# tag pages for pages with tags # tag pages for pages with tags
generate_tags_pages = false generate_tags_pages = false
# Whether to generate a categories page and individual # Whether to generate a categories page and individual
# category pages for pages with a category # category pages for pages with a category
generate_categories_pages = false generate_categories_pages = false
# Whether to compile the Sass files found in the `sass` directory # Whether to compile the Sass files found in the `sass` directory
compile_sass = false compile_sass = false
# A list of glob patterns specifying asset files to ignore when
# copying content. Defaults to none, which means all asset files
# are copied over to the public folder. Example:
# ignored_content = ["*.{graphml,xlsx}", "temp.*"]
ignored_content = []
# Optional translation object. The key if present should be a language code # Optional translation object. The key if present should be a language code
[translations] [translations]
# You can put any kind of data in there and it # You can put any kind of data in there and it
# will be accessible in all templates # will be accessible in all templates
[extra] [extra]
``` ```
@ -76,5 +82,5 @@ Gutenberg currently has the following highlight themes available:
- solarized-light - solarized-light
- 1337 - 1337
Gutenberg uses the Sublime Text themes, making it very easy to add more. Gutenberg uses the Sublime Text themes, making it very easy to add more.
If you want a theme not on that list, please open an issue or a pull request on the [Gutenberg repo](https://github.com/Keats/gutenberg). If you want a theme not on that list, please open an issue or a pull request on the [Gutenberg repo](https://github.com/Keats/gutenberg).