Merge pull request #240 from PhilipDaniels/next

Filter ignored content in page.rs.
This commit is contained in:
Vincent Prouillet 2018-02-27 08:37:26 +01:00 committed by GitHub
commit f218f2eaf5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 177 additions and 16 deletions

15
Cargo.lock generated
View file

@ -172,6 +172,7 @@ version = "0.1.0"
dependencies = [
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"errors 0.1.0",
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"highlighting 0.1.0",
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
@ -185,6 +186,7 @@ dependencies = [
"config 0.1.0",
"errors 0.1.0",
"front_matter 0.1.0",
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rendering 0.1.0",
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
@ -364,6 +366,18 @@ name = "glob"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "globset"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "gutenberg"
version = "0.3.1"
@ -1546,6 +1560,7 @@ dependencies = [
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb"
"checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1e96ab92362c06811385ae9a34d2698e8a1160745e0c78fbb434a44c8de3fabc"
"checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37"
"checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e"
"checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2"

View file

@ -8,6 +8,7 @@ toml = "0.4"
serde = "1"
serde_derive = "1"
chrono = "0.4"
globset = "0.3.0"
errors = { path = "../errors" }
highlighting = { path = "../highlighting"}

View file

@ -5,6 +5,7 @@ extern crate toml;
extern crate errors;
extern crate highlighting;
extern crate chrono;
extern crate globset;
use std::collections::HashMap;
use std::fs::File;
@ -13,6 +14,7 @@ use std::path::{Path, PathBuf};
use toml::{Value as Toml};
use chrono::Utc;
use globset::{Glob, GlobSet, GlobSetBuilder};
use errors::{Result, ResultExt};
use highlighting::THEME_SET;
@ -22,7 +24,7 @@ mod theme;
use theme::Theme;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Config {
/// Base URL of the site, the only required config argument
pub base_url: String,
@ -49,6 +51,12 @@ pub struct Config {
pub generate_categories_pages: Option<bool>,
/// Whether to compile the `sass` directory and output the css files into the static folder
pub compile_sass: Option<bool>,
/// A list of file glob patterns to ignore when processing the content folder. Defaults to none.
/// Had to remove the PartialEq derive because GlobSet does not implement it. No impact
/// because it's unused anyway (who wants to sort Configs?).
pub ignored_content: Option<Vec<String>>,
#[serde(skip_serializing, skip_deserializing)]
pub ignored_content_globber: Option<GlobSet>,
/// Languages list and translated strings
pub translations: Option<HashMap<String, Toml>>,
@ -84,6 +92,7 @@ impl Config {
set_default!(config.generate_tags_pages, false);
set_default!(config.generate_categories_pages, false);
set_default!(config.compile_sass, false);
set_default!(config.ignored_content, Vec::new());
set_default!(config.translations, HashMap::new());
set_default!(config.extra, HashMap::new());
@ -97,6 +106,25 @@ impl Config {
};
config.build_timestamp = Some(Utc::now().timestamp());
// Convert the file glob strings into a compiled glob set matcher. We want to do this once,
// at program initialization, rather than for every page, for example. We arrange for the
// globset matcher to always exist (even though it has to be an inside an Option at the
// moment because of the TOML serializer); if the glob set is empty the `is_match` function
// of the globber always returns false.
let mut glob_set_builder = GlobSetBuilder::new();
if let Some(ref v) = config.ignored_content {
for pat in v {
let glob = match Glob::new(pat) {
Ok(g) => g,
Err(e) => bail!("Invalid ignored_content glob pattern: {}, error = {}", pat, e)
};
glob_set_builder.add(glob);
}
}
config.ignored_content_globber = Some(glob_set_builder.build().expect("Bad ignored_content in config file."));
Ok(config)
}
@ -176,6 +204,8 @@ impl Default for Config {
generate_tags_pages: Some(true),
generate_categories_pages: Some(true),
compile_sass: Some(false),
ignored_content: Some(Vec::new()),
ignored_content_globber: Some(GlobSetBuilder::new().build().unwrap()),
translations: None,
extra: None,
build_timestamp: Some(1),
@ -330,4 +360,51 @@ title = "A title"
assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title");
}
#[test]
fn missing_ignored_content_results_in_empty_vector_and_empty_globber() {
let config_str = r#"
title = "My site"
base_url = "example.com"
"#;
let config = Config::parse(config_str).unwrap();
let v = config.ignored_content.unwrap();
assert_eq!(v.len(), 0);
assert!(config.ignored_content_globber.unwrap().is_empty());
}
#[test]
fn empty_ignored_content_results_in_empty_vector_and_empty_globber() {
let config_str = r#"
title = "My site"
base_url = "example.com"
ignored_content = []
"#;
let config = Config::parse(config_str).unwrap();
assert_eq!(config.ignored_content.unwrap().len(), 0);
assert!(config.ignored_content_globber.unwrap().is_empty());
}
#[test]
fn non_empty_ignored_content_results_in_vector_of_patterns_and_configured_globber() {
let config_str = r#"
title = "My site"
base_url = "example.com"
ignored_content = ["*.{graphml,iso}", "*.py?"]
"#;
let config = Config::parse(config_str).unwrap();
let v = config.ignored_content.unwrap();
assert_eq!(v, vec!["*.{graphml,iso}", "*.py?"]);
let g = config.ignored_content_globber.unwrap();
assert_eq!(g.len(), 2);
assert!(g.is_match("foo.graphml"));
assert!(g.is_match("foo.iso"));
assert!(!g.is_match("foo.png"));
assert!(g.is_match("foo.py2"));
assert!(g.is_match("foo.py3"));
assert!(!g.is_match("foo.py"));
}
}

View file

@ -18,3 +18,4 @@ front_matter = { path = "../front_matter" }
[dev-dependencies]
tempdir = "0.3"
toml = "0.4"
globset = "0.3.0"

View file

@ -13,6 +13,8 @@ extern crate utils;
extern crate tempdir;
#[cfg(test)]
extern crate toml;
#[cfg(test)]
extern crate globset;
mod file_info;
mod page;

View file

@ -128,10 +128,27 @@ impl Page {
let path = path.as_ref();
let content = read_file(path)?;
let mut page = Page::parse(path, &content, config)?;
page.assets = vec![];
if page.file.name == "index" {
page.assets = find_related_assets(path.parent().unwrap());
// `find_related_assets` only scans the immediate directory (it is not recursive) so our
// filtering only needs to work against the file_name component, not the full suffix. If
// `find_related_assets` was changed to also return files in subdirectories, we could
// use `PathBuf.strip_prefix` to remove the parent directory and then glob-filter
// against the remaining path. Note that the current behaviour effectively means that
// the `ignored_content` setting in the config file is limited to single-file glob
// patterns (no "**" patterns).
let globber = config.ignored_content_globber.as_ref().unwrap();
let parent_dir = path.parent().unwrap();
page.assets = find_related_assets(parent_dir).into_iter()
.filter(|path|
match path.file_name() {
None => true,
Some(file) => !globber.is_match(file)
}
).collect();
} else {
page.assets = vec![];
}
Ok(page)
@ -240,6 +257,7 @@ mod tests {
use tera::Tera;
use tempdir::TempDir;
use globset::{Glob, GlobSetBuilder};
use config::Config;
use super::Page;
@ -419,4 +437,34 @@ Hello world
assert_eq!(page.assets.len(), 3);
assert_eq!(page.permalink, "http://a-website.com/posts/hey/");
}
#[test]
fn page_with_ignored_assets_filters_out_correct_files() {
let tmp_dir = TempDir::new("example").expect("create temp dir");
let path = tmp_dir.path();
create_dir(&path.join("content")).expect("create content temp dir");
create_dir(&path.join("content").join("posts")).expect("create posts temp dir");
let nested_path = path.join("content").join("posts").join("with-assets");
create_dir(&nested_path).expect("create nested temp dir");
let mut f = File::create(nested_path.join("index.md")).unwrap();
f.write_all(b"+++\nslug=\"hey\"\n+++\n").unwrap();
File::create(nested_path.join("example.js")).unwrap();
File::create(nested_path.join("graph.jpg")).unwrap();
File::create(nested_path.join("fail.png")).unwrap();
let mut gsb = GlobSetBuilder::new();
gsb.add(Glob::new("*.{js,png}").unwrap());
let mut config = Config::default();
config.ignored_content_globber = Some(gsb.build().unwrap());
let res = Page::from_file(
nested_path.join("index.md").as_path(),
&config
);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.assets.len(), 1);
assert_eq!(page.assets[0].file_name().unwrap().to_str(), Some("graph.jpg"));
}
}

View file

@ -52,3 +52,14 @@ Those assets will be copied in the same folder when building the site which allo
```
By default, this page will get the folder name (`with-assets` in this case) as its slug.
It is possible to ignore selected asset files using the
[ignored_content](./documentation/getting-started/configuration.md) setting in the config file.
For example, say you have an Excel spreadsheet from which you are taking several screenshots and
then linking to those image files on your website. For maintainability purposes, you want to keep
the spreadsheet in the same folder as the markdown, but you don't want to copy the spreadsheet to
the public web site. You can achieve this by simply setting `ignored_content` in the config file:
```
ignored_content = ["*.xlsx"]
```

View file

@ -51,6 +51,12 @@ generate_categories_pages = false
# Whether to compile the Sass files found in the `sass` directory
compile_sass = false
# A list of glob patterns specifying asset files to ignore when
# copying content. Defaults to none, which means all asset files
# are copied over to the public folder. Example:
# ignored_content = ["*.{graphml,xlsx}", "temp.*"]
ignored_content = []
# Optional translation object. The key if present should be a language code
[translations]