Optionally do not slugify paths (#875)

* maybe_slugify() only does simple sanitation if config.slugify is false * slugify is disabled by default, turn on for backwards-compatibility * First docs changes for optional slugification * Remove # from slugs but not & * Add/fix tests for utf8 slugs * Fix test sites for i18n slugs * fix templates tests for i18n slugs * Rename slugify setting to slugify_paths * Default slugify_paths * Update documentation for slugify_paths * quasi_slugify removes ?, /, # and newlines * Remove forbidden NTFS chars in quasi_slugify() * Slugification forbidden chars can be configured * Remove trailing dot/space in quasi_slugify * Fix NTFS path sanitation * Revert configurable slugification charset * Remove \r for windows newlines and \t tabulations in quasi_slugify() * Update docs for output paths * Replace slugify with slugify_paths * Fix test * Default to not slugifying * Move slugs utils to utils crate * Use slugify_paths for anchors as well
2019-12-21 10:44:13 +01:00 · 2019-12-21 10:44:13 +01:00 · ceb9bc8ed7
parent 0a0b6a3ad4
commit ceb9bc8ed7
21 changed files with 515 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,3 +25,5 @@ stage

 # nixos dependencies snippet
 shell.nix
+# vim temporary files
+**/.*.sw*
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,8 @@
 ### Breaking
 - Remove `toc` variable in section/page context and pass it to `page.toc` and `section.toc` instead so they are
 accessible everywhere
+- [Slugification](https://en.wikipedia.org/wiki/Slug_(web_publishing)#Slug) of page paths is now optional. By default, every path will be slugified as it is happening right now.
+To keep non-ASCII characters,  set `slugify_paths = true` in your config.

 ### Other
 - Add zenburn syntax highlighting theme
--- a/Cargo.lock
+++ b/Cargo.lock
@ -344,10 +344,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "bincode"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@ -1141,7 +1140,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "gif 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "jpeg-decoder 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)",
+ "jpeg-decoder 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)",
 "num-iter 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
 "num-rational 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "num-traits 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1223,7 +1222,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "jpeg-decoder"
-version = "0.1.16"
+version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1275,7 +1274,6 @@ dependencies = [
 "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
 "slotmap 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "slug 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "tera 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "toml 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2099,7 +2097,6 @@ dependencies = [
 "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
- "slug 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "syntect 3.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "templates 0.1.0",
 "tera 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2520,7 +2517,7 @@ name = "syntect"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bincode 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)",
 "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3009,6 +3006,7 @@ version = "0.1.0"
 dependencies = [
 "errors 0.1.0",
 "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
+ "slug 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "tera 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "toml 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3259,7 +3257,7 @@ dependencies = [
 "checksum backtrace-sys 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6575f128516de27e3ce99689419835fce9643a9b215a14d2b5b685be018491"
 "checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
 "checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
-"checksum bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8ab639324e3ee8774d296864fbc0dbbb256cf1a41c490b94cba90c082915f92"
+"checksum bincode 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5753e2a71534719bf3f4e57006c3a4f0d2c672a4b676eec84161f763eca87dbf"
 "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 "checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
 "checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
@ -3351,7 +3349,7 @@ dependencies = [
 "checksum iovec 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
 "checksum ipconfig 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa79fa216fbe60834a9c0737d7fcd30425b32d1c58854663e24d4c4b328ed83f"
 "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
-"checksum jpeg-decoder 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "c1aae18ffeeae409c6622c3b6a7ee49792a7e5a062eea1b135fbb74e301792ba"
+"checksum jpeg-decoder 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)" = "0256f0aec7352539102a9efbcb75543227b7ab1117e0f95450023af730128451"
 "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
 "checksum language-tags 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a91d884b6667cd606bb5a69aa0c99ba811a115fc68915e7056ec08a46e93199a"
 "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
--- a/components/config/src/config.rs
+++ b/components/config/src/config.rs
@ -130,6 +130,8 @@ pub struct Config {
    /// key into different language.
    translations: HashMap<String, TranslateTerm>,

+    /// Whether to slugify page and taxonomy URLs (disable for UTF-8 URLs)
+    pub slugify_paths: bool,
    /// Whether to highlight all code blocks found in markdown files. Defaults to false
    pub highlight_code: bool,
    /// Which themes to use for code highlighting. See Readme for supported themes
@ -354,6 +356,7 @@ impl Default for Config {
            title: None,
            description: None,
            theme: None,
+            slugify_paths: true,
            highlight_code: false,
            highlight_theme: "base16-ocean-dark".to_string(),
            default_language: "en".to_string(),
--- a/components/library/Cargo.toml
+++ b/components/library/Cargo.toml
@ -10,7 +10,6 @@ chrono = { version = "0.4", features = ["serde"] }
 tera = "1"
 serde = "1"
 serde_derive = "1"
-slug = "0.1"
 regex = "1"
 lazy_static = "1"

--- a/components/library/src/content/page.rs
+++ b/components/library/src/content/page.rs
@ -4,7 +4,6 @@ use std::path::{Path, PathBuf};

 use regex::Regex;
 use slotmap::DefaultKey;
-use slug::slugify;
 use tera::{Context as TeraContext, Tera};

 use config::Config;
@ -19,6 +18,7 @@ use utils::templates::render_template;
 use content::file_info::FileInfo;
 use content::has_anchor;
 use content::ser::SerializingPage;
+use utils::slugs::maybe_slugify_paths;

 lazy_static! {
    // Based on https://regex101.com/r/H2n38Z/1/tests
@ -160,21 +160,21 @@ impl Page {

        page.slug = {
            if let Some(ref slug) = page.meta.slug {
-                slugify(&slug.trim())
+                maybe_slugify_paths(&slug.trim(), config.slugify_paths)
            } else if page.file.name == "index" {
                if let Some(parent) = page.file.path.parent() {
                    if let Some(slug) = slug_from_dated_filename {
-                        slugify(&slug)
+                        maybe_slugify_paths(&slug, config.slugify_paths)
                    } else {
-                        slugify(parent.file_name().unwrap().to_str().unwrap())
+                        maybe_slugify_paths(parent.file_name().unwrap().to_str().unwrap(), config.slugify_paths)
                    }
                } else {
-                    slugify(&page.file.name)
+                    maybe_slugify_paths(&page.file.name, config.slugify_paths)
                }
            } else if let Some(slug) = slug_from_dated_filename {
-                slugify(&slug)
+                maybe_slugify_paths(&slug, config.slugify_paths)
            } else {
-                slugify(&page.file.name)
+                maybe_slugify_paths(&page.file.name, config.slugify_paths)
            }
        };

@ -443,7 +443,8 @@ Hello world"#;
    slug = "hello-&-world"
    +++
    Hello world"#;
-        let config = Config::default();
+        let mut config = Config::default();
+        config.slugify_paths = true;
        let res = Page::parse(Path::new("start.md"), content, &config, &PathBuf::new());
        assert!(res.is_ok());
        let page = res.unwrap();
@ -452,6 +453,23 @@ Hello world"#;
        assert_eq!(page.permalink, config.make_permalink("hello-world"));
    }

+    #[test]
+    fn can_make_url_from_utf8_slug_frontmatter() {
+        let content = r#"
+    +++
+    slug = "日本"
+    +++
+    Hello world"#;
+        let mut config = Config::default();
+        config.slugify_paths = false;
+        let res = Page::parse(Path::new("start.md"), content, &config, &PathBuf::new());
+        assert!(res.is_ok());
+        let page = res.unwrap();
+        assert_eq!(page.path, "日本/");
+        assert_eq!(page.components, vec!["日本"]);
+        assert_eq!(page.permalink, config.make_permalink("日本"));
+    }
+
    #[test]
    fn can_make_url_from_path() {
        let content = r#"
@ -508,7 +526,8 @@ Hello world"#;

    #[test]
    fn can_make_slug_from_non_slug_filename() {
-        let config = Config::default();
+        let mut config = Config::default();
+        config.slugify_paths = true;
        let res =
            Page::parse(Path::new(" file with space.md"), "+++\n+++", &config, &PathBuf::new());
        assert!(res.is_ok());
@ -517,6 +536,17 @@ Hello world"#;
        assert_eq!(page.permalink, config.make_permalink(&page.slug));
    }

+    #[test]
+    fn can_make_path_from_utf8_filename() {
+        let mut config = Config::default();
+        config.slugify_paths = false;
+        let res = Page::parse(Path::new("日本.md"), "+++\n++++", &config, &PathBuf::new());
+        assert!(res.is_ok());
+        let page = res.unwrap();
+        assert_eq!(page.slug, "日本");
+        assert_eq!(page.permalink, config.make_permalink(&page.slug));
+    }
+
    #[test]
    fn can_specify_summary() {
        let config = Config::default();
--- a/components/library/src/lib.rs
+++ b/components/library/src/lib.rs
@ -1,5 +1,4 @@
 extern crate serde;
-extern crate slug;
 extern crate tera;
 #[macro_use]
 extern crate serde_derive;
--- a/components/library/src/taxonomies/mod.rs
+++ b/components/library/src/taxonomies/mod.rs
@ -1,7 +1,6 @@
 use std::collections::HashMap;

 use slotmap::DefaultKey;
-use slug::slugify;
 use tera::{Context, Tera};

 use config::{Config, Taxonomy as TaxonomyConfig};
@ -10,6 +9,7 @@ use utils::templates::render_template;

 use content::SerializingPage;
 use library::Library;
+use utils::slugs::maybe_slugify_paths;
 use sorting::sort_pages_by_date;

 #[derive(Debug, Clone, PartialEq, Serialize)]
@ -69,7 +69,7 @@ impl TaxonomyItem {
            })
            .collect();
        let (mut pages, ignored_pages) = sort_pages_by_date(data);
-        let slug = slugify(name);
+        let slug = maybe_slugify_paths(name, config.slugify_paths);
        let permalink = if taxonomy.lang != config.default_language {
            config.make_permalink(&format!("/{}/{}/{}", taxonomy.lang, taxonomy.name, slug))
        } else {
@ -169,7 +169,6 @@ impl Taxonomy {
            self.items.iter().map(|i| SerializedTaxonomyItem::from_item(i, library)).collect();
        context.insert("terms", &terms);
        context.insert("taxonomy", &self.kind);
-        context.insert("lang", &self.kind.lang);
        context.insert("current_url", &config.make_permalink(&self.kind.name));
        context.insert("current_path", &self.kind.name);

@ -331,6 +330,101 @@ mod tests {
        assert_eq!(categories.items[1].pages.len(), 1);
    }

+    #[test]
+    fn can_make_slugified_taxonomies() {
+        let mut config = Config::default();
+        let mut library = Library::new(2, 0, false);
+
+        config.taxonomies = vec![
+            TaxonomyConfig {
+                name: "categories".to_string(),
+                lang: config.default_language.clone(),
+                ..TaxonomyConfig::default()
+            },
+            TaxonomyConfig {
+                name: "tags".to_string(),
+                lang: config.default_language.clone(),
+                ..TaxonomyConfig::default()
+            },
+            TaxonomyConfig {
+                name: "authors".to_string(),
+                lang: config.default_language.clone(),
+                ..TaxonomyConfig::default()
+            },
+        ];
+
+        let mut page1 = Page::default();
+        let mut taxo_page1 = HashMap::new();
+        taxo_page1.insert("tags".to_string(), vec!["rust".to_string(), "db".to_string()]);
+        taxo_page1.insert("categories".to_string(), vec!["Programming tutorials".to_string()]);
+        page1.meta.taxonomies = taxo_page1;
+        page1.lang = config.default_language.clone();
+        library.insert_page(page1);
+
+        let mut page2 = Page::default();
+        let mut taxo_page2 = HashMap::new();
+        taxo_page2.insert("tags".to_string(), vec!["rust".to_string(), "js".to_string()]);
+        taxo_page2.insert("categories".to_string(), vec!["Other".to_string()]);
+        page2.meta.taxonomies = taxo_page2;
+        page2.lang = config.default_language.clone();
+        library.insert_page(page2);
+
+        let mut page3 = Page::default();
+        let mut taxo_page3 = HashMap::new();
+        taxo_page3.insert("tags".to_string(), vec!["js".to_string()]);
+        taxo_page3.insert("authors".to_string(), vec!["Vincent Prouillet".to_string()]);
+        page3.meta.taxonomies = taxo_page3;
+        page3.lang = config.default_language.clone();
+        library.insert_page(page3);
+
+        let taxonomies = find_taxonomies(&config, &library).unwrap();
+        let (tags, categories, authors) = {
+            let mut t = None;
+            let mut c = None;
+            let mut a = None;
+            for x in taxonomies {
+                match x.kind.name.as_ref() {
+                    "tags" => t = Some(x),
+                    "categories" => c = Some(x),
+                    "authors" => a = Some(x),
+                    _ => unreachable!(),
+                }
+            }
+            (t.unwrap(), c.unwrap(), a.unwrap())
+        };
+        assert_eq!(tags.items.len(), 3);
+        assert_eq!(categories.items.len(), 2);
+        assert_eq!(authors.items.len(), 1);
+
+        assert_eq!(tags.items[0].name, "db");
+        assert_eq!(tags.items[0].slug, "db");
+        assert_eq!(tags.items[0].permalink, "http://a-website.com/tags/db/");
+        assert_eq!(tags.items[0].pages.len(), 1);
+
+        assert_eq!(tags.items[1].name, "js");
+        assert_eq!(tags.items[1].slug, "js");
+        assert_eq!(tags.items[1].permalink, "http://a-website.com/tags/js/");
+        assert_eq!(tags.items[1].pages.len(), 2);
+
+        assert_eq!(tags.items[2].name, "rust");
+        assert_eq!(tags.items[2].slug, "rust");
+        assert_eq!(tags.items[2].permalink, "http://a-website.com/tags/rust/");
+        assert_eq!(tags.items[2].pages.len(), 2);
+
+        assert_eq!(categories.items[0].name, "Other");
+        assert_eq!(categories.items[0].slug, "other");
+        assert_eq!(categories.items[0].permalink, "http://a-website.com/categories/other/");
+        assert_eq!(categories.items[0].pages.len(), 1);
+
+        assert_eq!(categories.items[1].name, "Programming tutorials");
+        assert_eq!(categories.items[1].slug, "programming-tutorials");
+        assert_eq!(
+            categories.items[1].permalink,
+            "http://a-website.com/categories/programming-tutorials/"
+        );
+        assert_eq!(categories.items[1].pages.len(), 1);
+    }
+
    #[test]
    fn errors_on_unknown_taxonomy() {
        let mut config = Config::default();
@ -466,4 +560,155 @@ mod tests {
        );
        assert_eq!(categories.items[1].pages.len(), 1);
    }
+
+    #[test]
+    fn can_make_utf8_taxonomies() {
+        let mut config = Config::default();
+        config.slugify_paths = false;
+        config.languages.push(Language {
+            rss: false,
+            code: "fr".to_string(),
+            ..Language::default()
+        });
+        let mut library = Library::new(2, 0, true);
+
+        config.taxonomies = vec![TaxonomyConfig {
+            name: "catégories".to_string(),
+            lang: "fr".to_string(),
+            ..TaxonomyConfig::default()
+        }];
+
+        let mut page = Page::default();
+        page.lang = "fr".to_string();
+        let mut taxo_page = HashMap::new();
+        taxo_page.insert("catégories".to_string(), vec!["Écologie".to_string()]);
+        page.meta.taxonomies = taxo_page;
+        library.insert_page(page);
+
+        let taxonomies = find_taxonomies(&config, &library).unwrap();
+        let categories = &taxonomies[0];
+
+        assert_eq!(categories.items.len(), 1);
+        assert_eq!(categories.items[0].name, "Écologie");
+        assert_eq!(
+            categories.items[0].permalink,
+            "http://a-website.com/fr/catégories/Écologie/"
+        );
+        assert_eq!(categories.items[0].pages.len(), 1);
+    }
+
+    #[test]
+    fn can_make_slugified_taxonomies_in_multiple_languages() {
+        let mut config = Config::default();
+        config.slugify_paths = true;
+        config.languages.push(Language {
+            rss: false,
+            code: "fr".to_string(),
+            ..Language::default()
+        });
+        let mut library = Library::new(2, 0, true);
+
+        config.taxonomies = vec![
+            TaxonomyConfig {
+                name: "categories".to_string(),
+                lang: config.default_language.clone(),
+                ..TaxonomyConfig::default()
+            },
+            TaxonomyConfig {
+                name: "tags".to_string(),
+                lang: config.default_language.clone(),
+                ..TaxonomyConfig::default()
+            },
+            TaxonomyConfig {
+                name: "auteurs".to_string(),
+                lang: "fr".to_string(),
+                ..TaxonomyConfig::default()
+            },
+            TaxonomyConfig {
+                name: "tags".to_string(),
+                lang: "fr".to_string(),
+                ..TaxonomyConfig::default()
+            },
+        ];
+
+        let mut page1 = Page::default();
+        let mut taxo_page1 = HashMap::new();
+        taxo_page1.insert("tags".to_string(), vec!["rust".to_string(), "db".to_string()]);
+        taxo_page1.insert("categories".to_string(), vec!["Programming tutorials".to_string()]);
+        page1.meta.taxonomies = taxo_page1;
+        page1.lang = config.default_language.clone();
+        library.insert_page(page1);
+
+        let mut page2 = Page::default();
+        let mut taxo_page2 = HashMap::new();
+        taxo_page2.insert("tags".to_string(), vec!["rust".to_string()]);
+        taxo_page2.insert("categories".to_string(), vec!["Other".to_string()]);
+        page2.meta.taxonomies = taxo_page2;
+        page2.lang = config.default_language.clone();
+        library.insert_page(page2);
+
+        let mut page3 = Page::default();
+        page3.lang = "fr".to_string();
+        let mut taxo_page3 = HashMap::new();
+        taxo_page3.insert("tags".to_string(), vec!["rust".to_string()]);
+        taxo_page3.insert("auteurs".to_string(), vec!["Vincent Prouillet".to_string()]);
+        page3.meta.taxonomies = taxo_page3;
+        library.insert_page(page3);
+
+        let taxonomies = find_taxonomies(&config, &library).unwrap();
+        let (tags, categories, authors) = {
+            let mut t = None;
+            let mut c = None;
+            let mut a = None;
+            for x in taxonomies {
+                match x.kind.name.as_ref() {
+                    "tags" => {
+                        if x.kind.lang == "en" {
+                            t = Some(x)
+                        }
+                    }
+                    "categories" => c = Some(x),
+                    "auteurs" => a = Some(x),
+                    _ => unreachable!(),
+                }
+            }
+            (t.unwrap(), c.unwrap(), a.unwrap())
+        };
+
+        assert_eq!(tags.items.len(), 2);
+        assert_eq!(categories.items.len(), 2);
+        assert_eq!(authors.items.len(), 1);
+
+        assert_eq!(tags.items[0].name, "db");
+        assert_eq!(tags.items[0].slug, "db");
+        assert_eq!(tags.items[0].permalink, "http://a-website.com/tags/db/");
+        assert_eq!(tags.items[0].pages.len(), 1);
+
+        assert_eq!(tags.items[1].name, "rust");
+        assert_eq!(tags.items[1].slug, "rust");
+        assert_eq!(tags.items[1].permalink, "http://a-website.com/tags/rust/");
+        assert_eq!(tags.items[1].pages.len(), 2);
+
+        assert_eq!(authors.items[0].name, "Vincent Prouillet");
+        assert_eq!(authors.items[0].slug, "vincent-prouillet");
+        assert_eq!(
+            authors.items[0].permalink,
+            "http://a-website.com/fr/auteurs/vincent-prouillet/"
+        );
+        assert_eq!(authors.items[0].pages.len(), 1);
+
+        assert_eq!(categories.items[0].name, "Other");
+        assert_eq!(categories.items[0].slug, "other");
+        assert_eq!(categories.items[0].permalink, "http://a-website.com/categories/other/");
+        assert_eq!(categories.items[0].pages.len(), 1);
+
+        assert_eq!(categories.items[1].name, "Programming tutorials");
+        assert_eq!(categories.items[1].slug, "programming-tutorials");
+        assert_eq!(
+            categories.items[1].permalink,
+            "http://a-website.com/categories/programming-tutorials/"
+        );
+        assert_eq!(categories.items[1].pages.len(), 1);
+    }
+
 }
--- a/components/rendering/Cargo.toml
+++ b/components/rendering/Cargo.toml
@ -7,7 +7,6 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"]
 tera = { version = "1", features = ["preserve_order"] }
 syntect = "=3.2.0"
 pulldown-cmark = "0.6"
-slug = "0.1"
 serde = "1"
 serde_derive = "1"
 pest = "2"
--- a/components/rendering/src/lib.rs
+++ b/components/rendering/src/lib.rs
@ -1,5 +1,4 @@
 extern crate pulldown_cmark;
-extern crate slug;
 extern crate syntect;
 extern crate tera;
 #[macro_use]
--- a/components/rendering/src/markdown.rs
+++ b/components/rendering/src/markdown.rs
@ -1,6 +1,5 @@
 use pulldown_cmark as cmark;
 use regex::Regex;
-use slug::slugify;
 use syntect::easy::HighlightLines;
 use syntect::html::{
    start_highlighted_html_snippet, styled_line_to_highlighted_html, IncludeBackground,
@ -13,6 +12,7 @@ use front_matter::InsertAnchor;
 use table_of_contents::{make_table_of_contents, Heading};
 use utils::site::resolve_internal_link;
 use utils::vec::InsertMany;
+use utils::slugs::maybe_slugify_anchors;

 use self::cmark::{Event, LinkType, Options, Parser, Tag};

@ -298,7 +298,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
            let title = get_text(&events[start_idx + 1..end_idx]);
            let id = heading_ref
                .id
-                .unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0));
+                .unwrap_or_else(|| find_anchor(&inserted_anchors, maybe_slugify_anchors(&title, context.config.slugify_paths), 0));
            inserted_anchors.push(id.clone());

            // insert `id` to the tag
--- a/components/rendering/tests/markdown.rs
+++ b/components/rendering/tests/markdown.rs
@ -351,6 +351,17 @@ fn can_add_id_to_headings_same_slug() {
    assert_eq!(res.body, "<h1 id=\"hello\">Hello</h1>\n<h1 id=\"hello-1\">Hello</h1>\n");
 }

+#[test]
+fn can_add_non_slug_id_to_headings() {
+    let tera_ctx = Tera::default();
+    let permalinks_ctx = HashMap::new();
+    let mut config = Config::default();
+    config.slugify_paths = false;
+    let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
+    let res = render_content(r#"# L'écologie et vous"#, &context).unwrap();
+    assert_eq!(res.body, "<h1 id=\"L'écologie_et_vous\">L'écologie et vous</h1>\n");
+}
+
 #[test]
 fn can_handle_manual_ids_on_headings() {
    let tera_ctx = Tera::default();
--- a/components/templates/src/global_fns/mod.rs
+++ b/components/templates/src/global_fns/mod.rs
@ -389,7 +389,8 @@ mod tests {

    #[test]
    fn can_get_taxonomy() {
-        let config = Config::default();
+        let mut config = Config::default();
+        config.slugify_paths = true;
        let taxo_config = TaxonomyConfig {
            name: "tags".to_string(),
            lang: config.default_language.clone(),
@ -466,7 +467,8 @@ mod tests {

    #[test]
    fn can_get_taxonomy_url() {
-        let config = Config::default();
+        let mut config = Config::default();
+        config.slugify_paths = true;
        let taxo_config = TaxonomyConfig {
            name: "tags".to_string(),
            lang: config.default_language.clone(),
--- a/components/utils/Cargo.toml
+++ b/components/utils/Cargo.toml
@ -10,6 +10,7 @@ unicode-segmentation = "1.2"
 walkdir = "2"
 toml = "0.5"
 serde = "1"
+slug = "0.1"

 [dev-dependencies]
 tempfile = "3"
--- a/components/utils/src/lib.rs
+++ b/components/utils/src/lib.rs
@ -8,6 +8,7 @@ extern crate tera;
 extern crate toml;
 extern crate unicode_segmentation;
 extern crate walkdir;
+extern crate slug;

 pub mod de;
 pub mod fs;
@ -15,3 +16,4 @@ pub mod net;
 pub mod site;
 pub mod templates;
 pub mod vec;
+pub mod slugs;
--- a/components/utils/src/slugs.rs
+++ b/components/utils/src/slugs.rs
@ -0,0 +1,107 @@
+fn strip_chars(s: &str, chars: &str) -> String {
+    let mut sanitized_string = s.to_string();
+    sanitized_string.retain( |c| !chars.contains(c));
+    sanitized_string
+}
+
+fn strip_invalid_paths_chars(s: &str) -> String {
+    // NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3
+    // Also we need to trim . from the end of filename
+    let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.');
+    let cleaned = trimmed.replace(" ", "_");
+    // And () [] since they are not allowed in markdown links
+    strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t")
+}
+
+fn strip_invalid_anchors_chars(s: &str) -> String {
+    // spaces are not valid in markdown links
+    let cleaned = s.replace(" ", "_");
+    // https://tools.ietf.org/html/rfc3986#section-3.5
+    strip_chars(&cleaned, "\"#%<>[\\]()^`{|}")
+}
+
+pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {
+    if slugify {
+        // ASCII slugification
+        slug::slugify(s)
+    }
+    else {
+        // Only remove forbidden characters
+        strip_invalid_paths_chars(s)
+    }
+}
+
+pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {
+    if slugify {
+        // ASCII slugification
+        slug::slugify(s)
+    }
+    else {
+        // Only remove forbidden characters
+        strip_invalid_anchors_chars(s)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strip_invalid_paths_chars_works() {
+        let tests = vec![
+            // no newlines
+            ("test\ntest", "testtest"),
+            // no whitespaces
+            ("test ", "test"),
+            ("t est ", "t_est"),
+            // invalid NTFS
+            ("test .", "test"),
+            ("test. ", "test"),
+            ("test#test/test?test", "testtesttesttest"),
+            // Invalid CommonMark chars in links
+            ("test (hey)", "test_hey"),
+            ("test (hey", "test_hey"),
+            ("test hey)", "test_hey"),
+            ("test [hey]", "test_hey"),
+            ("test [hey", "test_hey"),
+            ("test hey]", "test_hey"),
+            // UTF-8
+            ("日本", "日本"),
+        ];
+
+        for (input, expected) in tests {
+            assert_eq!(strip_invalid_paths_chars(&input), expected);
+        }
+    }
+
+    #[test]
+    fn strip_invalid_anchors_chars_works() {
+        let tests = vec![
+            ("日本", "日本"),
+            // Some invalid chars get removed
+            ("test#", "test"),
+            ("test<", "test"),
+            ("test%", "test"),
+            ("test^", "test"),
+            ("test{", "test"),
+            ("test|", "test"),
+            ("test(", "test"),
+            // Spaces are replaced by `_`
+            ("test hey", "test_hey"),
+        ];
+
+        for (input, expected) in tests {
+            assert_eq!(strip_invalid_anchors_chars(&input), expected);
+        }
+    }
+
+    #[test]
+    fn maybe_slugify_paths_enabled() {
+        assert_eq!(maybe_slugify_paths("héhé", true), "hehe");
+    }
+
+    #[test]
+    fn maybe_slugify_paths_disabled() {
+        assert_eq!(maybe_slugify_paths("héhé", false), "héhé");
+    }
+}
--- a/docs/content/documentation/content/linking.md
+++ b/docs/content/documentation/content/linking.md
@ -4,9 +4,11 @@ weight = 50
 +++

 ## Heading id and anchor insertion
-While rendering the Markdown content, a unique id will automatically be assigned to each heading. This id is created
-by converting the heading text to a [slug](https://en.wikipedia.org/wiki/Semantic_URL#Slug), and appending numbers at
-the end if the slug already exists for that article. For example:
+While rendering the Markdown content, a unique id will automatically be assigned to each heading. 
+This id is created by converting the heading text to a [slug](https://en.wikipedia.org/wiki/Semantic_URL#Slug) if `slugify_paths` is enabled.
+if `slugify_paths` is disabled, whitespaces are replaced by `_` and the following characters are stripped: `#`, `%`, `<`, `>`, `[`, `]`, `(`, `)`, \`, `^`, `{`, `|`, `}`.
+A number is appended at the end if the slug already exists for that article 
+For example:

 ```md
 # Something exciting! <- something-exciting
--- a/docs/content/documentation/content/page.md
+++ b/docs/content/documentation/content/page.md
@ -27,6 +27,49 @@ As you can see, creating an `about.md` file is equivalent to creating an
 the `about` directory allows you to use asset co-location, as discussed in the
 [overview](@/documentation/content/overview.md#asset-colocation) section.

+## Output paths
+
+For any page within your content folder, its output path will be defined by either:
+
+- its `slug` frontmatter key
+- its filename
+
+Either way, these proposed path will be sanitized before being used.
+If `slugify_paths` is enabled in the site's config - the default - paths are [slugified](https://en.wikipedia.org/wiki/Clean_URL#Slug). 
+Otherwise, a simpler sanitation is performed, outputting only valid NTFS paths. 
+The following characters are removed: `<`, `>`, `:`, `/`, `|`, `?`, `*`, `#`, `\\`, `(`, `)`, `[`, `]` as well as newlines and tabulations. 
+Additionally, trailing whitespace and dots are removed and whitespaces are replaced by `_`.
+
+**NOTE:** To produce URLs containing non-English characters (UTF8), `slugify_paths` needs to be set to `false`.
+
+### Path from frontmatter
+
+The output path for the page will first be read from the `slug` key in the page's frontmatter.
+
+**Example:** (file `content/zines/mlf-kurdistan.md`)
+
+```
+++
+title = "Le mouvement des Femmes Libres, à la tête de la libération kurde"
+slug = "femmes-libres-libération-kurde"
+++
+This is my article.
+```
+
+This frontmatter will output the article to `[base_url]/zines/femmes-libres-libération-kurde` with `slugify_paths` disabled, and to `[base_url]/zines/femmes-libres-liberation-kurde` with `slugify_enabled` enabled.
+
+### Path from filename
+
+When the article's output path is not specified in the frontmatter, it is extracted from the file's path in the content folder. Consider a file `content/foo/bar/thing.md`. The output path is constructed:
+- if the filename is `index.md`, its parent folder name (`bar`) is used as output path
+- otherwise, the output path is extracted from `thing` (the filename without the `.md` extension)
+
+If the path found starts with a datetime string (`YYYY-mm-dd` or [a RFC3339 datetime](https://www.ietf.org/rfc/rfc3339.txt)) followed by an underscore (`_`) or a dash (`-`), this date is removed from the output path and will be used as the page date (unless already set in the front-matter). Note that the full RFC3339 datetime contains colons, which is not a valid character in a filename on Windows.
+
+The output path extracted from the file path is then slugified or not depending on the `slugify_paths` config, as explained previously.
+
+**Example:** The file `content/blog/2018-10-10-hello-world.md` will generated a page available at will be available at `[base_url]/hello-world`.
+
 ## Front matter

 The TOML front matter is a set of metadata embedded in a file at the beginning of the file enclosed
--- a/docs/content/documentation/content/taxonomies.md
+++ b/docs/content/documentation/content/taxonomies.md
@ -5,7 +5,7 @@ weight = 90

 Zola has built-in support for taxonomies.

-The first step is to define the taxonomies in your [config.toml](@/documentation/getting-started/configuration.md).
+## Configuration

 A taxonomy has five variables:

@ -16,21 +16,48 @@ For example the default would be page/1.
 - `rss`: if set to `true`, an RSS feed will be generated for each term.
 - `lang`: only set this if you are making a multilingual site and want to indicate which language this taxonomy is for

-Once this is done, you can then set taxonomies in your content and Zola will pick
-them up:
+**Example 1:** (one language)
+
+```toml
+taxonomies = [ name = "categories", rss = true ]
+```
+
+**Example 2:** (multilingual site)
+
+```toml
+taxonomies = [
+    {name = "tags", lang = "fr"},
+    {name = "tags", lang = "eo"},
+    {name = "tags", lang = "en"},
+]
+```
+
+## Using taxonomies
+
+Once the configuration is done, you can then set taxonomies in your content and Zola will pick them up:
+
+**Example:**

 ```toml
 +++
-...
+title = "Writing a static-site generator in Rust"
+date = 2019-08-15
 [taxonomies]
 tags = ["rust", "web"]
 categories = ["programming"]
 +++
 ```

-The taxonomy pages are available at the following paths:
+## Output paths
+
+In a similar manner to how section and pages calculate their output path:
+- the taxonomy name is never slugified
+- the taxonomy entry (eg. as specific tag) is slugified when `slugify_paths` is enabled in the configuration
+
+The taxonomy pages are then available at the following paths:

 ```plain
-$BASE_URL/$NAME/
-$BASE_URL/$NAME/$SLUG
+$BASE_URL/$NAME/ (taxonomy)
+$BASE_URL/$NAME/$SLUG (taxonomy entry)
 ```
+
--- a/docs/content/documentation/getting-started/configuration.md
+++ b/docs/content/documentation/getting-started/configuration.md
@ -27,6 +27,10 @@ default_language = "en"
 # The site theme to use.
 theme = ""

+# Slugify paths for compatibility with ASCII-only URLs produced by Zola < 0.9
+# Enabling this setting removes non-English (UTF8) characters in URLs
+slugify_paths = false
+
 # When set to "true", all code blocks are highlighted.
 highlight_code = false

--- a/test_site/config.toml
+++ b/test_site/config.toml
@ -4,6 +4,7 @@ highlight_code = true
 compile_sass = true
 generate_rss = true
 theme = "sample"
+slugify_paths = true

 taxonomies = [
    {name = "categories", rss = true},