zola/components/utils/src/slugs.rs

fn strip_chars(s: &str, chars: &str) -> String {
    let mut sanitized_string = s.to_string();
    sanitized_string.retain(|c| !chars.contains(c));
    sanitized_string
}

fn strip_invalid_paths_chars(s: &str) -> String {
    // NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3
    // Also we need to trim . from the end of filename
    let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.');
    let cleaned = trimmed.replace(" ", "_");
    // And () [] since they are not allowed in markdown links
    strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t")
}

fn strip_invalid_anchors_chars(s: &str) -> String {
    // spaces are not valid in markdown links
    let cleaned = s.replace(" ", "_");
    // https://tools.ietf.org/html/rfc3986#section-3.5
    strip_chars(&cleaned, "\"#%<>[\\]()^`{|}")
}

pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {
    if slugify {
        // ASCII slugification
        slug::slugify(s)
    } else {
        // Only remove forbidden characters
        strip_invalid_paths_chars(s)
    }
}

pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {
    if slugify {
        // ASCII slugification
        slug::slugify(s)
    } else {
        // Only remove forbidden characters
        strip_invalid_anchors_chars(s)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn strip_invalid_paths_chars_works() {
        let tests = vec![
            // no newlines
            ("test\ntest", "testtest"),
            // no whitespaces
            ("test ", "test"),
            ("t est ", "t_est"),
            // invalid NTFS
            ("test .", "test"),
            ("test. ", "test"),
            ("test#test/test?test", "testtesttesttest"),
            // Invalid CommonMark chars in links
            ("test (hey)", "test_hey"),
            ("test (hey", "test_hey"),
            ("test hey)", "test_hey"),
            ("test [hey]", "test_hey"),
            ("test [hey", "test_hey"),
            ("test hey]", "test_hey"),
            // UTF-8
            ("日本", "日本"),
        ];

        for (input, expected) in tests {
            assert_eq!(strip_invalid_paths_chars(&input), expected);
        }
    }

    #[test]
    fn strip_invalid_anchors_chars_works() {
        let tests = vec![
            ("日本", "日本"),
            // Some invalid chars get removed
            ("test#", "test"),
            ("test<", "test"),
            ("test%", "test"),
            ("test^", "test"),
            ("test{", "test"),
            ("test|", "test"),
            ("test(", "test"),
            // Spaces are replaced by `_`
            ("test hey", "test_hey"),
        ];

        for (input, expected) in tests {
            assert_eq!(strip_invalid_anchors_chars(&input), expected);
        }
    }

    #[test]
    fn maybe_slugify_paths_enabled() {
        assert_eq!(maybe_slugify_paths("héhé", true), "hehe");
    }

    #[test]
    fn maybe_slugify_paths_disabled() {
        assert_eq!(maybe_slugify_paths("héhé", false), "héhé");
    }
}
Optionally do not slugify paths (#875) * maybe_slugify() only does simple sanitation if config.slugify is false * slugify is disabled by default, turn on for backwards-compatibility * First docs changes for optional slugification * Remove # from slugs but not & * Add/fix tests for utf8 slugs * Fix test sites for i18n slugs * fix templates tests for i18n slugs * Rename slugify setting to slugify_paths * Default slugify_paths * Update documentation for slugify_paths * quasi_slugify removes ?, /, # and newlines * Remove forbidden NTFS chars in quasi_slugify() * Slugification forbidden chars can be configured * Remove trailing dot/space in quasi_slugify * Fix NTFS path sanitation * Revert configurable slugification charset * Remove \r for windows newlines and \t tabulations in quasi_slugify() * Update docs for output paths * Replace slugify with slugify_paths * Fix test * Default to not slugifying * Move slugs utils to utils crate * Use slugify_paths for anchors as well 2019-12-21 09:44:13 +00:00			`fn strip_chars(s: &str, chars: &str) -> String {`
			`let mut sanitized_string = s.to_string();`
Format code using cargo fmt (#896) 2019-12-23 08:21:51 +00:00			`sanitized_string.retain(\|c\| !chars.contains(c));`
Optionally do not slugify paths (#875) * maybe_slugify() only does simple sanitation if config.slugify is false * slugify is disabled by default, turn on for backwards-compatibility * First docs changes for optional slugification * Remove # from slugs but not & * Add/fix tests for utf8 slugs * Fix test sites for i18n slugs * fix templates tests for i18n slugs * Rename slugify setting to slugify_paths * Default slugify_paths * Update documentation for slugify_paths * quasi_slugify removes ?, /, # and newlines * Remove forbidden NTFS chars in quasi_slugify() * Slugification forbidden chars can be configured * Remove trailing dot/space in quasi_slugify * Fix NTFS path sanitation * Revert configurable slugification charset * Remove \r for windows newlines and \t tabulations in quasi_slugify() * Update docs for output paths * Replace slugify with slugify_paths * Fix test * Default to not slugifying * Move slugs utils to utils crate * Use slugify_paths for anchors as well 2019-12-21 09:44:13 +00:00			`sanitized_string`
			`}`

			`fn strip_invalid_paths_chars(s: &str) -> String {`
			`// NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3`
			`// Also we need to trim . from the end of filename`
			`let trimmed = s.trim_end_matches(\|c\| c == ' ' \|\| c == '.');`
			`let cleaned = trimmed.replace(" ", "_");`
			`// And () [] since they are not allowed in markdown links`
			`strip_chars(&cleaned, "<>:/\|?*#()[]\n\"\\\r\t")`
			`}`

			`fn strip_invalid_anchors_chars(s: &str) -> String {`
			`// spaces are not valid in markdown links`
			`let cleaned = s.replace(" ", "_");`
			`// https://tools.ietf.org/html/rfc3986#section-3.5`
			strip_chars(&cleaned, "\"#%<>[\\]()^`{\|}")
			`}`

			`pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {`
			`if slugify {`
			`// ASCII slugification`
			`slug::slugify(s)`
Format code using cargo fmt (#896) 2019-12-23 08:21:51 +00:00			`} else {`
Optionally do not slugify paths (#875) * maybe_slugify() only does simple sanitation if config.slugify is false * slugify is disabled by default, turn on for backwards-compatibility * First docs changes for optional slugification * Remove # from slugs but not & * Add/fix tests for utf8 slugs * Fix test sites for i18n slugs * fix templates tests for i18n slugs * Rename slugify setting to slugify_paths * Default slugify_paths * Update documentation for slugify_paths * quasi_slugify removes ?, /, # and newlines * Remove forbidden NTFS chars in quasi_slugify() * Slugification forbidden chars can be configured * Remove trailing dot/space in quasi_slugify * Fix NTFS path sanitation * Revert configurable slugification charset * Remove \r for windows newlines and \t tabulations in quasi_slugify() * Update docs for output paths * Replace slugify with slugify_paths * Fix test * Default to not slugifying * Move slugs utils to utils crate * Use slugify_paths for anchors as well 2019-12-21 09:44:13 +00:00			`// Only remove forbidden characters`
			`strip_invalid_paths_chars(s)`
			`}`
			`}`

			`pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {`
			`if slugify {`
			`// ASCII slugification`
			`slug::slugify(s)`
Format code using cargo fmt (#896) 2019-12-23 08:21:51 +00:00			`} else {`
Optionally do not slugify paths (#875) * maybe_slugify() only does simple sanitation if config.slugify is false * slugify is disabled by default, turn on for backwards-compatibility * First docs changes for optional slugification * Remove # from slugs but not & * Add/fix tests for utf8 slugs * Fix test sites for i18n slugs * fix templates tests for i18n slugs * Rename slugify setting to slugify_paths * Default slugify_paths * Update documentation for slugify_paths * quasi_slugify removes ?, /, # and newlines * Remove forbidden NTFS chars in quasi_slugify() * Slugification forbidden chars can be configured * Remove trailing dot/space in quasi_slugify * Fix NTFS path sanitation * Revert configurable slugification charset * Remove \r for windows newlines and \t tabulations in quasi_slugify() * Update docs for output paths * Replace slugify with slugify_paths * Fix test * Default to not slugifying * Move slugs utils to utils crate * Use slugify_paths for anchors as well 2019-12-21 09:44:13 +00:00			`// Only remove forbidden characters`
			`strip_invalid_anchors_chars(s)`
			`}`
			`}`

			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`

			`#[test]`
			`fn strip_invalid_paths_chars_works() {`
			`let tests = vec![`
			`// no newlines`
			`("test\ntest", "testtest"),`
			`// no whitespaces`
			`("test ", "test"),`
			`("t est ", "t_est"),`
			`// invalid NTFS`
			`("test .", "test"),`
			`("test. ", "test"),`
			`("test#test/test?test", "testtesttesttest"),`
			`// Invalid CommonMark chars in links`
			`("test (hey)", "test_hey"),`
			`("test (hey", "test_hey"),`
			`("test hey)", "test_hey"),`
			`("test [hey]", "test_hey"),`
			`("test [hey", "test_hey"),`
			`("test hey]", "test_hey"),`
			`// UTF-8`
			`("日本", "日本"),`
			`];`

			`for (input, expected) in tests {`
			`assert_eq!(strip_invalid_paths_chars(&input), expected);`
			`}`
			`}`

			`#[test]`
			`fn strip_invalid_anchors_chars_works() {`
			`let tests = vec![`
			`("日本", "日本"),`
			`// Some invalid chars get removed`
			`("test#", "test"),`
			`("test<", "test"),`
			`("test%", "test"),`
			`("test^", "test"),`
			`("test{", "test"),`
			`("test\|", "test"),`
			`("test(", "test"),`
			// Spaces are replaced by `_`
			`("test hey", "test_hey"),`
			`];`

			`for (input, expected) in tests {`
			`assert_eq!(strip_invalid_anchors_chars(&input), expected);`
			`}`
			`}`

			`#[test]`
			`fn maybe_slugify_paths_enabled() {`
			`assert_eq!(maybe_slugify_paths("héhé", true), "hehe");`
			`}`

			`#[test]`
			`fn maybe_slugify_paths_disabled() {`
			`assert_eq!(maybe_slugify_paths("héhé", false), "héhé");`
			`}`
			`}`