2019-12-21 09:44:13 +00:00
|
|
|
fn strip_chars(s: &str, chars: &str) -> String {
|
|
|
|
let mut sanitized_string = s.to_string();
|
2019-12-23 08:21:51 +00:00
|
|
|
sanitized_string.retain(|c| !chars.contains(c));
|
2019-12-21 09:44:13 +00:00
|
|
|
sanitized_string
|
|
|
|
}
|
|
|
|
|
|
|
|
fn strip_invalid_paths_chars(s: &str) -> String {
|
|
|
|
// NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3
|
|
|
|
// Also we need to trim . from the end of filename
|
|
|
|
let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.');
|
|
|
|
let cleaned = trimmed.replace(" ", "_");
|
|
|
|
// And () [] since they are not allowed in markdown links
|
|
|
|
strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn strip_invalid_anchors_chars(s: &str) -> String {
|
|
|
|
// spaces are not valid in markdown links
|
|
|
|
let cleaned = s.replace(" ", "_");
|
|
|
|
// https://tools.ietf.org/html/rfc3986#section-3.5
|
|
|
|
strip_chars(&cleaned, "\"#%<>[\\]()^`{|}")
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {
|
|
|
|
if slugify {
|
|
|
|
// ASCII slugification
|
|
|
|
slug::slugify(s)
|
2019-12-23 08:21:51 +00:00
|
|
|
} else {
|
2019-12-21 09:44:13 +00:00
|
|
|
// Only remove forbidden characters
|
|
|
|
strip_invalid_paths_chars(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {
|
|
|
|
if slugify {
|
|
|
|
// ASCII slugification
|
|
|
|
slug::slugify(s)
|
2019-12-23 08:21:51 +00:00
|
|
|
} else {
|
2019-12-21 09:44:13 +00:00
|
|
|
// Only remove forbidden characters
|
|
|
|
strip_invalid_anchors_chars(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn strip_invalid_paths_chars_works() {
|
|
|
|
let tests = vec![
|
|
|
|
// no newlines
|
|
|
|
("test\ntest", "testtest"),
|
|
|
|
// no whitespaces
|
|
|
|
("test ", "test"),
|
|
|
|
("t est ", "t_est"),
|
|
|
|
// invalid NTFS
|
|
|
|
("test .", "test"),
|
|
|
|
("test. ", "test"),
|
|
|
|
("test#test/test?test", "testtesttesttest"),
|
|
|
|
// Invalid CommonMark chars in links
|
|
|
|
("test (hey)", "test_hey"),
|
|
|
|
("test (hey", "test_hey"),
|
|
|
|
("test hey)", "test_hey"),
|
|
|
|
("test [hey]", "test_hey"),
|
|
|
|
("test [hey", "test_hey"),
|
|
|
|
("test hey]", "test_hey"),
|
|
|
|
// UTF-8
|
|
|
|
("日本", "日本"),
|
|
|
|
];
|
|
|
|
|
|
|
|
for (input, expected) in tests {
|
|
|
|
assert_eq!(strip_invalid_paths_chars(&input), expected);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn strip_invalid_anchors_chars_works() {
|
|
|
|
let tests = vec![
|
|
|
|
("日本", "日本"),
|
|
|
|
// Some invalid chars get removed
|
|
|
|
("test#", "test"),
|
|
|
|
("test<", "test"),
|
|
|
|
("test%", "test"),
|
|
|
|
("test^", "test"),
|
|
|
|
("test{", "test"),
|
|
|
|
("test|", "test"),
|
|
|
|
("test(", "test"),
|
|
|
|
// Spaces are replaced by `_`
|
|
|
|
("test hey", "test_hey"),
|
|
|
|
];
|
|
|
|
|
|
|
|
for (input, expected) in tests {
|
|
|
|
assert_eq!(strip_invalid_anchors_chars(&input), expected);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn maybe_slugify_paths_enabled() {
|
|
|
|
assert_eq!(maybe_slugify_paths("héhé", true), "hehe");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn maybe_slugify_paths_disabled() {
|
|
|
|
assert_eq!(maybe_slugify_paths("héhé", false), "héhé");
|
|
|
|
}
|
|
|
|
}
|