Enforce unic valid language codes

This commit is contained in:
Vincent Prouillet 2021-02-22 22:26:19 +01:00
parent ba8939b240
commit 975800eb5b
12 changed files with 102 additions and 69 deletions

25
Cargo.lock generated
View file

@ -305,6 +305,7 @@ dependencies = [
"serde_derive", "serde_derive",
"syntect", "syntect",
"toml", "toml",
"unic-langid",
"utils", "utils",
] ]
@ -2759,6 +2760,12 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "tinystr"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1"
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.1.1" version = "1.1.1"
@ -2898,6 +2905,24 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
[[package]]
name = "unic-langid"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73328fcd730a030bdb19ddf23e192187a6b01cd98be6d3140622a89129459ce5"
dependencies = [
"unic-langid-impl",
]
[[package]]
name = "unic-langid-impl"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a4a8eeaf0494862c1404c95ec2f4c33a2acff5076f64314b465e3ddae1b934d"
dependencies = [
"tinystr",
]
[[package]] [[package]]
name = "unic-segment" name = "unic-segment"
version = "0.9.0" version = "0.9.0"

View file

@ -13,6 +13,7 @@ chrono = "0.4"
globset = "0.4" globset = "0.4"
lazy_static = "1" lazy_static = "1"
syntect = "4.1" syntect = "4.1"
unic-langid = "0.9"
errors = { path = "../errors" } errors = { path = "../errors" }
utils = { path = "../utils" } utils = { path = "../utils" }

View file

@ -1,16 +1,31 @@
use std::collections::HashMap; use std::collections::HashMap;
use errors::{bail, Result};
use serde_derive::{Deserialize, Serialize}; use serde_derive::{Deserialize, Serialize};
use unic_langid::LanguageIdentifier;
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)] #[serde(default)]
pub struct Language { pub struct LanguageOptions {
/// The language code
pub code: String,
/// Whether to generate a feed for that language, defaults to `false` /// Whether to generate a feed for that language, defaults to `false`
pub feed: bool, pub generate_feed: bool,
/// Whether to generate search index for that language, defaults to `false` /// Whether to generate search index for that language, defaults to `false`
pub search: bool, pub build_search_index: bool,
}
impl Default for LanguageOptions {
fn default() -> Self {
LanguageOptions { generate_feed: false, build_search_index: false }
}
} }
pub type TranslateTerm = HashMap<String, String>; pub type TranslateTerm = HashMap<String, String>;
/// We want to ensure the language codes are valid ones
pub fn validate_code(code: &str) -> Result<()> {
if LanguageIdentifier::from_bytes(code.as_bytes()).is_err() {
bail!("Language `{}` is not a valid Unicode Language Identifier (see http://unicode.org/reports/tr35/#Unicode_language_identifier)", code)
}
Ok(())
}

View file

@ -44,7 +44,7 @@ pub struct Config {
/// The language used in the site. Defaults to "en" /// The language used in the site. Defaults to "en"
pub default_language: String, pub default_language: String,
/// The list of supported languages outside of the default one /// The list of supported languages outside of the default one
pub languages: Vec<languages::Language>, pub languages: HashMap<String, languages::LanguageOptions>,
/// Languages list and translated strings /// Languages list and translated strings
/// ///
@ -129,10 +129,15 @@ impl Config {
bail!("Highlight theme {} defined in config does not exist.", highlight_theme); bail!("Highlight theme {} defined in config does not exist.", highlight_theme);
} }
if config.languages.iter().any(|l| l.code == config.default_language) { if config.languages.iter().any(|(code, _)| code == &config.default_language) {
bail!("Default language `{}` should not appear both in `config.default_language` and `config.languages`", config.default_language) bail!("Default language `{}` should not appear both in `config.default_language` and `config.languages`", config.default_language)
} }
languages::validate_code(&config.default_language)?;
for code in config.languages.keys() {
languages::validate_code(&code)?;
}
if !config.ignored_content.is_empty() { if !config.ignored_content.is_empty() {
// Convert the file glob strings into a compiled glob set matcher. We want to do this once, // Convert the file glob strings into a compiled glob set matcher. We want to do this once,
// at program initialization, rather than for every page, for example. We arrange for the // at program initialization, rather than for every page, for example. We arrange for the
@ -280,7 +285,7 @@ impl Config {
/// Returns the codes of all additional languages /// Returns the codes of all additional languages
pub fn languages_codes(&self) -> Vec<&str> { pub fn languages_codes(&self) -> Vec<&str> {
self.languages.iter().map(|l| l.code.as_ref()).collect() self.languages.iter().map(|(code, _)| code.as_ref()).collect()
} }
pub fn is_in_build_mode(&self) -> bool { pub fn is_in_build_mode(&self) -> bool {
@ -362,7 +367,7 @@ impl Default for Config {
highlight_code: false, highlight_code: false,
highlight_theme: "base16-ocean-dark".to_string(), highlight_theme: "base16-ocean-dark".to_string(),
default_language: "en".to_string(), default_language: "en".to_string(),
languages: Vec::new(), languages: HashMap::new(),
generate_feed: false, generate_feed: false,
feed_limit: None, feed_limit: None,
feed_filename: "atom.xml".to_string(), feed_filename: "atom.xml".to_string(),
@ -671,10 +676,11 @@ anchors = "off"
let config_str = r#" let config_str = r#"
base_url = "https://remplace-par-ton-url.fr" base_url = "https://remplace-par-ton-url.fr"
default_language = "fr" default_language = "fr"
languages = [
{ code = "fr" }, [languages.fr]
{ code = "en" },
] [languages.en]
"#; "#;
let config = Config::parse(config_str); let config = Config::parse(config_str);
let err = config.unwrap_err(); let err = config.unwrap_err();

View file

@ -2,7 +2,7 @@ mod config;
pub mod highlighting; pub mod highlighting;
mod theme; mod theme;
pub use crate::config::{ pub use crate::config::{
languages::Language, link_checker::LinkChecker, slugify::Slugify, taxonomies::Taxonomy, Config, languages::LanguageOptions, link_checker::LinkChecker, slugify::Slugify, taxonomies::Taxonomy, Config,
}; };
use errors::Result; use errors::Result;

View file

@ -152,7 +152,7 @@ impl FileInfo {
mod tests { mod tests {
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use config::{Config, Language}; use config::{Config, LanguageOptions};
use super::{find_content_components, FileInfo}; use super::{find_content_components, FileInfo};
@ -184,7 +184,7 @@ mod tests {
#[test] #[test]
fn can_find_valid_language_in_page() { fn can_find_valid_language_in_page() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page( let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"), &Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"),
&PathBuf::new(), &PathBuf::new(),
@ -197,7 +197,7 @@ mod tests {
#[test] #[test]
fn can_find_valid_language_with_default_locale() { fn can_find_valid_language_with_default_locale() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page( let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python.en.md"), &Path::new("/home/vincent/code/site/content/posts/tutorials/python.en.md"),
&PathBuf::new(), &PathBuf::new(),
@ -210,7 +210,7 @@ mod tests {
#[test] #[test]
fn can_find_valid_language_in_page_with_assets() { fn can_find_valid_language_in_page_with_assets() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page( let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"), &Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"),
&PathBuf::new(), &PathBuf::new(),
@ -236,7 +236,7 @@ mod tests {
#[test] #[test]
fn errors_on_unknown_language_in_page_with_i18n_on() { fn errors_on_unknown_language_in_page_with_i18n_on() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("it"), feed: false, search: false }); config.languages.insert("it".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page( let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"), &Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"),
&PathBuf::new(), &PathBuf::new(),
@ -248,7 +248,7 @@ mod tests {
#[test] #[test]
fn can_find_valid_language_in_section() { fn can_find_valid_language_in_section() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_section( let mut file = FileInfo::new_section(
&Path::new("/home/vincent/code/site/content/posts/tutorials/_index.fr.md"), &Path::new("/home/vincent/code/site/content/posts/tutorials/_index.fr.md"),
&PathBuf::new(), &PathBuf::new(),
@ -275,7 +275,7 @@ mod tests {
#[test] #[test]
fn correct_canonical_after_find_language() { fn correct_canonical_after_find_language() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page( let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"), &Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"),
&PathBuf::new(), &PathBuf::new(),

View file

@ -333,7 +333,7 @@ mod tests {
use tera::Tera; use tera::Tera;
use super::Page; use super::Page;
use config::{Config, Language}; use config::{Config, LanguageOptions};
use front_matter::InsertAnchor; use front_matter::InsertAnchor;
use utils::slugs::SlugifyStrategy; use utils::slugs::SlugifyStrategy;
@ -805,7 +805,7 @@ Hello world
#[test] #[test]
fn can_specify_language_in_filename() { fn can_specify_language_in_filename() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#" let content = r#"
+++ +++
+++ +++
@ -822,7 +822,7 @@ Bonjour le monde"#
#[test] #[test]
fn can_specify_language_in_filename_with_date() { fn can_specify_language_in_filename_with_date() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#" let content = r#"
+++ +++
+++ +++
@ -841,7 +841,7 @@ Bonjour le monde"#
#[test] #[test]
fn i18n_frontmatter_path_overrides_default_permalink() { fn i18n_frontmatter_path_overrides_default_permalink() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#" let content = r#"
+++ +++
path = "bonjour" path = "bonjour"

View file

@ -254,7 +254,7 @@ mod tests {
use tempfile::tempdir; use tempfile::tempdir;
use super::Section; use super::Section;
use config::{Config, Language}; use config::{Config, LanguageOptions};
#[test] #[test]
fn section_with_assets_gets_right_info() { fn section_with_assets_gets_right_info() {
@ -312,7 +312,7 @@ mod tests {
#[test] #[test]
fn can_specify_language_in_filename() { fn can_specify_language_in_filename() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#" let content = r#"
+++ +++
+++ +++
@ -334,7 +334,7 @@ Bonjour le monde"#
#[test] #[test]
fn can_make_links_to_translated_sections_without_double_trailing_slash() { fn can_make_links_to_translated_sections_without_double_trailing_slash() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#" let content = r#"
+++ +++
+++ +++
@ -351,7 +351,7 @@ Bonjour le monde"#
#[test] #[test]
fn can_make_links_to_translated_subsections_with_trailing_slash() { fn can_make_links_to_translated_subsections_with_trailing_slash() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#" let content = r#"
+++ +++
+++ +++

View file

@ -270,7 +270,7 @@ mod tests {
use crate::content::Page; use crate::content::Page;
use crate::library::Library; use crate::library::Library;
use config::{Config, Language, Slugify, Taxonomy as TaxonomyConfig}; use config::{Config, LanguageOptions, Slugify, Taxonomy as TaxonomyConfig};
use utils::slugs::SlugifyStrategy; use utils::slugs::SlugifyStrategy;
#[test] #[test]
@ -495,7 +495,7 @@ mod tests {
#[test] #[test]
fn can_make_taxonomies_in_multiple_languages() { fn can_make_taxonomies_in_multiple_languages() {
let mut config = Config::default(); let mut config = Config::default();
config.languages.push(Language { feed: false, code: "fr".to_string(), search: false }); config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut library = Library::new(2, 0, true); let mut library = Library::new(2, 0, true);
config.taxonomies = vec![ config.taxonomies = vec![
@ -605,11 +605,7 @@ mod tests {
fn can_make_utf8_taxonomies() { fn can_make_utf8_taxonomies() {
let mut config = Config::default(); let mut config = Config::default();
config.slugify.taxonomies = SlugifyStrategy::Safe; config.slugify.taxonomies = SlugifyStrategy::Safe;
config.languages.push(Language { config.languages.insert("fr".to_owned(), LanguageOptions::default());
feed: false,
code: "fr".to_string(),
..Language::default()
});
let mut library = Library::new(2, 0, true); let mut library = Library::new(2, 0, true);
config.taxonomies = vec![TaxonomyConfig { config.taxonomies = vec![TaxonomyConfig {
@ -638,11 +634,7 @@ mod tests {
fn can_make_slugified_taxonomies_in_multiple_languages() { fn can_make_slugified_taxonomies_in_multiple_languages() {
let mut config = Config::default(); let mut config = Config::default();
config.slugify.taxonomies = SlugifyStrategy::On; config.slugify.taxonomies = SlugifyStrategy::On;
config.languages.push(Language { config.languages.insert("fr".to_owned(), LanguageOptions::default());
feed: false,
code: "fr".to_string(),
..Language::default()
});
let mut library = Library::new(2, 0, true); let mut library = Library::new(2, 0, true);
config.taxonomies = vec![ config.taxonomies = vec![

View file

@ -126,11 +126,8 @@ impl Site {
/// There are one index section for the default language + 1 per language /// There are one index section for the default language + 1 per language
fn index_section_paths(&self) -> Vec<(PathBuf, Option<String>)> { fn index_section_paths(&self) -> Vec<(PathBuf, Option<String>)> {
let mut res = vec![(self.content_path.join("_index.md"), None)]; let mut res = vec![(self.content_path.join("_index.md"), None)];
for language in &self.config.languages { for code in self.config.languages.keys() {
res.push(( res.push((self.content_path.join(format!("_index.{}.md", code)), Some(code.clone())));
self.content_path.join(format!("_index.{}.md", language.code)),
Some(language.code.clone()),
));
} }
res res
} }
@ -177,7 +174,7 @@ impl Site {
// so it's kinda necessecary // so it's kinda necessecary
let mut dir_walker = WalkDir::new(format!("{}/{}", base_path, "content/")).into_iter(); let mut dir_walker = WalkDir::new(format!("{}/{}", base_path, "content/")).into_iter();
let mut allowed_index_filenames: Vec<_> = let mut allowed_index_filenames: Vec<_> =
self.config.languages.iter().map(|l| format!("_index.{}.md", l.code)).collect(); self.config.languages.iter().map(|(code, _)| format!("_index.{}.md", code)).collect();
allowed_index_filenames.push("_index.md".to_string()); allowed_index_filenames.push("_index.md".to_string());
loop { loop {
@ -228,7 +225,7 @@ impl Site {
Ok(f) => { Ok(f) => {
let path_str = f.path().file_name().unwrap().to_str().unwrap(); let path_str = f.path().file_name().unwrap().to_str().unwrap();
if f.path().is_file() if f.path().is_file()
&& allowed_index_filenames.iter().find(|&s| *s == path_str).is_some() && allowed_index_filenames.iter().any(|s| s == path_str)
{ {
Some(f) Some(f)
} else { } else {
@ -660,13 +657,13 @@ impl Site {
start = log_time(start, "Generated feed in default language"); start = log_time(start, "Generated feed in default language");
} }
for lang in &self.config.languages { for (code, language) in &self.config.languages {
if !lang.feed { if !language.generate_feed {
continue; continue;
} }
let pages = let pages =
library.pages_values().iter().filter(|p| p.lang == lang.code).cloned().collect(); library.pages_values().iter().filter(|p| &p.lang == code).cloned().collect();
self.render_feed(pages, Some(&PathBuf::from(lang.code.clone())), &lang.code, |c| c)?; self.render_feed(pages, Some(&PathBuf::from(code)), &code, |c| c)?;
start = log_time(start, "Generated feed in other language"); start = log_time(start, "Generated feed in other language");
} }
@ -704,17 +701,13 @@ impl Site {
), ),
)?; )?;
for language in &self.config.languages { for (code, language) in &self.config.languages {
if language.code != self.config.default_language && language.search { if code != &self.config.default_language && language.build_search_index {
create_file( create_file(
&self.output_path.join(&format!("search_index.{}.js", &language.code)), &self.output_path.join(&format!("search_index.{}.js", &code)),
&format!( &format!(
"window.searchIndex = {};", "window.searchIndex = {};",
search::build_index( search::build_index(&code, &self.library.read().unwrap(), &self.config)?
&language.code,
&self.library.read().unwrap(),
&self.config
)?
), ),
)?; )?;
} }

View file

@ -67,7 +67,7 @@ fn make_path_with_lang(path: String, lang: &str, config: &Config) -> Result<Stri
return Ok(path); return Ok(path);
} }
if !config.languages.iter().any(|x| x.code == lang) { if !config.languages.iter().any(|(x, _)| x == lang) {
return Err( return Err(
format!("`{}` is not an authorized language (check config.languages).", lang).into() format!("`{}` is not an authorized language (check config.languages).", lang).into()
); );
@ -728,9 +728,9 @@ mod tests {
const TRANS_CONFIG: &str = r#" const TRANS_CONFIG: &str = r#"
base_url = "https://remplace-par-ton-url.fr" base_url = "https://remplace-par-ton-url.fr"
default_language = "fr" default_language = "fr"
languages = [
{ code = "en" }, [languages]
] [languages.en]
[translations] [translations]
[translations.fr] [translations.fr]

View file

@ -18,10 +18,11 @@ taxonomies = [
{name = "tags", lang = "fr"}, {name = "tags", lang = "fr"},
] ]
languages = [ [languages.fr]
{code = "fr", feed = true}, generate_feed = true
{code = "it", feed = false, search = true },
] [languages.it]
build_search_index = true
[markdown] [markdown]
highlight_code = false highlight_code = false