diff --git a/Cargo.lock b/Cargo.lock index 959c84fe..5fca5c19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1754,6 +1754,12 @@ dependencies = [ "regex", ] +[[package]] +name = "path-slash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cacbb3c4ff353b534a67fb8d7524d00229da4cb1dc8c79f4db96e375ab5b619" + [[package]] name = "percent-encoding" version = "2.1.0" @@ -2461,6 +2467,7 @@ dependencies = [ "lazy_static", "library", "link_checker", + "path-slash", "rayon", "relative-path", "sass-rs", diff --git a/components/library/src/library.rs b/components/library/src/library.rs index b933f8b6..362ca5e5 100644 --- a/components/library/src/library.rs +++ b/components/library/src/library.rs @@ -154,6 +154,16 @@ impl Library { .push(section.file.path.clone()); } + // populate translations if necessary + if self.is_multilingual { + self.translations + .entry(section.file.canonical.clone()) + .and_modify(|trans| { + trans.insert(key); + }) + .or_insert(set![key]); + }; + // Index has no ancestors, no need to go through it if section.is_index() { ancestors.insert(section.file.path.clone(), vec![]); @@ -178,15 +188,7 @@ impl Library { } ancestors.insert(section.file.path.clone(), parents); - // populate translations if necessary - if self.is_multilingual { - self.translations - .entry(section.file.canonical.clone()) - .and_modify(|trans| { - trans.insert(key); - }) - .or_insert(set![key]); - }; + } for (key, page) in &mut self.pages { diff --git a/components/site/Cargo.toml b/components/site/Cargo.toml index 51fa4907..eb90d9c8 100644 --- a/components/site/Cargo.toml +++ b/components/site/Cargo.toml @@ -29,3 +29,4 @@ link_checker = { path = "../link_checker" } [dev-dependencies] tempfile = "3" +path-slash = "0.1.4" diff --git a/components/site/tests/common.rs b/components/site/tests/common.rs index 24431706..a6858a3b 100644 --- a/components/site/tests/common.rs +++ b/components/site/tests/common.rs @@ -1,9 +1,11 @@ #![allow(dead_code)] use std::env; -use std::path::PathBuf; +use std::path::{PathBuf, Path}; +use std::collections::HashMap; use site::Site; use tempfile::{tempdir, TempDir}; +use path_slash::PathExt; // 2 helper macros to make all the build testing more bearable #[macro_export] @@ -67,3 +69,253 @@ where site.build().expect("Couldn't build the site"); (site, tmp_dir, public.clone()) } + +/// Finds the unified path (eg. _index.fr.md -> _index.md) and +/// potential language (if not default) associated with a path +/// When the path is not a markdown file (.md), None is returned +/// Strips base_dir from the start of path +fn find_lang_for(entry: &Path, base_dir: &Path) -> Option<(String, Option)> { + let ext = entry.extension(); + if ext.is_none() { + // Not a markdown file (no extension), skip + return None; + } + let ext = ext.unwrap(); + if ext != "md" { + // Not a markdown file, skip + return None; + } + let mut no_ext = entry.to_path_buf(); + let stem = entry.file_stem().unwrap(); + // Remove .md + no_ext.pop(); + no_ext.push(stem); + if let Some(lang) = no_ext.extension() { + let stem = no_ext.file_stem(); + // Remove lang + let mut unified_path = no_ext.clone(); + unified_path.pop(); + // Readd stem with .md added + unified_path.push(&format!("{}.md", stem.unwrap().to_str().unwrap())); + let unified_path_str = match unified_path.strip_prefix(base_dir) { + Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()} + _ => {unified_path.to_slash_lossy()} + }; + return Some((unified_path_str, Some(lang.to_str().unwrap().into()))); + } else { + // No lang, return no_ext directly + let mut no_ext_string = match no_ext.strip_prefix(base_dir) { + Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()} + _ => {no_ext.to_slash_lossy()} + }; + no_ext_string.push_str(".md"); + return Some((no_ext_string, None)); + } +} + +/// Recursively process a folder to find translations, returning a list of every language +/// translated for every page found. Translations for the default language are stored as "DEFAULT" +/// TODO: This implementation does not support files with a dot inside (foo.bar.md where bar is +/// not a language), because it requires to know what languages are enabled from config, and it's +/// unclear how to distinguish (and what to do) between disabled language or "legit" dots +pub fn add_translations_from(dir: &Path, strip: &Path, default: &str) -> HashMap> { + let mut expected: HashMap> = HashMap::new(); + for entry in dir.read_dir().expect("Failed to read dir") { + let entry = entry.expect("Failed to read entry").path(); + if entry.is_dir() { + // Recurse + expected.extend(add_translations_from(&entry, strip, default)); + } + if let Some((unified_path, lang)) = find_lang_for(&entry, strip) { + if let Some(index) = expected.get_mut(&unified_path) { + // Insert found lang for rel_path, or DEFAULT otherwise + index.push(lang.unwrap_or(default.to_string())); + } else { + // rel_path is not registered yet, insert it in expected + expected.insert(unified_path, vec!(lang.unwrap_or(default.to_string()))); + } + } else { + // Not a markdown file, skip + continue; + } + } + return expected; +} + +/// Calculate output path for Markdown files +/// respecting page/section `path` fields, but not aliases (yet) +/// Returns a mapping of unified Markdown paths -> translations +pub fn find_expected_translations(name: &str, default_language: &str) -> HashMap> { + let mut path = env::current_dir().unwrap().parent().unwrap().parent().unwrap().to_path_buf(); + path.push(name); + path.push("content"); + + // Find expected translations from content folder + // We remove BASEDIR/content/ from the keys so they match paths in library + let mut strip_prefix = path.to_str().unwrap().to_string(); + strip_prefix.push('/'); + add_translations_from(&path, &path, default_language) +} + +/// Checks whether a given permalink has a corresponding HTML page in output folder +pub fn ensure_output_exists(outputdir: &Path, baseurl: &str, link: &str) -> bool { + // Remove the baseurl as well as the remaining /, otherwise path will be interpreted + // as absolute. + let trimmed_url = link.trim_start_matches(baseurl).trim_start_matches('/'); + let path = outputdir.join(trimmed_url); + path.exists() +} + +pub struct Translation { + path: String, + lang: String, + permalink: String, +} + +pub struct Translations { + trans: Vec, +} + +impl Translations { + pub fn for_path(site: &Site, path: &str) -> Translations { + let library = site.library.clone(); + let library = library.read().unwrap(); + // WORKAROUND because site.content_path is private + let unified_path = if let Some(page) = library.get_page(site.base_path.join("content").join(path)) { + page.file.canonical.clone() + } else if let Some(section) = library.get_section(site.base_path.join("content").join(path)) { + section.file.canonical.clone() + } else { + panic!("No such page or section: {}", path); + }; + + let translations = library.translations.get(&unified_path); + if translations.is_none() { + println!("Page canonical path {} is not in library translations", unified_path.display()); + panic!("Library error"); + } + + let translations = translations + .unwrap() + .iter().map(|key| { + // Are we looking for a section? (no file extension here) + if unified_path.ends_with("_index") { + //library.get_section_by_key(*key).file.relative.to_string() + let section = library.get_section_by_key(*key); + Translation { + lang: section.lang.clone(), + permalink: section.permalink.clone(), + path: section.file.path.to_str().unwrap().to_string(), + } + } else { + let page = library.get_page_by_key(*key); + Translation { + lang: page.lang.clone(), + permalink: page.permalink.clone(), + path: page.file.path.to_str().unwrap().to_string(), + } + //library.get_page_by_key(*key).file.relative.to_string() + } + }).collect(); + + Translations { + trans: translations, + } + } + + pub fn languages(&self) -> Vec { + let mut lang: Vec = self.trans.iter().map(|x| x.lang.clone()).collect(); + lang.sort_unstable(); + lang + } + + pub fn permalinks(&self) -> Vec { + let mut links: Vec = self.trans.iter().map(|x| x.permalink.clone()).collect(); + links.sort_unstable(); + links + } + + pub fn paths(&self) -> Vec { + let mut paths: Vec = self.trans.iter().map(|x| x.path.clone()).collect(); + paths.sort_unstable(); + paths + } +} + +/// Find translations in library for a single path +fn library_translations_lang_for(site: &Site, path: &str) -> Vec { + let library_translations = Translations::for_path(site, path); + library_translations.languages() +} + +/// This function takes a list of translations generated by find_expected_translations(), +/// a site instance, and a path of a page to check that translations are the same on both sides +pub fn ensure_translations_match(translations: &HashMap>, site: &Site, path: &str) -> bool { + let library_page_translations = library_translations_lang_for(site, path); + + if let Some((unified_path, _lang)) = find_lang_for(&PathBuf::from(path), Path::new("")) { + if let Some(page_translations) = translations.get(&unified_path) { + // We order both claimed translations so we can compare them + // library_page_translations is already ordered + let mut page_translations = page_translations.clone(); + page_translations.sort_unstable(); + + if page_translations != library_page_translations { + // Some translations don't match, print some context + // There is a special case where the index page may be autogenerated for a lang + // by zola so if we are looking at the index page, library may contain more (not + // less) languages than our tests. + if unified_path == "_index.md" { + for lang in &page_translations { + if !library_page_translations.contains(lang) { + println!("Library is missing language: {} for page {}", lang, unified_path); + return false; + } + } + // All languages from Markdown were found. We don't care if the library + // auto-generated more. + return true; + } + println!("Translations don't match for {}:", path); + println!(" - library: {:?}", library_page_translations); + println!(" - tests: {:?}", page_translations); + return false; + } + // Everything went well + return true; + } else { + // Should never happen because even the default language counts as a translation + // Reaching here means either there is a logic error in the tests themselves, + // or the permalinks contained a page which does not exist for some reason + unreachable!("Translations not found for {}", unified_path); + } + } else { + // None means the page does not end with .md. Only markdown pages should be passed to this function. + // Maybe a non-markdown path was found in site's permalinks? + unreachable!("{} is not a markdown page (extension not .md)", path); + } +} + +/// For a given URL (from the permalinks), find the corresponding output page +/// and ensure all translation permalinks are linked inside +pub fn ensure_translations_in_output(site: &Site, path: &str, permalink: &str) -> bool { + let library_page_translations = Translations::for_path(site, path); + let translations_permalinks = library_page_translations.permalinks(); + + let output_path = permalink.trim_start_matches(&site.config.base_url); + // Strip leading / so it's not interpreted as an absolute path + let output_path = output_path.trim_start_matches('/'); + // Don't forget to remove / because + let output_path = site.output_path.join(output_path); + + let output = std::fs::read_to_string(&output_path).expect(&format!("Output not found in {}", output_path.display())); + + for permalink in &translations_permalinks { + if !output.contains(permalink) { + println!("Page {} has translation {}, but it was not found in output", path, permalink); + return false; + } + } + + return true; +} diff --git a/components/site/tests/site_i18n.rs b/components/site/tests/site_i18n.rs index d4ed0e34..eddbe9a4 100644 --- a/components/site/tests/site_i18n.rs +++ b/components/site/tests/site_i18n.rs @@ -2,7 +2,7 @@ mod common; use std::env; -use common::build_site; +use common::*; use site::Site; #[test] @@ -14,7 +14,7 @@ fn can_parse_multilingual_site() { site.load().unwrap(); let library = site.library.read().unwrap(); - assert_eq!(library.pages().len(), 10); + assert_eq!(library.pages().len(), 11); assert_eq!(library.sections().len(), 6); // default index sections @@ -174,3 +174,27 @@ fn can_build_multilingual_site() { assert!(file_exists!(public, "search_index.it.js")); assert!(!file_exists!(public, "search_index.fr.js")); } + +#[test] +fn correct_translations_on_all_pages() { + let (site, _tmp_dir, public) = build_site("test_site_i18n"); + + assert!(public.exists()); + + let translations = find_expected_translations("test_site_i18n", &site.config.default_language); + + for (path, link) in &site.permalinks { + // link ends with /, does not add index.html + let link = format!("{}index.html", link); + + // Ensure every permalink has produced a HTML page + assert!(ensure_output_exists(&public, &site.config.base_url, &link)); + + // Ensure translations expected here match with those in the library + // TODO: add constructive error message inside the function + assert!(ensure_translations_match(&translations, &site, &path)); + + // Ensure output file contains all translations URLs + assert!(ensure_translations_in_output(&site, &path, &link)); + } +} diff --git a/test_site_i18n/content/_index.fr.md b/test_site_i18n/content/_index.fr.md new file mode 100644 index 00000000..b7249018 --- /dev/null +++ b/test_site_i18n/content/_index.fr.md @@ -0,0 +1,5 @@ ++++ +title = "Accueil" ++++ + +Page d'accueil diff --git a/test_site_i18n/content/_index.md b/test_site_i18n/content/_index.md new file mode 100644 index 00000000..814b9a0e --- /dev/null +++ b/test_site_i18n/content/_index.md @@ -0,0 +1,5 @@ ++++ +title = "Home" ++++ + +Homepage diff --git a/test_site_i18n/content/blog/not-translated-in-default-language.fr.md b/test_site_i18n/content/blog/not-translated-in-default-language.fr.md new file mode 100644 index 00000000..a918561c --- /dev/null +++ b/test_site_i18n/content/blog/not-translated-in-default-language.fr.md @@ -0,0 +1,5 @@ ++++ +title = "Ma page que en français" ++++ + +Cette page n'est pas traduite dans la langue par défaut (anglais). diff --git a/test_site_i18n/templates/index.html b/test_site_i18n/templates/index.html index e1b47f13..1f5e03c9 100644 --- a/test_site_i18n/templates/index.html +++ b/test_site_i18n/templates/index.html @@ -2,3 +2,7 @@ {{page.title}} {% endfor %} Language: {{lang}} + +{% for t in section.translations %} +Translated in {{t.lang|default(value=config.default_language)}}: {{t.title}} {{t.permalink|safe}} +{% endfor %}