Southerntofu bugfix index translations (#1417)

* Translations are also generated for the index page (fix #1332)

* More tests for translations

* Even better error message

* Update page count for test

* Patch to fix Windows tests

By @mtolk

Co-authored-by: southerntofu <southerntofu@thunix.net>
This commit is contained in:
Vincent Prouillet 2021-03-22 20:56:11 +01:00 committed by GitHub
parent 5bf9ebc43a
commit 341ac3bfbd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 317 additions and 12 deletions

7
Cargo.lock generated
View file

@ -1754,6 +1754,12 @@ dependencies = [
"regex", "regex",
] ]
[[package]]
name = "path-slash"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cacbb3c4ff353b534a67fb8d7524d00229da4cb1dc8c79f4db96e375ab5b619"
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.1.0" version = "2.1.0"
@ -2461,6 +2467,7 @@ dependencies = [
"lazy_static", "lazy_static",
"library", "library",
"link_checker", "link_checker",
"path-slash",
"rayon", "rayon",
"relative-path", "relative-path",
"sass-rs", "sass-rs",

View file

@ -154,6 +154,16 @@ impl Library {
.push(section.file.path.clone()); .push(section.file.path.clone());
} }
// populate translations if necessary
if self.is_multilingual {
self.translations
.entry(section.file.canonical.clone())
.and_modify(|trans| {
trans.insert(key);
})
.or_insert(set![key]);
};
// Index has no ancestors, no need to go through it // Index has no ancestors, no need to go through it
if section.is_index() { if section.is_index() {
ancestors.insert(section.file.path.clone(), vec![]); ancestors.insert(section.file.path.clone(), vec![]);
@ -178,15 +188,7 @@ impl Library {
} }
ancestors.insert(section.file.path.clone(), parents); ancestors.insert(section.file.path.clone(), parents);
// populate translations if necessary
if self.is_multilingual {
self.translations
.entry(section.file.canonical.clone())
.and_modify(|trans| {
trans.insert(key);
})
.or_insert(set![key]);
};
} }
for (key, page) in &mut self.pages { for (key, page) in &mut self.pages {

View file

@ -29,3 +29,4 @@ link_checker = { path = "../link_checker" }
[dev-dependencies] [dev-dependencies]
tempfile = "3" tempfile = "3"
path-slash = "0.1.4"

View file

@ -1,9 +1,11 @@
#![allow(dead_code)] #![allow(dead_code)]
use std::env; use std::env;
use std::path::PathBuf; use std::path::{PathBuf, Path};
use std::collections::HashMap;
use site::Site; use site::Site;
use tempfile::{tempdir, TempDir}; use tempfile::{tempdir, TempDir};
use path_slash::PathExt;
// 2 helper macros to make all the build testing more bearable // 2 helper macros to make all the build testing more bearable
#[macro_export] #[macro_export]
@ -67,3 +69,253 @@ where
site.build().expect("Couldn't build the site"); site.build().expect("Couldn't build the site");
(site, tmp_dir, public.clone()) (site, tmp_dir, public.clone())
} }
/// Finds the unified path (eg. _index.fr.md -> _index.md) and
/// potential language (if not default) associated with a path
/// When the path is not a markdown file (.md), None is returned
/// Strips base_dir from the start of path
fn find_lang_for(entry: &Path, base_dir: &Path) -> Option<(String, Option<String>)> {
let ext = entry.extension();
if ext.is_none() {
// Not a markdown file (no extension), skip
return None;
}
let ext = ext.unwrap();
if ext != "md" {
// Not a markdown file, skip
return None;
}
let mut no_ext = entry.to_path_buf();
let stem = entry.file_stem().unwrap();
// Remove .md
no_ext.pop();
no_ext.push(stem);
if let Some(lang) = no_ext.extension() {
let stem = no_ext.file_stem();
// Remove lang
let mut unified_path = no_ext.clone();
unified_path.pop();
// Readd stem with .md added
unified_path.push(&format!("{}.md", stem.unwrap().to_str().unwrap()));
let unified_path_str = match unified_path.strip_prefix(base_dir) {
Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()}
_ => {unified_path.to_slash_lossy()}
};
return Some((unified_path_str, Some(lang.to_str().unwrap().into())));
} else {
// No lang, return no_ext directly
let mut no_ext_string = match no_ext.strip_prefix(base_dir) {
Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()}
_ => {no_ext.to_slash_lossy()}
};
no_ext_string.push_str(".md");
return Some((no_ext_string, None));
}
}
/// Recursively process a folder to find translations, returning a list of every language
/// translated for every page found. Translations for the default language are stored as "DEFAULT"
/// TODO: This implementation does not support files with a dot inside (foo.bar.md where bar is
/// not a language), because it requires to know what languages are enabled from config, and it's
/// unclear how to distinguish (and what to do) between disabled language or "legit" dots
pub fn add_translations_from(dir: &Path, strip: &Path, default: &str) -> HashMap<String, Vec<String>> {
let mut expected: HashMap<String, Vec<String>> = HashMap::new();
for entry in dir.read_dir().expect("Failed to read dir") {
let entry = entry.expect("Failed to read entry").path();
if entry.is_dir() {
// Recurse
expected.extend(add_translations_from(&entry, strip, default));
}
if let Some((unified_path, lang)) = find_lang_for(&entry, strip) {
if let Some(index) = expected.get_mut(&unified_path) {
// Insert found lang for rel_path, or DEFAULT otherwise
index.push(lang.unwrap_or(default.to_string()));
} else {
// rel_path is not registered yet, insert it in expected
expected.insert(unified_path, vec!(lang.unwrap_or(default.to_string())));
}
} else {
// Not a markdown file, skip
continue;
}
}
return expected;
}
/// Calculate output path for Markdown files
/// respecting page/section `path` fields, but not aliases (yet)
/// Returns a mapping of unified Markdown paths -> translations
pub fn find_expected_translations(name: &str, default_language: &str) -> HashMap<String, Vec<String>> {
let mut path = env::current_dir().unwrap().parent().unwrap().parent().unwrap().to_path_buf();
path.push(name);
path.push("content");
// Find expected translations from content folder
// We remove BASEDIR/content/ from the keys so they match paths in library
let mut strip_prefix = path.to_str().unwrap().to_string();
strip_prefix.push('/');
add_translations_from(&path, &path, default_language)
}
/// Checks whether a given permalink has a corresponding HTML page in output folder
pub fn ensure_output_exists(outputdir: &Path, baseurl: &str, link: &str) -> bool {
// Remove the baseurl as well as the remaining /, otherwise path will be interpreted
// as absolute.
let trimmed_url = link.trim_start_matches(baseurl).trim_start_matches('/');
let path = outputdir.join(trimmed_url);
path.exists()
}
pub struct Translation {
path: String,
lang: String,
permalink: String,
}
pub struct Translations {
trans: Vec<Translation>,
}
impl Translations {
pub fn for_path(site: &Site, path: &str) -> Translations {
let library = site.library.clone();
let library = library.read().unwrap();
// WORKAROUND because site.content_path is private
let unified_path = if let Some(page) = library.get_page(site.base_path.join("content").join(path)) {
page.file.canonical.clone()
} else if let Some(section) = library.get_section(site.base_path.join("content").join(path)) {
section.file.canonical.clone()
} else {
panic!("No such page or section: {}", path);
};
let translations = library.translations.get(&unified_path);
if translations.is_none() {
println!("Page canonical path {} is not in library translations", unified_path.display());
panic!("Library error");
}
let translations = translations
.unwrap()
.iter().map(|key| {
// Are we looking for a section? (no file extension here)
if unified_path.ends_with("_index") {
//library.get_section_by_key(*key).file.relative.to_string()
let section = library.get_section_by_key(*key);
Translation {
lang: section.lang.clone(),
permalink: section.permalink.clone(),
path: section.file.path.to_str().unwrap().to_string(),
}
} else {
let page = library.get_page_by_key(*key);
Translation {
lang: page.lang.clone(),
permalink: page.permalink.clone(),
path: page.file.path.to_str().unwrap().to_string(),
}
//library.get_page_by_key(*key).file.relative.to_string()
}
}).collect();
Translations {
trans: translations,
}
}
pub fn languages(&self) -> Vec<String> {
let mut lang: Vec<String> = self.trans.iter().map(|x| x.lang.clone()).collect();
lang.sort_unstable();
lang
}
pub fn permalinks(&self) -> Vec<String> {
let mut links: Vec<String> = self.trans.iter().map(|x| x.permalink.clone()).collect();
links.sort_unstable();
links
}
pub fn paths(&self) -> Vec<String> {
let mut paths: Vec<String> = self.trans.iter().map(|x| x.path.clone()).collect();
paths.sort_unstable();
paths
}
}
/// Find translations in library for a single path
fn library_translations_lang_for(site: &Site, path: &str) -> Vec<String> {
let library_translations = Translations::for_path(site, path);
library_translations.languages()
}
/// This function takes a list of translations generated by find_expected_translations(),
/// a site instance, and a path of a page to check that translations are the same on both sides
pub fn ensure_translations_match(translations: &HashMap<String, Vec<String>>, site: &Site, path: &str) -> bool {
let library_page_translations = library_translations_lang_for(site, path);
if let Some((unified_path, _lang)) = find_lang_for(&PathBuf::from(path), Path::new("")) {
if let Some(page_translations) = translations.get(&unified_path) {
// We order both claimed translations so we can compare them
// library_page_translations is already ordered
let mut page_translations = page_translations.clone();
page_translations.sort_unstable();
if page_translations != library_page_translations {
// Some translations don't match, print some context
// There is a special case where the index page may be autogenerated for a lang
// by zola so if we are looking at the index page, library may contain more (not
// less) languages than our tests.
if unified_path == "_index.md" {
for lang in &page_translations {
if !library_page_translations.contains(lang) {
println!("Library is missing language: {} for page {}", lang, unified_path);
return false;
}
}
// All languages from Markdown were found. We don't care if the library
// auto-generated more.
return true;
}
println!("Translations don't match for {}:", path);
println!(" - library: {:?}", library_page_translations);
println!(" - tests: {:?}", page_translations);
return false;
}
// Everything went well
return true;
} else {
// Should never happen because even the default language counts as a translation
// Reaching here means either there is a logic error in the tests themselves,
// or the permalinks contained a page which does not exist for some reason
unreachable!("Translations not found for {}", unified_path);
}
} else {
// None means the page does not end with .md. Only markdown pages should be passed to this function.
// Maybe a non-markdown path was found in site's permalinks?
unreachable!("{} is not a markdown page (extension not .md)", path);
}
}
/// For a given URL (from the permalinks), find the corresponding output page
/// and ensure all translation permalinks are linked inside
pub fn ensure_translations_in_output(site: &Site, path: &str, permalink: &str) -> bool {
let library_page_translations = Translations::for_path(site, path);
let translations_permalinks = library_page_translations.permalinks();
let output_path = permalink.trim_start_matches(&site.config.base_url);
// Strip leading / so it's not interpreted as an absolute path
let output_path = output_path.trim_start_matches('/');
// Don't forget to remove / because
let output_path = site.output_path.join(output_path);
let output = std::fs::read_to_string(&output_path).expect(&format!("Output not found in {}", output_path.display()));
for permalink in &translations_permalinks {
if !output.contains(permalink) {
println!("Page {} has translation {}, but it was not found in output", path, permalink);
return false;
}
}
return true;
}

View file

@ -2,7 +2,7 @@ mod common;
use std::env; use std::env;
use common::build_site; use common::*;
use site::Site; use site::Site;
#[test] #[test]
@ -14,7 +14,7 @@ fn can_parse_multilingual_site() {
site.load().unwrap(); site.load().unwrap();
let library = site.library.read().unwrap(); let library = site.library.read().unwrap();
assert_eq!(library.pages().len(), 10); assert_eq!(library.pages().len(), 11);
assert_eq!(library.sections().len(), 6); assert_eq!(library.sections().len(), 6);
// default index sections // default index sections
@ -174,3 +174,27 @@ fn can_build_multilingual_site() {
assert!(file_exists!(public, "search_index.it.js")); assert!(file_exists!(public, "search_index.it.js"));
assert!(!file_exists!(public, "search_index.fr.js")); assert!(!file_exists!(public, "search_index.fr.js"));
} }
#[test]
fn correct_translations_on_all_pages() {
let (site, _tmp_dir, public) = build_site("test_site_i18n");
assert!(public.exists());
let translations = find_expected_translations("test_site_i18n", &site.config.default_language);
for (path, link) in &site.permalinks {
// link ends with /, does not add index.html
let link = format!("{}index.html", link);
// Ensure every permalink has produced a HTML page
assert!(ensure_output_exists(&public, &site.config.base_url, &link));
// Ensure translations expected here match with those in the library
// TODO: add constructive error message inside the function
assert!(ensure_translations_match(&translations, &site, &path));
// Ensure output file contains all translations URLs
assert!(ensure_translations_in_output(&site, &path, &link));
}
}

View file

@ -0,0 +1,5 @@
+++
title = "Accueil"
+++
Page d'accueil

View file

@ -0,0 +1,5 @@
+++
title = "Home"
+++
Homepage

View file

@ -0,0 +1,5 @@
+++
title = "Ma page que en français"
+++
Cette page n'est pas traduite dans la langue par défaut (anglais).

View file

@ -2,3 +2,7 @@
{{page.title}} {{page.title}}
{% endfor %} {% endfor %}
Language: {{lang}} Language: {{lang}}
{% for t in section.translations %}
Translated in {{t.lang|default(value=config.default_language)}}: {{t.title}} {{t.permalink|safe}}
{% endfor %}