Southerntofu bugfix index translations (#1417)
* Translations are also generated for the index page (fix #1332) * More tests for translations * Even better error message * Update page count for test * Patch to fix Windows tests By @mtolk Co-authored-by: southerntofu <southerntofu@thunix.net>
This commit is contained in:
parent
5bf9ebc43a
commit
341ac3bfbd
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -1754,6 +1754,12 @@ dependencies = [
|
|||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "path-slash"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3cacbb3c4ff353b534a67fb8d7524d00229da4cb1dc8c79f4db96e375ab5b619"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.1.0"
|
||||
|
@ -2461,6 +2467,7 @@ dependencies = [
|
|||
"lazy_static",
|
||||
"library",
|
||||
"link_checker",
|
||||
"path-slash",
|
||||
"rayon",
|
||||
"relative-path",
|
||||
"sass-rs",
|
||||
|
|
|
@ -154,6 +154,16 @@ impl Library {
|
|||
.push(section.file.path.clone());
|
||||
}
|
||||
|
||||
// populate translations if necessary
|
||||
if self.is_multilingual {
|
||||
self.translations
|
||||
.entry(section.file.canonical.clone())
|
||||
.and_modify(|trans| {
|
||||
trans.insert(key);
|
||||
})
|
||||
.or_insert(set![key]);
|
||||
};
|
||||
|
||||
// Index has no ancestors, no need to go through it
|
||||
if section.is_index() {
|
||||
ancestors.insert(section.file.path.clone(), vec![]);
|
||||
|
@ -178,15 +188,7 @@ impl Library {
|
|||
}
|
||||
ancestors.insert(section.file.path.clone(), parents);
|
||||
|
||||
// populate translations if necessary
|
||||
if self.is_multilingual {
|
||||
self.translations
|
||||
.entry(section.file.canonical.clone())
|
||||
.and_modify(|trans| {
|
||||
trans.insert(key);
|
||||
})
|
||||
.or_insert(set![key]);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
for (key, page) in &mut self.pages {
|
||||
|
|
|
@ -29,3 +29,4 @@ link_checker = { path = "../link_checker" }
|
|||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
path-slash = "0.1.4"
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#![allow(dead_code)]
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{PathBuf, Path};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use site::Site;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use path_slash::PathExt;
|
||||
|
||||
// 2 helper macros to make all the build testing more bearable
|
||||
#[macro_export]
|
||||
|
@ -67,3 +69,253 @@ where
|
|||
site.build().expect("Couldn't build the site");
|
||||
(site, tmp_dir, public.clone())
|
||||
}
|
||||
|
||||
/// Finds the unified path (eg. _index.fr.md -> _index.md) and
|
||||
/// potential language (if not default) associated with a path
|
||||
/// When the path is not a markdown file (.md), None is returned
|
||||
/// Strips base_dir from the start of path
|
||||
fn find_lang_for(entry: &Path, base_dir: &Path) -> Option<(String, Option<String>)> {
|
||||
let ext = entry.extension();
|
||||
if ext.is_none() {
|
||||
// Not a markdown file (no extension), skip
|
||||
return None;
|
||||
}
|
||||
let ext = ext.unwrap();
|
||||
if ext != "md" {
|
||||
// Not a markdown file, skip
|
||||
return None;
|
||||
}
|
||||
let mut no_ext = entry.to_path_buf();
|
||||
let stem = entry.file_stem().unwrap();
|
||||
// Remove .md
|
||||
no_ext.pop();
|
||||
no_ext.push(stem);
|
||||
if let Some(lang) = no_ext.extension() {
|
||||
let stem = no_ext.file_stem();
|
||||
// Remove lang
|
||||
let mut unified_path = no_ext.clone();
|
||||
unified_path.pop();
|
||||
// Readd stem with .md added
|
||||
unified_path.push(&format!("{}.md", stem.unwrap().to_str().unwrap()));
|
||||
let unified_path_str = match unified_path.strip_prefix(base_dir) {
|
||||
Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()}
|
||||
_ => {unified_path.to_slash_lossy()}
|
||||
};
|
||||
return Some((unified_path_str, Some(lang.to_str().unwrap().into())));
|
||||
} else {
|
||||
// No lang, return no_ext directly
|
||||
let mut no_ext_string = match no_ext.strip_prefix(base_dir) {
|
||||
Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()}
|
||||
_ => {no_ext.to_slash_lossy()}
|
||||
};
|
||||
no_ext_string.push_str(".md");
|
||||
return Some((no_ext_string, None));
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively process a folder to find translations, returning a list of every language
|
||||
/// translated for every page found. Translations for the default language are stored as "DEFAULT"
|
||||
/// TODO: This implementation does not support files with a dot inside (foo.bar.md where bar is
|
||||
/// not a language), because it requires to know what languages are enabled from config, and it's
|
||||
/// unclear how to distinguish (and what to do) between disabled language or "legit" dots
|
||||
pub fn add_translations_from(dir: &Path, strip: &Path, default: &str) -> HashMap<String, Vec<String>> {
|
||||
let mut expected: HashMap<String, Vec<String>> = HashMap::new();
|
||||
for entry in dir.read_dir().expect("Failed to read dir") {
|
||||
let entry = entry.expect("Failed to read entry").path();
|
||||
if entry.is_dir() {
|
||||
// Recurse
|
||||
expected.extend(add_translations_from(&entry, strip, default));
|
||||
}
|
||||
if let Some((unified_path, lang)) = find_lang_for(&entry, strip) {
|
||||
if let Some(index) = expected.get_mut(&unified_path) {
|
||||
// Insert found lang for rel_path, or DEFAULT otherwise
|
||||
index.push(lang.unwrap_or(default.to_string()));
|
||||
} else {
|
||||
// rel_path is not registered yet, insert it in expected
|
||||
expected.insert(unified_path, vec!(lang.unwrap_or(default.to_string())));
|
||||
}
|
||||
} else {
|
||||
// Not a markdown file, skip
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return expected;
|
||||
}
|
||||
|
||||
/// Calculate output path for Markdown files
|
||||
/// respecting page/section `path` fields, but not aliases (yet)
|
||||
/// Returns a mapping of unified Markdown paths -> translations
|
||||
pub fn find_expected_translations(name: &str, default_language: &str) -> HashMap<String, Vec<String>> {
|
||||
let mut path = env::current_dir().unwrap().parent().unwrap().parent().unwrap().to_path_buf();
|
||||
path.push(name);
|
||||
path.push("content");
|
||||
|
||||
// Find expected translations from content folder
|
||||
// We remove BASEDIR/content/ from the keys so they match paths in library
|
||||
let mut strip_prefix = path.to_str().unwrap().to_string();
|
||||
strip_prefix.push('/');
|
||||
add_translations_from(&path, &path, default_language)
|
||||
}
|
||||
|
||||
/// Checks whether a given permalink has a corresponding HTML page in output folder
|
||||
pub fn ensure_output_exists(outputdir: &Path, baseurl: &str, link: &str) -> bool {
|
||||
// Remove the baseurl as well as the remaining /, otherwise path will be interpreted
|
||||
// as absolute.
|
||||
let trimmed_url = link.trim_start_matches(baseurl).trim_start_matches('/');
|
||||
let path = outputdir.join(trimmed_url);
|
||||
path.exists()
|
||||
}
|
||||
|
||||
pub struct Translation {
|
||||
path: String,
|
||||
lang: String,
|
||||
permalink: String,
|
||||
}
|
||||
|
||||
pub struct Translations {
|
||||
trans: Vec<Translation>,
|
||||
}
|
||||
|
||||
impl Translations {
|
||||
pub fn for_path(site: &Site, path: &str) -> Translations {
|
||||
let library = site.library.clone();
|
||||
let library = library.read().unwrap();
|
||||
// WORKAROUND because site.content_path is private
|
||||
let unified_path = if let Some(page) = library.get_page(site.base_path.join("content").join(path)) {
|
||||
page.file.canonical.clone()
|
||||
} else if let Some(section) = library.get_section(site.base_path.join("content").join(path)) {
|
||||
section.file.canonical.clone()
|
||||
} else {
|
||||
panic!("No such page or section: {}", path);
|
||||
};
|
||||
|
||||
let translations = library.translations.get(&unified_path);
|
||||
if translations.is_none() {
|
||||
println!("Page canonical path {} is not in library translations", unified_path.display());
|
||||
panic!("Library error");
|
||||
}
|
||||
|
||||
let translations = translations
|
||||
.unwrap()
|
||||
.iter().map(|key| {
|
||||
// Are we looking for a section? (no file extension here)
|
||||
if unified_path.ends_with("_index") {
|
||||
//library.get_section_by_key(*key).file.relative.to_string()
|
||||
let section = library.get_section_by_key(*key);
|
||||
Translation {
|
||||
lang: section.lang.clone(),
|
||||
permalink: section.permalink.clone(),
|
||||
path: section.file.path.to_str().unwrap().to_string(),
|
||||
}
|
||||
} else {
|
||||
let page = library.get_page_by_key(*key);
|
||||
Translation {
|
||||
lang: page.lang.clone(),
|
||||
permalink: page.permalink.clone(),
|
||||
path: page.file.path.to_str().unwrap().to_string(),
|
||||
}
|
||||
//library.get_page_by_key(*key).file.relative.to_string()
|
||||
}
|
||||
}).collect();
|
||||
|
||||
Translations {
|
||||
trans: translations,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn languages(&self) -> Vec<String> {
|
||||
let mut lang: Vec<String> = self.trans.iter().map(|x| x.lang.clone()).collect();
|
||||
lang.sort_unstable();
|
||||
lang
|
||||
}
|
||||
|
||||
pub fn permalinks(&self) -> Vec<String> {
|
||||
let mut links: Vec<String> = self.trans.iter().map(|x| x.permalink.clone()).collect();
|
||||
links.sort_unstable();
|
||||
links
|
||||
}
|
||||
|
||||
pub fn paths(&self) -> Vec<String> {
|
||||
let mut paths: Vec<String> = self.trans.iter().map(|x| x.path.clone()).collect();
|
||||
paths.sort_unstable();
|
||||
paths
|
||||
}
|
||||
}
|
||||
|
||||
/// Find translations in library for a single path
|
||||
fn library_translations_lang_for(site: &Site, path: &str) -> Vec<String> {
|
||||
let library_translations = Translations::for_path(site, path);
|
||||
library_translations.languages()
|
||||
}
|
||||
|
||||
/// This function takes a list of translations generated by find_expected_translations(),
|
||||
/// a site instance, and a path of a page to check that translations are the same on both sides
|
||||
pub fn ensure_translations_match(translations: &HashMap<String, Vec<String>>, site: &Site, path: &str) -> bool {
|
||||
let library_page_translations = library_translations_lang_for(site, path);
|
||||
|
||||
if let Some((unified_path, _lang)) = find_lang_for(&PathBuf::from(path), Path::new("")) {
|
||||
if let Some(page_translations) = translations.get(&unified_path) {
|
||||
// We order both claimed translations so we can compare them
|
||||
// library_page_translations is already ordered
|
||||
let mut page_translations = page_translations.clone();
|
||||
page_translations.sort_unstable();
|
||||
|
||||
if page_translations != library_page_translations {
|
||||
// Some translations don't match, print some context
|
||||
// There is a special case where the index page may be autogenerated for a lang
|
||||
// by zola so if we are looking at the index page, library may contain more (not
|
||||
// less) languages than our tests.
|
||||
if unified_path == "_index.md" {
|
||||
for lang in &page_translations {
|
||||
if !library_page_translations.contains(lang) {
|
||||
println!("Library is missing language: {} for page {}", lang, unified_path);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// All languages from Markdown were found. We don't care if the library
|
||||
// auto-generated more.
|
||||
return true;
|
||||
}
|
||||
println!("Translations don't match for {}:", path);
|
||||
println!(" - library: {:?}", library_page_translations);
|
||||
println!(" - tests: {:?}", page_translations);
|
||||
return false;
|
||||
}
|
||||
// Everything went well
|
||||
return true;
|
||||
} else {
|
||||
// Should never happen because even the default language counts as a translation
|
||||
// Reaching here means either there is a logic error in the tests themselves,
|
||||
// or the permalinks contained a page which does not exist for some reason
|
||||
unreachable!("Translations not found for {}", unified_path);
|
||||
}
|
||||
} else {
|
||||
// None means the page does not end with .md. Only markdown pages should be passed to this function.
|
||||
// Maybe a non-markdown path was found in site's permalinks?
|
||||
unreachable!("{} is not a markdown page (extension not .md)", path);
|
||||
}
|
||||
}
|
||||
|
||||
/// For a given URL (from the permalinks), find the corresponding output page
|
||||
/// and ensure all translation permalinks are linked inside
|
||||
pub fn ensure_translations_in_output(site: &Site, path: &str, permalink: &str) -> bool {
|
||||
let library_page_translations = Translations::for_path(site, path);
|
||||
let translations_permalinks = library_page_translations.permalinks();
|
||||
|
||||
let output_path = permalink.trim_start_matches(&site.config.base_url);
|
||||
// Strip leading / so it's not interpreted as an absolute path
|
||||
let output_path = output_path.trim_start_matches('/');
|
||||
// Don't forget to remove / because
|
||||
let output_path = site.output_path.join(output_path);
|
||||
|
||||
let output = std::fs::read_to_string(&output_path).expect(&format!("Output not found in {}", output_path.display()));
|
||||
|
||||
for permalink in &translations_permalinks {
|
||||
if !output.contains(permalink) {
|
||||
println!("Page {} has translation {}, but it was not found in output", path, permalink);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ mod common;
|
|||
|
||||
use std::env;
|
||||
|
||||
use common::build_site;
|
||||
use common::*;
|
||||
use site::Site;
|
||||
|
||||
#[test]
|
||||
|
@ -14,7 +14,7 @@ fn can_parse_multilingual_site() {
|
|||
site.load().unwrap();
|
||||
|
||||
let library = site.library.read().unwrap();
|
||||
assert_eq!(library.pages().len(), 10);
|
||||
assert_eq!(library.pages().len(), 11);
|
||||
assert_eq!(library.sections().len(), 6);
|
||||
|
||||
// default index sections
|
||||
|
@ -174,3 +174,27 @@ fn can_build_multilingual_site() {
|
|||
assert!(file_exists!(public, "search_index.it.js"));
|
||||
assert!(!file_exists!(public, "search_index.fr.js"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn correct_translations_on_all_pages() {
|
||||
let (site, _tmp_dir, public) = build_site("test_site_i18n");
|
||||
|
||||
assert!(public.exists());
|
||||
|
||||
let translations = find_expected_translations("test_site_i18n", &site.config.default_language);
|
||||
|
||||
for (path, link) in &site.permalinks {
|
||||
// link ends with /, does not add index.html
|
||||
let link = format!("{}index.html", link);
|
||||
|
||||
// Ensure every permalink has produced a HTML page
|
||||
assert!(ensure_output_exists(&public, &site.config.base_url, &link));
|
||||
|
||||
// Ensure translations expected here match with those in the library
|
||||
// TODO: add constructive error message inside the function
|
||||
assert!(ensure_translations_match(&translations, &site, &path));
|
||||
|
||||
// Ensure output file contains all translations URLs
|
||||
assert!(ensure_translations_in_output(&site, &path, &link));
|
||||
}
|
||||
}
|
||||
|
|
5
test_site_i18n/content/_index.fr.md
Normal file
5
test_site_i18n/content/_index.fr.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
+++
|
||||
title = "Accueil"
|
||||
+++
|
||||
|
||||
Page d'accueil
|
5
test_site_i18n/content/_index.md
Normal file
5
test_site_i18n/content/_index.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
+++
|
||||
title = "Home"
|
||||
+++
|
||||
|
||||
Homepage
|
|
@ -0,0 +1,5 @@
|
|||
+++
|
||||
title = "Ma page que en français"
|
||||
+++
|
||||
|
||||
Cette page n'est pas traduite dans la langue par défaut (anglais).
|
|
@ -2,3 +2,7 @@
|
|||
{{page.title}}
|
||||
{% endfor %}
|
||||
Language: {{lang}}
|
||||
|
||||
{% for t in section.translations %}
|
||||
Translated in {{t.lang|default(value=config.default_language)}}: {{t.title}} {{t.permalink|safe}}
|
||||
{% endfor %}
|
||||
|
|
Loading…
Reference in a new issue