Southerntofu bugfix index translations (#1417)
* Translations are also generated for the index page (fix #1332) * More tests for translations * Even better error message * Update page count for test * Patch to fix Windows tests By @mtolk Co-authored-by: southerntofu <southerntofu@thunix.net>
This commit is contained in:
parent
5bf9ebc43a
commit
341ac3bfbd
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -1754,6 +1754,12 @@ dependencies = [
|
||||||
"regex",
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "path-slash"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3cacbb3c4ff353b534a67fb8d7524d00229da4cb1dc8c79f4db96e375ab5b619"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.1.0"
|
version = "2.1.0"
|
||||||
|
@ -2461,6 +2467,7 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"library",
|
"library",
|
||||||
"link_checker",
|
"link_checker",
|
||||||
|
"path-slash",
|
||||||
"rayon",
|
"rayon",
|
||||||
"relative-path",
|
"relative-path",
|
||||||
"sass-rs",
|
"sass-rs",
|
||||||
|
|
|
@ -154,6 +154,16 @@ impl Library {
|
||||||
.push(section.file.path.clone());
|
.push(section.file.path.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// populate translations if necessary
|
||||||
|
if self.is_multilingual {
|
||||||
|
self.translations
|
||||||
|
.entry(section.file.canonical.clone())
|
||||||
|
.and_modify(|trans| {
|
||||||
|
trans.insert(key);
|
||||||
|
})
|
||||||
|
.or_insert(set![key]);
|
||||||
|
};
|
||||||
|
|
||||||
// Index has no ancestors, no need to go through it
|
// Index has no ancestors, no need to go through it
|
||||||
if section.is_index() {
|
if section.is_index() {
|
||||||
ancestors.insert(section.file.path.clone(), vec![]);
|
ancestors.insert(section.file.path.clone(), vec![]);
|
||||||
|
@ -178,15 +188,7 @@ impl Library {
|
||||||
}
|
}
|
||||||
ancestors.insert(section.file.path.clone(), parents);
|
ancestors.insert(section.file.path.clone(), parents);
|
||||||
|
|
||||||
// populate translations if necessary
|
|
||||||
if self.is_multilingual {
|
|
||||||
self.translations
|
|
||||||
.entry(section.file.canonical.clone())
|
|
||||||
.and_modify(|trans| {
|
|
||||||
trans.insert(key);
|
|
||||||
})
|
|
||||||
.or_insert(set![key]);
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (key, page) in &mut self.pages {
|
for (key, page) in &mut self.pages {
|
||||||
|
|
|
@ -29,3 +29,4 @@ link_checker = { path = "../link_checker" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
|
path-slash = "0.1.4"
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::path::PathBuf;
|
use std::path::{PathBuf, Path};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use site::Site;
|
use site::Site;
|
||||||
use tempfile::{tempdir, TempDir};
|
use tempfile::{tempdir, TempDir};
|
||||||
|
use path_slash::PathExt;
|
||||||
|
|
||||||
// 2 helper macros to make all the build testing more bearable
|
// 2 helper macros to make all the build testing more bearable
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
|
@ -67,3 +69,253 @@ where
|
||||||
site.build().expect("Couldn't build the site");
|
site.build().expect("Couldn't build the site");
|
||||||
(site, tmp_dir, public.clone())
|
(site, tmp_dir, public.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Finds the unified path (eg. _index.fr.md -> _index.md) and
|
||||||
|
/// potential language (if not default) associated with a path
|
||||||
|
/// When the path is not a markdown file (.md), None is returned
|
||||||
|
/// Strips base_dir from the start of path
|
||||||
|
fn find_lang_for(entry: &Path, base_dir: &Path) -> Option<(String, Option<String>)> {
|
||||||
|
let ext = entry.extension();
|
||||||
|
if ext.is_none() {
|
||||||
|
// Not a markdown file (no extension), skip
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let ext = ext.unwrap();
|
||||||
|
if ext != "md" {
|
||||||
|
// Not a markdown file, skip
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut no_ext = entry.to_path_buf();
|
||||||
|
let stem = entry.file_stem().unwrap();
|
||||||
|
// Remove .md
|
||||||
|
no_ext.pop();
|
||||||
|
no_ext.push(stem);
|
||||||
|
if let Some(lang) = no_ext.extension() {
|
||||||
|
let stem = no_ext.file_stem();
|
||||||
|
// Remove lang
|
||||||
|
let mut unified_path = no_ext.clone();
|
||||||
|
unified_path.pop();
|
||||||
|
// Readd stem with .md added
|
||||||
|
unified_path.push(&format!("{}.md", stem.unwrap().to_str().unwrap()));
|
||||||
|
let unified_path_str = match unified_path.strip_prefix(base_dir) {
|
||||||
|
Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()}
|
||||||
|
_ => {unified_path.to_slash_lossy()}
|
||||||
|
};
|
||||||
|
return Some((unified_path_str, Some(lang.to_str().unwrap().into())));
|
||||||
|
} else {
|
||||||
|
// No lang, return no_ext directly
|
||||||
|
let mut no_ext_string = match no_ext.strip_prefix(base_dir) {
|
||||||
|
Ok(path_without_prefix) => {path_without_prefix.to_slash_lossy()}
|
||||||
|
_ => {no_ext.to_slash_lossy()}
|
||||||
|
};
|
||||||
|
no_ext_string.push_str(".md");
|
||||||
|
return Some((no_ext_string, None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively process a folder to find translations, returning a list of every language
|
||||||
|
/// translated for every page found. Translations for the default language are stored as "DEFAULT"
|
||||||
|
/// TODO: This implementation does not support files with a dot inside (foo.bar.md where bar is
|
||||||
|
/// not a language), because it requires to know what languages are enabled from config, and it's
|
||||||
|
/// unclear how to distinguish (and what to do) between disabled language or "legit" dots
|
||||||
|
pub fn add_translations_from(dir: &Path, strip: &Path, default: &str) -> HashMap<String, Vec<String>> {
|
||||||
|
let mut expected: HashMap<String, Vec<String>> = HashMap::new();
|
||||||
|
for entry in dir.read_dir().expect("Failed to read dir") {
|
||||||
|
let entry = entry.expect("Failed to read entry").path();
|
||||||
|
if entry.is_dir() {
|
||||||
|
// Recurse
|
||||||
|
expected.extend(add_translations_from(&entry, strip, default));
|
||||||
|
}
|
||||||
|
if let Some((unified_path, lang)) = find_lang_for(&entry, strip) {
|
||||||
|
if let Some(index) = expected.get_mut(&unified_path) {
|
||||||
|
// Insert found lang for rel_path, or DEFAULT otherwise
|
||||||
|
index.push(lang.unwrap_or(default.to_string()));
|
||||||
|
} else {
|
||||||
|
// rel_path is not registered yet, insert it in expected
|
||||||
|
expected.insert(unified_path, vec!(lang.unwrap_or(default.to_string())));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Not a markdown file, skip
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate output path for Markdown files
|
||||||
|
/// respecting page/section `path` fields, but not aliases (yet)
|
||||||
|
/// Returns a mapping of unified Markdown paths -> translations
|
||||||
|
pub fn find_expected_translations(name: &str, default_language: &str) -> HashMap<String, Vec<String>> {
|
||||||
|
let mut path = env::current_dir().unwrap().parent().unwrap().parent().unwrap().to_path_buf();
|
||||||
|
path.push(name);
|
||||||
|
path.push("content");
|
||||||
|
|
||||||
|
// Find expected translations from content folder
|
||||||
|
// We remove BASEDIR/content/ from the keys so they match paths in library
|
||||||
|
let mut strip_prefix = path.to_str().unwrap().to_string();
|
||||||
|
strip_prefix.push('/');
|
||||||
|
add_translations_from(&path, &path, default_language)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks whether a given permalink has a corresponding HTML page in output folder
|
||||||
|
pub fn ensure_output_exists(outputdir: &Path, baseurl: &str, link: &str) -> bool {
|
||||||
|
// Remove the baseurl as well as the remaining /, otherwise path will be interpreted
|
||||||
|
// as absolute.
|
||||||
|
let trimmed_url = link.trim_start_matches(baseurl).trim_start_matches('/');
|
||||||
|
let path = outputdir.join(trimmed_url);
|
||||||
|
path.exists()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Translation {
|
||||||
|
path: String,
|
||||||
|
lang: String,
|
||||||
|
permalink: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Translations {
|
||||||
|
trans: Vec<Translation>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Translations {
|
||||||
|
pub fn for_path(site: &Site, path: &str) -> Translations {
|
||||||
|
let library = site.library.clone();
|
||||||
|
let library = library.read().unwrap();
|
||||||
|
// WORKAROUND because site.content_path is private
|
||||||
|
let unified_path = if let Some(page) = library.get_page(site.base_path.join("content").join(path)) {
|
||||||
|
page.file.canonical.clone()
|
||||||
|
} else if let Some(section) = library.get_section(site.base_path.join("content").join(path)) {
|
||||||
|
section.file.canonical.clone()
|
||||||
|
} else {
|
||||||
|
panic!("No such page or section: {}", path);
|
||||||
|
};
|
||||||
|
|
||||||
|
let translations = library.translations.get(&unified_path);
|
||||||
|
if translations.is_none() {
|
||||||
|
println!("Page canonical path {} is not in library translations", unified_path.display());
|
||||||
|
panic!("Library error");
|
||||||
|
}
|
||||||
|
|
||||||
|
let translations = translations
|
||||||
|
.unwrap()
|
||||||
|
.iter().map(|key| {
|
||||||
|
// Are we looking for a section? (no file extension here)
|
||||||
|
if unified_path.ends_with("_index") {
|
||||||
|
//library.get_section_by_key(*key).file.relative.to_string()
|
||||||
|
let section = library.get_section_by_key(*key);
|
||||||
|
Translation {
|
||||||
|
lang: section.lang.clone(),
|
||||||
|
permalink: section.permalink.clone(),
|
||||||
|
path: section.file.path.to_str().unwrap().to_string(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let page = library.get_page_by_key(*key);
|
||||||
|
Translation {
|
||||||
|
lang: page.lang.clone(),
|
||||||
|
permalink: page.permalink.clone(),
|
||||||
|
path: page.file.path.to_str().unwrap().to_string(),
|
||||||
|
}
|
||||||
|
//library.get_page_by_key(*key).file.relative.to_string()
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
Translations {
|
||||||
|
trans: translations,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn languages(&self) -> Vec<String> {
|
||||||
|
let mut lang: Vec<String> = self.trans.iter().map(|x| x.lang.clone()).collect();
|
||||||
|
lang.sort_unstable();
|
||||||
|
lang
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn permalinks(&self) -> Vec<String> {
|
||||||
|
let mut links: Vec<String> = self.trans.iter().map(|x| x.permalink.clone()).collect();
|
||||||
|
links.sort_unstable();
|
||||||
|
links
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn paths(&self) -> Vec<String> {
|
||||||
|
let mut paths: Vec<String> = self.trans.iter().map(|x| x.path.clone()).collect();
|
||||||
|
paths.sort_unstable();
|
||||||
|
paths
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find translations in library for a single path
|
||||||
|
fn library_translations_lang_for(site: &Site, path: &str) -> Vec<String> {
|
||||||
|
let library_translations = Translations::for_path(site, path);
|
||||||
|
library_translations.languages()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This function takes a list of translations generated by find_expected_translations(),
|
||||||
|
/// a site instance, and a path of a page to check that translations are the same on both sides
|
||||||
|
pub fn ensure_translations_match(translations: &HashMap<String, Vec<String>>, site: &Site, path: &str) -> bool {
|
||||||
|
let library_page_translations = library_translations_lang_for(site, path);
|
||||||
|
|
||||||
|
if let Some((unified_path, _lang)) = find_lang_for(&PathBuf::from(path), Path::new("")) {
|
||||||
|
if let Some(page_translations) = translations.get(&unified_path) {
|
||||||
|
// We order both claimed translations so we can compare them
|
||||||
|
// library_page_translations is already ordered
|
||||||
|
let mut page_translations = page_translations.clone();
|
||||||
|
page_translations.sort_unstable();
|
||||||
|
|
||||||
|
if page_translations != library_page_translations {
|
||||||
|
// Some translations don't match, print some context
|
||||||
|
// There is a special case where the index page may be autogenerated for a lang
|
||||||
|
// by zola so if we are looking at the index page, library may contain more (not
|
||||||
|
// less) languages than our tests.
|
||||||
|
if unified_path == "_index.md" {
|
||||||
|
for lang in &page_translations {
|
||||||
|
if !library_page_translations.contains(lang) {
|
||||||
|
println!("Library is missing language: {} for page {}", lang, unified_path);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// All languages from Markdown were found. We don't care if the library
|
||||||
|
// auto-generated more.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
println!("Translations don't match for {}:", path);
|
||||||
|
println!(" - library: {:?}", library_page_translations);
|
||||||
|
println!(" - tests: {:?}", page_translations);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Everything went well
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// Should never happen because even the default language counts as a translation
|
||||||
|
// Reaching here means either there is a logic error in the tests themselves,
|
||||||
|
// or the permalinks contained a page which does not exist for some reason
|
||||||
|
unreachable!("Translations not found for {}", unified_path);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// None means the page does not end with .md. Only markdown pages should be passed to this function.
|
||||||
|
// Maybe a non-markdown path was found in site's permalinks?
|
||||||
|
unreachable!("{} is not a markdown page (extension not .md)", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// For a given URL (from the permalinks), find the corresponding output page
|
||||||
|
/// and ensure all translation permalinks are linked inside
|
||||||
|
pub fn ensure_translations_in_output(site: &Site, path: &str, permalink: &str) -> bool {
|
||||||
|
let library_page_translations = Translations::for_path(site, path);
|
||||||
|
let translations_permalinks = library_page_translations.permalinks();
|
||||||
|
|
||||||
|
let output_path = permalink.trim_start_matches(&site.config.base_url);
|
||||||
|
// Strip leading / so it's not interpreted as an absolute path
|
||||||
|
let output_path = output_path.trim_start_matches('/');
|
||||||
|
// Don't forget to remove / because
|
||||||
|
let output_path = site.output_path.join(output_path);
|
||||||
|
|
||||||
|
let output = std::fs::read_to_string(&output_path).expect(&format!("Output not found in {}", output_path.display()));
|
||||||
|
|
||||||
|
for permalink in &translations_permalinks {
|
||||||
|
if !output.contains(permalink) {
|
||||||
|
println!("Page {} has translation {}, but it was not found in output", path, permalink);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ mod common;
|
||||||
|
|
||||||
use std::env;
|
use std::env;
|
||||||
|
|
||||||
use common::build_site;
|
use common::*;
|
||||||
use site::Site;
|
use site::Site;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -14,7 +14,7 @@ fn can_parse_multilingual_site() {
|
||||||
site.load().unwrap();
|
site.load().unwrap();
|
||||||
|
|
||||||
let library = site.library.read().unwrap();
|
let library = site.library.read().unwrap();
|
||||||
assert_eq!(library.pages().len(), 10);
|
assert_eq!(library.pages().len(), 11);
|
||||||
assert_eq!(library.sections().len(), 6);
|
assert_eq!(library.sections().len(), 6);
|
||||||
|
|
||||||
// default index sections
|
// default index sections
|
||||||
|
@ -174,3 +174,27 @@ fn can_build_multilingual_site() {
|
||||||
assert!(file_exists!(public, "search_index.it.js"));
|
assert!(file_exists!(public, "search_index.it.js"));
|
||||||
assert!(!file_exists!(public, "search_index.fr.js"));
|
assert!(!file_exists!(public, "search_index.fr.js"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn correct_translations_on_all_pages() {
|
||||||
|
let (site, _tmp_dir, public) = build_site("test_site_i18n");
|
||||||
|
|
||||||
|
assert!(public.exists());
|
||||||
|
|
||||||
|
let translations = find_expected_translations("test_site_i18n", &site.config.default_language);
|
||||||
|
|
||||||
|
for (path, link) in &site.permalinks {
|
||||||
|
// link ends with /, does not add index.html
|
||||||
|
let link = format!("{}index.html", link);
|
||||||
|
|
||||||
|
// Ensure every permalink has produced a HTML page
|
||||||
|
assert!(ensure_output_exists(&public, &site.config.base_url, &link));
|
||||||
|
|
||||||
|
// Ensure translations expected here match with those in the library
|
||||||
|
// TODO: add constructive error message inside the function
|
||||||
|
assert!(ensure_translations_match(&translations, &site, &path));
|
||||||
|
|
||||||
|
// Ensure output file contains all translations URLs
|
||||||
|
assert!(ensure_translations_in_output(&site, &path, &link));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
5
test_site_i18n/content/_index.fr.md
Normal file
5
test_site_i18n/content/_index.fr.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
+++
|
||||||
|
title = "Accueil"
|
||||||
|
+++
|
||||||
|
|
||||||
|
Page d'accueil
|
5
test_site_i18n/content/_index.md
Normal file
5
test_site_i18n/content/_index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
+++
|
||||||
|
title = "Home"
|
||||||
|
+++
|
||||||
|
|
||||||
|
Homepage
|
|
@ -0,0 +1,5 @@
|
||||||
|
+++
|
||||||
|
title = "Ma page que en français"
|
||||||
|
+++
|
||||||
|
|
||||||
|
Cette page n'est pas traduite dans la langue par défaut (anglais).
|
|
@ -2,3 +2,7 @@
|
||||||
{{page.title}}
|
{{page.title}}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
Language: {{lang}}
|
Language: {{lang}}
|
||||||
|
|
||||||
|
{% for t in section.translations %}
|
||||||
|
Translated in {{t.lang|default(value=config.default_language)}}: {{t.title}} {{t.permalink|safe}}
|
||||||
|
{% endfor %}
|
||||||
|
|
Loading…
Reference in a new issue