Get language from filename

This commit is contained in:
Vincent Prouillet 2018-12-27 13:14:54 +01:00
parent f0cafcd1d6
commit e50d3daad1
5 changed files with 176 additions and 10 deletions

View file

@ -249,6 +249,11 @@ impl Config {
pub fn uses_i18n(&self) -> bool {
!self.languages.is_empty()
}
/// Returns the codes of all additional languages
pub fn languages_codes(&self) -> Vec<&str> {
self.languages.iter().map(|l| l.code.as_ref()).collect()
}
}
impl Default for Config {

View file

@ -12,7 +12,7 @@ extern crate syntect;
mod config;
pub mod highlighting;
mod theme;
pub use config::{Config, Taxonomy};
pub use config::{Config, Taxonomy, Language};
use std::path::Path;

View file

@ -1,5 +1,8 @@
use std::path::{Path, PathBuf};
use config::Config;
use errors::Result;
/// Takes a full path to a file and returns only the components after the first `content` directory
/// Will not return the filename as last component
pub fn find_content_components<P: AsRef<Path>>(path: P) -> Vec<String> {
@ -29,6 +32,7 @@ pub struct FileInfo {
/// The full path to the .md file
pub path: PathBuf,
/// The name of the .md file without the extension, always `_index` for sections
/// Doesn't contain the language if there was one in the filename
pub name: String,
/// The .md path, starting from the content directory, with `/` slashes
pub relative: String,
@ -55,7 +59,9 @@ impl FileInfo {
};
// If we have a folder with an asset, don't consider it as a component
if !components.is_empty() && name == "index" {
// Splitting on `.` as we might have a language so it isn't *only* index but also index.fr
// etc
if !components.is_empty() && name.split('.').collect::<Vec<_>>()[0] == "index" {
components.pop();
// also set parent_path to grandparent instead
parent = parent.parent().unwrap().to_path_buf();
@ -74,12 +80,12 @@ impl FileInfo {
pub fn new_section(path: &Path) -> FileInfo {
let parent = path.parent().unwrap().to_path_buf();
let name = path.file_stem().unwrap().to_string_lossy().to_string();
let components = find_content_components(path);
let relative = if components.is_empty() {
// the index one
"_index.md".to_string()
let relative = if !components.is_empty() {
format!("{}/{}.md", components.join("/"), name)
} else {
format!("{}/_index.md", components.join("/"))
format!("{}.md", name)
};
let grand_parent = parent.parent().map(|p| p.to_path_buf());
@ -87,11 +93,40 @@ impl FileInfo {
path: path.to_path_buf(),
parent,
grand_parent,
name: "_index".to_string(),
name,
components,
relative,
}
}
/// Look for a language in the filename.
/// If a language has been found, update the name of the file in this struct to
/// remove it and return the language code
pub fn find_language(&mut self, config: &Config) -> Result<Option<String>> {
// No languages? Nothing to do
if !config.uses_i18n() {
return Ok(None);
}
if !self.name.contains('.') {
return Ok(None);
}
// Go with the assumption that no one is using `.` in filenames when using i18n
// We can document that
let mut parts: Vec<String> = self.name.splitn(2,'.').map(|s| s.to_string()).collect();
// The language code is not present in the config: typo or the user forgot to add it to the
// config
if !config.languages_codes().contains(&parts[1].as_ref()) {
bail!("File {:?} has a language code of {} which isn't present in the config.toml `languages`", self.path, parts[1]);
}
self.name = parts.swap_remove(0);
let lang = parts.swap_remove(0);
Ok(Some(lang))
}
}
#[doc(hidden)]
@ -110,7 +145,11 @@ impl Default for FileInfo {
#[cfg(test)]
mod tests {
use super::find_content_components;
use std::path::Path;
use config::{Config, Language};
use super::{FileInfo, find_content_components};
#[test]
fn can_find_content_components() {
@ -118,4 +157,64 @@ mod tests {
find_content_components("/home/vincent/code/site/content/posts/tutorials/python.md");
assert_eq!(res, ["posts".to_string(), "tutorials".to_string()]);
}
#[test]
fn can_find_components_in_page_with_assets() {
let file =
FileInfo::new_page(&Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.md"));
assert_eq!(file.components, ["posts".to_string(), "tutorials".to_string()]);
}
#[test]
fn can_find_valid_language_in_page() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("fr"), rss: false});
let mut file =
FileInfo::new_page(&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"));
let res = file.find_language(&config);
assert!(res.is_ok());
assert_eq!(res.unwrap(), Some(String::from("fr")));
}
#[test]
fn can_find_valid_language_in_page_with_assets() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("fr"), rss: false});
let mut file =
FileInfo::new_page(&Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"));
assert_eq!(file.components, ["posts".to_string(), "tutorials".to_string()]);
let res = file.find_language(&config);
assert!(res.is_ok());
assert_eq!(res.unwrap(), Some(String::from("fr")));
}
#[test]
fn do_nothing_on_unknown_language_in_page_with_i18n_off() {
let config = Config::default();
let mut file =
FileInfo::new_page(&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"));
let res = file.find_language(&config);
assert!(res.is_ok());
assert!(res.unwrap().is_none());
}
#[test]
fn errors_on_unknown_language_in_page_with_i18n_on() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("it"), rss: false});
let mut file =
FileInfo::new_page(&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"));
let res = file.find_language(&config);
assert!(res.is_err());
}
#[test]
fn can_find_valid_language_in_section() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("fr"), rss: false});
let mut file =
FileInfo::new_section(&Path::new("/home/vincent/code/site/content/posts/tutorials/_index.fr.md"));
let res = file.find_language(&config);
assert!(res.is_ok());
assert_eq!(res.unwrap(), Some(String::from("fr")));
}
}

View file

@ -71,6 +71,9 @@ pub struct Page {
/// How long would it take to read the raw content.
/// See `get_reading_analytics` on how it is calculated
pub reading_time: Option<usize>,
/// The language of that page. `None` if the user doesn't setup `languages` in config.
/// Corresponds to the lang in the {slug}.{lang}.md file scheme
pub lang: Option<String>,
}
impl Page {
@ -97,6 +100,7 @@ impl Page {
toc: vec![],
word_count: None,
reading_time: None,
lang: None,
}
}
@ -111,6 +115,8 @@ impl Page {
let (meta, content) = split_page_content(file_path, content)?;
let mut page = Page::new(file_path, meta);
page.lang = page.file.find_language(config)?;
page.raw_content = content;
let (word_count, reading_time) = get_reading_analytics(&page.raw_content);
page.word_count = Some(word_count);
@ -286,6 +292,7 @@ impl Default for Page {
toc: vec![],
word_count: None,
reading_time: None,
lang: None,
}
}
}
@ -302,7 +309,7 @@ mod tests {
use tera::Tera;
use super::Page;
use config::Config;
use config::{Config, Language};
use front_matter::InsertAnchor;
#[test]
@ -559,4 +566,37 @@ Hello world
assert_eq!(page.meta.date, Some("2018-09-09".to_string()));
assert_eq!(page.slug, "hello");
}
#[test]
fn can_specify_language_in_filename() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("fr"), rss: false});
let content = r#"
+++
+++
Bonjour le monde"#
.to_string();
let res = Page::parse(Path::new("hello.fr.md"), &content, &config);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.lang, Some("fr".to_string()));
assert_eq!(page.slug, "hello".to_string());
}
#[test]
fn can_specify_language_in_filename_with_date() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("fr"), rss: false});
let content = r#"
+++
+++
Bonjour le monde"#
.to_string();
let res = Page::parse(Path::new("2018-10-08_hello.fr.md"), &content, &config);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.meta.date, Some("2018-10-08".to_string()));
assert_eq!(page.lang, Some("fr".to_string()));
assert_eq!(page.slug, "hello".to_string());
}
}

View file

@ -51,6 +51,9 @@ pub struct Section {
/// How long would it take to read the raw content.
/// See `get_reading_analytics` on how it is calculated
pub reading_time: Option<usize>,
/// The language of that section. `None` if the user doesn't setup `languages` in config.
/// Corresponds to the lang in the _index.{lang}.md file scheme
pub lang: Option<String>,
}
impl Section {
@ -74,12 +77,14 @@ impl Section {
toc: vec![],
word_count: None,
reading_time: None,
lang: None,
}
}
pub fn parse(file_path: &Path, content: &str, config: &Config) -> Result<Section> {
let (meta, content) = split_section_content(file_path, content)?;
let mut section = Section::new(file_path, meta);
section.lang = section.file.find_language(config)?;
section.raw_content = content;
let (word_count, reading_time) = get_reading_analytics(&section.raw_content);
section.word_count = Some(word_count);
@ -223,6 +228,7 @@ impl Default for Section {
toc: vec![],
reading_time: None,
word_count: None,
lang: None,
}
}
}
@ -231,12 +237,13 @@ impl Default for Section {
mod tests {
use std::fs::{create_dir, File};
use std::io::Write;
use std::path::Path;
use globset::{Glob, GlobSetBuilder};
use tempfile::tempdir;
use super::Section;
use config::Config;
use config::{Config, Language};
#[test]
fn section_with_assets_gets_right_info() {
@ -285,4 +292,19 @@ mod tests {
assert_eq!(page.assets.len(), 1);
assert_eq!(page.assets[0].file_name().unwrap().to_str(), Some("graph.jpg"));
}
#[test]
fn can_specify_language_in_filename() {
let mut config = Config::default();
config.languages.push(Language {code: String::from("fr"), rss: false});
let content = r#"
+++
+++
Bonjour le monde"#
.to_string();
let res = Section::parse(Path::new("hello.fr.md"), &content, &config);
assert!(res.is_ok());
let section = res.unwrap();
assert_eq!(section.lang, Some("fr".to_string()));
}
}