Parsing front matter

This commit is contained in:
Vincent Prouillet 2016-12-06 20:53:14 +09:00
parent a48a4c9bdf
commit a6e421c5e4
5 changed files with 343 additions and 14 deletions

118
Cargo.lock generated
View file

@ -8,6 +8,7 @@ dependencies = [
"lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"pulldown-cmark 0.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
"tera 0.4.0 (git+https://github.com/Keats/tera.git)",
"toml 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -109,6 +110,11 @@ dependencies = [
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "dtoa"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "error-chain"
version = "0.7.1"
@ -127,6 +133,31 @@ name = "getopts"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "glob"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "humansize"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "idna"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itoa"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "kernel32-sys"
version = "0.2.2"
@ -164,6 +195,16 @@ name = "nom"
version = "1.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num-traits"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "pest"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "pulldown-cmark"
version = "0.0.8"
@ -173,6 +214,11 @@ dependencies = [
"getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quick-error"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "quine-mc_cluskey"
version = "0.2.4"
@ -218,11 +264,47 @@ name = "serde"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde_json"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.8.19 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "slug"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unidecode 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "strsim"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "tera"
version = "0.4.0"
source = "git+https://github.com/Keats/tera.git#186ddc1a57f733421dd5de67badb29cf30c14d56"
dependencies = [
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"humansize 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"pest 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.8.19 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)",
"slug 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "term_size"
version = "0.2.1"
@ -266,6 +348,14 @@ dependencies = [
"serde 0.8.19 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-bidi"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-normalization"
version = "0.1.2"
@ -281,6 +371,20 @@ name = "unicode-width"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unidecode"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "url"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "utf8-ranges"
version = "0.1.3"
@ -322,16 +426,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum clippy 0.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "5b4fabf979ddf6419a313c1c0ada4a5b95cfd2049c56e8418d622d27b4b6ff32"
"checksum clippy_lints 0.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "ce96ec05bfe018a0d5d43da115e54850ea2217981ff0f2e462780ab9d594651a"
"checksum dbghelp-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "97590ba53bcb8ac28279161ca943a924d1fd4a8fb3fa63302591647c4fc5b850"
"checksum dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0dd841b58510c9618291ffa448da2e4e0f699d984d436122372f446dae62263d"
"checksum error-chain 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1cd681735364a04cd5d69f01a4f6768e70473941f8d86d8c224faf6955a75799"
"checksum gcc 0.3.39 (registry+https://github.com/rust-lang/crates.io-index)" = "771e4a97ff6f237cf0f7d5f5102f6e28bb9743814b6198d684da5c58b76c11e0"
"checksum getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9047cfbd08a437050b363d35ef160452c5fe8ea5187ae0a624708c91581d685"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum humansize 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ec9e8cc78ff5f1f18be53b9a0295dce25c668c10cd60c4d3e535b8882a88f77"
"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"
"checksum itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae3088ea4baeceb0284ee9eea42f591226e6beaecf65373e41b38d95a1b8e7a1"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6abe0ee2e758cd6bc8a2cd56726359007748fbf4128da998b65d0b70f881e19b"
"checksum libc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "a51822fc847e7a8101514d1d44e354ba2ffa7d4c194dcab48870740e327cac70"
"checksum matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efd7622e3022e1a6eaa602c4cea8912254e5582c9c692e9167714182244801b1"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
"checksum nom 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a5b8c256fd9471521bcb84c3cdba98921497f1a331cbc15b8030fc63b82050ce"
"checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c"
"checksum pest 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f6666c81a6359af7a9dbc48f596d6f318a9dbaefdec248581ab836dc0c1f082"
"checksum pulldown-cmark 0.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1058d7bb927ca067656537eec4e02c2b4b70eaaa129664c5b90c111e20326f41"
"checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c"
"checksum quine-mc_cluskey 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "07589615d719a60c8dd8a4622e7946465dfef20d1a428f969e3443e7386d5f45"
"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
@ -339,15 +451,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rustc-serialize 0.3.21 (registry+https://github.com/rust-lang/crates.io-index)" = "bff9fc1c79f2dec76b253273d07682e94a978bd8f132ded071188122b2af9818"
"checksum semver 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d5b7638a1f03815d94e88cb3b3c08e87f0db4d683ef499d1836aaf70a45623f"
"checksum serde 0.8.19 (registry+https://github.com/rust-lang/crates.io-index)" = "58a19c0871c298847e6b68318484685cd51fa5478c0c905095647540031356e5"
"checksum serde_json 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1cb6b19e74d9f65b9d03343730b643d729a446b29376785cd65efdff4675e2fc"
"checksum slug 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f5ff4b43cb07b86c5f9236c92714a22cdf9e5a27a7d85e398e2c9403328cb8"
"checksum strsim 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "67f84c44fbb2f91db7fef94554e6b2ac05909c9c0b0bc23bb98d3a1aebfe7f7c"
"checksum tera 0.4.0 (git+https://github.com/Keats/tera.git)" = "<none>"
"checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0"
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
"checksum toml 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)" = "0590d72182e50e879c4da3b11c6488dae18fccb1ae0c7a3eda18e16795844796"
"checksum toml 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "736b60249cb25337bc196faa43ee12c705e426f3d55c214d73a4e7be06f92cb4"
"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"
"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"
"checksum unicode-segmentation 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c3bc443ded17b11305ffffe6b37e2076f328a5a8cb6aa877b1b98f77699e98b5"
"checksum unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d6722facc10989f63ee0e20a83cd4e1714a9ae11529403ac7e0afd069abc39e"
"checksum unidecode 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d2adb95ee07cd579ed18131f2d9e7a17c25a4b76022935c7f2460d2bfae89fd2"
"checksum url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "48ccf7bd87a81b769cf84ad556e034541fb90e1cd6d4bc375c822ed9500cd9d7"
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f"
"checksum walkdir 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "dd7c16466ecc507c7cb5988db03e6eab4aaeab89a5c37a29251fcfd3ac9b7afe"

View file

@ -16,6 +16,7 @@ walkdir = "1"
pulldown-cmark = "0"
regex = "0.1"
lazy_static = "0.2"
tera = { git = "https://github.com/Keats/tera.git" }
clippy = {version = "~0.0.103", optional = true}
[dependencies.toml]

View file

@ -1,6 +1,6 @@
use config:: Config;
use errors::{Result, ErrorKind};
use errors::{Result};
pub fn build(config: Config) -> Result<()> {

View file

@ -8,6 +8,7 @@ extern crate toml;
extern crate walkdir;
extern crate pulldown_cmark;
extern crate regex;
extern crate tera;
mod config;
mod errors;

View file

@ -1,17 +1,37 @@
/// A page, can be a blog post or a basic page
use std::collections::HashMap;
use std::collections::{HashMap, BTreeMap};
use std::default::Default;
use pulldown_cmark as cmark;
// use pulldown_cmark as cmark;
use regex::Regex;
use toml::Parser;
use toml::{Parser, Value as TomlValue};
use tera::{Value, to_value};
use errors::{Result, ErrorKind};
use errors::{Result};
use errors::ErrorKind::InvalidFrontMatter;
lazy_static! {
static ref DELIM_RE: Regex = Regex::new(r"\+\+\+\s*\r?\n").unwrap();
}
// Converts from one value (Toml) to another (Tera)
// Used to fill the Page::extra map
fn toml_to_tera(val: &TomlValue) -> Value {
match *val {
TomlValue::String(ref s) | TomlValue::Datetime(ref s) => to_value(s),
TomlValue::Boolean(ref b) => to_value(b),
TomlValue::Integer(ref n) => to_value(n),
TomlValue::Float(ref n) => to_value(n),
TomlValue::Array(ref arr) => to_value(&arr.into_iter().map(toml_to_tera).collect::<Vec<_>>()),
TomlValue::Table(ref table) => {
to_value(&table.into_iter().map(|(k, v)| {
(k, toml_to_tera(v))
}).collect::<BTreeMap<_,_>>())
}
}
}
#[derive(Debug, PartialEq)]
struct Page {
@ -23,14 +43,16 @@ struct Page {
content: String,
// tags, not to be confused with categories
tags: Vec<String>,
// whether this page should be public or not
is_draft: bool,
// any extra parameter present in the front matter
// it will be passed to the template context
extra: HashMap<String, String>,
extra: HashMap<String, Value>,
// only one category allowed
category: Option<String>,
// optional date if we want to order pages (ie block)
date: Option<bool>,
// optional date if we want to order pages (ie blog post)
date: Option<String>,
// optional layout, if we want to specify which html to render for that page
layout: Option<String>,
// description that appears when linked, e.g. on twitter
@ -38,42 +60,229 @@ struct Page {
}
impl Default for Page {
fn default() -> Page {
Page {
title: "".to_string(),
url: "".to_string(),
content: "".to_string(),
tags: vec![],
is_draft: false,
extra: HashMap::new(),
category: None,
date: None,
layout: None,
description: None,
}
}
}
impl Page {
// Parse a page given the content of the .md file
// Files without front matter or with invalid front matter are considered
// erroneous
pub fn from_str(filename: &str, content: &str) -> Result<()> {
pub fn from_str(filename: &str, content: &str) -> Result<Page> {
// 1. separate front matter from content
if !DELIM_RE.is_match(content) {
return Err(ErrorKind::InvalidFrontMatter(filename.to_string()).into());
return Err(InvalidFrontMatter(filename.to_string()).into());
}
// 2. extract the front matter and the content
let splits: Vec<&str> = DELIM_RE.splitn(content, 2).collect();
let front_matter = splits[0];
if front_matter.trim() == "" {
return Err(InvalidFrontMatter(filename.to_string()).into());
}
let content = splits[1];
// 2. parse front matter
// 2. create our page, parse front matter and assign all of that
let mut page = Page::default();
page.content = content.to_string();
// Keeps track of required fields: title, url
let mut num_required_fields = 2;
let mut parser = Parser::new(&front_matter);
if let Some(value) = parser.parse() {
for (key, value) in value.iter() {
if key == "title" {
page.title = value
.as_str()
.ok_or(InvalidFrontMatter(filename.to_string()))?
.to_string();
num_required_fields -= 1;
} else if key == "url" {
page.url = value
.as_str()
.ok_or(InvalidFrontMatter(filename.to_string()))?
.to_string();
num_required_fields -= 1;
} else if key == "draft" {
page.is_draft = value
.as_bool()
.ok_or(InvalidFrontMatter(filename.to_string()))?;
} else if key == "category" {
page.category = Some(
value
.as_str()
.ok_or(InvalidFrontMatter(filename.to_string()))?.to_string()
);
} else if key == "layout" {
page.layout = Some(
value
.as_str()
.ok_or(InvalidFrontMatter(filename.to_string()))?.to_string()
);
} else if key == "description" {
page.description = Some(
value
.as_str()
.ok_or(InvalidFrontMatter(filename.to_string()))?.to_string()
);
} else if key == "date" {
page.date = Some(
value
.as_datetime()
.ok_or(InvalidFrontMatter(filename.to_string()))?.to_string()
);
} else if key == "tags" {
let toml_tags = value
.as_slice()
.ok_or(InvalidFrontMatter(filename.to_string()))?;
for tag in toml_tags {
page.tags.push(
tag
.as_str()
.ok_or(InvalidFrontMatter(filename.to_string()))?
.to_string()
);
}
} else {
page.extra.insert(key.to_string(), toml_to_tera(value));
}
}
} else {
// TODO: handle error in parsing TOML
println!("parse errors: {:?}", parser.errors);
}
Ok(())
if num_required_fields > 0 {
println!("Not all required fields");
return Err(InvalidFrontMatter(filename.to_string()).into());
}
Ok(page)
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::{Page};
use tera::to_value;
#[test]
fn test_can_extract_front_matter() {
fn test_can_parse_a_valid_page() {
let content = r#"
title = "Hello"
url = "hello-world"
+++
Hello world"#;
let res = Page::from_str("", content);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.title, "Hello".to_string());
assert_eq!(page.url, "hello-world".to_string());
assert_eq!(page.content, "Hello world".to_string());
}
#[test]
fn test_can_parse_tags() {
let content = r#"
title = "Hello"
url = "hello-world"
tags = ["rust", "html"]
+++
Hello world"#;
let res = Page::from_str("", content);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.title, "Hello".to_string());
assert_eq!(page.url, "hello-world".to_string());
assert_eq!(page.content, "Hello world".to_string());
assert_eq!(page.tags, ["rust".to_string(), "html".to_string()]);
}
#[test]
fn test_can_parse_extra_attributes_in_frontmatter() {
let content = r#"
title = "Hello"
url = "hello-world"
language = "en"
authors = ["Bob", "Alice"]
+++
Hello world"#;
let res = Page::from_str("", content);
assert!(res.is_ok());
let page = res.unwrap();
assert_eq!(page.title, "Hello".to_string());
assert_eq!(page.url, "hello-world".to_string());
assert_eq!(page.extra.get("language").unwrap(), &to_value("en"));
assert_eq!(
page.extra.get("authors").unwrap(),
&to_value(["Bob".to_string(), "Alice".to_string()])
);
}
#[test]
fn test_ignore_pages_with_no_front_matter() {
let content = r#"Hello world"#;
let res = Page::from_str("", content);
assert!(res.is_err());
}
#[test]
fn test_ignores_pages_with_empty_front_matter() {
let content = r#"+++\nHello world"#;
let res = Page::from_str("", content);
assert!(res.is_err());
}
#[test]
fn test_ignores_pages_with_invalid_front_matter() {
let content = r#"title = 1\n+++\nHello world"#;
let res = Page::from_str("", content);
assert!(res.is_err());
}
#[test]
fn test_ignores_pages_with_missing_required_value_front_matter() {
let content = r#"
title = ""
+++
Hello world"#;
let res = Page::from_str("", content);
assert!(res.is_err());
}
#[test]
fn test_errors_on_non_string_tag() {
let content = r#"
title = "Hello"
url = "hello-world"
tags = ["rust", 1]
+++
Hello world"#;
let res = Page::from_str("", content);
assert!(res.is_err());
}
}