From 2673466998ccd988cd19f9c7805128975a1c0638 Mon Sep 17 00:00:00 2001 From: Renato Caldas Date: Sat, 21 Nov 2020 10:44:42 +0000 Subject: [PATCH] Add bibtex support to load_data() (#1190) * Add support for loading Bibtex data. * Add load_data() documentation for the bibtex format * Force bibtex tags to be lower case. Bibtex tags are case-insensitive, and this works around tera's case-sensitiveness. * Improve the load_data() documentation for the bibtex format --- Cargo.lock | 111 ++++++++++++++++++ components/templates/Cargo.toml | 1 + .../templates/src/global_fns/load_data.rs | 53 ++++++++- .../documentation/templates/overview.md | 56 ++++++++- 4 files changed, 217 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3de60be..54444335 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,6 +51,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "arrayvec" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" + [[package]] name = "assert-json-diff" version = "1.1.0" @@ -161,6 +167,18 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" +[[package]] +name = "bytecount" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8" + +[[package]] +name = "bytecount" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0017894339f586ccb943b01b9555de56770c11cda818e7e3d8bd93f4ed7f46e" + [[package]] name = "bytemuck" version = "1.4.1" @@ -1158,6 +1176,19 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73a004f877f468548d8d0ac4977456a249d8fabbdb8416c36db163dfc8f2e8ca" +[[package]] +name = "lexical-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db65c6da02e61f55dae90a0ae427b2a5f6b3e8db09f58d10efab23af92592616" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.80" @@ -1483,6 +1514,73 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "nom-bibtex" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9db257f6c7b9c8b3ab67ee6a4b23a290c157d183fef2ac065bf9fce5f1c1299" +dependencies = [ + "nom", + "nom-tracable", + "nom_locate 2.0.0", + "quick-error", +] + +[[package]] +name = "nom-tracable" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e012c742e1269f801f6bfe0d1ebf99d7a3f7bc1d65c970bab0e7bee439e31610" +dependencies = [ + "nom", + "nom-tracable-macros", + "nom_locate 1.0.0", + "nom_locate 2.0.0", +] + +[[package]] +name = "nom-tracable-macros" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65ad630ff46d4c61da89042f327e6fdf104a6ebb667565727ef0bb294a7c3197" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "nom_locate" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f932834fd8e391fc7710e2ba17e8f9f8645d846b55aa63207e17e110a1e1ce35" +dependencies = [ + "bytecount 0.3.2", + "memchr", + "nom", +] + +[[package]] +name = "nom_locate" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e4726500a3d0297dd38edc169d919ad997a9931b4645b59ce0231e88536e213" +dependencies = [ + "bytecount 0.6.0", + "memchr", + "nom", +] + [[package]] name = "notify" version = "4.0.15" @@ -1849,6 +1947,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.7" @@ -2317,6 +2421,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "string_cache" version = "0.8.0" @@ -2438,6 +2548,7 @@ dependencies = [ "lazy_static", "library", "mockito", + "nom-bibtex", "pulldown-cmark", "reqwest", "serde_json", diff --git a/components/templates/Cargo.toml b/components/templates/Cargo.toml index 29a690e9..b8757a94 100644 --- a/components/templates/Cargo.toml +++ b/components/templates/Cargo.toml @@ -15,6 +15,7 @@ image = "0.23" serde_json = "1.0" sha2 = "0.9" url = "2" +nom-bibtex = "0.3" errors = { path = "../errors" } utils = { path = "../utils" } diff --git a/components/templates/src/global_fns/load_data.rs b/components/templates/src/global_fns/load_data.rs index 811f1321..0011b516 100644 --- a/components/templates/src/global_fns/load_data.rs +++ b/components/templates/src/global_fns/load_data.rs @@ -28,6 +28,7 @@ enum OutputFormat { Toml, Json, Csv, + Bibtex, Plain, } @@ -51,6 +52,7 @@ impl FromStr for OutputFormat { "toml" => Ok(OutputFormat::Toml), "csv" => Ok(OutputFormat::Csv), "json" => Ok(OutputFormat::Json), + "bibtex" => Ok(OutputFormat::Bibtex), "plain" => Ok(OutputFormat::Plain), format => Err(format!("Unknown output format {}", format).into()), } @@ -63,6 +65,7 @@ impl OutputFormat { OutputFormat::Json => "application/json", OutputFormat::Csv => "text/csv", OutputFormat::Toml => "application/toml", + OutputFormat::Bibtex => "application/x-bibtex", OutputFormat::Plain => "text/plain", }) } @@ -148,7 +151,7 @@ fn get_output_format_from_args( let format_arg = optional_arg!( String, args.get("format"), - "`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, plain)" + "`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, bibtex, plain)" ); if let Some(format) = format_arg { @@ -169,7 +172,7 @@ fn get_output_format_from_args( } /// A Tera function to load data from a file or from a URL -/// Currently the supported formats are json, toml, csv and plain text +/// Currently the supported formats are json, toml, csv, bibtex and plain text #[derive(Debug)] pub struct LoadData { base_path: PathBuf, @@ -223,6 +226,7 @@ impl TeraFn for LoadData { OutputFormat::Toml => load_toml(data), OutputFormat::Csv => load_csv(data), OutputFormat::Json => load_json(data), + OutputFormat::Bibtex => load_bibtex(data), OutputFormat::Plain => to_value(data).map_err(|e| e.into()), }; @@ -252,6 +256,51 @@ fn load_toml(toml_data: String) -> Result { } } +/// Parse a BIBTEX string and convert it to a Tera Value +fn load_bibtex(bibtex_data: String) -> Result { + let bibtex_model = nom_bibtex::Bibtex::parse(&bibtex_data).map_err(|e| format!("{:?}", e))?; + let mut bibtex_map = Map::new(); + + let preambles_array = bibtex_model.preambles() + .iter() + .map(|v| Value::String(v.to_string())) + .collect(); + bibtex_map.insert(String::from("preambles"), Value::Array(preambles_array)); + + let comments_array = bibtex_model.comments() + .iter() + .map(|v| Value::String(v.to_string())) + .collect(); + bibtex_map.insert(String::from("comments"), Value::Array(comments_array)); + + let mut variables_map = Map::new(); + for (key,val) in bibtex_model.variables() { + variables_map.insert(key.to_string(), Value::String(val.to_string())); + } + bibtex_map.insert(String::from("variables"), Value::Object(variables_map)); + + let bibliographies_array = bibtex_model.bibliographies() + .iter() + .map(|b| { + let mut m = Map::new(); + m.insert(String::from("entry_type"), Value::String(b.entry_type().to_string())); + m.insert(String::from("citation_key"), Value::String(b.citation_key().to_string())); + + let mut tags = Map::new(); + for (key, val) in b.tags() { + tags.insert(key.to_lowercase().to_string(), Value::String(val.to_string())); + } + m.insert(String::from("tags"), Value::Object(tags)); + Value::Object(m) + }) + .collect(); + bibtex_map.insert(String::from("bibliographies"), Value::Array(bibliographies_array)); + + let bibtex_value: Value = Value::Object(bibtex_map); + to_value(bibtex_value).map_err(|err| err.into()) +} + + /// Parse a CSV string and convert it to a Tera Value /// /// An example csv file `example.csv` could be: diff --git a/docs/content/documentation/templates/overview.md b/docs/content/documentation/templates/overview.md index 2150225e..a7ee08c3 100644 --- a/docs/content/documentation/templates/overview.md +++ b/docs/content/documentation/templates/overview.md @@ -202,7 +202,7 @@ items: Array; See the [Taxonomies documentation](@/documentation/templates/taxonomies.md) for a full documentation of those types. ### `load_data` -Loads data from a file or URL. Supported file types include *toml*, *json* and *csv*. +Loads data from a file or URL. Supported file types include *toml*, *json*, *csv* and *bibtex*. Any other file type will be loaded as plain text. The `path` argument specifies the path to the data file relative to your base directory, where your `config.toml` is. @@ -213,7 +213,7 @@ As a security precaution, if this file is outside the main site directory, your ``` The optional `format` argument allows you to specify and override which data type is contained -within the file specified in the `path` argument. Valid entries are `toml`, `json`, `csv` +within the file specified in the `path` argument. Valid entries are `toml`, `json`, `csv`, `bibtex` or `plain`. If the `format` argument isn't specified, then the path extension is used. ```jinja2 @@ -251,6 +251,58 @@ template: } ``` +The `bibtex` format loads data into a structure matching the format used by the +[nom-bibtex crate](https://crates.io/crates/nom-bibtex). The following is an example of data +in bibtex format: + +``` +@preamble{"A bibtex preamble" # " this is."} + +@Comment{ + Here is a comment. +} + +Another comment! + +@string(name = "Vincent Prouillet") +@string(github = "https://github.com/getzola/zola") + +@misc {my_citation_key, + author= name, + title = "Zola", + note = "github: " # github +} } +``` + +The following is the json-equivalent format of the produced bibtex data structure: +```json +{ + "preambles": ["A bibtex preamble this is."], + "comments": ["Here is a comment.", "Another comment!"], + "variables": { + "name": "Vincent Prouillet", + "github": "https://github.com/getzola/zola" + }, + "bibliographies": [ + { + "entry_type": "misc", + "citation_key": "my_citation_key", + "tags": { + "author": "Vincent Prouillet", + "title": "Zola", + "note": "github: https://github.com/getzola/zola" + } + } + ] +} +``` + +Finally, the bibtex data can be accessed from the template as follows: +```jinja2 +{% set tags = data.bibliographies[0].tags %} +This was generated using {{ tags.title }}, authored by {{ tags.author }}. +``` + #### Remote content Instead of using a file, you can load data from a remote URL. This can be done by specifying a `url` parameter