416 lines
14 KiB
Rust
416 lines
14 KiB
Rust
|
extern crate toml;
|
||
|
extern crate serde_json;
|
||
|
|
||
|
use utils::fs::{read_file, is_path_in_directory, get_file_time};
|
||
|
|
||
|
use std::hash::{Hasher, Hash};
|
||
|
use std::str::FromStr;
|
||
|
use std::fmt;
|
||
|
use std::collections::hash_map::DefaultHasher;
|
||
|
use reqwest::{Client, header};
|
||
|
use url::Url;
|
||
|
|
||
|
use std::path::PathBuf;
|
||
|
use std::sync::{Arc, Mutex};
|
||
|
|
||
|
|
||
|
use csv::Reader;
|
||
|
use std::collections::HashMap;
|
||
|
use tera::{GlobalFn, Value, from_value, to_value, Result, Map, Error};
|
||
|
|
||
|
static GET_DATA_ARGUMENT_ERROR_MESSAGE: &str = "`load_data`: requires EITHER a `path` or `url` argument";
|
||
|
|
||
|
enum DataSource {
|
||
|
Url(Url),
|
||
|
Path(PathBuf)
|
||
|
}
|
||
|
|
||
|
#[derive(Debug)]
|
||
|
enum OutputFormat {
|
||
|
Toml,
|
||
|
Json,
|
||
|
Csv,
|
||
|
Plain
|
||
|
}
|
||
|
|
||
|
impl fmt::Display for OutputFormat {
|
||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
|
fmt::Debug::fmt(self, f)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl Hash for OutputFormat {
|
||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||
|
self.to_string().hash(state);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl FromStr for OutputFormat {
|
||
|
type Err = Error;
|
||
|
|
||
|
fn from_str(output_format: &str) -> Result<Self> {
|
||
|
return match output_format {
|
||
|
"toml" => Ok(OutputFormat::Toml),
|
||
|
"csv" => Ok(OutputFormat::Csv),
|
||
|
"json" => Ok(OutputFormat::Json),
|
||
|
"plain" => Ok(OutputFormat::Plain),
|
||
|
format => Err(format!("Unknown output format {}", format).into())
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl OutputFormat {
|
||
|
fn as_accept_header(&self) -> header::HeaderValue {
|
||
|
return header::HeaderValue::from_static(match self {
|
||
|
OutputFormat::Json => "application/json",
|
||
|
OutputFormat::Csv => "text/csv",
|
||
|
OutputFormat::Toml => "application/toml",
|
||
|
OutputFormat::Plain => "text/plain",
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl DataSource {
|
||
|
fn from_args(path_arg: Option<String>, url_arg: Option<String>, content_path: &PathBuf) -> Result<Self> {
|
||
|
if path_arg.is_some() && url_arg.is_some() {
|
||
|
return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into());
|
||
|
}
|
||
|
|
||
|
if let Some(path) = path_arg {
|
||
|
let full_path = content_path.join(path);
|
||
|
if !full_path.exists() {
|
||
|
return Err(format!("{} doesn't exist", full_path.display()).into());
|
||
|
}
|
||
|
return Ok(DataSource::Path(full_path));
|
||
|
}
|
||
|
|
||
|
if let Some(url) = url_arg {
|
||
|
return Url::parse(&url).map(|parsed_url| DataSource::Url(parsed_url)).map_err(|e| format!("Failed to parse {} as url: {}", url, e).into());
|
||
|
}
|
||
|
|
||
|
return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into());
|
||
|
}
|
||
|
|
||
|
fn get_cache_key(&self, format: &OutputFormat) -> u64 {
|
||
|
let mut hasher = DefaultHasher::new();
|
||
|
format.hash(&mut hasher);
|
||
|
self.hash(&mut hasher);
|
||
|
return hasher.finish();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl Hash for DataSource {
|
||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||
|
match self {
|
||
|
DataSource::Url(url) => url.hash(state),
|
||
|
DataSource::Path(path) => {
|
||
|
path.hash(state);
|
||
|
get_file_time(&path).expect("get file time").hash(state);
|
||
|
}
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
fn get_data_from_args(content_path: &PathBuf, args: &HashMap<String, Value>) -> Result<DataSource> {
|
||
|
let path_arg = optional_arg!(
|
||
|
String,
|
||
|
args.get("path"),
|
||
|
GET_DATA_ARGUMENT_ERROR_MESSAGE
|
||
|
);
|
||
|
|
||
|
let url_arg = optional_arg!(
|
||
|
String,
|
||
|
args.get("url"),
|
||
|
GET_DATA_ARGUMENT_ERROR_MESSAGE
|
||
|
);
|
||
|
|
||
|
return DataSource::from_args(path_arg, url_arg, content_path);
|
||
|
}
|
||
|
|
||
|
fn read_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result<String> {
|
||
|
if !is_path_in_directory(&base_path, &full_path).map_err(|e| format!("Failed to read data file {}: {}", full_path.display(), e))? {
|
||
|
return Err(format!("{} is not inside the base site directory {}", full_path.display(), base_path.display()).into());
|
||
|
}
|
||
|
return read_file(&full_path)
|
||
|
.map_err(|e| format!("`load_data`: error {} loading file {}", full_path.to_str().unwrap(), e).into());
|
||
|
}
|
||
|
|
||
|
fn get_output_format_from_args(args: &HashMap<String, Value>, data_source: &DataSource) -> Result<OutputFormat> {
|
||
|
let format_arg = optional_arg!(
|
||
|
String,
|
||
|
args.get("format"),
|
||
|
"`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml)"
|
||
|
);
|
||
|
|
||
|
if let Some(format) = format_arg {
|
||
|
return OutputFormat::from_str(&format);
|
||
|
}
|
||
|
|
||
|
let from_extension = if let DataSource::Path(path) = data_source {
|
||
|
let extension_result: Result<&str> = path.extension().map(|extension| extension.to_str().unwrap()).ok_or(format!("Could not determine format for {} from extension", path.display()).into());
|
||
|
extension_result?
|
||
|
} else {
|
||
|
"plain"
|
||
|
};
|
||
|
return OutputFormat::from_str(from_extension);
|
||
|
}
|
||
|
|
||
|
|
||
|
/// A global function to load data from a data file.
|
||
|
/// Currently the supported formats are json, toml and csv
|
||
|
pub fn make_load_data(content_path: PathBuf, base_path: PathBuf) -> GlobalFn {
|
||
|
let mut headers = header::HeaderMap::new();
|
||
|
headers.insert(header::USER_AGENT, "zola".parse().unwrap());
|
||
|
let client = Arc::new(Mutex::new(Client::builder().build().expect("reqwest client build")));
|
||
|
let result_cache: Arc<Mutex<HashMap<u64, Value>>> = Arc::new(Mutex::new(HashMap::new()));
|
||
|
Box::new(move |args| -> Result<Value> {
|
||
|
let data_source = get_data_from_args(&content_path, &args)?;
|
||
|
|
||
|
let file_format = get_output_format_from_args(&args, &data_source)?;
|
||
|
|
||
|
let cache_key = data_source.get_cache_key(&file_format);
|
||
|
|
||
|
let mut cache = result_cache.lock().expect("result cache lock");
|
||
|
let response_client = client.lock().expect("response client lock");
|
||
|
if let Some(cached_result) = cache.get(&cache_key) {
|
||
|
return Ok(cached_result.clone());
|
||
|
}
|
||
|
|
||
|
let data = match data_source {
|
||
|
DataSource::Path(path) => read_data_file(&base_path, path),
|
||
|
DataSource::Url(url) => {
|
||
|
let mut response = response_client.get(url.as_str()).header(header::ACCEPT, file_format.as_accept_header()).send().and_then(|res| res.error_for_status()).map_err(|e| format!("Failed to request {}: {}", url, e.status().expect("response status")))?;
|
||
|
response.text().map_err(|e| format!("Failed to parse response from {}: {:?}", url, e).into())
|
||
|
},
|
||
|
}?;
|
||
|
|
||
|
let result_value: Result<Value> = match file_format {
|
||
|
OutputFormat::Toml => load_toml(data),
|
||
|
OutputFormat::Csv => load_csv(data),
|
||
|
OutputFormat::Json => load_json(data),
|
||
|
OutputFormat::Plain => to_value(data).map_err(|e| e.into()),
|
||
|
};
|
||
|
|
||
|
if let Ok(data_result) = &result_value {
|
||
|
cache.insert(cache_key, data_result.clone());
|
||
|
}
|
||
|
|
||
|
result_value
|
||
|
})
|
||
|
}
|
||
|
|
||
|
/// load/parse a json file from the given path and place it into a
|
||
|
/// tera value
|
||
|
fn load_json(json_data: String) -> Result<Value> {
|
||
|
let json_content: Value = serde_json::from_str(json_data.as_str()).map_err(|e| format!("{:?}", e))?;
|
||
|
return Ok(json_content);
|
||
|
}
|
||
|
|
||
|
/// load/parse a toml file from the given path, and place it into a
|
||
|
/// tera Value
|
||
|
fn load_toml(toml_data: String) -> Result<Value> {
|
||
|
let toml_content: toml::Value = toml::from_str(&toml_data).map_err(|e| format!("{:?}", e))?;
|
||
|
|
||
|
to_value(toml_content).map_err(|e| e.into())
|
||
|
}
|
||
|
|
||
|
/// Load/parse a csv file from the given path, and place it into a
|
||
|
/// tera Value.
|
||
|
///
|
||
|
/// An example csv file `example.csv` could be:
|
||
|
/// ```csv
|
||
|
/// Number, Title
|
||
|
/// 1,Gutenberg
|
||
|
/// 2,Printing
|
||
|
/// ```
|
||
|
/// The json value output would be:
|
||
|
/// ```json
|
||
|
/// {
|
||
|
/// "headers": ["Number", "Title"],
|
||
|
/// "records": [
|
||
|
/// ["1", "Gutenberg"],
|
||
|
/// ["2", "Printing"]
|
||
|
/// ],
|
||
|
/// }
|
||
|
/// ```
|
||
|
fn load_csv(csv_data: String) -> Result<Value> {
|
||
|
let mut reader = Reader::from_reader(csv_data.as_bytes());
|
||
|
|
||
|
let mut csv_map = Map::new();
|
||
|
|
||
|
{
|
||
|
let hdrs = reader.headers()
|
||
|
.map_err(|e| format!("'load_data': {} - unable to read CSV header line (line 1) for CSV file", e))?;
|
||
|
|
||
|
let headers_array = hdrs.iter()
|
||
|
.map(|v| Value::String(v.to_string()))
|
||
|
.collect();
|
||
|
|
||
|
csv_map.insert(String::from("headers"), Value::Array(headers_array));
|
||
|
}
|
||
|
|
||
|
{
|
||
|
let records = reader.records();
|
||
|
|
||
|
let mut records_array: Vec<Value> = Vec::new();
|
||
|
|
||
|
for result in records {
|
||
|
let record = result.unwrap();
|
||
|
|
||
|
let mut elements_array: Vec<Value> = Vec::new();
|
||
|
|
||
|
for e in record.into_iter() {
|
||
|
elements_array.push(Value::String(String::from(e)));
|
||
|
}
|
||
|
|
||
|
records_array.push(Value::Array(elements_array));
|
||
|
}
|
||
|
|
||
|
csv_map.insert(String::from("records"), Value::Array(records_array));
|
||
|
}
|
||
|
|
||
|
let csv_value: Value = Value::Object(csv_map);
|
||
|
to_value(csv_value).map_err(|err| err.into())
|
||
|
}
|
||
|
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod tests {
|
||
|
use super::{make_load_data, DataSource, OutputFormat};
|
||
|
|
||
|
use std::collections::HashMap;
|
||
|
use std::path::PathBuf;
|
||
|
|
||
|
use tera::to_value;
|
||
|
|
||
|
fn get_test_file(filename: &str) -> PathBuf {
|
||
|
let test_files = PathBuf::from("../utils/test-files").canonicalize().unwrap();
|
||
|
return test_files.join(filename);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn fails_when_missing_file() {
|
||
|
let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils"));
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("path".to_string(), to_value("../../../READMEE.md").unwrap());
|
||
|
let result = static_fn(args);
|
||
|
assert!(result.is_err());
|
||
|
assert!(result.unwrap_err().description().contains("READMEE.md doesn't exist"));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn cant_load_outside_content_dir() {
|
||
|
let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils"));
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("path".to_string(), to_value("../../../README.md").unwrap());
|
||
|
args.insert("format".to_string(), to_value("plain").unwrap());
|
||
|
let result = static_fn(args);
|
||
|
assert!(result.is_err());
|
||
|
assert!(result.unwrap_err().description().contains("README.md is not inside the base site directory"));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn calculates_cache_key_for_path() {
|
||
|
// We can't test against a fixed value, due to the fact the cache key is built from the absolute path
|
||
|
let cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
|
||
|
let cache_key_2 = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
|
||
|
assert_eq!(cache_key, cache_key_2);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn calculates_cache_key_for_url() {
|
||
|
let cache_key = DataSource::Url("https://api.github.com/repos/getzola/zola".parse().unwrap()).get_cache_key(&OutputFormat::Plain);
|
||
|
assert_eq!(cache_key, 8916756616423791754);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn different_cache_key_per_filename() {
|
||
|
let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
|
||
|
let json_cache_key = DataSource::Path(get_test_file("test.json")).get_cache_key(&OutputFormat::Toml);
|
||
|
assert_ne!(toml_cache_key, json_cache_key);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn different_cache_key_per_format() {
|
||
|
let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
|
||
|
let json_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Json);
|
||
|
assert_ne!(toml_cache_key, json_cache_key);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn can_load_remote_data() {
|
||
|
let static_fn = make_load_data(PathBuf::new(), PathBuf::new());
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("url".to_string(), to_value("https://httpbin.org/json").unwrap());
|
||
|
args.insert("format".to_string(), to_value("json").unwrap());
|
||
|
let result = static_fn(args).unwrap();
|
||
|
assert_eq!(result.get("slideshow").unwrap().get("title").unwrap(), &to_value("Sample Slide Show").unwrap());
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn fails_when_request_404s() {
|
||
|
let static_fn = make_load_data(PathBuf::new(), PathBuf::new());
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("url".to_string(), to_value("https://httpbin.org/status/404/").unwrap());
|
||
|
args.insert("format".to_string(), to_value("json").unwrap());
|
||
|
let result = static_fn(args);
|
||
|
assert!(result.is_err());
|
||
|
assert_eq!(result.unwrap_err().description(), "Failed to request https://httpbin.org/status/404/: 404 Not Found");
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn can_load_toml()
|
||
|
{
|
||
|
let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils/test-files"));
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("path".to_string(), to_value("test.toml").unwrap());
|
||
|
let result = static_fn(args.clone()).unwrap();
|
||
|
|
||
|
//TOML does not load in order, and also dates are not returned as strings, but
|
||
|
//rather as another object with a key and value
|
||
|
assert_eq!(result, json!({
|
||
|
"category": {
|
||
|
"date": {
|
||
|
"$__toml_private_datetime": "1979-05-27T07:32:00Z"
|
||
|
},
|
||
|
"key": "value"
|
||
|
},
|
||
|
}));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn can_load_csv()
|
||
|
{
|
||
|
let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils/test-files"));
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("path".to_string(), to_value("test.csv").unwrap());
|
||
|
let result = static_fn(args.clone()).unwrap();
|
||
|
|
||
|
assert_eq!(result, json!({
|
||
|
"headers": ["Number", "Title"],
|
||
|
"records": [
|
||
|
["1", "Gutenberg"],
|
||
|
["2", "Printing"]
|
||
|
],
|
||
|
}))
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn can_load_json()
|
||
|
{
|
||
|
let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils/test-files"));
|
||
|
let mut args = HashMap::new();
|
||
|
args.insert("path".to_string(), to_value("test.json").unwrap());
|
||
|
let result = static_fn(args.clone()).unwrap();
|
||
|
|
||
|
assert_eq!(result, json!({
|
||
|
"key": "value",
|
||
|
"array": [1, 2, 3],
|
||
|
"subpackage": {
|
||
|
"subkey": 5
|
||
|
}
|
||
|
}))
|
||
|
}
|
||
|
}
|