From 7154e9054294edc4fbad601058155a76dd229bd6 Mon Sep 17 00:00:00 2001 From: Vincent Prouillet Date: Wed, 12 May 2021 22:39:15 +0200 Subject: [PATCH] Update load_data --- components/templates/src/filters.rs | 2 +- components/templates/src/global_fns/images.rs | 56 ++-- .../templates/src/global_fns/load_data.rs | 239 +++++++++--------- components/templates/src/global_fns/mod.rs | 39 ++- 4 files changed, 182 insertions(+), 154 deletions(-) diff --git a/components/templates/src/filters.rs b/components/templates/src/filters.rs index 17ccc17c..0e465540 100644 --- a/components/templates/src/filters.rs +++ b/components/templates/src/filters.rs @@ -23,7 +23,7 @@ impl MarkdownFilter { config: Config, permalinks: HashMap, ) -> TeraResult { - let tera = load_tera(&path, &config).map_err(|err| tera::Error::msg(err))?; + let tera = load_tera(&path, &config).map_err(tera::Error::msg)?; Ok(Self { config, permalinks, tera }) } } diff --git a/components/templates/src/global_fns/images.rs b/components/templates/src/global_fns/images.rs index ca85d839..ee46a7c7 100644 --- a/components/templates/src/global_fns/images.rs +++ b/components/templates/src/global_fns/images.rs @@ -1,8 +1,9 @@ use std::collections::HashMap; use std::ffi::OsStr; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; +use crate::global_fns::search_for_file; use image::GenericImageView; use serde_derive::{Deserialize, Serialize}; use svg_metadata as svg; @@ -20,15 +21,12 @@ struct ResizeImageResponse { pub struct ResizeImage { /// The base path of the Zola site base_path: PathBuf, - search_paths: [PathBuf; 2], imageproc: Arc>, } impl ResizeImage { pub fn new(base_path: PathBuf, imageproc: Arc>) -> Self { - let search_paths = - [base_path.join("static").to_path_buf(), base_path.join("content").to_path_buf()]; - Self { base_path, imageproc, search_paths } + Self { base_path, imageproc } } } @@ -37,7 +35,7 @@ static DEFAULT_FMT: &str = "auto"; impl TeraFn for ResizeImage { fn call(&self, args: &HashMap) -> Result { - let mut path = required_arg!( + let path = required_arg!( String, args.get("path"), "`resize_image` requires a `path` argument with a string value" @@ -68,27 +66,12 @@ impl TeraFn for ResizeImage { } let mut imageproc = self.imageproc.lock().unwrap(); - if path.starts_with("@/") { - path = path.replace("@/", "content/"); - } - - let mut file_path = self.base_path.join(&path); - let mut file_exists = file_path.exists(); - if !file_exists { - // we need to search in both search folders now - for dir in &self.search_paths { - let p = dir.join(&path); - if p.exists() { - file_path = p; - file_exists = true; - break; - } + let file_path = match search_for_file(&self.base_path, &path) { + Some(f) => f, + None => { + return Err(format!("`resize_image`: Cannot find path: {}", path).into()); } - } - - if !file_exists { - return Err(format!("`resize_image`: Cannot find path: {}", path).into()); - } + }; let imageop = imageproc::ImageOp::from_args(path, file_path, &op, width, height, &format, quality) @@ -104,7 +87,7 @@ impl TeraFn for ResizeImage { } // Try to read the image dimensions for a given image -fn image_dimensions(path: &PathBuf) -> Result<(u32, u32)> { +fn image_dimensions(path: &Path) -> Result<(u32, u32)> { if let Some("svg") = path.extension().and_then(OsStr::to_str) { let img = svg::Metadata::parse_file(&path) .map_err(|e| Error::chain(format!("Failed to process SVG: {}", path.display()), e))?; @@ -134,18 +117,17 @@ impl GetImageMetadata { impl TeraFn for GetImageMetadata { fn call(&self, args: &HashMap) -> Result { - let mut path = required_arg!( + let path = required_arg!( String, args.get("path"), "`get_image_metadata` requires a `path` argument with a string value" ); - if path.starts_with("@/") { - path = path.replace("@/", "content/"); - } - let src_path = self.base_path.join(&path); - if !src_path.exists() { - return Err(format!("`get_image_metadata`: Cannot find path: {}", path).into()); - } + let src_path = match search_for_file(&self.base_path, &path) { + Some(f) => f, + None => { + return Err(format!("`resize_image`: Cannot find path: {}", path).into()); + } + }; let (height, width) = image_dimensions(&src_path)?; let mut map = tera::Map::new(); map.insert(String::from("height"), Value::Number(tera::Number::from(height))); @@ -220,11 +202,11 @@ mod tests { let data = static_fn.call(&args).unwrap().as_object().unwrap().clone(); assert_eq!( data["static_path"], - to_value("static/processed_images/32454a1e0243976c00.jpg").unwrap() + to_value("static/processed_images/074e171855ee541800.jpg").unwrap() ); assert_eq!( data["url"], - to_value("http://a-website.com/processed_images/32454a1e0243976c00.jpg").unwrap() + to_value("http://a-website.com/processed_images/074e171855ee541800.jpg").unwrap() ); // 4. resizing an image with a relative path not starting with static or content diff --git a/components/templates/src/global_fns/load_data.rs b/components/templates/src/global_fns/load_data.rs index 189ce537..6d003fc0 100644 --- a/components/templates/src/global_fns/load_data.rs +++ b/components/templates/src/global_fns/load_data.rs @@ -4,7 +4,6 @@ use utils::fs::{get_file_time, is_path_in_directory, read_file}; use reqwest::header::{HeaderValue, CONTENT_TYPE}; use reqwest::{blocking::Client, header}; use std::collections::hash_map::DefaultHasher; -use std::fmt; use std::hash::{Hash, Hasher}; use std::str::FromStr; use url::Url; @@ -12,6 +11,7 @@ use url::Url; use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use crate::global_fns::search_for_file; use csv::Reader; use std::collections::HashMap; use tera::{from_value, to_value, Error, Function as TeraFn, Map, Result, Value}; @@ -19,11 +19,6 @@ use tera::{from_value, to_value, Error, Function as TeraFn, Map, Result, Value}; static GET_DATA_ARGUMENT_ERROR_MESSAGE: &str = "`load_data`: requires EITHER a `path` or `url` argument"; -enum DataSource { - Url(Url), - Path(PathBuf), -} - #[derive(Debug, PartialEq, Clone, Copy, Hash)] enum Method { Post, @@ -42,7 +37,7 @@ impl FromStr for Method { } } -#[derive(Debug)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum OutputFormat { Toml, Json, @@ -51,23 +46,11 @@ enum OutputFormat { Plain, } -impl fmt::Display for OutputFormat { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(self, f) - } -} - -impl Hash for OutputFormat { - fn hash(&self, state: &mut H) { - self.to_string().hash(state); - } -} - impl FromStr for OutputFormat { type Err = Error; fn from_str(output_format: &str) -> Result { - match output_format { + match output_format.to_lowercase().as_ref() { "toml" => Ok(OutputFormat::Toml), "csv" => Ok(OutputFormat::Csv), "json" => Ok(OutputFormat::Json), @@ -90,6 +73,12 @@ impl OutputFormat { } } +#[derive(Debug, PartialEq, Eq)] +enum DataSource { + Url(Url), + Path(PathBuf), +} + impl DataSource { /// Returns Some(DataSource) on success, from optional load_data() path/url arguments /// Returns an Error when a URL could not be parsed and Ok(None) when the path @@ -99,18 +88,17 @@ impl DataSource { fn from_args( path_arg: Option, url_arg: Option, - content_path: &PathBuf, + base_path: &PathBuf, ) -> Result> { if path_arg.is_some() && url_arg.is_some() { return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into()); } if let Some(path) = path_arg { - let full_path = content_path.join(path); - if !full_path.exists() { - return Ok(None); - } - return Ok(Some(DataSource::Path(full_path))); + return match search_for_file(&base_path, &path) { + Some(f) => Ok(Some(DataSource::Path(f))), + None => Ok(None), + }; } if let Some(url) = url_arg { @@ -127,8 +115,8 @@ impl DataSource { &self, format: &OutputFormat, method: Method, - post_body: Option, - post_content_type: Option, + post_body: &Option, + post_content_type: &Option, ) -> u64 { let mut hasher = DefaultHasher::new(); format.hash(&mut hasher); @@ -152,47 +140,38 @@ impl Hash for DataSource { } } -fn read_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result { +fn read_local_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result { if !is_path_in_directory(&base_path, &full_path) .map_err(|e| format!("Failed to read data file {}: {}", full_path.display(), e))? { return Err(format!( - "{} is not inside the base site directory {}", - full_path.display(), - base_path.display() + "{:?} is not inside the base site directory {:?}", + full_path, base_path ) .into()); } - read_file(&full_path).map_err(|e| { - format!("`load_data`: error {} loading file {}", full_path.to_str().unwrap(), e).into() - }) + + read_file(&full_path) + .map_err(|e| format!("`load_data`: error reading file {:?}: {}", full_path, e).into()) } fn get_output_format_from_args( - args: &HashMap, + format_arg: Option, data_source: &DataSource, ) -> Result { - let format_arg = optional_arg!( - String, - args.get("format"), - "`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, bibtex, plain)" - ); - if let Some(format) = format_arg { - if format == "plain" { - return Ok(OutputFormat::Plain); - } return OutputFormat::from_str(&format); } - let from_extension = if let DataSource::Path(path) = data_source { - path.extension().map(|extension| extension.to_str().unwrap()).unwrap_or_else(|| "plain") + if let DataSource::Path(path) = data_source { + match path.extension().and_then(|e| e.to_str()) { + Some(ext) => OutputFormat::from_str(ext).or(Ok(OutputFormat::Plain)), + None => Ok(OutputFormat::Plain), + } } else { - "plain" - }; - - // Always default to Plain if we don't know what it is - OutputFormat::from_str(from_extension).or(Ok(OutputFormat::Plain)) + // Always default to Plain if we don't know what it is + Ok(OutputFormat::Plain) + } } /// A Tera function to load data from a file or from a URL @@ -218,17 +197,22 @@ impl LoadData { impl TeraFn for LoadData { fn call(&self, args: &HashMap) -> Result { - let required = if let Some(req) = optional_arg!( + // Either a local path or a URL + let path_arg = optional_arg!(String, args.get("path"), GET_DATA_ARGUMENT_ERROR_MESSAGE); + let url_arg = optional_arg!(String, args.get("url"), GET_DATA_ARGUMENT_ERROR_MESSAGE); + // Optional general params + let format_arg = optional_arg!( + String, + args.get("format"), + "`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, bibtex, plain)" + ); + let required = optional_arg!( bool, args.get("required"), "`load_data`: `required` must be a boolean (true or false)" - ) { - req - } else { - true - }; - let path_arg = optional_arg!(String, args.get("path"), GET_DATA_ARGUMENT_ERROR_MESSAGE); - let url_arg = optional_arg!(String, args.get("url"), GET_DATA_ARGUMENT_ERROR_MESSAGE); + ) + .unwrap_or(true); + // Remote URL parameters only let post_body_arg = optional_arg!(String, args.get("body"), "`load_data` body must be a string, if set."); let post_content_type = optional_arg!( @@ -241,6 +225,7 @@ impl TeraFn for LoadData { args.get("method"), "`load_data` method must either be POST or GET." ); + let method = match method_arg { Some(ref method_str) => match Method::from_str(&method_str) { Ok(m) => m, @@ -248,43 +233,39 @@ impl TeraFn for LoadData { }, _ => Method::Get, }; - let data_source = DataSource::from_args(path_arg.clone(), url_arg, &self.base_path)?; // If the file doesn't exist, source is None - match (&data_source, required) { - // If the file was not required, return a Null value to the template - (None, false) => { - return Ok(Value::Null); - } - // If the file was required, error - (None, true) => { - // source is None only with path_arg (not URL), so path_arg is safely unwrap - return Err(format!( - "{} doesn't exist", - &self.base_path.join(path_arg.unwrap()).display() - ) - .into()); - } - _ => {} - } - let data_source = data_source.unwrap(); - let file_format = get_output_format_from_args(&args, &data_source)?; - let cache_key = data_source.get_cache_key( - &file_format, - method, - post_body_arg.clone(), - post_content_type.clone(), - ); + let data_source = + match (DataSource::from_args(path_arg.clone(), url_arg, &self.base_path)?, required) { + // If the file was not required, return a Null value to the template + (None, false) => { + return Ok(Value::Null); + } + // If the file was required, error + (None, true) => { + // source is None only with path_arg (not URL), so path_arg is safely unwrap + return Err(format!( + "{} doesn't exist", + &self.base_path.join(path_arg.unwrap()).display() + ) + .into()); + } + (Some(data_source), _) => data_source, + }; + + let file_format = get_output_format_from_args(format_arg, &data_source)?; + let cache_key = + data_source.get_cache_key(&file_format, method, &post_body_arg, &post_content_type); let mut cache = self.result_cache.lock().expect("result cache lock"); - let response_client = self.client.lock().expect("response client lock"); if let Some(cached_result) = cache.get(&cache_key) { return Ok(cached_result.clone()); } let data = match data_source { - DataSource::Path(path) => read_data_file(&self.base_path, path), + DataSource::Path(path) => read_local_data_file(&self.base_path, path), DataSource::Url(url) => { + let response_client = self.client.lock().expect("response client lock"); let req = match method { Method::Get => response_client .get(url.as_str()) @@ -308,12 +289,9 @@ impl TeraFn for LoadData { }, _ => {} }; - match post_body_arg { - Some(body) => { - resp = resp.body(body); - } - _ => {} - }; + if let Some(body) = post_body_arg { + resp = resp.body(body); + } resp } }; @@ -335,7 +313,7 @@ impl TeraFn for LoadData { .into()) } } - } // Now that we have discarded recoverable errors, we can unwrap the result + } }?; let result_value: Result = match file_format { @@ -368,7 +346,7 @@ fn load_toml(toml_data: String) -> Result { match toml_value { Value::Object(m) => Ok(fix_toml_dates(m)), - _ => unreachable!("Loaded something other than a TOML object"), + _ => Err("Loaded something other than a TOML object".into()), } } @@ -436,11 +414,11 @@ fn load_csv(csv_data: String) -> Result { let mut csv_map = Map::new(); { - let hdrs = reader.headers().map_err(|e| { + let headers = reader.headers().map_err(|e| { format!("'load_data': {} - unable to read CSV header line (line 1) for CSV file", e) })?; - let headers_array = hdrs.iter().map(|v| Value::String(v.to_string())).collect(); + let headers_array = headers.iter().map(|v| Value::String(v.to_string())).collect(); csv_map.insert(String::from("headers"), Value::Array(headers_array)); } @@ -487,6 +465,8 @@ mod tests { use crate::global_fns::load_data::Method; use mockito::mock; use serde_json::json; + use std::fs::{copy, create_dir_all}; + use tempfile::tempdir; use tera::{to_value, Function}; // NOTE: HTTP mock paths below are randomly generated to avoid name @@ -621,17 +601,47 @@ mod tests { } #[test] - fn cant_load_outside_content_dir() { + fn can_handle_various_local_file_locations() { + let dir = tempdir().unwrap(); + create_dir_all(dir.path().join("content").join("gallery")).unwrap(); + create_dir_all(dir.path().join("static")).unwrap(); + copy(get_test_file("test.css"), dir.path().join("content").join("test.css")).unwrap(); + copy(get_test_file("test.css"), dir.path().join("content").join("gallery").join("new.css")) + .unwrap(); + copy(get_test_file("test.css"), dir.path().join("static").join("test.css")).unwrap(); + + let static_fn = LoadData::new(dir.path().to_path_buf()); + let mut args = HashMap::new(); + + // 1. relative path in `static` + args.insert("path".to_string(), to_value("static/test.css").unwrap()); + let data = static_fn.call(&args).unwrap().as_str().unwrap().to_string(); + assert_eq!(data, ".hello {}\n"); + + // 2. relative path in `content` + args.insert("path".to_string(), to_value("content/test.css").unwrap()); + let data = static_fn.call(&args).unwrap().as_str().unwrap().to_string(); + assert_eq!(data, ".hello {}\n"); + + // 3. path starting with @/ + args.insert("path".to_string(), to_value("@/test.css").unwrap()); + let data = static_fn.call(&args).unwrap().as_str().unwrap().to_string(); + assert_eq!(data, ".hello {}\n"); + + // 4. absolute path does not work + args.insert("path".to_string(), to_value("/test.css").unwrap()); + assert!(static_fn.call(&args).is_err()); + } + + #[test] + fn cannot_load_outside_content_dir() { let static_fn = LoadData::new(PathBuf::from(PathBuf::from("../utils"))); let mut args = HashMap::new(); args.insert("path".to_string(), to_value("../../README.md").unwrap()); args.insert("format".to_string(), to_value("plain").unwrap()); let result = static_fn.call(&args); assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("README.md is not inside the base site directory")); + assert!(result.unwrap_err().to_string().contains("is not inside the base site directory")); } #[test] @@ -640,14 +650,14 @@ mod tests { let cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key( &OutputFormat::Toml, Method::Get, - None, - None, + &None, + &None, ); let cache_key_2 = DataSource::Path(get_test_file("test.toml")).get_cache_key( &OutputFormat::Toml, Method::Get, - None, - None, + &None, + &None, ); assert_eq!(cache_key, cache_key_2); } @@ -657,14 +667,14 @@ mod tests { let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key( &OutputFormat::Toml, Method::Get, - None, - None, + &None, + &None, ); let json_cache_key = DataSource::Path(get_test_file("test.json")).get_cache_key( &OutputFormat::Toml, Method::Get, - None, - None, + &None, + &None, ); assert_ne!(toml_cache_key, json_cache_key); } @@ -674,14 +684,14 @@ mod tests { let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key( &OutputFormat::Toml, Method::Get, - None, - None, + &None, + &None, ); let json_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key( &OutputFormat::Json, Method::Get, - None, - None, + &None, + &None, ); assert_ne!(toml_cache_key, json_cache_key); } @@ -800,6 +810,7 @@ mod tests { let mut args = HashMap::new(); args.insert("path".to_string(), to_value("test.css").unwrap()); let result = static_fn.call(&args.clone()).unwrap(); + println!("{:?}", result); if cfg!(windows) { assert_eq!(result.as_str().unwrap().replace("\r\n", "\n"), ".hello {}\n",); diff --git a/components/templates/src/global_fns/mod.rs b/components/templates/src/global_fns/mod.rs index 9ee87d11..e7514fd4 100644 --- a/components/templates/src/global_fns/mod.rs +++ b/components/templates/src/global_fns/mod.rs @@ -1,5 +1,6 @@ +use std::borrow::Cow; use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::{fs, io, result}; use base64::encode as encode_b64; @@ -22,6 +23,40 @@ pub use self::i18n::Trans; pub use self::images::{GetImageMetadata, ResizeImage}; pub use self::load_data::LoadData; +/// This is used by a few Tera functions to search for files on the filesystem. +/// This does try to find the file in 3 different spots: +/// 1. base_path + path +/// 2. base_path + static + path +/// 3. base_path + content + path +pub fn search_for_file(base_path: &Path, path: &str) -> Option { + let search_paths = [base_path.join("static"), base_path.join("content")]; + let actual_path = if path.starts_with("@/") { + Cow::Owned(path.replace("@/", "content/")) + } else { + Cow::Borrowed(path) + }; + let mut file_path = base_path.join(&*actual_path); + let mut file_exists = file_path.exists(); + + if !file_exists { + // we need to search in both search folders now + for dir in &search_paths { + let p = dir.join(&*actual_path); + if p.exists() { + file_path = p; + file_exists = true; + break; + } + } + } + + if file_exists { + Some(file_path) + } else { + None + } +} + #[derive(Debug)] pub struct GetUrl { config: Config, @@ -77,7 +112,7 @@ where let mut hasher = D::new(); io::copy(&mut file, &mut hasher)?; if base64 { - Ok(format!("{}", encode_b64(hasher.finalize()))) + Ok(encode_b64(hasher.finalize())) } else { Ok(format!("{:x}", hasher.finalize())) }