Update load_data

This commit is contained in:
Vincent Prouillet 2021-05-12 22:39:15 +02:00
parent 7fb99eaa44
commit 7154e90542
4 changed files with 182 additions and 154 deletions

View file

@ -23,7 +23,7 @@ impl MarkdownFilter {
config: Config,
permalinks: HashMap<String, String>,
) -> TeraResult<Self> {
let tera = load_tera(&path, &config).map_err(|err| tera::Error::msg(err))?;
let tera = load_tera(&path, &config).map_err(tera::Error::msg)?;
Ok(Self { config, permalinks, tera })
}
}

View file

@ -1,8 +1,9 @@
use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use crate::global_fns::search_for_file;
use image::GenericImageView;
use serde_derive::{Deserialize, Serialize};
use svg_metadata as svg;
@ -20,15 +21,12 @@ struct ResizeImageResponse {
pub struct ResizeImage {
/// The base path of the Zola site
base_path: PathBuf,
search_paths: [PathBuf; 2],
imageproc: Arc<Mutex<imageproc::Processor>>,
}
impl ResizeImage {
pub fn new(base_path: PathBuf, imageproc: Arc<Mutex<imageproc::Processor>>) -> Self {
let search_paths =
[base_path.join("static").to_path_buf(), base_path.join("content").to_path_buf()];
Self { base_path, imageproc, search_paths }
Self { base_path, imageproc }
}
}
@ -37,7 +35,7 @@ static DEFAULT_FMT: &str = "auto";
impl TeraFn for ResizeImage {
fn call(&self, args: &HashMap<String, Value>) -> Result<Value> {
let mut path = required_arg!(
let path = required_arg!(
String,
args.get("path"),
"`resize_image` requires a `path` argument with a string value"
@ -68,27 +66,12 @@ impl TeraFn for ResizeImage {
}
let mut imageproc = self.imageproc.lock().unwrap();
if path.starts_with("@/") {
path = path.replace("@/", "content/");
}
let mut file_path = self.base_path.join(&path);
let mut file_exists = file_path.exists();
if !file_exists {
// we need to search in both search folders now
for dir in &self.search_paths {
let p = dir.join(&path);
if p.exists() {
file_path = p;
file_exists = true;
break;
}
let file_path = match search_for_file(&self.base_path, &path) {
Some(f) => f,
None => {
return Err(format!("`resize_image`: Cannot find path: {}", path).into());
}
}
if !file_exists {
return Err(format!("`resize_image`: Cannot find path: {}", path).into());
}
};
let imageop =
imageproc::ImageOp::from_args(path, file_path, &op, width, height, &format, quality)
@ -104,7 +87,7 @@ impl TeraFn for ResizeImage {
}
// Try to read the image dimensions for a given image
fn image_dimensions(path: &PathBuf) -> Result<(u32, u32)> {
fn image_dimensions(path: &Path) -> Result<(u32, u32)> {
if let Some("svg") = path.extension().and_then(OsStr::to_str) {
let img = svg::Metadata::parse_file(&path)
.map_err(|e| Error::chain(format!("Failed to process SVG: {}", path.display()), e))?;
@ -134,18 +117,17 @@ impl GetImageMetadata {
impl TeraFn for GetImageMetadata {
fn call(&self, args: &HashMap<String, Value>) -> Result<Value> {
let mut path = required_arg!(
let path = required_arg!(
String,
args.get("path"),
"`get_image_metadata` requires a `path` argument with a string value"
);
if path.starts_with("@/") {
path = path.replace("@/", "content/");
}
let src_path = self.base_path.join(&path);
if !src_path.exists() {
return Err(format!("`get_image_metadata`: Cannot find path: {}", path).into());
}
let src_path = match search_for_file(&self.base_path, &path) {
Some(f) => f,
None => {
return Err(format!("`resize_image`: Cannot find path: {}", path).into());
}
};
let (height, width) = image_dimensions(&src_path)?;
let mut map = tera::Map::new();
map.insert(String::from("height"), Value::Number(tera::Number::from(height)));
@ -220,11 +202,11 @@ mod tests {
let data = static_fn.call(&args).unwrap().as_object().unwrap().clone();
assert_eq!(
data["static_path"],
to_value("static/processed_images/32454a1e0243976c00.jpg").unwrap()
to_value("static/processed_images/074e171855ee541800.jpg").unwrap()
);
assert_eq!(
data["url"],
to_value("http://a-website.com/processed_images/32454a1e0243976c00.jpg").unwrap()
to_value("http://a-website.com/processed_images/074e171855ee541800.jpg").unwrap()
);
// 4. resizing an image with a relative path not starting with static or content

View file

@ -4,7 +4,6 @@ use utils::fs::{get_file_time, is_path_in_directory, read_file};
use reqwest::header::{HeaderValue, CONTENT_TYPE};
use reqwest::{blocking::Client, header};
use std::collections::hash_map::DefaultHasher;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::str::FromStr;
use url::Url;
@ -12,6 +11,7 @@ use url::Url;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use crate::global_fns::search_for_file;
use csv::Reader;
use std::collections::HashMap;
use tera::{from_value, to_value, Error, Function as TeraFn, Map, Result, Value};
@ -19,11 +19,6 @@ use tera::{from_value, to_value, Error, Function as TeraFn, Map, Result, Value};
static GET_DATA_ARGUMENT_ERROR_MESSAGE: &str =
"`load_data`: requires EITHER a `path` or `url` argument";
enum DataSource {
Url(Url),
Path(PathBuf),
}
#[derive(Debug, PartialEq, Clone, Copy, Hash)]
enum Method {
Post,
@ -42,7 +37,7 @@ impl FromStr for Method {
}
}
#[derive(Debug)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum OutputFormat {
Toml,
Json,
@ -51,23 +46,11 @@ enum OutputFormat {
Plain,
}
impl fmt::Display for OutputFormat {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self, f)
}
}
impl Hash for OutputFormat {
fn hash<H: Hasher>(&self, state: &mut H) {
self.to_string().hash(state);
}
}
impl FromStr for OutputFormat {
type Err = Error;
fn from_str(output_format: &str) -> Result<Self> {
match output_format {
match output_format.to_lowercase().as_ref() {
"toml" => Ok(OutputFormat::Toml),
"csv" => Ok(OutputFormat::Csv),
"json" => Ok(OutputFormat::Json),
@ -90,6 +73,12 @@ impl OutputFormat {
}
}
#[derive(Debug, PartialEq, Eq)]
enum DataSource {
Url(Url),
Path(PathBuf),
}
impl DataSource {
/// Returns Some(DataSource) on success, from optional load_data() path/url arguments
/// Returns an Error when a URL could not be parsed and Ok(None) when the path
@ -99,18 +88,17 @@ impl DataSource {
fn from_args(
path_arg: Option<String>,
url_arg: Option<String>,
content_path: &PathBuf,
base_path: &PathBuf,
) -> Result<Option<Self>> {
if path_arg.is_some() && url_arg.is_some() {
return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into());
}
if let Some(path) = path_arg {
let full_path = content_path.join(path);
if !full_path.exists() {
return Ok(None);
}
return Ok(Some(DataSource::Path(full_path)));
return match search_for_file(&base_path, &path) {
Some(f) => Ok(Some(DataSource::Path(f))),
None => Ok(None),
};
}
if let Some(url) = url_arg {
@ -127,8 +115,8 @@ impl DataSource {
&self,
format: &OutputFormat,
method: Method,
post_body: Option<String>,
post_content_type: Option<String>,
post_body: &Option<String>,
post_content_type: &Option<String>,
) -> u64 {
let mut hasher = DefaultHasher::new();
format.hash(&mut hasher);
@ -152,47 +140,38 @@ impl Hash for DataSource {
}
}
fn read_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result<String> {
fn read_local_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result<String> {
if !is_path_in_directory(&base_path, &full_path)
.map_err(|e| format!("Failed to read data file {}: {}", full_path.display(), e))?
{
return Err(format!(
"{} is not inside the base site directory {}",
full_path.display(),
base_path.display()
"{:?} is not inside the base site directory {:?}",
full_path, base_path
)
.into());
}
read_file(&full_path).map_err(|e| {
format!("`load_data`: error {} loading file {}", full_path.to_str().unwrap(), e).into()
})
read_file(&full_path)
.map_err(|e| format!("`load_data`: error reading file {:?}: {}", full_path, e).into())
}
fn get_output_format_from_args(
args: &HashMap<String, Value>,
format_arg: Option<String>,
data_source: &DataSource,
) -> Result<OutputFormat> {
let format_arg = optional_arg!(
String,
args.get("format"),
"`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, bibtex, plain)"
);
if let Some(format) = format_arg {
if format == "plain" {
return Ok(OutputFormat::Plain);
}
return OutputFormat::from_str(&format);
}
let from_extension = if let DataSource::Path(path) = data_source {
path.extension().map(|extension| extension.to_str().unwrap()).unwrap_or_else(|| "plain")
if let DataSource::Path(path) = data_source {
match path.extension().and_then(|e| e.to_str()) {
Some(ext) => OutputFormat::from_str(ext).or(Ok(OutputFormat::Plain)),
None => Ok(OutputFormat::Plain),
}
} else {
"plain"
};
// Always default to Plain if we don't know what it is
OutputFormat::from_str(from_extension).or(Ok(OutputFormat::Plain))
// Always default to Plain if we don't know what it is
Ok(OutputFormat::Plain)
}
}
/// A Tera function to load data from a file or from a URL
@ -218,17 +197,22 @@ impl LoadData {
impl TeraFn for LoadData {
fn call(&self, args: &HashMap<String, Value>) -> Result<Value> {
let required = if let Some(req) = optional_arg!(
// Either a local path or a URL
let path_arg = optional_arg!(String, args.get("path"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
let url_arg = optional_arg!(String, args.get("url"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
// Optional general params
let format_arg = optional_arg!(
String,
args.get("format"),
"`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml, bibtex, plain)"
);
let required = optional_arg!(
bool,
args.get("required"),
"`load_data`: `required` must be a boolean (true or false)"
) {
req
} else {
true
};
let path_arg = optional_arg!(String, args.get("path"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
let url_arg = optional_arg!(String, args.get("url"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
)
.unwrap_or(true);
// Remote URL parameters only
let post_body_arg =
optional_arg!(String, args.get("body"), "`load_data` body must be a string, if set.");
let post_content_type = optional_arg!(
@ -241,6 +225,7 @@ impl TeraFn for LoadData {
args.get("method"),
"`load_data` method must either be POST or GET."
);
let method = match method_arg {
Some(ref method_str) => match Method::from_str(&method_str) {
Ok(m) => m,
@ -248,43 +233,39 @@ impl TeraFn for LoadData {
},
_ => Method::Get,
};
let data_source = DataSource::from_args(path_arg.clone(), url_arg, &self.base_path)?;
// If the file doesn't exist, source is None
match (&data_source, required) {
// If the file was not required, return a Null value to the template
(None, false) => {
return Ok(Value::Null);
}
// If the file was required, error
(None, true) => {
// source is None only with path_arg (not URL), so path_arg is safely unwrap
return Err(format!(
"{} doesn't exist",
&self.base_path.join(path_arg.unwrap()).display()
)
.into());
}
_ => {}
}
let data_source = data_source.unwrap();
let file_format = get_output_format_from_args(&args, &data_source)?;
let cache_key = data_source.get_cache_key(
&file_format,
method,
post_body_arg.clone(),
post_content_type.clone(),
);
let data_source =
match (DataSource::from_args(path_arg.clone(), url_arg, &self.base_path)?, required) {
// If the file was not required, return a Null value to the template
(None, false) => {
return Ok(Value::Null);
}
// If the file was required, error
(None, true) => {
// source is None only with path_arg (not URL), so path_arg is safely unwrap
return Err(format!(
"{} doesn't exist",
&self.base_path.join(path_arg.unwrap()).display()
)
.into());
}
(Some(data_source), _) => data_source,
};
let file_format = get_output_format_from_args(format_arg, &data_source)?;
let cache_key =
data_source.get_cache_key(&file_format, method, &post_body_arg, &post_content_type);
let mut cache = self.result_cache.lock().expect("result cache lock");
let response_client = self.client.lock().expect("response client lock");
if let Some(cached_result) = cache.get(&cache_key) {
return Ok(cached_result.clone());
}
let data = match data_source {
DataSource::Path(path) => read_data_file(&self.base_path, path),
DataSource::Path(path) => read_local_data_file(&self.base_path, path),
DataSource::Url(url) => {
let response_client = self.client.lock().expect("response client lock");
let req = match method {
Method::Get => response_client
.get(url.as_str())
@ -308,12 +289,9 @@ impl TeraFn for LoadData {
},
_ => {}
};
match post_body_arg {
Some(body) => {
resp = resp.body(body);
}
_ => {}
};
if let Some(body) = post_body_arg {
resp = resp.body(body);
}
resp
}
};
@ -335,7 +313,7 @@ impl TeraFn for LoadData {
.into())
}
}
} // Now that we have discarded recoverable errors, we can unwrap the result
}
}?;
let result_value: Result<Value> = match file_format {
@ -368,7 +346,7 @@ fn load_toml(toml_data: String) -> Result<Value> {
match toml_value {
Value::Object(m) => Ok(fix_toml_dates(m)),
_ => unreachable!("Loaded something other than a TOML object"),
_ => Err("Loaded something other than a TOML object".into()),
}
}
@ -436,11 +414,11 @@ fn load_csv(csv_data: String) -> Result<Value> {
let mut csv_map = Map::new();
{
let hdrs = reader.headers().map_err(|e| {
let headers = reader.headers().map_err(|e| {
format!("'load_data': {} - unable to read CSV header line (line 1) for CSV file", e)
})?;
let headers_array = hdrs.iter().map(|v| Value::String(v.to_string())).collect();
let headers_array = headers.iter().map(|v| Value::String(v.to_string())).collect();
csv_map.insert(String::from("headers"), Value::Array(headers_array));
}
@ -487,6 +465,8 @@ mod tests {
use crate::global_fns::load_data::Method;
use mockito::mock;
use serde_json::json;
use std::fs::{copy, create_dir_all};
use tempfile::tempdir;
use tera::{to_value, Function};
// NOTE: HTTP mock paths below are randomly generated to avoid name
@ -621,17 +601,47 @@ mod tests {
}
#[test]
fn cant_load_outside_content_dir() {
fn can_handle_various_local_file_locations() {
let dir = tempdir().unwrap();
create_dir_all(dir.path().join("content").join("gallery")).unwrap();
create_dir_all(dir.path().join("static")).unwrap();
copy(get_test_file("test.css"), dir.path().join("content").join("test.css")).unwrap();
copy(get_test_file("test.css"), dir.path().join("content").join("gallery").join("new.css"))
.unwrap();
copy(get_test_file("test.css"), dir.path().join("static").join("test.css")).unwrap();
let static_fn = LoadData::new(dir.path().to_path_buf());
let mut args = HashMap::new();
// 1. relative path in `static`
args.insert("path".to_string(), to_value("static/test.css").unwrap());
let data = static_fn.call(&args).unwrap().as_str().unwrap().to_string();
assert_eq!(data, ".hello {}\n");
// 2. relative path in `content`
args.insert("path".to_string(), to_value("content/test.css").unwrap());
let data = static_fn.call(&args).unwrap().as_str().unwrap().to_string();
assert_eq!(data, ".hello {}\n");
// 3. path starting with @/
args.insert("path".to_string(), to_value("@/test.css").unwrap());
let data = static_fn.call(&args).unwrap().as_str().unwrap().to_string();
assert_eq!(data, ".hello {}\n");
// 4. absolute path does not work
args.insert("path".to_string(), to_value("/test.css").unwrap());
assert!(static_fn.call(&args).is_err());
}
#[test]
fn cannot_load_outside_content_dir() {
let static_fn = LoadData::new(PathBuf::from(PathBuf::from("../utils")));
let mut args = HashMap::new();
args.insert("path".to_string(), to_value("../../README.md").unwrap());
args.insert("format".to_string(), to_value("plain").unwrap());
let result = static_fn.call(&args);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("README.md is not inside the base site directory"));
assert!(result.unwrap_err().to_string().contains("is not inside the base site directory"));
}
#[test]
@ -640,14 +650,14 @@ mod tests {
let cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(
&OutputFormat::Toml,
Method::Get,
None,
None,
&None,
&None,
);
let cache_key_2 = DataSource::Path(get_test_file("test.toml")).get_cache_key(
&OutputFormat::Toml,
Method::Get,
None,
None,
&None,
&None,
);
assert_eq!(cache_key, cache_key_2);
}
@ -657,14 +667,14 @@ mod tests {
let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(
&OutputFormat::Toml,
Method::Get,
None,
None,
&None,
&None,
);
let json_cache_key = DataSource::Path(get_test_file("test.json")).get_cache_key(
&OutputFormat::Toml,
Method::Get,
None,
None,
&None,
&None,
);
assert_ne!(toml_cache_key, json_cache_key);
}
@ -674,14 +684,14 @@ mod tests {
let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(
&OutputFormat::Toml,
Method::Get,
None,
None,
&None,
&None,
);
let json_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(
&OutputFormat::Json,
Method::Get,
None,
None,
&None,
&None,
);
assert_ne!(toml_cache_key, json_cache_key);
}
@ -800,6 +810,7 @@ mod tests {
let mut args = HashMap::new();
args.insert("path".to_string(), to_value("test.css").unwrap());
let result = static_fn.call(&args.clone()).unwrap();
println!("{:?}", result);
if cfg!(windows) {
assert_eq!(result.as_str().unwrap().replace("\r\n", "\n"), ".hello {}\n",);

View file

@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::{fs, io, result};
use base64::encode as encode_b64;
@ -22,6 +23,40 @@ pub use self::i18n::Trans;
pub use self::images::{GetImageMetadata, ResizeImage};
pub use self::load_data::LoadData;
/// This is used by a few Tera functions to search for files on the filesystem.
/// This does try to find the file in 3 different spots:
/// 1. base_path + path
/// 2. base_path + static + path
/// 3. base_path + content + path
pub fn search_for_file(base_path: &Path, path: &str) -> Option<PathBuf> {
let search_paths = [base_path.join("static"), base_path.join("content")];
let actual_path = if path.starts_with("@/") {
Cow::Owned(path.replace("@/", "content/"))
} else {
Cow::Borrowed(path)
};
let mut file_path = base_path.join(&*actual_path);
let mut file_exists = file_path.exists();
if !file_exists {
// we need to search in both search folders now
for dir in &search_paths {
let p = dir.join(&*actual_path);
if p.exists() {
file_path = p;
file_exists = true;
break;
}
}
}
if file_exists {
Some(file_path)
} else {
None
}
}
#[derive(Debug)]
pub struct GetUrl {
config: Config,
@ -77,7 +112,7 @@ where
let mut hasher = D::new();
io::copy(&mut file, &mut hasher)?;
if base64 {
Ok(format!("{}", encode_b64(hasher.finalize())))
Ok(encode_b64(hasher.finalize()))
} else {
Ok(format!("{:x}", hasher.finalize()))
}