load_data() template function takes a required boolean flag (closes #1209) (#1306)

* load_data() template function takes a `required` boolean flag

* Update tests for load_data()

* Add test to make sure invalid data always fails in load_data

* Better documentation, fixing a few typos

Co-authored-by: southerntofu <southerntofu@thunix.net>
This commit is contained in:
southerntofu 2021-02-02 19:19:44 +00:00 committed by GitHub
parent 92b5b4b3a5
commit 15e0ae6699
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 109 additions and 24 deletions

View file

@ -72,11 +72,16 @@ impl OutputFormat {
}
impl DataSource {
/// Returns Some(DataSource) on success, from optional load_data() path/url arguments
/// Returns an Error when a URL could not be parsed and Ok(None) when the path
/// is missing, so that the load_data() function can decide whether this is an error
/// Note: if the signature of this function changes, please update LoadData::call()
/// so we don't mistakenly unwrap things over there
fn from_args(
path_arg: Option<String>,
url_arg: Option<String>,
content_path: &PathBuf,
) -> Result<Self> {
) -> Result<Option<Self>> {
if path_arg.is_some() && url_arg.is_some() {
return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into());
}
@ -84,14 +89,15 @@ impl DataSource {
if let Some(path) = path_arg {
let full_path = content_path.join(path);
if !full_path.exists() {
return Err(format!("{} doesn't exist", full_path.display()).into());
return Ok(None);
}
return Ok(DataSource::Path(full_path));
return Ok(Some(DataSource::Path(full_path)));
}
if let Some(url) = url_arg {
return Url::parse(&url)
.map(DataSource::Url)
.map(|x| Some(x))
.map_err(|e| format!("Failed to parse {} as url: {}", url, e).into());
}
@ -118,16 +124,6 @@ impl Hash for DataSource {
}
}
fn get_data_source_from_args(
content_path: &PathBuf,
args: &HashMap<String, Value>,
) -> Result<DataSource> {
let path_arg = optional_arg!(String, args.get("path"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
let url_arg = optional_arg!(String, args.get("url"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
DataSource::from_args(path_arg, url_arg, content_path)
}
fn read_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result<String> {
if !is_path_in_directory(&base_path, &full_path)
.map_err(|e| format!("Failed to read data file {}: {}", full_path.display(), e))?
@ -194,7 +190,23 @@ impl LoadData {
impl TeraFn for LoadData {
fn call(&self, args: &HashMap<String, Value>) -> Result<Value> {
let data_source = get_data_source_from_args(&self.base_path, &args)?;
let required = if let Some(req) = optional_arg!(bool, args.get("required"), "`load_data`: `required` must be a boolean (true or false)") { req } else { true };
let path_arg = optional_arg!(String, args.get("path"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
let url_arg = optional_arg!(String, args.get("url"), GET_DATA_ARGUMENT_ERROR_MESSAGE);
let data_source = DataSource::from_args(path_arg.clone(), url_arg, &self.base_path)?;
// If the file doesn't exist, source is None
match (&data_source, required) {
// If the file was not required, return a Null value to the template
(None, false) => { return Ok(Value::Null); },
// If the file was required, error
(None, true) => {
// source is None only with path_arg (not URL), so path_arg is safely unwrap
return Err(format!("{} doesn't exist", &self.base_path.join(path_arg.unwrap()).display()).into());
},
_ => {},
}
let data_source = data_source.unwrap();
let file_format = get_output_format_from_args(&args, &data_source)?;
let cache_key = data_source.get_cache_key(&file_format);
@ -207,19 +219,29 @@ impl TeraFn for LoadData {
let data = match data_source {
DataSource::Path(path) => read_data_file(&self.base_path, path),
DataSource::Url(url) => {
let response = response_client
match response_client
.get(url.as_str())
.header(header::ACCEPT, file_format.as_accept_header())
.send()
.and_then(|res| res.error_for_status())
.map_err(|e| match e.status() {
Some(status) => format!("Failed to request {}: {}", url, status),
None => format!("Could not get response status for url: {}", url),
})?;
response
.text()
.map_err(|e| format!("Failed to parse response from {}: {:?}", url, e).into())
}
.and_then(|res| res.error_for_status()) {
Ok(r) => {
r.text()
.map_err(|e| format!("Failed to parse response from {}: {:?}", url, e).into())
},
Err(e) => {
if !required {
// HTTP error is discarded (because required=false) and
// Null value is returned to the template
return Ok(Value::Null);
}
Err(match e.status() {
Some(status) => format!("Failed to request {}: {}", url, status),
None => format!("Could not get response status for url: {}", url),
}.into())
}
}
},
// Now that we have discarded recoverable errors, we can unwrap the result
}?;
let result_value: Result<Value> = match file_format {
@ -392,6 +414,17 @@ mod tests {
assert!(result.unwrap_err().to_string().contains("READMEE.md doesn't exist"));
}
#[test]
fn doesnt_fail_when_missing_file_is_not_required() {
let static_fn = LoadData::new(PathBuf::from("../utils"));
let mut args = HashMap::new();
args.insert("path".to_string(), to_value("../../../READMEE.md").unwrap());
args.insert("required".to_string(), to_value(false).unwrap());
let result = static_fn.call(&args);
assert!(result.is_ok());
assert_eq!(result.unwrap(), tera::Value::Null);
}
#[test]
fn cant_load_outside_content_dir() {
let static_fn = LoadData::new(PathBuf::from(PathBuf::from("../utils")));
@ -490,6 +523,26 @@ mod tests {
);
}
#[test]
fn doesnt_fail_when_request_404s_is_not_required() {
let _m = mock("GET", "/aazeow0kog")
.with_status(404)
.with_header("content-type", "text/plain")
.with_body("Not Found")
.create();
let url = format!("{}{}", mockito::server_url(), "/aazeow0kog");
let static_fn = LoadData::new(PathBuf::new());
let mut args = HashMap::new();
args.insert("url".to_string(), to_value(&url).unwrap());
args.insert("format".to_string(), to_value("json").unwrap());
args.insert("required".to_string(), to_value(false).unwrap());
let result = static_fn.call(&args);
assert!(result.is_ok());
assert_eq!(result.unwrap(), tera::Value::Null);
}
#[test]
fn set_default_user_agent() {
let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"));
@ -619,6 +672,28 @@ mod tests {
}
}
#[test]
fn bad_csv_should_result_in_error_even_when_not_required() {
let static_fn = LoadData::new(PathBuf::from("../utils/test-files"));
let mut args = HashMap::new();
args.insert("path".to_string(), to_value("uneven_rows.csv").unwrap());
args.insert("required".to_string(), to_value(false).unwrap());
let result = static_fn.call(&args.clone());
assert!(result.is_err());
let error_kind = result.err().unwrap().kind;
match error_kind {
tera::ErrorKind::Msg(msg) => {
if msg != String::from("Error encountered when parsing csv records") {
panic!("Error message is wrong. Perhaps wrong error is being returned?");
}
}
_ => panic!("Error encountered was not expected CSV error"),
}
}
#[test]
fn can_load_json() {
let static_fn = LoadData::new(PathBuf::from("../utils/test-files"));

View file

@ -210,10 +210,20 @@ As a security precaution, if this file is outside the main site directory, your
{% set data = load_data(path="content/blog/story/data.toml") %}
```
The optional `required` boolean argument can be set to false so that missing data (HTTP error or local file not found) does not produce an error, but returns a null value instead. However, permission issues with a local file and invalid data that could not be parsed to the requested data format will still produce an error even with `required=false`.
The snippet below outputs the HTML from a Wikipedia page, or "No data found" if the page was not reachable, or did not return a successful HTTP code:
```jinja2
{% set data = load_data(path="https://en.wikipedia.org/wiki/Commune_of_Paris", required=false) %}
{% if data %}{{ data | safe }}{% else %}No data found{% endif %}
```
The optional `format` argument allows you to specify and override which data type is contained
within the file specified in the `path` argument. Valid entries are `toml`, `json`, `csv`, `bibtex`
or `plain`. If the `format` argument isn't specified, then the path extension is used.
```jinja2
{% set data = load_data(path="content/blog/story/data.txt", format="json") %}
```