Set default user agent for external requests

Many servers will return errors (e.g. 400/403) to requests that do not
set a User-Agent header. This results in issues in both the link_checker
and load_data components. With the link_checker these are false positive
dead links. In load_data, remote data fails to be fetched. To mitigate
this issue, this sets a default User-Agent of

    $CARGO_PKG_NAME/$CARGO_PKG_VERSION

Note that the root cause of this regression from zola v0.9.0 is that
reqwest 0.10 changed their default behavior and no longer sets a
User-Agent by default:

    https://github.com/seanmonstar/reqwest/pull/751

Fixes #950.
This commit is contained in:
Luke Hsiao 2020-02-17 11:09:33 -08:00
parent 15a3ab1a51
commit 661bd9c0fa
2 changed files with 51 additions and 2 deletions

View file

@ -58,7 +58,10 @@ pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
headers.insert(ACCEPT, "text/html".parse().unwrap());
headers.append(ACCEPT, "*/*".parse().unwrap());
let client = Client::new();
let client = Client::builder()
.user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")))
.build()
.expect("reqwest client build");
let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
@ -185,6 +188,22 @@ mod tests {
assert!(res.error.is_none());
}
#[test]
fn set_default_user_agent() {
let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"));
let _m1 = mock("GET", "/C4Szbfnvj6M0LoPk")
.match_header("User-Agent", user_agent)
.with_status(200)
.with_body("Test")
.create();
let url = format!("{}{}", mockito::server_url(), "/C4Szbfnvj6M0LoPk");
let res = check_url(&url, &LinkChecker::default());
assert!(res.is_valid());
assert!(res.code.is_some());
assert!(res.error.is_none());
}
#[test]
fn can_fail_301_to_404_links() {
let _m1 = mock("GET", "/cav9vibhsc")

View file

@ -178,7 +178,12 @@ pub struct LoadData {
}
impl LoadData {
pub fn new(base_path: PathBuf) -> Self {
let client = Arc::new(Mutex::new(Client::builder().build().expect("reqwest client build")));
let client = Arc::new(Mutex::new(
Client::builder()
.user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")))
.build()
.expect("reqwest client build"),
));
let result_cache = Arc::new(Mutex::new(HashMap::new()));
Self { base_path, client, result_cache }
}
@ -443,6 +448,31 @@ mod tests {
);
}
#[test]
fn set_default_user_agent() {
let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"));
let _m = mock("GET", "/chu8aizahBiy")
.match_header("User-Agent", user_agent)
.with_header("content-type", "application/json")
.with_body(
r#"{
"test": {
"foo": "bar"
}
}
"#,
)
.create();
let url = format!("{}{}", mockito::server_url(), "/chu8aizahBiy");
let static_fn = LoadData::new(PathBuf::new());
let mut args = HashMap::new();
args.insert("url".to_string(), to_value(&url).unwrap());
args.insert("format".to_string(), to_value("json").unwrap());
let result = static_fn.call(&args).unwrap();
assert_eq!(result.get("test").unwrap().get("foo").unwrap(), &to_value("bar").unwrap());
}
#[test]
fn can_load_toml() {
let static_fn = LoadData::new(PathBuf::from("../utils/test-files"));