use lazy_static::lazy_static; use reqwest::header::{HeaderMap, ACCEPT}; use reqwest::{blocking::Client, StatusCode}; use config::LinkChecker; use errors::Result; use std::collections::HashMap; use std::sync::{Arc, RwLock}; #[derive(Clone, Debug, PartialEq)] pub struct LinkResult { pub code: Option, /// Whether the HTTP request didn't make it to getting a HTTP code pub error: Option, } impl LinkResult { pub fn is_valid(&self) -> bool { if self.error.is_some() { return false; } if let Some(c) = self.code { return c.is_success() || c == StatusCode::NOT_MODIFIED; } true } pub fn message(&self) -> String { if let Some(ref e) = self.error { return e.clone(); } if let Some(c) = self.code { return format!("{}", c); } "Unknown error".to_string() } } lazy_static! { // Keep history of link checks so a rebuild doesn't have to check again static ref LINKS: Arc>> = Arc::new(RwLock::new(HashMap::new())); } pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult { { let guard = LINKS.read().unwrap(); if let Some(res) = guard.get(url) { return res.clone(); } } let mut headers = HeaderMap::new(); headers.insert(ACCEPT, "text/html".parse().unwrap()); headers.append(ACCEPT, "*/*".parse().unwrap()); let client = Client::builder() .user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"))) .build() .expect("reqwest client build"); let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix)); // Need to actually do the link checking let res = match client.get(url).headers(headers).send() { Ok(ref mut response) if check_anchor && has_anchor(url) => { let body = { let mut buf: Vec = vec![]; response.copy_to(&mut buf).unwrap(); String::from_utf8(buf).unwrap() }; match check_page_for_anchor(url, body) { Ok(_) => LinkResult { code: Some(response.status()), error: None }, Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, } } Ok(response) => { if response.status().is_success() || response.status() == StatusCode::NOT_MODIFIED { LinkResult { code: Some(response.status()), error: None } } else { let error_string = if response.status().is_informational() { format!("Informational status code ({}) received", response.status()) } else if response.status().is_redirection() { format!("Redirection status code ({}) received", response.status()) } else if response.status().is_client_error() { format!("Client error status code ({}) received", response.status()) } else if response.status().is_server_error() { format!("Server error status code ({}) received", response.status()) } else { format!("Non-success status code ({}) received", response.status()) }; LinkResult { code: None, error: Some(error_string) } } } Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, }; LINKS.write().unwrap().insert(url.to_string(), res.clone()); res } fn has_anchor(url: &str) -> bool { match url.find('#') { Some(index) => match url.get(index..=index + 1) { Some("#/") | Some("#!") | None => false, Some(_) => true, }, None => false, } } fn check_page_for_anchor(url: &str, body: String) -> Result<()> { let index = url.find('#').unwrap(); let anchor = url.get(index + 1..).unwrap(); let checks: [String; 8] = [ format!(" id='{}'", anchor), format!(" ID='{}'", anchor), format!(r#" id="{}""#, anchor), format!(r#" ID="{}""#, anchor), format!(" name='{}'", anchor), format!(" NAME='{}'", anchor), format!(r#" name="{}""#, anchor), format!(r#" NAME="{}""#, anchor), ]; if checks.iter().any(|check| body[..].contains(&check[..])) { Ok(()) } else { Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor))) } } #[cfg(test)] mod tests { use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS}; use mockito::mock; // NOTE: HTTP mock paths below are randomly generated to avoid name // collisions. Mocks with the same path can sometimes bleed between tests // and cause them to randomly pass/fail. Please make sure to use unique // paths when adding or modifying tests that use Mockito. #[test] fn can_validate_ok_links() { let url = format!("{}{}", mockito::server_url(), "/ekbtwxfhjw"); let _m = mock("GET", "/ekbtwxfhjw") .with_header("Content-Type", "text/html") .with_body(format!( r#" Test Mock URL "#, url )) .create(); let res = check_url(&url, &LinkChecker::default()); assert!(res.is_valid()); assert!(LINKS.read().unwrap().get(&url).is_some()); } #[test] fn can_follow_301_links() { let _m1 = mock("GET", "/c7qrtrv3zz") .with_status(301) .with_header("Content-Type", "text/plain") .with_header("Location", format!("{}/rbs5avjs8e", mockito::server_url()).as_str()) .with_body("Redirecting...") .create(); let _m2 = mock("GET", "/rbs5avjs8e") .with_header("Content-Type", "text/plain") .with_body("Test") .create(); let url = format!("{}{}", mockito::server_url(), "/c7qrtrv3zz"); let res = check_url(&url, &LinkChecker::default()); assert!(res.is_valid()); assert!(res.code.is_some()); assert!(res.error.is_none()); } #[test] fn set_default_user_agent() { let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); let _m1 = mock("GET", "/C4Szbfnvj6M0LoPk") .match_header("User-Agent", user_agent) .with_status(200) .with_body("Test") .create(); let url = format!("{}{}", mockito::server_url(), "/C4Szbfnvj6M0LoPk"); let res = check_url(&url, &LinkChecker::default()); assert!(res.is_valid()); assert!(res.code.is_some()); assert!(res.error.is_none()); } #[test] fn can_fail_301_to_404_links() { let _m1 = mock("GET", "/cav9vibhsc") .with_status(301) .with_header("Content-Type", "text/plain") .with_header("Location", format!("{}/72zmfg4smd", mockito::server_url()).as_str()) .with_body("Redirecting...") .create(); let _m2 = mock("GET", "/72zmfg4smd") .with_status(404) .with_header("Content-Type", "text/plain") .with_body("Not Found") .create(); let url = format!("{}{}", mockito::server_url(), "/cav9vibhsc"); let res = check_url(&url, &LinkChecker::default()); assert_eq!(res.is_valid(), false); assert!(res.code.is_none()); assert!(res.error.is_some()); } #[test] fn can_fail_404_links() { let _m = mock("GET", "/nlhab9c1vc") .with_status(404) .with_header("Content-Type", "text/plain") .with_body("Not Found") .create(); let url = format!("{}{}", mockito::server_url(), "/nlhab9c1vc"); let res = check_url(&url, &LinkChecker::default()); assert_eq!(res.is_valid(), false); assert!(res.code.is_none()); assert!(res.error.is_some()); } #[test] fn can_fail_500_links() { let _m = mock("GET", "/qdbrssazes") .with_status(500) .with_header("Content-Type", "text/plain") .with_body("Internal Server Error") .create(); let url = format!("{}{}", mockito::server_url(), "/qdbrssazes"); let res = check_url(&url, &LinkChecker::default()); assert_eq!(res.is_valid(), false); assert!(res.code.is_none()); assert!(res.error.is_some()); } #[test] fn can_fail_unresolved_links() { let res = check_url("https://t6l5cn9lpm.lxizfnzckd", &LinkChecker::default()); assert_eq!(res.is_valid(), false); assert!(res.code.is_none()); assert!(res.error.is_some()); } #[test] fn can_validate_anchors() { let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; let body = r#"

collect

"#.to_string(); let res = check_page_for_anchor(url, body); assert!(res.is_ok()); } // https://github.com/getzola/zola/issues/948 #[test] fn can_validate_anchors_in_capital() { let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; let body = r#"

collect

"#.to_string(); let res = check_page_for_anchor(url, body); assert!(res.is_ok()); } #[test] fn can_validate_anchors_with_other_quotes() { let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; let body = r#"

collect

"#.to_string(); let res = check_page_for_anchor(url, body); assert!(res.is_ok()); } #[test] fn can_validate_anchors_with_name_attr() { let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; let body = r#"

collect

"#.to_string(); let res = check_page_for_anchor(url, body); assert!(res.is_ok()); } #[test] fn can_fail_when_anchor_not_found() { let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me"; let body = r#"

collect

"#.to_string(); let res = check_page_for_anchor(url, body); assert!(res.is_err()); } #[test] fn can_check_url_for_anchor() { let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library"; let res = has_anchor(url); assert_eq!(res, true); } #[test] fn will_return_false_when_no_anchor() { let url = "https://doc.rust-lang.org/std/index.html"; let res = has_anchor(url); assert_eq!(res, false); } #[test] fn will_return_false_when_has_router_url() { let url = "https://doc.rust-lang.org/#/std"; let res = has_anchor(url); assert_eq!(res, false); } #[test] fn will_return_false_when_has_router_url_alt() { let url = "https://doc.rust-lang.org/#!/std"; let res = has_anchor(url); assert_eq!(res, false); } #[test] fn skip_anchor_prefixes() { let ignore_url = format!("{}{}", mockito::server_url(), "/ignore/"); let config = LinkChecker { skip_prefixes: vec![], skip_anchor_prefixes: vec![ignore_url] }; let _m1 = mock("GET", "/ignore/i30hobj1cy") .with_header("Content-Type", "text/html") .with_body( r#" Ignore

"#, ) .create(); // anchor check is ignored because the url matches the prefix let ignore = format!("{}{}", mockito::server_url(), "/ignore/i30hobj1cy#nonexistent"); assert!(check_url(&ignore, &config).is_valid()); let _m2 = mock("GET", "/guvqcqwmth") .with_header("Content-Type", "text/html") .with_body( r#" Test

"#, ) .create(); // other anchors are checked let existent = format!("{}{}", mockito::server_url(), "/guvqcqwmth#existent"); assert!(check_url(&existent, &config).is_valid()); let nonexistent = format!("{}{}", mockito::server_url(), "/guvqcqwmth#nonexistent"); assert_eq!(check_url(&nonexistent, &config).is_valid(), false); } }