Add anchor existence checking to link_checker component (#786)
* Add anchor existant checking to link_checker component * Oops, forgot some changes * Drop scraper dependency and rework tests * Handle name attributes
This commit is contained in:
parent
096fefe7ed
commit
52c2b74b39
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1223,6 +1223,7 @@ dependencies = [
|
||||||
name = "link_checker"
|
name = "link_checker"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"errors 0.1.0",
|
||||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"reqwest 0.9.20 (registry+https://github.com/rust-lang/crates.io-index)",
|
"reqwest 0.9.20 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
|
@ -6,3 +6,5 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"]
|
||||||
[dependencies]
|
[dependencies]
|
||||||
reqwest = "0.9"
|
reqwest = "0.9"
|
||||||
lazy_static = "1"
|
lazy_static = "1"
|
||||||
|
|
||||||
|
errors = { path = "../errors" }
|
||||||
|
|
|
@ -2,8 +2,13 @@ extern crate reqwest;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
|
||||||
|
extern crate errors;
|
||||||
|
|
||||||
use reqwest::header::{HeaderMap, ACCEPT};
|
use reqwest::header::{HeaderMap, ACCEPT};
|
||||||
use reqwest::StatusCode;
|
use reqwest::StatusCode;
|
||||||
|
|
||||||
|
use errors::Result;
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
|
@ -62,6 +67,12 @@ pub fn check_url(url: &str) -> LinkResult {
|
||||||
|
|
||||||
// Need to actually do the link checking
|
// Need to actually do the link checking
|
||||||
let res = match client.get(url).headers(headers).send() {
|
let res = match client.get(url).headers(headers).send() {
|
||||||
|
Ok(ref mut response) if has_anchor(url) => {
|
||||||
|
match check_page_for_anchor(url, response.text()) {
|
||||||
|
Ok(_) => LinkResult { code: Some(response.status()), error: None },
|
||||||
|
Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(response) => LinkResult { code: Some(response.status()), error: None },
|
Ok(response) => LinkResult { code: Some(response.status()), error: None },
|
||||||
Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
|
Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
|
||||||
};
|
};
|
||||||
|
@ -70,9 +81,37 @@ pub fn check_url(url: &str) -> LinkResult {
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn has_anchor(url: &str) -> bool {
|
||||||
|
match url.find('#') {
|
||||||
|
Some(index) => match url.get(index..=index + 1) {
|
||||||
|
Some("#/") | Some("#!") | None => false,
|
||||||
|
Some(_) => true,
|
||||||
|
},
|
||||||
|
None => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> {
|
||||||
|
let body = body.unwrap();
|
||||||
|
let index = url.find('#').unwrap();
|
||||||
|
let anchor = url.get(index + 1..).unwrap();
|
||||||
|
let checks: [String; 4] = [
|
||||||
|
format!(" id='{}'", anchor),
|
||||||
|
format!(r#" id="{}""#, anchor),
|
||||||
|
format!(" name='{}'", anchor),
|
||||||
|
format!(r#" name="{}""#, anchor),
|
||||||
|
];
|
||||||
|
|
||||||
|
if checks.iter().any(|check| body[..].contains(&check[..])) {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{check_url, LINKS};
|
use super::{check_page_for_anchor, check_url, has_anchor, LINKS};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_validate_ok_links() {
|
fn can_validate_ok_links() {
|
||||||
|
@ -91,4 +130,64 @@ mod tests {
|
||||||
assert!(res.code.is_none());
|
assert!(res.code.is_none());
|
||||||
assert!(res.error.is_some());
|
assert!(res.error.is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_validate_anchors() {
|
||||||
|
let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
|
||||||
|
let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
|
||||||
|
let res = check_page_for_anchor(url, Ok(body));
|
||||||
|
assert!(res.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_validate_anchors_with_other_quotes() {
|
||||||
|
let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
|
||||||
|
let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
|
||||||
|
let res = check_page_for_anchor(url, Ok(body));
|
||||||
|
assert!(res.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_validate_anchors_with_name_attr() {
|
||||||
|
let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
|
||||||
|
let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
|
||||||
|
let res = check_page_for_anchor(url, Ok(body));
|
||||||
|
assert!(res.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_fail_when_anchor_not_found() {
|
||||||
|
let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
|
||||||
|
let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
|
||||||
|
let res = check_page_for_anchor(url, Ok(body));
|
||||||
|
assert!(res.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_check_url_for_anchor() {
|
||||||
|
let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
|
||||||
|
let res = has_anchor(url);
|
||||||
|
assert_eq!(res, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn will_return_false_when_no_anchor() {
|
||||||
|
let url = "https://doc.rust-lang.org/std/index.html";
|
||||||
|
let res = has_anchor(url);
|
||||||
|
assert_eq!(res, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn will_return_false_when_has_router_url() {
|
||||||
|
let url = "https://doc.rust-lang.org/#/std";
|
||||||
|
let res = has_anchor(url);
|
||||||
|
assert_eq!(res, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn will_return_false_when_has_router_url_alt() {
|
||||||
|
let url = "https://doc.rust-lang.org/#!/std";
|
||||||
|
let res = has_anchor(url);
|
||||||
|
assert_eq!(res, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue