Skip anchor checking for URL with prefix in config (#812)
* cargo fmt & clippy * Skip anchor checking for URL with prefix in config
This commit is contained in:
parent
4aa2ba84fc
commit
6149fd17e1
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1241,6 +1241,7 @@ dependencies = [
|
|||
name = "link_checker"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"config 0.1.0",
|
||||
"errors 0.1.0",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
|
@ -7,8 +7,8 @@ use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
|
|||
use toml;
|
||||
use toml::Value as Toml;
|
||||
|
||||
use errors::Result;
|
||||
use errors::Error;
|
||||
use errors::Result;
|
||||
use highlighting::THEME_SET;
|
||||
use theme::Theme;
|
||||
use utils::fs::read_file_with_error;
|
||||
|
@ -86,7 +86,20 @@ impl Default for Taxonomy {
|
|||
}
|
||||
}
|
||||
|
||||
type TranslateTerm = HashMap<String, String>;
|
||||
type TranslateTerm = HashMap<String, String>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct LinkChecker {
|
||||
/// Skip anchor checking for these URL prefixes
|
||||
pub skip_anchor_prefixes: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for LinkChecker {
|
||||
fn default() -> LinkChecker {
|
||||
LinkChecker { skip_anchor_prefixes: Vec::new() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
|
@ -152,6 +165,8 @@ pub struct Config {
|
|||
#[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are need
|
||||
pub extra_syntax_set: Option<SyntaxSet>,
|
||||
|
||||
pub link_checker: LinkChecker,
|
||||
|
||||
/// All user params set in [extra] in the config
|
||||
pub extra: HashMap<String, Toml>,
|
||||
|
||||
|
@ -317,9 +332,16 @@ impl Config {
|
|||
Error::msg(format!("Translation for language '{}' is missing", lang.as_ref()))
|
||||
})?;
|
||||
|
||||
terms.get(key.as_ref()).ok_or_else(|| {
|
||||
Error::msg(format!("Translation key '{}' for language '{}' is missing", key.as_ref(), lang.as_ref()))
|
||||
}).map(|term| term.to_string())
|
||||
terms
|
||||
.get(key.as_ref())
|
||||
.ok_or_else(|| {
|
||||
Error::msg(format!(
|
||||
"Translation key '{}' for language '{}' is missing",
|
||||
key.as_ref(),
|
||||
lang.as_ref()
|
||||
))
|
||||
})
|
||||
.map(|term| term.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -346,6 +368,7 @@ impl Default for Config {
|
|||
translations: HashMap::new(),
|
||||
extra_syntaxes: Vec::new(),
|
||||
extra_syntax_set: None,
|
||||
link_checker: LinkChecker::default(),
|
||||
extra: HashMap::new(),
|
||||
build_timestamp: Some(1),
|
||||
}
|
||||
|
@ -551,4 +574,25 @@ ignored_content = ["*.{graphml,iso}", "*.py?"]
|
|||
assert!(g.is_match("foo.py3"));
|
||||
assert!(!g.is_match("foo.py"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn link_checker_skip_anchor_prefixes() {
|
||||
let config_str = r#"
|
||||
title = "My site"
|
||||
base_url = "example.com"
|
||||
|
||||
[link_checker]
|
||||
skip_anchor_prefixes = [
|
||||
"https://caniuse.com/#feat=",
|
||||
"https://github.com/rust-lang/rust/blob/",
|
||||
]
|
||||
"#;
|
||||
|
||||
let config = Config::parse(config_str).unwrap();
|
||||
let v = config.link_checker.skip_anchor_prefixes;
|
||||
assert_eq!(
|
||||
v,
|
||||
vec!["https://caniuse.com/#feat=", "https://github.com/rust-lang/rust/blob/"]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ extern crate utils;
|
|||
mod config;
|
||||
pub mod highlighting;
|
||||
mod theme;
|
||||
pub use config::{Config, Language, Taxonomy};
|
||||
pub use config::{Config, Language, LinkChecker, Taxonomy};
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
|
|
|
@ -272,7 +272,7 @@ impl ImageOp {
|
|||
} else {
|
||||
img
|
||||
}
|
||||
},
|
||||
}
|
||||
Fill(w, h) => {
|
||||
let factor_w = img_w as f32 / w as f32;
|
||||
let factor_h = img_h as f32 / h as f32;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use slotmap::{DenseSlotMap, DefaultKey};
|
||||
use slotmap::{DefaultKey, DenseSlotMap};
|
||||
|
||||
use front_matter::SortBy;
|
||||
|
||||
|
|
|
@ -21,7 +21,9 @@ pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering {
|
|||
/// Takes a list of (page key, date, permalink) and sort them by dates if possible
|
||||
/// Pages without date will be put in the unsortable bucket
|
||||
/// The permalink is used to break ties
|
||||
pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
|
||||
pub fn sort_pages_by_date(
|
||||
pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>,
|
||||
) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
|
||||
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =
|
||||
pages.into_par_iter().partition(|page| page.1.is_some());
|
||||
|
||||
|
@ -40,7 +42,9 @@ pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>
|
|||
/// Takes a list of (page key, weight, permalink) and sort them by weight if possible
|
||||
/// Pages without weight will be put in the unsortable bucket
|
||||
/// The permalink is used to break ties
|
||||
pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
|
||||
pub fn sort_pages_by_weight(
|
||||
pages: Vec<(&DefaultKey, Option<usize>, &str)>,
|
||||
) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
|
||||
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =
|
||||
pages.into_par_iter().partition(|page| page.1.is_some());
|
||||
|
||||
|
@ -57,7 +61,9 @@ pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> (
|
|||
}
|
||||
|
||||
/// Find the lighter/heavier and earlier/later pages for all pages having a date/weight
|
||||
pub fn find_siblings(sorted: &[DefaultKey]) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> {
|
||||
pub fn find_siblings(
|
||||
sorted: &[DefaultKey],
|
||||
) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> {
|
||||
let mut res = Vec::with_capacity(sorted.len());
|
||||
let length = sorted.len();
|
||||
|
||||
|
|
|
@ -7,4 +7,5 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"]
|
|||
reqwest = "0.9"
|
||||
lazy_static = "1"
|
||||
|
||||
config = { path = "../config" }
|
||||
errors = { path = "../errors" }
|
||||
|
|
|
@ -2,11 +2,13 @@ extern crate reqwest;
|
|||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
extern crate config;
|
||||
extern crate errors;
|
||||
|
||||
use reqwest::header::{HeaderMap, ACCEPT};
|
||||
use reqwest::StatusCode;
|
||||
|
||||
use config::LinkChecker;
|
||||
use errors::Result;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
@ -51,7 +53,7 @@ lazy_static! {
|
|||
static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
|
||||
}
|
||||
|
||||
pub fn check_url(url: &str) -> LinkResult {
|
||||
pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
|
||||
{
|
||||
let guard = LINKS.read().unwrap();
|
||||
if let Some(res) = guard.get(url) {
|
||||
|
@ -65,9 +67,11 @@ pub fn check_url(url: &str) -> LinkResult {
|
|||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
|
||||
|
||||
// Need to actually do the link checking
|
||||
let res = match client.get(url).headers(headers).send() {
|
||||
Ok(ref mut response) if has_anchor(url) => {
|
||||
Ok(ref mut response) if check_anchor && has_anchor(url) => {
|
||||
match check_page_for_anchor(url, response.text()) {
|
||||
Ok(_) => LinkResult { code: Some(response.status()), error: None },
|
||||
Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
|
||||
|
@ -111,21 +115,21 @@ fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()>
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{check_page_for_anchor, check_url, has_anchor, LINKS};
|
||||
use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};
|
||||
|
||||
#[test]
|
||||
fn can_validate_ok_links() {
|
||||
let url = "https://google.com";
|
||||
let res = check_url(url);
|
||||
let res = check_url(url, &LinkChecker::default());
|
||||
assert!(res.is_valid());
|
||||
assert!(LINKS.read().unwrap().get(url).is_some());
|
||||
let res = check_url(url);
|
||||
let res = check_url(url, &LinkChecker::default());
|
||||
assert!(res.is_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_fail_404_links() {
|
||||
let res = check_url("https://google.comys");
|
||||
let res = check_url("https://google.comys", &LinkChecker::default());
|
||||
assert_eq!(res.is_valid(), false);
|
||||
assert!(res.code.is_none());
|
||||
assert!(res.error.is_some());
|
||||
|
@ -190,4 +194,23 @@ mod tests {
|
|||
let res = has_anchor(url);
|
||||
assert_eq!(res, false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_anchor_prefixes() {
|
||||
let config = LinkChecker {
|
||||
skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()],
|
||||
};
|
||||
|
||||
// anchor check is ignored because the url matches the prefix
|
||||
let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214";
|
||||
assert!(check_url(&permalink, &config).is_valid());
|
||||
|
||||
// other anchors are checked
|
||||
let glossary = "https://help.github.com/en/articles/github-glossary#blame";
|
||||
assert!(check_url(&glossary, &config).is_valid());
|
||||
|
||||
let glossary_invalid =
|
||||
"https://help.github.com/en/articles/github-glossary#anchor-does-not-exist";
|
||||
assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -335,7 +335,7 @@ fn is_section(path: &str, languages_codes: &[&str]) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
false
|
||||
}
|
||||
|
||||
/// What happens when a section or a page is created/edited
|
||||
|
|
|
@ -296,8 +296,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
|||
let start_idx = heading_ref.start_idx;
|
||||
let end_idx = heading_ref.end_idx;
|
||||
let title = get_text(&events[start_idx + 1..end_idx]);
|
||||
let id =
|
||||
heading_ref.id.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0));
|
||||
let id = heading_ref
|
||||
.id
|
||||
.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0));
|
||||
inserted_anchors.push(id.clone());
|
||||
|
||||
// insert `id` to the tag
|
||||
|
@ -326,7 +327,8 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
|||
|
||||
// record heading to make table of contents
|
||||
let permalink = format!("{}#{}", context.current_page_permalink, id);
|
||||
let h = Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() };
|
||||
let h =
|
||||
Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() };
|
||||
headings.push(h);
|
||||
}
|
||||
|
||||
|
|
|
@ -399,7 +399,7 @@ impl Site {
|
|||
all_links
|
||||
.par_iter()
|
||||
.filter_map(|(page_path, link)| {
|
||||
let res = check_url(&link);
|
||||
let res = check_url(&link, &self.config.link_checker);
|
||||
if res.is_valid() {
|
||||
None
|
||||
} else {
|
||||
|
|
|
@ -662,3 +662,14 @@ fn can_ignore_markdown_content() {
|
|||
let (_, _tmp_dir, public) = build_site("test_site");
|
||||
assert!(!file_exists!(public, "posts/ignored/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_site() {
|
||||
let (mut site, _tmp_dir, _public) = build_site("test_site");
|
||||
|
||||
let prefixes = &site.config.link_checker.skip_anchor_prefixes;
|
||||
assert_eq!(prefixes, &vec!["https://github.com/rust-lang/rust/blob/"]);
|
||||
|
||||
site.config.enable_check_mode();
|
||||
site.load().expect("link check test_site");
|
||||
}
|
||||
|
|
|
@ -34,9 +34,10 @@ impl TeraFn for Trans {
|
|||
let lang = optional_arg!(String, args.get("lang"), "`trans`: `lang` must be a string.")
|
||||
.unwrap_or_else(|| self.config.default_language.clone());
|
||||
|
||||
let term = self.config.get_translation(lang, key).map_err(|e| {
|
||||
Error::chain("Failed to retreive term translation", e)
|
||||
})?;
|
||||
let term = self
|
||||
.config
|
||||
.get_translation(lang, key)
|
||||
.map_err(|e| Error::chain("Failed to retreive term translation", e))?;
|
||||
|
||||
Ok(to_value(term).unwrap())
|
||||
}
|
||||
|
@ -509,7 +510,6 @@ mod tests {
|
|||
assert!(static_fn.call(&args).is_err());
|
||||
}
|
||||
|
||||
|
||||
const TRANS_CONFIG: &str = r#"
|
||||
base_url = "https://remplace-par-ton-url.fr"
|
||||
default_language = "fr"
|
||||
|
|
|
@ -95,8 +95,14 @@ extra_syntaxes = []
|
|||
#
|
||||
# [translations.en]
|
||||
# title = "A title"
|
||||
#
|
||||
[translations]
|
||||
|
||||
|
||||
# Configure the link checker
|
||||
[link_checker]
|
||||
# Skip anchor checking for external URLs that start with these prefixes
|
||||
skip_anchor_prefixes = [
|
||||
"https://caniuse.com/",
|
||||
]
|
||||
|
||||
# You can put any kind of data in there and it
|
||||
# will be accessible in all templates
|
||||
|
|
|
@ -13,5 +13,10 @@ extra_syntaxes = ["syntaxes"]
|
|||
|
||||
ignored_content = ["*/ignored.md"]
|
||||
|
||||
[link_checker]
|
||||
skip_anchor_prefixes = [
|
||||
"https://github.com/rust-lang/rust/blob/",
|
||||
]
|
||||
|
||||
[extra.author]
|
||||
name = "Vincent Prouillet"
|
||||
|
|
|
@ -5,3 +5,9 @@ date = 2017-01-01
|
|||
+++
|
||||
|
||||
A simple page
|
||||
|
||||
<!-- more -->
|
||||
|
||||
Link to some rust-lang [source code][permalink].
|
||||
|
||||
[permalink]: https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214
|
||||
|
|
Loading…
Reference in a new issue