Check command global (#683)

* Add check subcommand

* Add some brief documentation for the check subcommand

* Start working on parallel link checks

* Check all external links in Site

* Return *all* dead links in site
This commit is contained in:
Marcus Klaas de Vries 2019-05-27 14:05:07 +02:00 committed by Vincent Prouillet
parent 93338c2762
commit 0d964204c3
12 changed files with 141 additions and 69 deletions

1
Cargo.lock generated
View file

@ -2162,6 +2162,7 @@ dependencies = [
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"imageproc 0.1.0", "imageproc 0.1.0",
"library 0.1.0", "library 0.1.0",
"link_checker 0.1.0",
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"search 0.1.0", "search 0.1.0",

View file

@ -76,6 +76,8 @@ pub struct Page {
pub lang: String, pub lang: String,
/// Contains all the translated version of that page /// Contains all the translated version of that page
pub translations: Vec<Key>, pub translations: Vec<Key>,
/// Contains the external links that need to be checked
pub external_links: Vec<String>,
} }
impl Page { impl Page {
@ -104,6 +106,7 @@ impl Page {
reading_time: None, reading_time: None,
lang: String::new(), lang: String::new(),
translations: Vec::new(), translations: Vec::new(),
external_links: Vec::new(),
} }
} }
@ -263,6 +266,7 @@ impl Page {
self.summary = res.summary_len.map(|l| res.body[0..l].to_owned()); self.summary = res.summary_len.map(|l| res.body[0..l].to_owned());
self.content = res.body; self.content = res.body;
self.toc = res.toc; self.toc = res.toc;
self.external_links = res.external_links;
Ok(()) Ok(())
} }
@ -339,6 +343,7 @@ impl Default for Page {
reading_time: None, reading_time: None,
lang: String::new(), lang: String::new(),
translations: Vec::new(), translations: Vec::new(),
external_links: Vec::new(),
} }
} }
} }

View file

@ -56,6 +56,8 @@ pub struct Section {
pub lang: String, pub lang: String,
/// Contains all the translated version of that section /// Contains all the translated version of that section
pub translations: Vec<Key>, pub translations: Vec<Key>,
/// Contains the external links that need to be checked
pub external_links: Vec<String>,
} }
impl Section { impl Section {
@ -85,6 +87,7 @@ impl Section {
reading_time: None, reading_time: None,
lang: String::new(), lang: String::new(),
translations: Vec::new(), translations: Vec::new(),
external_links: Vec::new(),
} }
} }
@ -189,6 +192,8 @@ impl Section {
})?; })?;
self.content = res.body; self.content = res.body;
self.toc = res.toc; self.toc = res.toc;
self.external_links = res.external_links;
Ok(()) Ok(())
} }
@ -255,6 +260,7 @@ impl Default for Section {
word_count: None, word_count: None,
lang: String::new(), lang: String::new(),
translations: Vec::new(), translations: Vec::new(),
external_links: Vec::new(),
} }
} }
} }

View file

@ -9,7 +9,6 @@ use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
use context::RenderContext; use context::RenderContext;
use errors::{Error, Result}; use errors::{Error, Result};
use front_matter::InsertAnchor; use front_matter::InsertAnchor;
use link_checker::check_url;
use table_of_contents::{make_table_of_contents, Header}; use table_of_contents::{make_table_of_contents, Header};
use utils::site::resolve_internal_link; use utils::site::resolve_internal_link;
use utils::vec::InsertMany; use utils::vec::InsertMany;
@ -25,6 +24,7 @@ pub struct Rendered {
pub body: String, pub body: String,
pub summary_len: Option<usize>, pub summary_len: Option<usize>,
pub toc: Vec<Header>, pub toc: Vec<Header>,
pub external_links: Vec<String>,
} }
// tracks a header in a slice of pulldown-cmark events // tracks a header in a slice of pulldown-cmark events
@ -66,7 +66,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
&& !link.starts_with("mailto:") && !link.starts_with("mailto:")
} }
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<String> { fn fix_link(link_type: LinkType, link: &str, context: &RenderContext, external_links: &mut Vec<String>) -> Result<String> {
if link_type == LinkType::Email { if link_type == LinkType::Email {
return Ok(link.to_string()); return Ok(link.to_string());
} }
@ -83,17 +83,10 @@ fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<
} }
} else if is_colocated_asset_link(&link) { } else if is_colocated_asset_link(&link) {
format!("{}{}", context.current_page_permalink, link) format!("{}{}", context.current_page_permalink, link)
} else if context.config.check_external_links
&& !link.starts_with('#')
&& !link.starts_with("mailto:")
{
let res = check_url(&link);
if res.is_valid() {
link.to_string()
} else { } else {
return Err(format!("Link {} is not valid: {}", link, res.message()).into()); if !link.starts_with('#') && !link.starts_with("mailto:") {
external_links.push(link.to_owned());
} }
} else {
link.to_string() link.to_string()
}; };
Ok(result) Ok(result)
@ -142,6 +135,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
let mut inserted_anchors: Vec<String> = vec![]; let mut inserted_anchors: Vec<String> = vec![];
let mut headers: Vec<Header> = vec![]; let mut headers: Vec<Header> = vec![];
let mut external_links = Vec::new();
let mut opts = Options::empty(); let mut opts = Options::empty();
let mut has_summary = false; let mut has_summary = false;
@ -207,7 +201,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
Event::Start(Tag::Image(link_type, src, title)) Event::Start(Tag::Image(link_type, src, title))
} }
Event::Start(Tag::Link(link_type, link, title)) => { Event::Start(Tag::Link(link_type, link, title)) => {
let fixed_link = match fix_link(link_type, &link, context) { let fixed_link = match fix_link(link_type, &link, context, &mut external_links) {
Ok(fixed_link) => fixed_link, Ok(fixed_link) => fixed_link,
Err(err) => { Err(err) => {
error = Some(err); error = Some(err);
@ -302,6 +296,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None }, summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
body: html, body: html,
toc: make_table_of_contents(headers), toc: make_table_of_contents(headers),
external_links: external_links,
}) })
} }
} }

View file

@ -697,10 +697,9 @@ Some text
} }
#[test] #[test]
fn can_validate_valid_external_links() { fn correctly_captures_external_links() {
let permalinks_ctx = HashMap::new(); let permalinks_ctx = HashMap::new();
let mut config = Config::default(); let config = Config::default();
config.check_external_links = true;
let context = RenderContext::new( let context = RenderContext::new(
&ZOLA_TERA, &ZOLA_TERA,
&config, &config,
@ -708,58 +707,14 @@ fn can_validate_valid_external_links() {
&permalinks_ctx, &permalinks_ctx,
InsertAnchor::None, InsertAnchor::None,
); );
let res = render_content("[a link](http://google.com)", &context).unwrap(); let content = "
assert_eq!(res.body, "<p><a href=\"http://google.com\">a link</a></p>\n"); [a link](http://google.com)
} [a link](http://google.comy)
Email: [foo@bar.baz](mailto:foo@bar.baz)
#[test] Email: <foo@bar.baz>
fn can_show_error_message_for_invalid_external_links() { ";
let permalinks_ctx = HashMap::new(); let res = render_content(content, &context).unwrap();
let mut config = Config::default(); assert_eq!(res.external_links, &["http://google.com".to_owned(), "http://google.comy".to_owned()]);
config.check_external_links = true;
let context = RenderContext::new(
&ZOLA_TERA,
&config,
"https://vincent.is/about/",
&permalinks_ctx,
InsertAnchor::None,
);
let res = render_content("[a link](http://google.comy)", &context);
assert!(res.is_err());
let err = res.unwrap_err();
assert!(format!("{}", err).contains("Link http://google.comy is not valid"));
}
#[test]
fn doesnt_try_to_validate_email_links_mailto() {
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.check_external_links = true;
let context = RenderContext::new(
&ZOLA_TERA,
&config,
"https://vincent.is/about/",
&permalinks_ctx,
InsertAnchor::None,
);
let res = render_content("Email: [foo@bar.baz](mailto:foo@bar.baz)", &context).unwrap();
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
}
#[test]
fn doesnt_try_to_validate_email_links_angled_brackets() {
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.check_external_links = true;
let context = RenderContext::new(
&ZOLA_TERA,
&config,
"https://vincent.is/about/",
&permalinks_ctx,
InsertAnchor::None,
);
let res = render_content("Email: <foo@bar.baz>", &context).unwrap();
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
} }
#[test] #[test]

View file

@ -19,6 +19,7 @@ front_matter = { path = "../front_matter" }
search = { path = "../search" } search = { path = "../search" }
imageproc = { path = "../imageproc" } imageproc = { path = "../imageproc" }
library = { path = "../library" } library = { path = "../library" }
link_checker = { path = "../link_checker" }
[dev-dependencies] [dev-dependencies]
tempfile = "3" tempfile = "3"

View file

@ -15,6 +15,7 @@ extern crate library;
extern crate search; extern crate search;
extern crate templates; extern crate templates;
extern crate utils; extern crate utils;
extern crate link_checker;
#[cfg(test)] #[cfg(test)]
extern crate tempfile; extern crate tempfile;
@ -33,7 +34,7 @@ use sass_rs::{compile_file, Options as SassOptions, OutputStyle};
use tera::{Context, Tera}; use tera::{Context, Tera};
use config::{get_config, Config}; use config::{get_config, Config};
use errors::{Error, Result}; use errors::{Error, ErrorKind, Result};
use front_matter::InsertAnchor; use front_matter::InsertAnchor;
use library::{ use library::{
find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy, find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy,
@ -42,6 +43,7 @@ use templates::{global_fns, render_redirect_template, ZOLA_TERA};
use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists}; use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists};
use utils::net::get_available_port; use utils::net::get_available_port;
use utils::templates::{render_template, rewrite_theme_paths}; use utils::templates::{render_template, rewrite_theme_paths};
use link_checker::check_url;
#[derive(Debug)] #[derive(Debug)]
pub struct Site { pub struct Site {
@ -243,9 +245,64 @@ impl Site {
self.render_markdown()?; self.render_markdown()?;
self.register_tera_global_fns(); self.register_tera_global_fns();
if self.config.check_external_links {
self.check_external_links()?;
}
Ok(()) Ok(())
} }
pub fn check_external_links(&self) -> Result<()> {
let library = self.library.write().expect("Get lock for check_external_links");
let page_links = library.pages()
.values()
.map(|p| {
let path = &p.file.path;
p.external_links.iter().map(move |l| (path.clone(), l))
})
.flatten();
let section_links = library.sections()
.values()
.map(|p| {
let path = &p.file.path;
p.external_links.iter().map(move |l| (path.clone(), l))
})
.flatten();
let all_links = page_links.chain(section_links).collect::<Vec<_>>();
// create thread pool with lots of threads so we can fetch
// (almost) all pages simultaneously
let threads = std::cmp::min(all_links.len(), 32);
let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build().map_err(|e| Error {
kind: ErrorKind::Msg(e.to_string().into()),
source: None,
})?;
let errors: Vec<_> = pool.install(|| {
all_links.par_iter().filter_map(|(path, link)| {
let res = check_url(link);
if res.is_valid() {
None
} else {
Some((path, res))
}
}).collect()
});
if errors.is_empty() {
Ok(())
} else {
let msg = errors.into_iter()
.map(|(path, check_res)| format!("Dead link in {:?}: {:?}", path, check_res))
.collect::<Vec<_>>()
.join("\n");
Err(Error {
kind: ErrorKind::Msg(msg.into()),
source: None,
})
}
}
/// Insert a default index section for each language if necessary so we don't need to create /// Insert a default index section for each language if necessary so we don't need to create
/// a _index.md to render the index page at the root of the site /// a _index.md to render the index page at the root of the site
pub fn create_default_index_sections(&mut self) -> Result<()> { pub fn create_default_index_sections(&mut self) -> Result<()> {

View file

@ -83,6 +83,11 @@ You can also point to another config file than `config.toml` like so - the posit
$ zola --config config.staging.toml serve $ zola --config config.staging.toml serve
``` ```
### check
The check subcommand will try to build all pages just like the build command would, but without writing any of the
results to disk. Additionally, it always checks external links regardless of the site configuration.
## Colored output ## Colored output
Any of the three commands will emit colored output if your terminal supports it. Any of the three commands will emit colored output if your terminal supports it.

View file

@ -67,5 +67,7 @@ pub fn build_cli() -> App<'static, 'static> {
.takes_value(false) .takes_value(false)
.help("Do not start a server, just re-build project on changes") .help("Do not start a server, just re-build project on changes")
]), ]),
SubCommand::with_name("check")
.about("Try building the project without rendering it. Checks links")
]) ])
} }

28
src/cmd/check.rs Normal file
View file

@ -0,0 +1,28 @@
use std::env;
use std::path::PathBuf;
use errors::Result;
use site::Site;
use console;
pub fn check(
config_file: &str,
base_path: Option<&str>,
base_url: Option<&str>,
) -> Result<()> {
let bp = base_path.map(PathBuf::from).unwrap_or(env::current_dir().unwrap());
let mut site = Site::new(bp, config_file)?;
// Force the checking of external links
site.config.check_external_links = true;
// Disable syntax highlighting since the results won't be used
// and this operation can be expensive.
site.config.highlight_code = false;
if let Some(b) = base_url {
site.set_base_url(b.to_string());
}
site.load()?;
console::notify_site_size(&site);
console::warn_about_ignored_pages(&site);
Ok(())
}

View file

@ -1,7 +1,9 @@
mod build; mod build;
mod init; mod init;
mod serve; mod serve;
mod check;
pub use self::build::build; pub use self::build::build;
pub use self::init::create_new_project; pub use self::init::create_new_project;
pub use self::serve::serve; pub use self::serve::serve;
pub use self::check::check;

View file

@ -89,6 +89,21 @@ fn main() {
} }
}; };
} }
("check", Some(matches)) => {
console::info("Checking site...");
let start = Instant::now();
match cmd::check(
config_file,
matches.value_of("base_path"),
matches.value_of("base_url"),
) {
Ok(()) => console::report_elapsed_time(start),
Err(e) => {
console::unravel_errors("Failed to check the site", &e);
::std::process::exit(1);
}
};
}
_ => unreachable!(), _ => unreachable!(),
} }
} }