Check command global (#683)
* Add check subcommand * Add some brief documentation for the check subcommand * Start working on parallel link checks * Check all external links in Site * Return *all* dead links in site
This commit is contained in:
parent
93338c2762
commit
0d964204c3
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2162,6 +2162,7 @@ dependencies = [
|
|||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"imageproc 0.1.0",
|
||||
"library 0.1.0",
|
||||
"link_checker 0.1.0",
|
||||
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"search 0.1.0",
|
||||
|
|
|
@ -76,6 +76,8 @@ pub struct Page {
|
|||
pub lang: String,
|
||||
/// Contains all the translated version of that page
|
||||
pub translations: Vec<Key>,
|
||||
/// Contains the external links that need to be checked
|
||||
pub external_links: Vec<String>,
|
||||
}
|
||||
|
||||
impl Page {
|
||||
|
@ -104,6 +106,7 @@ impl Page {
|
|||
reading_time: None,
|
||||
lang: String::new(),
|
||||
translations: Vec::new(),
|
||||
external_links: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -263,6 +266,7 @@ impl Page {
|
|||
self.summary = res.summary_len.map(|l| res.body[0..l].to_owned());
|
||||
self.content = res.body;
|
||||
self.toc = res.toc;
|
||||
self.external_links = res.external_links;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -339,6 +343,7 @@ impl Default for Page {
|
|||
reading_time: None,
|
||||
lang: String::new(),
|
||||
translations: Vec::new(),
|
||||
external_links: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,8 @@ pub struct Section {
|
|||
pub lang: String,
|
||||
/// Contains all the translated version of that section
|
||||
pub translations: Vec<Key>,
|
||||
/// Contains the external links that need to be checked
|
||||
pub external_links: Vec<String>,
|
||||
}
|
||||
|
||||
impl Section {
|
||||
|
@ -85,6 +87,7 @@ impl Section {
|
|||
reading_time: None,
|
||||
lang: String::new(),
|
||||
translations: Vec::new(),
|
||||
external_links: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,6 +192,8 @@ impl Section {
|
|||
})?;
|
||||
self.content = res.body;
|
||||
self.toc = res.toc;
|
||||
self.external_links = res.external_links;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -255,6 +260,7 @@ impl Default for Section {
|
|||
word_count: None,
|
||||
lang: String::new(),
|
||||
translations: Vec::new(),
|
||||
external_links: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@ use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
|
|||
use context::RenderContext;
|
||||
use errors::{Error, Result};
|
||||
use front_matter::InsertAnchor;
|
||||
use link_checker::check_url;
|
||||
use table_of_contents::{make_table_of_contents, Header};
|
||||
use utils::site::resolve_internal_link;
|
||||
use utils::vec::InsertMany;
|
||||
|
@ -25,6 +24,7 @@ pub struct Rendered {
|
|||
pub body: String,
|
||||
pub summary_len: Option<usize>,
|
||||
pub toc: Vec<Header>,
|
||||
pub external_links: Vec<String>,
|
||||
}
|
||||
|
||||
// tracks a header in a slice of pulldown-cmark events
|
||||
|
@ -66,7 +66,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
|
|||
&& !link.starts_with("mailto:")
|
||||
}
|
||||
|
||||
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<String> {
|
||||
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext, external_links: &mut Vec<String>) -> Result<String> {
|
||||
if link_type == LinkType::Email {
|
||||
return Ok(link.to_string());
|
||||
}
|
||||
|
@ -83,17 +83,10 @@ fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<
|
|||
}
|
||||
} else if is_colocated_asset_link(&link) {
|
||||
format!("{}{}", context.current_page_permalink, link)
|
||||
} else if context.config.check_external_links
|
||||
&& !link.starts_with('#')
|
||||
&& !link.starts_with("mailto:")
|
||||
{
|
||||
let res = check_url(&link);
|
||||
if res.is_valid() {
|
||||
link.to_string()
|
||||
} else {
|
||||
return Err(format!("Link {} is not valid: {}", link, res.message()).into());
|
||||
if !link.starts_with('#') && !link.starts_with("mailto:") {
|
||||
external_links.push(link.to_owned());
|
||||
}
|
||||
} else {
|
||||
link.to_string()
|
||||
};
|
||||
Ok(result)
|
||||
|
@ -142,6 +135,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
|||
|
||||
let mut inserted_anchors: Vec<String> = vec![];
|
||||
let mut headers: Vec<Header> = vec![];
|
||||
let mut external_links = Vec::new();
|
||||
|
||||
let mut opts = Options::empty();
|
||||
let mut has_summary = false;
|
||||
|
@ -207,7 +201,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
|||
Event::Start(Tag::Image(link_type, src, title))
|
||||
}
|
||||
Event::Start(Tag::Link(link_type, link, title)) => {
|
||||
let fixed_link = match fix_link(link_type, &link, context) {
|
||||
let fixed_link = match fix_link(link_type, &link, context, &mut external_links) {
|
||||
Ok(fixed_link) => fixed_link,
|
||||
Err(err) => {
|
||||
error = Some(err);
|
||||
|
@ -302,6 +296,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
|||
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
|
||||
body: html,
|
||||
toc: make_table_of_contents(headers),
|
||||
external_links: external_links,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -697,10 +697,9 @@ Some text
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn can_validate_valid_external_links() {
|
||||
fn correctly_captures_external_links() {
|
||||
let permalinks_ctx = HashMap::new();
|
||||
let mut config = Config::default();
|
||||
config.check_external_links = true;
|
||||
let config = Config::default();
|
||||
let context = RenderContext::new(
|
||||
&ZOLA_TERA,
|
||||
&config,
|
||||
|
@ -708,58 +707,14 @@ fn can_validate_valid_external_links() {
|
|||
&permalinks_ctx,
|
||||
InsertAnchor::None,
|
||||
);
|
||||
let res = render_content("[a link](http://google.com)", &context).unwrap();
|
||||
assert_eq!(res.body, "<p><a href=\"http://google.com\">a link</a></p>\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_show_error_message_for_invalid_external_links() {
|
||||
let permalinks_ctx = HashMap::new();
|
||||
let mut config = Config::default();
|
||||
config.check_external_links = true;
|
||||
let context = RenderContext::new(
|
||||
&ZOLA_TERA,
|
||||
&config,
|
||||
"https://vincent.is/about/",
|
||||
&permalinks_ctx,
|
||||
InsertAnchor::None,
|
||||
);
|
||||
let res = render_content("[a link](http://google.comy)", &context);
|
||||
assert!(res.is_err());
|
||||
let err = res.unwrap_err();
|
||||
assert!(format!("{}", err).contains("Link http://google.comy is not valid"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doesnt_try_to_validate_email_links_mailto() {
|
||||
let permalinks_ctx = HashMap::new();
|
||||
let mut config = Config::default();
|
||||
config.check_external_links = true;
|
||||
let context = RenderContext::new(
|
||||
&ZOLA_TERA,
|
||||
&config,
|
||||
"https://vincent.is/about/",
|
||||
&permalinks_ctx,
|
||||
InsertAnchor::None,
|
||||
);
|
||||
let res = render_content("Email: [foo@bar.baz](mailto:foo@bar.baz)", &context).unwrap();
|
||||
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doesnt_try_to_validate_email_links_angled_brackets() {
|
||||
let permalinks_ctx = HashMap::new();
|
||||
let mut config = Config::default();
|
||||
config.check_external_links = true;
|
||||
let context = RenderContext::new(
|
||||
&ZOLA_TERA,
|
||||
&config,
|
||||
"https://vincent.is/about/",
|
||||
&permalinks_ctx,
|
||||
InsertAnchor::None,
|
||||
);
|
||||
let res = render_content("Email: <foo@bar.baz>", &context).unwrap();
|
||||
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
|
||||
let content = "
|
||||
[a link](http://google.com)
|
||||
[a link](http://google.comy)
|
||||
Email: [foo@bar.baz](mailto:foo@bar.baz)
|
||||
Email: <foo@bar.baz>
|
||||
";
|
||||
let res = render_content(content, &context).unwrap();
|
||||
assert_eq!(res.external_links, &["http://google.com".to_owned(), "http://google.comy".to_owned()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -19,6 +19,7 @@ front_matter = { path = "../front_matter" }
|
|||
search = { path = "../search" }
|
||||
imageproc = { path = "../imageproc" }
|
||||
library = { path = "../library" }
|
||||
link_checker = { path = "../link_checker" }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
|
|
@ -15,6 +15,7 @@ extern crate library;
|
|||
extern crate search;
|
||||
extern crate templates;
|
||||
extern crate utils;
|
||||
extern crate link_checker;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate tempfile;
|
||||
|
@ -33,7 +34,7 @@ use sass_rs::{compile_file, Options as SassOptions, OutputStyle};
|
|||
use tera::{Context, Tera};
|
||||
|
||||
use config::{get_config, Config};
|
||||
use errors::{Error, Result};
|
||||
use errors::{Error, ErrorKind, Result};
|
||||
use front_matter::InsertAnchor;
|
||||
use library::{
|
||||
find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy,
|
||||
|
@ -42,6 +43,7 @@ use templates::{global_fns, render_redirect_template, ZOLA_TERA};
|
|||
use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists};
|
||||
use utils::net::get_available_port;
|
||||
use utils::templates::{render_template, rewrite_theme_paths};
|
||||
use link_checker::check_url;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Site {
|
||||
|
@ -243,9 +245,64 @@ impl Site {
|
|||
self.render_markdown()?;
|
||||
self.register_tera_global_fns();
|
||||
|
||||
if self.config.check_external_links {
|
||||
self.check_external_links()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn check_external_links(&self) -> Result<()> {
|
||||
let library = self.library.write().expect("Get lock for check_external_links");
|
||||
let page_links = library.pages()
|
||||
.values()
|
||||
.map(|p| {
|
||||
let path = &p.file.path;
|
||||
p.external_links.iter().map(move |l| (path.clone(), l))
|
||||
})
|
||||
.flatten();
|
||||
let section_links = library.sections()
|
||||
.values()
|
||||
.map(|p| {
|
||||
let path = &p.file.path;
|
||||
p.external_links.iter().map(move |l| (path.clone(), l))
|
||||
})
|
||||
.flatten();
|
||||
let all_links = page_links.chain(section_links).collect::<Vec<_>>();
|
||||
|
||||
// create thread pool with lots of threads so we can fetch
|
||||
// (almost) all pages simultaneously
|
||||
let threads = std::cmp::min(all_links.len(), 32);
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build().map_err(|e| Error {
|
||||
kind: ErrorKind::Msg(e.to_string().into()),
|
||||
source: None,
|
||||
})?;
|
||||
|
||||
let errors: Vec<_> = pool.install(|| {
|
||||
all_links.par_iter().filter_map(|(path, link)| {
|
||||
let res = check_url(link);
|
||||
if res.is_valid() {
|
||||
None
|
||||
} else {
|
||||
Some((path, res))
|
||||
}
|
||||
}).collect()
|
||||
});
|
||||
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
let msg = errors.into_iter()
|
||||
.map(|(path, check_res)| format!("Dead link in {:?}: {:?}", path, check_res))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
Err(Error {
|
||||
kind: ErrorKind::Msg(msg.into()),
|
||||
source: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a default index section for each language if necessary so we don't need to create
|
||||
/// a _index.md to render the index page at the root of the site
|
||||
pub fn create_default_index_sections(&mut self) -> Result<()> {
|
||||
|
|
|
@ -83,6 +83,11 @@ You can also point to another config file than `config.toml` like so - the posit
|
|||
$ zola --config config.staging.toml serve
|
||||
```
|
||||
|
||||
### check
|
||||
|
||||
The check subcommand will try to build all pages just like the build command would, but without writing any of the
|
||||
results to disk. Additionally, it always checks external links regardless of the site configuration.
|
||||
|
||||
## Colored output
|
||||
|
||||
Any of the three commands will emit colored output if your terminal supports it.
|
||||
|
|
|
@ -67,5 +67,7 @@ pub fn build_cli() -> App<'static, 'static> {
|
|||
.takes_value(false)
|
||||
.help("Do not start a server, just re-build project on changes")
|
||||
]),
|
||||
SubCommand::with_name("check")
|
||||
.about("Try building the project without rendering it. Checks links")
|
||||
])
|
||||
}
|
||||
|
|
28
src/cmd/check.rs
Normal file
28
src/cmd/check.rs
Normal file
|
@ -0,0 +1,28 @@
|
|||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use errors::Result;
|
||||
use site::Site;
|
||||
|
||||
use console;
|
||||
|
||||
pub fn check(
|
||||
config_file: &str,
|
||||
base_path: Option<&str>,
|
||||
base_url: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let bp = base_path.map(PathBuf::from).unwrap_or(env::current_dir().unwrap());
|
||||
let mut site = Site::new(bp, config_file)?;
|
||||
// Force the checking of external links
|
||||
site.config.check_external_links = true;
|
||||
// Disable syntax highlighting since the results won't be used
|
||||
// and this operation can be expensive.
|
||||
site.config.highlight_code = false;
|
||||
if let Some(b) = base_url {
|
||||
site.set_base_url(b.to_string());
|
||||
}
|
||||
site.load()?;
|
||||
console::notify_site_size(&site);
|
||||
console::warn_about_ignored_pages(&site);
|
||||
Ok(())
|
||||
}
|
|
@ -1,7 +1,9 @@
|
|||
mod build;
|
||||
mod init;
|
||||
mod serve;
|
||||
mod check;
|
||||
|
||||
pub use self::build::build;
|
||||
pub use self::init::create_new_project;
|
||||
pub use self::serve::serve;
|
||||
pub use self::check::check;
|
||||
|
|
15
src/main.rs
15
src/main.rs
|
@ -89,6 +89,21 @@ fn main() {
|
|||
}
|
||||
};
|
||||
}
|
||||
("check", Some(matches)) => {
|
||||
console::info("Checking site...");
|
||||
let start = Instant::now();
|
||||
match cmd::check(
|
||||
config_file,
|
||||
matches.value_of("base_path"),
|
||||
matches.value_of("base_url"),
|
||||
) {
|
||||
Ok(()) => console::report_elapsed_time(start),
|
||||
Err(e) => {
|
||||
console::unravel_errors("Failed to check the site", &e);
|
||||
::std::process::exit(1);
|
||||
}
|
||||
};
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue