Check command global (#683)
* Add check subcommand * Add some brief documentation for the check subcommand * Start working on parallel link checks * Check all external links in Site * Return *all* dead links in site
This commit is contained in:
parent
93338c2762
commit
0d964204c3
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2162,6 +2162,7 @@ dependencies = [
|
||||||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"imageproc 0.1.0",
|
"imageproc 0.1.0",
|
||||||
"library 0.1.0",
|
"library 0.1.0",
|
||||||
|
"link_checker 0.1.0",
|
||||||
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"search 0.1.0",
|
"search 0.1.0",
|
||||||
|
|
|
@ -76,6 +76,8 @@ pub struct Page {
|
||||||
pub lang: String,
|
pub lang: String,
|
||||||
/// Contains all the translated version of that page
|
/// Contains all the translated version of that page
|
||||||
pub translations: Vec<Key>,
|
pub translations: Vec<Key>,
|
||||||
|
/// Contains the external links that need to be checked
|
||||||
|
pub external_links: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Page {
|
impl Page {
|
||||||
|
@ -104,6 +106,7 @@ impl Page {
|
||||||
reading_time: None,
|
reading_time: None,
|
||||||
lang: String::new(),
|
lang: String::new(),
|
||||||
translations: Vec::new(),
|
translations: Vec::new(),
|
||||||
|
external_links: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,6 +266,7 @@ impl Page {
|
||||||
self.summary = res.summary_len.map(|l| res.body[0..l].to_owned());
|
self.summary = res.summary_len.map(|l| res.body[0..l].to_owned());
|
||||||
self.content = res.body;
|
self.content = res.body;
|
||||||
self.toc = res.toc;
|
self.toc = res.toc;
|
||||||
|
self.external_links = res.external_links;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -339,6 +343,7 @@ impl Default for Page {
|
||||||
reading_time: None,
|
reading_time: None,
|
||||||
lang: String::new(),
|
lang: String::new(),
|
||||||
translations: Vec::new(),
|
translations: Vec::new(),
|
||||||
|
external_links: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,8 @@ pub struct Section {
|
||||||
pub lang: String,
|
pub lang: String,
|
||||||
/// Contains all the translated version of that section
|
/// Contains all the translated version of that section
|
||||||
pub translations: Vec<Key>,
|
pub translations: Vec<Key>,
|
||||||
|
/// Contains the external links that need to be checked
|
||||||
|
pub external_links: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Section {
|
impl Section {
|
||||||
|
@ -85,6 +87,7 @@ impl Section {
|
||||||
reading_time: None,
|
reading_time: None,
|
||||||
lang: String::new(),
|
lang: String::new(),
|
||||||
translations: Vec::new(),
|
translations: Vec::new(),
|
||||||
|
external_links: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,6 +192,8 @@ impl Section {
|
||||||
})?;
|
})?;
|
||||||
self.content = res.body;
|
self.content = res.body;
|
||||||
self.toc = res.toc;
|
self.toc = res.toc;
|
||||||
|
self.external_links = res.external_links;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,6 +260,7 @@ impl Default for Section {
|
||||||
word_count: None,
|
word_count: None,
|
||||||
lang: String::new(),
|
lang: String::new(),
|
||||||
translations: Vec::new(),
|
translations: Vec::new(),
|
||||||
|
external_links: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,6 @@ use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
|
||||||
use context::RenderContext;
|
use context::RenderContext;
|
||||||
use errors::{Error, Result};
|
use errors::{Error, Result};
|
||||||
use front_matter::InsertAnchor;
|
use front_matter::InsertAnchor;
|
||||||
use link_checker::check_url;
|
|
||||||
use table_of_contents::{make_table_of_contents, Header};
|
use table_of_contents::{make_table_of_contents, Header};
|
||||||
use utils::site::resolve_internal_link;
|
use utils::site::resolve_internal_link;
|
||||||
use utils::vec::InsertMany;
|
use utils::vec::InsertMany;
|
||||||
|
@ -25,6 +24,7 @@ pub struct Rendered {
|
||||||
pub body: String,
|
pub body: String,
|
||||||
pub summary_len: Option<usize>,
|
pub summary_len: Option<usize>,
|
||||||
pub toc: Vec<Header>,
|
pub toc: Vec<Header>,
|
||||||
|
pub external_links: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// tracks a header in a slice of pulldown-cmark events
|
// tracks a header in a slice of pulldown-cmark events
|
||||||
|
@ -66,7 +66,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
|
||||||
&& !link.starts_with("mailto:")
|
&& !link.starts_with("mailto:")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<String> {
|
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext, external_links: &mut Vec<String>) -> Result<String> {
|
||||||
if link_type == LinkType::Email {
|
if link_type == LinkType::Email {
|
||||||
return Ok(link.to_string());
|
return Ok(link.to_string());
|
||||||
}
|
}
|
||||||
|
@ -83,17 +83,10 @@ fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<
|
||||||
}
|
}
|
||||||
} else if is_colocated_asset_link(&link) {
|
} else if is_colocated_asset_link(&link) {
|
||||||
format!("{}{}", context.current_page_permalink, link)
|
format!("{}{}", context.current_page_permalink, link)
|
||||||
} else if context.config.check_external_links
|
|
||||||
&& !link.starts_with('#')
|
|
||||||
&& !link.starts_with("mailto:")
|
|
||||||
{
|
|
||||||
let res = check_url(&link);
|
|
||||||
if res.is_valid() {
|
|
||||||
link.to_string()
|
|
||||||
} else {
|
|
||||||
return Err(format!("Link {} is not valid: {}", link, res.message()).into());
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
|
if !link.starts_with('#') && !link.starts_with("mailto:") {
|
||||||
|
external_links.push(link.to_owned());
|
||||||
|
}
|
||||||
link.to_string()
|
link.to_string()
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok(result)
|
||||||
|
@ -142,6 +135,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
||||||
|
|
||||||
let mut inserted_anchors: Vec<String> = vec![];
|
let mut inserted_anchors: Vec<String> = vec![];
|
||||||
let mut headers: Vec<Header> = vec![];
|
let mut headers: Vec<Header> = vec![];
|
||||||
|
let mut external_links = Vec::new();
|
||||||
|
|
||||||
let mut opts = Options::empty();
|
let mut opts = Options::empty();
|
||||||
let mut has_summary = false;
|
let mut has_summary = false;
|
||||||
|
@ -207,7 +201,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
||||||
Event::Start(Tag::Image(link_type, src, title))
|
Event::Start(Tag::Image(link_type, src, title))
|
||||||
}
|
}
|
||||||
Event::Start(Tag::Link(link_type, link, title)) => {
|
Event::Start(Tag::Link(link_type, link, title)) => {
|
||||||
let fixed_link = match fix_link(link_type, &link, context) {
|
let fixed_link = match fix_link(link_type, &link, context, &mut external_links) {
|
||||||
Ok(fixed_link) => fixed_link,
|
Ok(fixed_link) => fixed_link,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
error = Some(err);
|
error = Some(err);
|
||||||
|
@ -302,6 +296,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
|
||||||
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
|
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
|
||||||
body: html,
|
body: html,
|
||||||
toc: make_table_of_contents(headers),
|
toc: make_table_of_contents(headers),
|
||||||
|
external_links: external_links,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -697,10 +697,9 @@ Some text
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn can_validate_valid_external_links() {
|
fn correctly_captures_external_links() {
|
||||||
let permalinks_ctx = HashMap::new();
|
let permalinks_ctx = HashMap::new();
|
||||||
let mut config = Config::default();
|
let config = Config::default();
|
||||||
config.check_external_links = true;
|
|
||||||
let context = RenderContext::new(
|
let context = RenderContext::new(
|
||||||
&ZOLA_TERA,
|
&ZOLA_TERA,
|
||||||
&config,
|
&config,
|
||||||
|
@ -708,58 +707,14 @@ fn can_validate_valid_external_links() {
|
||||||
&permalinks_ctx,
|
&permalinks_ctx,
|
||||||
InsertAnchor::None,
|
InsertAnchor::None,
|
||||||
);
|
);
|
||||||
let res = render_content("[a link](http://google.com)", &context).unwrap();
|
let content = "
|
||||||
assert_eq!(res.body, "<p><a href=\"http://google.com\">a link</a></p>\n");
|
[a link](http://google.com)
|
||||||
}
|
[a link](http://google.comy)
|
||||||
|
Email: [foo@bar.baz](mailto:foo@bar.baz)
|
||||||
#[test]
|
Email: <foo@bar.baz>
|
||||||
fn can_show_error_message_for_invalid_external_links() {
|
";
|
||||||
let permalinks_ctx = HashMap::new();
|
let res = render_content(content, &context).unwrap();
|
||||||
let mut config = Config::default();
|
assert_eq!(res.external_links, &["http://google.com".to_owned(), "http://google.comy".to_owned()]);
|
||||||
config.check_external_links = true;
|
|
||||||
let context = RenderContext::new(
|
|
||||||
&ZOLA_TERA,
|
|
||||||
&config,
|
|
||||||
"https://vincent.is/about/",
|
|
||||||
&permalinks_ctx,
|
|
||||||
InsertAnchor::None,
|
|
||||||
);
|
|
||||||
let res = render_content("[a link](http://google.comy)", &context);
|
|
||||||
assert!(res.is_err());
|
|
||||||
let err = res.unwrap_err();
|
|
||||||
assert!(format!("{}", err).contains("Link http://google.comy is not valid"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn doesnt_try_to_validate_email_links_mailto() {
|
|
||||||
let permalinks_ctx = HashMap::new();
|
|
||||||
let mut config = Config::default();
|
|
||||||
config.check_external_links = true;
|
|
||||||
let context = RenderContext::new(
|
|
||||||
&ZOLA_TERA,
|
|
||||||
&config,
|
|
||||||
"https://vincent.is/about/",
|
|
||||||
&permalinks_ctx,
|
|
||||||
InsertAnchor::None,
|
|
||||||
);
|
|
||||||
let res = render_content("Email: [foo@bar.baz](mailto:foo@bar.baz)", &context).unwrap();
|
|
||||||
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn doesnt_try_to_validate_email_links_angled_brackets() {
|
|
||||||
let permalinks_ctx = HashMap::new();
|
|
||||||
let mut config = Config::default();
|
|
||||||
config.check_external_links = true;
|
|
||||||
let context = RenderContext::new(
|
|
||||||
&ZOLA_TERA,
|
|
||||||
&config,
|
|
||||||
"https://vincent.is/about/",
|
|
||||||
&permalinks_ctx,
|
|
||||||
InsertAnchor::None,
|
|
||||||
);
|
|
||||||
let res = render_content("Email: <foo@bar.baz>", &context).unwrap();
|
|
||||||
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -19,6 +19,7 @@ front_matter = { path = "../front_matter" }
|
||||||
search = { path = "../search" }
|
search = { path = "../search" }
|
||||||
imageproc = { path = "../imageproc" }
|
imageproc = { path = "../imageproc" }
|
||||||
library = { path = "../library" }
|
library = { path = "../library" }
|
||||||
|
link_checker = { path = "../link_checker" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
|
|
|
@ -15,6 +15,7 @@ extern crate library;
|
||||||
extern crate search;
|
extern crate search;
|
||||||
extern crate templates;
|
extern crate templates;
|
||||||
extern crate utils;
|
extern crate utils;
|
||||||
|
extern crate link_checker;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
extern crate tempfile;
|
extern crate tempfile;
|
||||||
|
@ -33,7 +34,7 @@ use sass_rs::{compile_file, Options as SassOptions, OutputStyle};
|
||||||
use tera::{Context, Tera};
|
use tera::{Context, Tera};
|
||||||
|
|
||||||
use config::{get_config, Config};
|
use config::{get_config, Config};
|
||||||
use errors::{Error, Result};
|
use errors::{Error, ErrorKind, Result};
|
||||||
use front_matter::InsertAnchor;
|
use front_matter::InsertAnchor;
|
||||||
use library::{
|
use library::{
|
||||||
find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy,
|
find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy,
|
||||||
|
@ -42,6 +43,7 @@ use templates::{global_fns, render_redirect_template, ZOLA_TERA};
|
||||||
use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists};
|
use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists};
|
||||||
use utils::net::get_available_port;
|
use utils::net::get_available_port;
|
||||||
use utils::templates::{render_template, rewrite_theme_paths};
|
use utils::templates::{render_template, rewrite_theme_paths};
|
||||||
|
use link_checker::check_url;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Site {
|
pub struct Site {
|
||||||
|
@ -243,9 +245,64 @@ impl Site {
|
||||||
self.render_markdown()?;
|
self.render_markdown()?;
|
||||||
self.register_tera_global_fns();
|
self.register_tera_global_fns();
|
||||||
|
|
||||||
|
if self.config.check_external_links {
|
||||||
|
self.check_external_links()?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn check_external_links(&self) -> Result<()> {
|
||||||
|
let library = self.library.write().expect("Get lock for check_external_links");
|
||||||
|
let page_links = library.pages()
|
||||||
|
.values()
|
||||||
|
.map(|p| {
|
||||||
|
let path = &p.file.path;
|
||||||
|
p.external_links.iter().map(move |l| (path.clone(), l))
|
||||||
|
})
|
||||||
|
.flatten();
|
||||||
|
let section_links = library.sections()
|
||||||
|
.values()
|
||||||
|
.map(|p| {
|
||||||
|
let path = &p.file.path;
|
||||||
|
p.external_links.iter().map(move |l| (path.clone(), l))
|
||||||
|
})
|
||||||
|
.flatten();
|
||||||
|
let all_links = page_links.chain(section_links).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// create thread pool with lots of threads so we can fetch
|
||||||
|
// (almost) all pages simultaneously
|
||||||
|
let threads = std::cmp::min(all_links.len(), 32);
|
||||||
|
let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build().map_err(|e| Error {
|
||||||
|
kind: ErrorKind::Msg(e.to_string().into()),
|
||||||
|
source: None,
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let errors: Vec<_> = pool.install(|| {
|
||||||
|
all_links.par_iter().filter_map(|(path, link)| {
|
||||||
|
let res = check_url(link);
|
||||||
|
if res.is_valid() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some((path, res))
|
||||||
|
}
|
||||||
|
}).collect()
|
||||||
|
});
|
||||||
|
|
||||||
|
if errors.is_empty() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
let msg = errors.into_iter()
|
||||||
|
.map(|(path, check_res)| format!("Dead link in {:?}: {:?}", path, check_res))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n");
|
||||||
|
Err(Error {
|
||||||
|
kind: ErrorKind::Msg(msg.into()),
|
||||||
|
source: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Insert a default index section for each language if necessary so we don't need to create
|
/// Insert a default index section for each language if necessary so we don't need to create
|
||||||
/// a _index.md to render the index page at the root of the site
|
/// a _index.md to render the index page at the root of the site
|
||||||
pub fn create_default_index_sections(&mut self) -> Result<()> {
|
pub fn create_default_index_sections(&mut self) -> Result<()> {
|
||||||
|
|
|
@ -83,6 +83,11 @@ You can also point to another config file than `config.toml` like so - the posit
|
||||||
$ zola --config config.staging.toml serve
|
$ zola --config config.staging.toml serve
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### check
|
||||||
|
|
||||||
|
The check subcommand will try to build all pages just like the build command would, but without writing any of the
|
||||||
|
results to disk. Additionally, it always checks external links regardless of the site configuration.
|
||||||
|
|
||||||
## Colored output
|
## Colored output
|
||||||
|
|
||||||
Any of the three commands will emit colored output if your terminal supports it.
|
Any of the three commands will emit colored output if your terminal supports it.
|
||||||
|
|
|
@ -67,5 +67,7 @@ pub fn build_cli() -> App<'static, 'static> {
|
||||||
.takes_value(false)
|
.takes_value(false)
|
||||||
.help("Do not start a server, just re-build project on changes")
|
.help("Do not start a server, just re-build project on changes")
|
||||||
]),
|
]),
|
||||||
|
SubCommand::with_name("check")
|
||||||
|
.about("Try building the project without rendering it. Checks links")
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
28
src/cmd/check.rs
Normal file
28
src/cmd/check.rs
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
use std::env;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use errors::Result;
|
||||||
|
use site::Site;
|
||||||
|
|
||||||
|
use console;
|
||||||
|
|
||||||
|
pub fn check(
|
||||||
|
config_file: &str,
|
||||||
|
base_path: Option<&str>,
|
||||||
|
base_url: Option<&str>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let bp = base_path.map(PathBuf::from).unwrap_or(env::current_dir().unwrap());
|
||||||
|
let mut site = Site::new(bp, config_file)?;
|
||||||
|
// Force the checking of external links
|
||||||
|
site.config.check_external_links = true;
|
||||||
|
// Disable syntax highlighting since the results won't be used
|
||||||
|
// and this operation can be expensive.
|
||||||
|
site.config.highlight_code = false;
|
||||||
|
if let Some(b) = base_url {
|
||||||
|
site.set_base_url(b.to_string());
|
||||||
|
}
|
||||||
|
site.load()?;
|
||||||
|
console::notify_site_size(&site);
|
||||||
|
console::warn_about_ignored_pages(&site);
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -1,7 +1,9 @@
|
||||||
mod build;
|
mod build;
|
||||||
mod init;
|
mod init;
|
||||||
mod serve;
|
mod serve;
|
||||||
|
mod check;
|
||||||
|
|
||||||
pub use self::build::build;
|
pub use self::build::build;
|
||||||
pub use self::init::create_new_project;
|
pub use self::init::create_new_project;
|
||||||
pub use self::serve::serve;
|
pub use self::serve::serve;
|
||||||
|
pub use self::check::check;
|
||||||
|
|
15
src/main.rs
15
src/main.rs
|
@ -89,6 +89,21 @@ fn main() {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
("check", Some(matches)) => {
|
||||||
|
console::info("Checking site...");
|
||||||
|
let start = Instant::now();
|
||||||
|
match cmd::check(
|
||||||
|
config_file,
|
||||||
|
matches.value_of("base_path"),
|
||||||
|
matches.value_of("base_url"),
|
||||||
|
) {
|
||||||
|
Ok(()) => console::report_elapsed_time(start),
|
||||||
|
Err(e) => {
|
||||||
|
console::unravel_errors("Failed to check the site", &e);
|
||||||
|
::std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue