diff --git a/Cargo.lock b/Cargo.lock index be7fcfa7..2f90cda0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2162,6 +2162,7 @@ dependencies = [ "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "imageproc 0.1.0", "library 0.1.0", + "link_checker 0.1.0", "rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "search 0.1.0", diff --git a/components/library/src/content/page.rs b/components/library/src/content/page.rs index aa48a4fd..741ec33a 100644 --- a/components/library/src/content/page.rs +++ b/components/library/src/content/page.rs @@ -76,6 +76,8 @@ pub struct Page { pub lang: String, /// Contains all the translated version of that page pub translations: Vec, + /// Contains the external links that need to be checked + pub external_links: Vec, } impl Page { @@ -104,6 +106,7 @@ impl Page { reading_time: None, lang: String::new(), translations: Vec::new(), + external_links: Vec::new(), } } @@ -263,6 +266,7 @@ impl Page { self.summary = res.summary_len.map(|l| res.body[0..l].to_owned()); self.content = res.body; self.toc = res.toc; + self.external_links = res.external_links; Ok(()) } @@ -339,6 +343,7 @@ impl Default for Page { reading_time: None, lang: String::new(), translations: Vec::new(), + external_links: Vec::new(), } } } diff --git a/components/library/src/content/section.rs b/components/library/src/content/section.rs index ebc9ca1a..cc5c0acb 100644 --- a/components/library/src/content/section.rs +++ b/components/library/src/content/section.rs @@ -56,6 +56,8 @@ pub struct Section { pub lang: String, /// Contains all the translated version of that section pub translations: Vec, + /// Contains the external links that need to be checked + pub external_links: Vec, } impl Section { @@ -85,6 +87,7 @@ impl Section { reading_time: None, lang: String::new(), translations: Vec::new(), + external_links: Vec::new(), } } @@ -189,6 +192,8 @@ impl Section { })?; self.content = res.body; self.toc = res.toc; + self.external_links = res.external_links; + Ok(()) } @@ -255,6 +260,7 @@ impl Default for Section { word_count: None, lang: String::new(), translations: Vec::new(), + external_links: Vec::new(), } } } diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 49d224e9..8843e8ff 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -9,7 +9,6 @@ use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET}; use context::RenderContext; use errors::{Error, Result}; use front_matter::InsertAnchor; -use link_checker::check_url; use table_of_contents::{make_table_of_contents, Header}; use utils::site::resolve_internal_link; use utils::vec::InsertMany; @@ -25,6 +24,7 @@ pub struct Rendered { pub body: String, pub summary_len: Option, pub toc: Vec
, + pub external_links: Vec, } // tracks a header in a slice of pulldown-cmark events @@ -66,7 +66,7 @@ fn is_colocated_asset_link(link: &str) -> bool { && !link.starts_with("mailto:") } -fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result { +fn fix_link(link_type: LinkType, link: &str, context: &RenderContext, external_links: &mut Vec) -> Result { if link_type == LinkType::Email { return Ok(link.to_string()); } @@ -83,17 +83,10 @@ fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result< } } else if is_colocated_asset_link(&link) { format!("{}{}", context.current_page_permalink, link) - } else if context.config.check_external_links - && !link.starts_with('#') - && !link.starts_with("mailto:") - { - let res = check_url(&link); - if res.is_valid() { - link.to_string() - } else { - return Err(format!("Link {} is not valid: {}", link, res.message()).into()); - } } else { + if !link.starts_with('#') && !link.starts_with("mailto:") { + external_links.push(link.to_owned()); + } link.to_string() }; Ok(result) @@ -142,6 +135,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result = vec![]; let mut headers: Vec
= vec![]; + let mut external_links = Vec::new(); let mut opts = Options::empty(); let mut has_summary = false; @@ -207,7 +201,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result { - let fixed_link = match fix_link(link_type, &link, context) { + let fixed_link = match fix_link(link_type, &link, context, &mut external_links) { Ok(fixed_link) => fixed_link, Err(err) => { error = Some(err); @@ -302,6 +296,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Resulta link

\n"); -} - -#[test] -fn can_show_error_message_for_invalid_external_links() { - let permalinks_ctx = HashMap::new(); - let mut config = Config::default(); - config.check_external_links = true; - let context = RenderContext::new( - &ZOLA_TERA, - &config, - "https://vincent.is/about/", - &permalinks_ctx, - InsertAnchor::None, - ); - let res = render_content("[a link](http://google.comy)", &context); - assert!(res.is_err()); - let err = res.unwrap_err(); - assert!(format!("{}", err).contains("Link http://google.comy is not valid")); -} - -#[test] -fn doesnt_try_to_validate_email_links_mailto() { - let permalinks_ctx = HashMap::new(); - let mut config = Config::default(); - config.check_external_links = true; - let context = RenderContext::new( - &ZOLA_TERA, - &config, - "https://vincent.is/about/", - &permalinks_ctx, - InsertAnchor::None, - ); - let res = render_content("Email: [foo@bar.baz](mailto:foo@bar.baz)", &context).unwrap(); - assert_eq!(res.body, "

Email: foo@bar.baz

\n"); -} - -#[test] -fn doesnt_try_to_validate_email_links_angled_brackets() { - let permalinks_ctx = HashMap::new(); - let mut config = Config::default(); - config.check_external_links = true; - let context = RenderContext::new( - &ZOLA_TERA, - &config, - "https://vincent.is/about/", - &permalinks_ctx, - InsertAnchor::None, - ); - let res = render_content("Email: ", &context).unwrap(); - assert_eq!(res.body, "

Email: foo@bar.baz

\n"); + let content = " +[a link](http://google.com) +[a link](http://google.comy) +Email: [foo@bar.baz](mailto:foo@bar.baz) +Email: + "; + let res = render_content(content, &context).unwrap(); + assert_eq!(res.external_links, &["http://google.com".to_owned(), "http://google.comy".to_owned()]); } #[test] diff --git a/components/site/Cargo.toml b/components/site/Cargo.toml index bb88d12f..fa9f6256 100644 --- a/components/site/Cargo.toml +++ b/components/site/Cargo.toml @@ -19,6 +19,7 @@ front_matter = { path = "../front_matter" } search = { path = "../search" } imageproc = { path = "../imageproc" } library = { path = "../library" } +link_checker = { path = "../link_checker" } [dev-dependencies] tempfile = "3" diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs index ae2d92b1..007947b3 100644 --- a/components/site/src/lib.rs +++ b/components/site/src/lib.rs @@ -15,6 +15,7 @@ extern crate library; extern crate search; extern crate templates; extern crate utils; +extern crate link_checker; #[cfg(test)] extern crate tempfile; @@ -33,7 +34,7 @@ use sass_rs::{compile_file, Options as SassOptions, OutputStyle}; use tera::{Context, Tera}; use config::{get_config, Config}; -use errors::{Error, Result}; +use errors::{Error, ErrorKind, Result}; use front_matter::InsertAnchor; use library::{ find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy, @@ -42,6 +43,7 @@ use templates::{global_fns, render_redirect_template, ZOLA_TERA}; use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists}; use utils::net::get_available_port; use utils::templates::{render_template, rewrite_theme_paths}; +use link_checker::check_url; #[derive(Debug)] pub struct Site { @@ -243,9 +245,64 @@ impl Site { self.render_markdown()?; self.register_tera_global_fns(); + if self.config.check_external_links { + self.check_external_links()?; + } + Ok(()) } + pub fn check_external_links(&self) -> Result<()> { + let library = self.library.write().expect("Get lock for check_external_links"); + let page_links = library.pages() + .values() + .map(|p| { + let path = &p.file.path; + p.external_links.iter().map(move |l| (path.clone(), l)) + }) + .flatten(); + let section_links = library.sections() + .values() + .map(|p| { + let path = &p.file.path; + p.external_links.iter().map(move |l| (path.clone(), l)) + }) + .flatten(); + let all_links = page_links.chain(section_links).collect::>(); + + // create thread pool with lots of threads so we can fetch + // (almost) all pages simultaneously + let threads = std::cmp::min(all_links.len(), 32); + let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build().map_err(|e| Error { + kind: ErrorKind::Msg(e.to_string().into()), + source: None, + })?; + + let errors: Vec<_> = pool.install(|| { + all_links.par_iter().filter_map(|(path, link)| { + let res = check_url(link); + if res.is_valid() { + None + } else { + Some((path, res)) + } + }).collect() + }); + + if errors.is_empty() { + Ok(()) + } else { + let msg = errors.into_iter() + .map(|(path, check_res)| format!("Dead link in {:?}: {:?}", path, check_res)) + .collect::>() + .join("\n"); + Err(Error { + kind: ErrorKind::Msg(msg.into()), + source: None, + }) + } + } + /// Insert a default index section for each language if necessary so we don't need to create /// a _index.md to render the index page at the root of the site pub fn create_default_index_sections(&mut self) -> Result<()> { diff --git a/docs/content/documentation/getting-started/cli-usage.md b/docs/content/documentation/getting-started/cli-usage.md index 95267f53..1167981d 100644 --- a/docs/content/documentation/getting-started/cli-usage.md +++ b/docs/content/documentation/getting-started/cli-usage.md @@ -83,6 +83,11 @@ You can also point to another config file than `config.toml` like so - the posit $ zola --config config.staging.toml serve ``` +### check + +The check subcommand will try to build all pages just like the build command would, but without writing any of the +results to disk. Additionally, it always checks external links regardless of the site configuration. + ## Colored output Any of the three commands will emit colored output if your terminal supports it. diff --git a/src/cli.rs b/src/cli.rs index 304d135b..368224d6 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -67,5 +67,7 @@ pub fn build_cli() -> App<'static, 'static> { .takes_value(false) .help("Do not start a server, just re-build project on changes") ]), + SubCommand::with_name("check") + .about("Try building the project without rendering it. Checks links") ]) } diff --git a/src/cmd/check.rs b/src/cmd/check.rs new file mode 100644 index 00000000..dca8793c --- /dev/null +++ b/src/cmd/check.rs @@ -0,0 +1,28 @@ +use std::env; +use std::path::PathBuf; + +use errors::Result; +use site::Site; + +use console; + +pub fn check( + config_file: &str, + base_path: Option<&str>, + base_url: Option<&str>, +) -> Result<()> { + let bp = base_path.map(PathBuf::from).unwrap_or(env::current_dir().unwrap()); + let mut site = Site::new(bp, config_file)?; + // Force the checking of external links + site.config.check_external_links = true; + // Disable syntax highlighting since the results won't be used + // and this operation can be expensive. + site.config.highlight_code = false; + if let Some(b) = base_url { + site.set_base_url(b.to_string()); + } + site.load()?; + console::notify_site_size(&site); + console::warn_about_ignored_pages(&site); + Ok(()) +} diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index baac6f0c..2936f1e5 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -1,7 +1,9 @@ mod build; mod init; mod serve; +mod check; pub use self::build::build; pub use self::init::create_new_project; pub use self::serve::serve; +pub use self::check::check; diff --git a/src/main.rs b/src/main.rs index 987e08bc..b2a0e742 100644 --- a/src/main.rs +++ b/src/main.rs @@ -89,6 +89,21 @@ fn main() { } }; } + ("check", Some(matches)) => { + console::info("Checking site..."); + let start = Instant::now(); + match cmd::check( + config_file, + matches.value_of("base_path"), + matches.value_of("base_url"), + ) { + Ok(()) => console::report_elapsed_time(start), + Err(e) => { + console::unravel_errors("Failed to check the site", &e); + ::std::process::exit(1); + } + }; + } _ => unreachable!(), } }