diff --git a/Cargo.lock b/Cargo.lock
index be7fcfa7..2f90cda0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2162,6 +2162,7 @@ dependencies = [
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"imageproc 0.1.0",
"library 0.1.0",
+ "link_checker 0.1.0",
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"search 0.1.0",
diff --git a/components/library/src/content/page.rs b/components/library/src/content/page.rs
index aa48a4fd..741ec33a 100644
--- a/components/library/src/content/page.rs
+++ b/components/library/src/content/page.rs
@@ -76,6 +76,8 @@ pub struct Page {
pub lang: String,
/// Contains all the translated version of that page
pub translations: Vec,
+ /// Contains the external links that need to be checked
+ pub external_links: Vec,
}
impl Page {
@@ -104,6 +106,7 @@ impl Page {
reading_time: None,
lang: String::new(),
translations: Vec::new(),
+ external_links: Vec::new(),
}
}
@@ -263,6 +266,7 @@ impl Page {
self.summary = res.summary_len.map(|l| res.body[0..l].to_owned());
self.content = res.body;
self.toc = res.toc;
+ self.external_links = res.external_links;
Ok(())
}
@@ -339,6 +343,7 @@ impl Default for Page {
reading_time: None,
lang: String::new(),
translations: Vec::new(),
+ external_links: Vec::new(),
}
}
}
diff --git a/components/library/src/content/section.rs b/components/library/src/content/section.rs
index ebc9ca1a..cc5c0acb 100644
--- a/components/library/src/content/section.rs
+++ b/components/library/src/content/section.rs
@@ -56,6 +56,8 @@ pub struct Section {
pub lang: String,
/// Contains all the translated version of that section
pub translations: Vec,
+ /// Contains the external links that need to be checked
+ pub external_links: Vec,
}
impl Section {
@@ -85,6 +87,7 @@ impl Section {
reading_time: None,
lang: String::new(),
translations: Vec::new(),
+ external_links: Vec::new(),
}
}
@@ -189,6 +192,8 @@ impl Section {
})?;
self.content = res.body;
self.toc = res.toc;
+ self.external_links = res.external_links;
+
Ok(())
}
@@ -255,6 +260,7 @@ impl Default for Section {
word_count: None,
lang: String::new(),
translations: Vec::new(),
+ external_links: Vec::new(),
}
}
}
diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs
index 49d224e9..8843e8ff 100644
--- a/components/rendering/src/markdown.rs
+++ b/components/rendering/src/markdown.rs
@@ -9,7 +9,6 @@ use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
use context::RenderContext;
use errors::{Error, Result};
use front_matter::InsertAnchor;
-use link_checker::check_url;
use table_of_contents::{make_table_of_contents, Header};
use utils::site::resolve_internal_link;
use utils::vec::InsertMany;
@@ -25,6 +24,7 @@ pub struct Rendered {
pub body: String,
pub summary_len: Option,
pub toc: Vec,
+ pub external_links: Vec,
}
// tracks a header in a slice of pulldown-cmark events
@@ -66,7 +66,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
&& !link.starts_with("mailto:")
}
-fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result {
+fn fix_link(link_type: LinkType, link: &str, context: &RenderContext, external_links: &mut Vec) -> Result {
if link_type == LinkType::Email {
return Ok(link.to_string());
}
@@ -83,17 +83,10 @@ fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<
}
} else if is_colocated_asset_link(&link) {
format!("{}{}", context.current_page_permalink, link)
- } else if context.config.check_external_links
- && !link.starts_with('#')
- && !link.starts_with("mailto:")
- {
- let res = check_url(&link);
- if res.is_valid() {
- link.to_string()
- } else {
- return Err(format!("Link {} is not valid: {}", link, res.message()).into());
- }
} else {
+ if !link.starts_with('#') && !link.starts_with("mailto:") {
+ external_links.push(link.to_owned());
+ }
link.to_string()
};
Ok(result)
@@ -142,6 +135,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result = vec![];
let mut headers: Vec = vec![];
+ let mut external_links = Vec::new();
let mut opts = Options::empty();
let mut has_summary = false;
@@ -207,7 +201,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result {
- let fixed_link = match fix_link(link_type, &link, context) {
+ let fixed_link = match fix_link(link_type, &link, context, &mut external_links) {
Ok(fixed_link) => fixed_link,
Err(err) => {
error = Some(err);
@@ -302,6 +296,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Resulta link
\n");
-}
-
-#[test]
-fn can_show_error_message_for_invalid_external_links() {
- let permalinks_ctx = HashMap::new();
- let mut config = Config::default();
- config.check_external_links = true;
- let context = RenderContext::new(
- &ZOLA_TERA,
- &config,
- "https://vincent.is/about/",
- &permalinks_ctx,
- InsertAnchor::None,
- );
- let res = render_content("[a link](http://google.comy)", &context);
- assert!(res.is_err());
- let err = res.unwrap_err();
- assert!(format!("{}", err).contains("Link http://google.comy is not valid"));
-}
-
-#[test]
-fn doesnt_try_to_validate_email_links_mailto() {
- let permalinks_ctx = HashMap::new();
- let mut config = Config::default();
- config.check_external_links = true;
- let context = RenderContext::new(
- &ZOLA_TERA,
- &config,
- "https://vincent.is/about/",
- &permalinks_ctx,
- InsertAnchor::None,
- );
- let res = render_content("Email: [foo@bar.baz](mailto:foo@bar.baz)", &context).unwrap();
- assert_eq!(res.body, "Email: foo@bar.baz
\n");
-}
-
-#[test]
-fn doesnt_try_to_validate_email_links_angled_brackets() {
- let permalinks_ctx = HashMap::new();
- let mut config = Config::default();
- config.check_external_links = true;
- let context = RenderContext::new(
- &ZOLA_TERA,
- &config,
- "https://vincent.is/about/",
- &permalinks_ctx,
- InsertAnchor::None,
- );
- let res = render_content("Email: ", &context).unwrap();
- assert_eq!(res.body, "Email: foo@bar.baz
\n");
+ let content = "
+[a link](http://google.com)
+[a link](http://google.comy)
+Email: [foo@bar.baz](mailto:foo@bar.baz)
+Email:
+ ";
+ let res = render_content(content, &context).unwrap();
+ assert_eq!(res.external_links, &["http://google.com".to_owned(), "http://google.comy".to_owned()]);
}
#[test]
diff --git a/components/site/Cargo.toml b/components/site/Cargo.toml
index bb88d12f..fa9f6256 100644
--- a/components/site/Cargo.toml
+++ b/components/site/Cargo.toml
@@ -19,6 +19,7 @@ front_matter = { path = "../front_matter" }
search = { path = "../search" }
imageproc = { path = "../imageproc" }
library = { path = "../library" }
+link_checker = { path = "../link_checker" }
[dev-dependencies]
tempfile = "3"
diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs
index ae2d92b1..007947b3 100644
--- a/components/site/src/lib.rs
+++ b/components/site/src/lib.rs
@@ -15,6 +15,7 @@ extern crate library;
extern crate search;
extern crate templates;
extern crate utils;
+extern crate link_checker;
#[cfg(test)]
extern crate tempfile;
@@ -33,7 +34,7 @@ use sass_rs::{compile_file, Options as SassOptions, OutputStyle};
use tera::{Context, Tera};
use config::{get_config, Config};
-use errors::{Error, Result};
+use errors::{Error, ErrorKind, Result};
use front_matter::InsertAnchor;
use library::{
find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy,
@@ -42,6 +43,7 @@ use templates::{global_fns, render_redirect_template, ZOLA_TERA};
use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists};
use utils::net::get_available_port;
use utils::templates::{render_template, rewrite_theme_paths};
+use link_checker::check_url;
#[derive(Debug)]
pub struct Site {
@@ -243,9 +245,64 @@ impl Site {
self.render_markdown()?;
self.register_tera_global_fns();
+ if self.config.check_external_links {
+ self.check_external_links()?;
+ }
+
Ok(())
}
+ pub fn check_external_links(&self) -> Result<()> {
+ let library = self.library.write().expect("Get lock for check_external_links");
+ let page_links = library.pages()
+ .values()
+ .map(|p| {
+ let path = &p.file.path;
+ p.external_links.iter().map(move |l| (path.clone(), l))
+ })
+ .flatten();
+ let section_links = library.sections()
+ .values()
+ .map(|p| {
+ let path = &p.file.path;
+ p.external_links.iter().map(move |l| (path.clone(), l))
+ })
+ .flatten();
+ let all_links = page_links.chain(section_links).collect::>();
+
+ // create thread pool with lots of threads so we can fetch
+ // (almost) all pages simultaneously
+ let threads = std::cmp::min(all_links.len(), 32);
+ let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build().map_err(|e| Error {
+ kind: ErrorKind::Msg(e.to_string().into()),
+ source: None,
+ })?;
+
+ let errors: Vec<_> = pool.install(|| {
+ all_links.par_iter().filter_map(|(path, link)| {
+ let res = check_url(link);
+ if res.is_valid() {
+ None
+ } else {
+ Some((path, res))
+ }
+ }).collect()
+ });
+
+ if errors.is_empty() {
+ Ok(())
+ } else {
+ let msg = errors.into_iter()
+ .map(|(path, check_res)| format!("Dead link in {:?}: {:?}", path, check_res))
+ .collect::>()
+ .join("\n");
+ Err(Error {
+ kind: ErrorKind::Msg(msg.into()),
+ source: None,
+ })
+ }
+ }
+
/// Insert a default index section for each language if necessary so we don't need to create
/// a _index.md to render the index page at the root of the site
pub fn create_default_index_sections(&mut self) -> Result<()> {
diff --git a/docs/content/documentation/getting-started/cli-usage.md b/docs/content/documentation/getting-started/cli-usage.md
index 95267f53..1167981d 100644
--- a/docs/content/documentation/getting-started/cli-usage.md
+++ b/docs/content/documentation/getting-started/cli-usage.md
@@ -83,6 +83,11 @@ You can also point to another config file than `config.toml` like so - the posit
$ zola --config config.staging.toml serve
```
+### check
+
+The check subcommand will try to build all pages just like the build command would, but without writing any of the
+results to disk. Additionally, it always checks external links regardless of the site configuration.
+
## Colored output
Any of the three commands will emit colored output if your terminal supports it.
diff --git a/src/cli.rs b/src/cli.rs
index 304d135b..368224d6 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -67,5 +67,7 @@ pub fn build_cli() -> App<'static, 'static> {
.takes_value(false)
.help("Do not start a server, just re-build project on changes")
]),
+ SubCommand::with_name("check")
+ .about("Try building the project without rendering it. Checks links")
])
}
diff --git a/src/cmd/check.rs b/src/cmd/check.rs
new file mode 100644
index 00000000..dca8793c
--- /dev/null
+++ b/src/cmd/check.rs
@@ -0,0 +1,28 @@
+use std::env;
+use std::path::PathBuf;
+
+use errors::Result;
+use site::Site;
+
+use console;
+
+pub fn check(
+ config_file: &str,
+ base_path: Option<&str>,
+ base_url: Option<&str>,
+) -> Result<()> {
+ let bp = base_path.map(PathBuf::from).unwrap_or(env::current_dir().unwrap());
+ let mut site = Site::new(bp, config_file)?;
+ // Force the checking of external links
+ site.config.check_external_links = true;
+ // Disable syntax highlighting since the results won't be used
+ // and this operation can be expensive.
+ site.config.highlight_code = false;
+ if let Some(b) = base_url {
+ site.set_base_url(b.to_string());
+ }
+ site.load()?;
+ console::notify_site_size(&site);
+ console::warn_about_ignored_pages(&site);
+ Ok(())
+}
diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs
index baac6f0c..2936f1e5 100644
--- a/src/cmd/mod.rs
+++ b/src/cmd/mod.rs
@@ -1,7 +1,9 @@
mod build;
mod init;
mod serve;
+mod check;
pub use self::build::build;
pub use self::init::create_new_project;
pub use self::serve::serve;
+pub use self::check::check;
diff --git a/src/main.rs b/src/main.rs
index 987e08bc..b2a0e742 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -89,6 +89,21 @@ fn main() {
}
};
}
+ ("check", Some(matches)) => {
+ console::info("Checking site...");
+ let start = Instant::now();
+ match cmd::check(
+ config_file,
+ matches.value_of("base_path"),
+ matches.value_of("base_url"),
+ ) {
+ Ok(()) => console::report_elapsed_time(start),
+ Err(e) => {
+ console::unravel_errors("Failed to check the site", &e);
+ ::std::process::exit(1);
+ }
+ };
+ }
_ => unreachable!(),
}
}