diff --git a/components/library/src/content/page.rs b/components/library/src/content/page.rs index 216de4ce..9a37ecc8 100644 --- a/components/library/src/content/page.rs +++ b/components/library/src/content/page.rs @@ -82,12 +82,11 @@ pub struct Page { pub lang: String, /// Contains all the translated version of that page pub translations: Vec, - /// Contains the internal links that have an anchor: we can only check the anchor - /// after all pages have been built and their ToC compiled. The page itself should exist otherwise - /// it would have errored before getting there - /// (path to markdown, anchor value) - pub internal_links_with_anchors: Vec<(String, String)>, - /// Contains the external links that need to be checked + /// The list of all internal links (as path to markdown file), with optional anchor fragments. + /// We can only check the anchor after all pages have been built and their ToC compiled. + /// The page itself should exist otherwise it would have errored before getting there. + pub internal_links: Vec<(String, Option)>, + /// The list of all links to external webpages. They can be validated by the `link_checker`. pub external_links: Vec, } @@ -268,7 +267,7 @@ impl Page { self.content = res.body; self.toc = res.toc; self.external_links = res.external_links; - self.internal_links_with_anchors = res.internal_links_with_anchors; + self.internal_links = res.internal_links; Ok(()) } diff --git a/components/library/src/content/section.rs b/components/library/src/content/section.rs index 1b284cca..3d4fb6fd 100644 --- a/components/library/src/content/section.rs +++ b/components/library/src/content/section.rs @@ -56,12 +56,11 @@ pub struct Section { /// The language of that section. Equal to the default lang if the user doesn't setup `languages` in config. /// Corresponds to the lang in the _index.{lang}.md file scheme pub lang: String, - /// Contains the internal links that have an anchor: we can only check the anchor - /// after all pages have been built and their ToC compiled. The page itself should exist otherwise - /// it would have errored before getting there - /// (path to markdown, anchor value) - pub internal_links_with_anchors: Vec<(String, String)>, - /// Contains the external links that need to be checked + /// The list of all internal links (as path to markdown file), with optional anchor fragments. + /// We can only check the anchor after all pages have been built and their ToC compiled. + /// The page itself should exist otherwise it would have errored before getting there. + pub internal_links: Vec<(String, Option)>, + /// The list of all links to external webpages. They can be validated by the `link_checker`. pub external_links: Vec, } @@ -186,7 +185,7 @@ impl Section { self.content = res.body; self.toc = res.toc; self.external_links = res.external_links; - self.internal_links_with_anchors = res.internal_links_with_anchors; + self.internal_links = res.internal_links; Ok(()) } diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 731b2c7b..5929e1fe 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -26,7 +26,9 @@ pub struct Rendered { pub body: String, pub summary_len: Option, pub toc: Vec, - pub internal_links_with_anchors: Vec<(String, String)>, + /// Links to site-local pages: relative path plus optional anchor target. + pub internal_links: Vec<(String, Option)>, + /// Outgoing links to external webpages (i.e. HTTP(S) targets). pub external_links: Vec, } @@ -93,7 +95,7 @@ fn fix_link( link_type: LinkType, link: &str, context: &RenderContext, - internal_links_with_anchors: &mut Vec<(String, String)>, + internal_links: &mut Vec<(String, Option)>, external_links: &mut Vec, ) -> Result { if link_type == LinkType::Email { @@ -107,10 +109,7 @@ fn fix_link( let result = if link.starts_with("@/") { match resolve_internal_link(&link, &context.permalinks) { Ok(resolved) => { - if resolved.anchor.is_some() { - internal_links_with_anchors - .push((resolved.md_path.unwrap(), resolved.anchor.unwrap())); - } + internal_links.push((resolved.md_path, resolved.anchor)); resolved.permalink } Err(_) => { @@ -175,7 +174,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result = vec![]; let mut headings: Vec = vec![]; - let mut internal_links_with_anchors = Vec::new(); + let mut internal_links = Vec::new(); let mut external_links = Vec::new(); let mut opts = Options::empty(); @@ -294,7 +293,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result fixed_link, @@ -429,7 +428,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result Result<()> { + println!("Checking all internal links with anchors."); let library = site.library.write().expect("Get lock for check_internal_links_with_anchors"); + + // Chain all internal links, from both sections and pages. let page_links = library .pages() .values() .map(|p| { let path = &p.file.path; - p.internal_links_with_anchors.iter().map(move |l| (path.clone(), l)) + p.internal_links.iter().map(move |l| (path.clone(), l)) }) .flatten(); let section_links = library @@ -20,67 +25,46 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> { .values() .map(|p| { let path = &p.file.path; - p.internal_links_with_anchors.iter().map(move |l| (path.clone(), l)) + p.internal_links.iter().map(move |l| (path.clone(), l)) }) .flatten(); - let all_links = page_links.chain(section_links).collect::>(); + let all_links = page_links.chain(section_links); - if site.config.is_in_check_mode() { - println!("Checking {} internal link(s) with an anchor.", all_links.len()); - } - - if all_links.is_empty() { - return Ok(()); - } - - let mut full_path = site.base_path.clone(); - full_path.push("content"); - - let errors: Vec<_> = all_links - .iter() - .filter_map(|(page_path, (md_path, anchor))| { - // There are a few `expect` here since the presence of the .md file will - // already have been checked in the markdown rendering - let mut p = full_path.clone(); - for part in md_path.split('/') { - p.push(part); - } - if md_path.contains("_index.md") { - let section = library - .get_section(&p) - .expect("Couldn't find section in check_internal_links_with_anchors"); - if section.has_anchor(&anchor) { - None - } else { - Some((page_path, md_path, anchor)) - } - } else { - let page = library - .get_page(&p) - .expect("Couldn't find section in check_internal_links_with_anchors"); - if page.has_anchor(&anchor) { - None - } else { - Some((page_path, md_path, anchor)) - } - } + // Only keep links with anchor fragments, and count them too. + // Bare files have already been checked elsewhere, thus they are not interesting here. + let mut anchors_total = 0usize; + let links_with_anchors = all_links + .filter_map(|(page_path, link)| match link { + (md_path, Some(anchor)) => Some((page_path, md_path, anchor)), + _ => None, }) - .collect(); + .inspect(|_| anchors_total = anchors_total.saturating_add(1)); - if site.config.is_in_check_mode() { - println!( - "> Checked {} internal link(s) with an anchor: {} error(s) found.", - all_links.len(), - errors.len() - ); - } + // Check for targets existence (including anchors), then keep only the faulty + // entries for error reporting purposes. + let missing_targets = links_with_anchors.filter(|(_, md_path, anchor)| { + // There are a few `expect` here since the presence of the .md file will + // already have been checked in the markdown rendering + let mut full_path = site.base_path.clone(); + full_path.push("content"); + for part in md_path.split('/') { + full_path.push(part); + } + if md_path.contains("_index.md") { + let section = library + .get_section(&full_path) + .expect("Couldn't find section in check_internal_links_with_anchors"); + !section.has_anchor(&anchor) + } else { + let page = library + .get_page(&full_path) + .expect("Couldn't find section in check_internal_links_with_anchors"); + !page.has_anchor(&anchor) + } + }); - if errors.is_empty() { - return Ok(()); - } - - let msg = errors - .into_iter() + // Format faulty entries into error messages, and collect them. + let errors = missing_targets .map(|(page_path, md_path, anchor)| { format!( "The anchor in the link `@/{}#{}` in {} does not exist.", @@ -89,9 +73,22 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> { page_path.to_string_lossy(), ) }) - .collect::>() - .join("\n"); - Err(Error { kind: ErrorKind::Msg(msg), source: None }) + .collect::>(); + + // Finally emit a summary, and return overall anchors-checking result. + match errors.len() { + 0 => { + println!("> Succesfully checked {} internal link(s) with anchors.", anchors_total); + Ok(()) + } + errors_total => { + println!( + "> Checked {} internal link(s) with anchors: {} target(s) missing.", + anchors_total, errors_total, + ); + Err(Error { kind: ErrorKind::Msg(errors.join("\n")), source: None }) + } + } } pub fn check_external_links(site: &Site) -> Result<()> { diff --git a/components/utils/src/site.rs b/components/utils/src/site.rs index 26e0f8ac..d49e559a 100644 --- a/components/utils/src/site.rs +++ b/components/utils/src/site.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::hash::BuildHasher; use unicode_segmentation::UnicodeSegmentation; -use errors::{bail, Result}; +use errors::Result; /// Get word count and estimated reading time pub fn get_reading_analytics(content: &str) -> (usize, usize) { @@ -14,12 +14,15 @@ pub fn get_reading_analytics(content: &str) -> (usize, usize) { (word_count, ((word_count + 199) / 200)) } +/// Result of a successful resolution of an internal link. #[derive(Debug, PartialEq, Clone)] pub struct ResolvedInternalLink { + /// Resolved link target, as absolute URL address. pub permalink: String, - // The 2 fields below are only set when there is an anchor - // as we will need that to check if it exists after the markdown rendering is done - pub md_path: Option, + /// Internal path to the .md file, without the leading `@/`. + pub md_path: String, + /// Optional anchor target. + /// We can check whether it exists only after all the markdown rendering is done. pub anchor: Option, } @@ -36,20 +39,17 @@ pub fn resolve_internal_link( let parts = clean_link.split('#').collect::>(); // If we have slugification turned off, we might end up with some escaped characters so we need // to decode them first - let decoded = &*percent_decode(parts[0].as_bytes()).decode_utf8_lossy(); - match permalinks.get(decoded) { - Some(p) => { - if parts.len() > 1 { - Ok(ResolvedInternalLink { - permalink: format!("{}#{}", p, parts[1]), - md_path: Some(decoded.to_string()), - anchor: Some(parts[1].to_string()), - }) - } else { - Ok(ResolvedInternalLink { permalink: p.to_string(), md_path: None, anchor: None }) - } - } - None => bail!(format!("Relative link {} not found.", link)), + let decoded = percent_decode(parts[0].as_bytes()).decode_utf8_lossy().to_string(); + let target = + permalinks.get(&decoded).ok_or_else(|| format!("Relative link {} not found.", link))?; + if parts.len() > 1 { + Ok(ResolvedInternalLink { + permalink: format!("{}#{}", target, parts[1]), + md_path: decoded, + anchor: Some(parts[1].to_string()), + }) + } else { + Ok(ResolvedInternalLink { permalink: target.to_string(), md_path: decoded, anchor: None }) } } @@ -81,7 +81,7 @@ mod tests { permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string()); let res = resolve_internal_link("@/pages/about.md#hello", &permalinks).unwrap(); assert_eq!(res.permalink, "https://vincent.is/about#hello"); - assert_eq!(res.md_path, Some("pages/about.md".to_string())); + assert_eq!(res.md_path, "pages/about.md".to_string()); assert_eq!(res.anchor, Some("hello".to_string())); } @@ -94,7 +94,7 @@ mod tests { ); let res = resolve_internal_link("@/pages/about%20space.md#hello", &permalinks).unwrap(); assert_eq!(res.permalink, "https://vincent.is/about%20space/#hello"); - assert_eq!(res.md_path, Some("pages/about space.md".to_string())); + assert_eq!(res.md_path, "pages/about space.md".to_string()); assert_eq!(res.anchor, Some("hello".to_string())); }