From c027cd97d6f2ba1942825247fffbd4dc0028a808 Mon Sep 17 00:00:00 2001 From: Peng Guanwen Date: Sat, 12 Jan 2019 16:55:52 +0800 Subject: [PATCH 1/4] Footnote is now supported in headers This fixes #569 . `markdown_to_html` is heavily refactored, header-related things is handled in a second pass. --- components/rendering/src/markdown.rs | 184 +++++++++--------- components/rendering/src/table_of_contents.rs | 48 ----- components/rendering/tests/markdown.rs | 37 +++- components/utils/src/lib.rs | 1 + components/utils/src/vec.rs | 44 +++++ 5 files changed, 169 insertions(+), 145 deletions(-) create mode 100644 components/utils/src/vec.rs diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index dc409ce7..f01eb29a 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -1,22 +1,25 @@ use std::borrow::Cow::{Borrowed, Owned}; -use self::cmark::{Event, Options, Parser, Tag}; use pulldown_cmark as cmark; use slug::slugify; use syntect::easy::HighlightLines; use syntect::html::{ - start_highlighted_html_snippet, styled_line_to_highlighted_html, IncludeBackground, + IncludeBackground, start_highlighted_html_snippet, styled_line_to_highlighted_html, }; use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET}; -use errors::Result; -use link_checker::check_url; -use utils::site::resolve_internal_link; - use context::RenderContext; -use table_of_contents::{make_table_of_contents, Header, TempHeader}; +use errors::Result; +use front_matter::InsertAnchor; +use link_checker::check_url; +use table_of_contents::{Header, make_table_of_contents, TempHeader}; +use utils::site::resolve_internal_link; +use utils::vec::InsertMany; + +use self::cmark::{Event, Options, Parser, Tag}; const CONTINUE_READING: &str = "

\n"; +const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html"; #[derive(Debug)] pub struct Rendered { @@ -25,6 +28,18 @@ pub struct Rendered { pub toc: Vec
, } +struct HeaderIndex { + start: usize, + end: usize, + level: i32, +} + +impl HeaderIndex { + fn new(start: usize, level: i32) -> HeaderIndex { + HeaderIndex { start, end: 0, level } + } +} + // We might have cases where the slug is already present in our list of anchor // for example an article could have several titles named Example // We add a counter after the slug if the slug is already present, which @@ -65,7 +80,8 @@ fn fix_link(link: &str, context: &RenderContext) -> Result { format!("{}{}", context.current_page_permalink, link) } else if context.config.check_external_links && !link.starts_with('#') - && !link.starts_with("mailto:") { + && !link.starts_with("mailto:") + { let res = check_url(&link); if res.is_valid() { link.to_string() @@ -80,35 +96,36 @@ fn fix_link(link: &str, context: &RenderContext) -> Result { Ok(result) } -fn push_start_tag(temp_header: &mut TempHeader, tag: &Tag) -> bool { - match tag { - Tag::Emphasis => temp_header.add_html(""), - Tag::Strong => temp_header.add_html(""), - Tag::Code => temp_header.add_html(""), - // Tag::Link is handled in `markdown_to_html` - _ => return false, +/// get only text in a slice of events +fn get_text(parser_slice: &[Event]) -> String { + let mut title = String::new(); + + for event in parser_slice.iter() { + if let Event::Text(text) = event { + title += text; + } } - true + + title } -fn push_end_tag(temp_header: &mut TempHeader, tag: &Tag) -> bool { - match tag { - Tag::Emphasis => temp_header.add_html(""), - Tag::Strong => temp_header.add_html(""), - Tag::Code => temp_header.add_html(""), - Tag::Link(_, _) => temp_header.add_html(""), - _ => return false, - } - true -} +fn get_header_indexes(events: &[Event]) -> Vec { + let mut header_indexes = vec![]; -/// returns true if event have been processed -fn push_to_temp_header(event: &Event, temp_header: &mut TempHeader) -> bool { - match event { - Event::Start(tag) => push_start_tag(temp_header, tag), - Event::End(tag) => push_end_tag(temp_header, tag), - _ => false, + for (i, event) in events.iter().enumerate() { + match event { + Event::Start(Tag::Header(level)) => { + header_indexes.push(HeaderIndex::new(i, *level)); + } + Event::End(Tag::Header(_)) => { + let msg = "Header end before start?"; + header_indexes.last_mut().expect(msg).end = i; + } + _ => (), + } } + + header_indexes } pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result { @@ -119,17 +136,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result = None; - // If we get text in header, we need to insert the id and a anchor - let mut in_header = false; - // pulldown_cmark can send several text events for a title if there are markdown - // specific characters like `!` in them. We only want to insert the anchor the first time - let mut header_created = false; - let mut anchors: Vec = vec![]; - let mut headers = vec![]; - // Defaults to a 0 level so not a real header - // It should be an Option ideally but not worth the hassle to update - let mut temp_header = TempHeader::default(); + let mut inserted_anchors: Vec = vec![]; + let mut headers: Vec = vec![]; let mut opts = Options::empty(); let mut has_summary = false; @@ -137,26 +146,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result { - // Header first - if in_header { - if header_created { - temp_header.add_text(&text); - return Event::Html(Borrowed("")); - } - // += as we might have some or other things already there - temp_header.add_text(&text); - header_created = true; - return Event::Html(Borrowed("")); - } - // if we are in the middle of a code block if let Some((ref mut highlighter, in_extra)) = highlighter { let highlighted = if in_extra { @@ -217,47 +209,55 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result", fixed_link) - } else { - format!("", fixed_link, title) - }; - temp_header.add_html(&html); - return Event::Html(Borrowed("")); - } - Event::Start(Tag::Link(Owned(fixed_link), title)) } - Event::Start(Tag::Header(num)) => { - in_header = true; - temp_header = TempHeader::new(num); - Event::Html(Borrowed("")) - } - Event::End(Tag::Header(_)) => { - // End of a header, reset all the things and return the header string - - let id = find_anchor(&anchors, slugify(&temp_header.title), 0); - anchors.push(id.clone()); - temp_header.permalink = format!("{}#{}", context.current_page_permalink, id); - temp_header.id = id; - - in_header = false; - header_created = false; - let val = temp_header.to_string(context.tera, context.insert_anchor); - headers.push(temp_header.clone()); - temp_header = TempHeader::default(); - Event::Html(Owned(val)) - } Event::Html(ref markup) if markup.contains("") => { has_summary = true; Event::Html(Borrowed(CONTINUE_READING)) } _ => event, } - }); + }).collect::>(); // We need to collect the events to make a second pass - cmark::html::push_html(&mut html, parser); + let mut header_indexes = get_header_indexes(&events); + + let mut anchors_to_insert = vec![]; + + for header_idx in header_indexes { + let start_idx = header_idx.start; + let end_idx = header_idx.end; + let title = get_text(&events[start_idx + 1 .. end_idx]); + let id = find_anchor(&inserted_anchors, slugify(&title), 0); + inserted_anchors.push(id.clone()); + + // insert `id` to the tag + let html = format!("", lvl = header_idx.level, id = id); + events[start_idx] = Event::Html(Owned(html)); + + // generate anchors and places to insert them + if context.insert_anchor != InsertAnchor::None { + let anchor_idx = match context.insert_anchor { + InsertAnchor::Left => start_idx + 1, + InsertAnchor::Right => end_idx, + InsertAnchor::None => 0, // Not important + }; + let mut c = tera::Context::new(); + c.insert("id", &id); + let anchor_link = context.tera.render(ANCHOR_LINK_TEMPLATE, &c).unwrap(); + anchors_to_insert.push((anchor_idx, Event::Html(Owned(anchor_link)))); + } + + // record header to make table of contents + let permalink = format!("{}#{}", context.current_page_permalink, id); + let temp_header = TempHeader { level: header_idx.level, id, permalink, title }; + headers.push(temp_header); + } + + if context.insert_anchor != InsertAnchor::None { + events.insert_many(anchors_to_insert); + } + + cmark::html::push_html(&mut html, events.into_iter()); } if let Some(e) = error { diff --git a/components/rendering/src/table_of_contents.rs b/components/rendering/src/table_of_contents.rs index 5cc115e0..777d5f24 100644 --- a/components/rendering/src/table_of_contents.rs +++ b/components/rendering/src/table_of_contents.rs @@ -1,6 +1,3 @@ -use front_matter::InsertAnchor; -use tera::{Context as TeraContext, Tera}; - #[derive(Debug, PartialEq, Clone, Serialize)] pub struct Header { #[serde(skip_serializing)] @@ -30,7 +27,6 @@ pub struct TempHeader { pub id: String, pub permalink: String, pub title: String, - pub html: String, } impl TempHeader { @@ -40,50 +36,6 @@ impl TempHeader { id: String::new(), permalink: String::new(), title: String::new(), - html: String::new(), - } - } - - pub fn add_html(&mut self, val: &str) { - self.html += val; - } - - pub fn add_text(&mut self, val: &str) { - self.html += val; - self.title += val; - } - - /// Transform all the information we have about this header into the HTML string for it - pub fn to_string(&self, tera: &Tera, insert_anchor: InsertAnchor) -> String { - let anchor_link = if insert_anchor != InsertAnchor::None { - let mut c = TeraContext::new(); - c.insert("id", &self.id); - tera.render("anchor-link.html", &c).unwrap() - } else { - String::new() - }; - - match insert_anchor { - InsertAnchor::None => format!( - "{t}\n", - lvl = self.level, - t = self.html, - id = self.id - ), - InsertAnchor::Left => format!( - "{a}{t}\n", - lvl = self.level, - a = anchor_link, - t = self.html, - id = self.id - ), - InsertAnchor::Right => format!( - "{t}{a}\n", - lvl = self.level, - a = anchor_link, - t = self.html, - id = self.id - ), } } } diff --git a/components/rendering/tests/markdown.rs b/components/rendering/tests/markdown.rs index 6149e4f5..a85829db 100644 --- a/components/rendering/tests/markdown.rs +++ b/components/rendering/tests/markdown.rs @@ -375,6 +375,19 @@ fn can_insert_anchor_right() { ); } +#[test] +fn can_insert_anchor_for_multi_header() { + let permalinks_ctx = HashMap::new(); + let config = Config::default(); + let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::Right); + let res = render_content("# Hello\n# World", &context).unwrap(); + assert_eq!( + res.body, + "

Hello🔗\n

\n\ +

World🔗\n

\n" + ); +} + // See https://github.com/Keats/gutenberg/issues/42 #[test] fn can_insert_anchor_with_exclamation_mark() { @@ -528,7 +541,7 @@ fn can_understand_emphasis_in_header() { let config = Config::default(); let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None); let res = render_content("# *Emphasis* text", &context).unwrap(); - assert_eq!(res.body, "

Emphasis text

\n") + assert_eq!(res.body, "

Emphasis text

\n"); } #[test] @@ -537,7 +550,7 @@ fn can_understand_strong_in_header() { let config = Config::default(); let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None); let res = render_content("# **Strong** text", &context).unwrap(); - assert_eq!(res.body, "

Strong text

\n") + assert_eq!(res.body, "

Strong text

\n"); } #[test] @@ -546,7 +559,21 @@ fn can_understand_code_in_header() { let config = Config::default(); let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None); let res = render_content("# `Code` text", &context).unwrap(); - assert_eq!(res.body, "

Code text

\n") + assert_eq!(res.body, "

Code text

\n"); +} + +// See https://github.com/getzola/zola/issues/569 +#[test] +fn can_understand_footnote_in_header() { + let permalinks_ctx = HashMap::new(); + let config = Config::default(); + let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content("# text [^1] there\n[^1]: footnote", &context).unwrap(); + assert_eq!(res.body, r##"

text 1 there

+
1 +

footnote

+
+"##); } #[test] @@ -641,8 +668,8 @@ fn can_validate_valid_external_links() { &permalinks_ctx, InsertAnchor::None, ); - let res = render_content("[a link](http://google.com)", &context).unwrap(); - assert_eq!(res.body, "

a link

\n"); + let res = render_content("[a link](http://bing.com)", &context).unwrap(); + assert_eq!(res.body, "

a link

\n"); } #[test] diff --git a/components/utils/src/lib.rs b/components/utils/src/lib.rs index 25581e80..8e462ccf 100644 --- a/components/utils/src/lib.rs +++ b/components/utils/src/lib.rs @@ -14,3 +14,4 @@ pub mod fs; pub mod net; pub mod site; pub mod templates; +pub mod vec; diff --git a/components/utils/src/vec.rs b/components/utils/src/vec.rs new file mode 100644 index 00000000..eac02dce --- /dev/null +++ b/components/utils/src/vec.rs @@ -0,0 +1,44 @@ +pub trait InsertMany { + type Element; + fn insert_many(&mut self, elem_to_insert: Vec<(usize, Self::Element)>); +} + +impl InsertMany for Vec { + type Element = T; + + /// Efficiently insert multiple element in their specified index. + /// The index should be sorted in ascending order. + /// + /// This is done in O(n) time. + fn insert_many(&mut self, elem_to_insert: Vec<(usize, T)>) { + let mut inserted = vec![]; + let mut last_idx = 0; + + for (idx, elem) in elem_to_insert.into_iter() { + let head_len = idx - last_idx; + inserted.extend(self.splice(0 .. head_len, std::iter::empty())); + inserted.push(elem); + last_idx = idx; + } + let len = self.len(); + inserted.extend(self.drain(0..len)); + + *self = inserted; + } +} + +#[cfg(test)] +mod test { + use super::InsertMany; + + #[test] + fn insert_many_works() { + let mut v = vec![1, 2, 3, 4, 5]; + v.insert_many(vec![(0, 0), (2, -1), (5, 6)]); + assert_eq!(v, &[0, 1, 2, -1, 3, 4, 5, 6]); + + let mut v2 = vec![1, 2, 3, 4, 5]; + v2.insert_many(vec![(0, 0), (2, -1)]); + assert_eq!(v2, &[0, 1, 2, -1, 3, 4, 5]); + } +} \ No newline at end of file From 80786a2fbbcc7760bd522ad59ce907b8a1c759c4 Mon Sep 17 00:00:00 2001 From: Peng Guanwen Date: Sat, 12 Jan 2019 17:25:01 +0800 Subject: [PATCH 2/4] Revert accidentally change --- components/rendering/tests/markdown.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/rendering/tests/markdown.rs b/components/rendering/tests/markdown.rs index a85829db..aaa108f9 100644 --- a/components/rendering/tests/markdown.rs +++ b/components/rendering/tests/markdown.rs @@ -668,8 +668,8 @@ fn can_validate_valid_external_links() { &permalinks_ctx, InsertAnchor::None, ); - let res = render_content("[a link](http://bing.com)", &context).unwrap(); - assert_eq!(res.body, "

a link

\n"); + let res = render_content("[a link](http://google.com)", &context).unwrap(); + assert_eq!(res.body, "

a link

\n"); } #[test] From 1dbd8874c0bf19757f79f026c47692bd6d9014b4 Mon Sep 17 00:00:00 2001 From: Peng Guanwen Date: Wed, 16 Jan 2019 17:09:23 +0800 Subject: [PATCH 3/4] derive Debug for HeaderIndex --- components/rendering/src/markdown.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index f01eb29a..6279e733 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -28,6 +28,7 @@ pub struct Rendered { pub toc: Vec
, } +#[derive(Debug)] struct HeaderIndex { start: usize, end: usize, From 5ab3466e2bbf85bd0f696d494025aa6d4ed0125d Mon Sep 17 00:00:00 2001 From: Peng Guanwen Date: Fri, 18 Jan 2019 22:46:18 +0800 Subject: [PATCH 4/4] Doc improvements --- components/rendering/src/markdown.rs | 35 ++++++++++++++-------------- components/utils/src/vec.rs | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 6279e733..01e31af0 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -28,16 +28,17 @@ pub struct Rendered { pub toc: Vec
, } +// tracks a header in a slice of pulldown-cmark events #[derive(Debug)] -struct HeaderIndex { - start: usize, - end: usize, +struct HeaderRef { + start_idx: usize, + end_idx: usize, level: i32, } -impl HeaderIndex { - fn new(start: usize, level: i32) -> HeaderIndex { - HeaderIndex { start, end: 0, level } +impl HeaderRef { + fn new(start: usize, level: i32) -> HeaderRef { + HeaderRef { start_idx: start, end_idx: 0, level } } } @@ -110,23 +111,23 @@ fn get_text(parser_slice: &[Event]) -> String { title } -fn get_header_indexes(events: &[Event]) -> Vec { - let mut header_indexes = vec![]; +fn get_header_refs(events: &[Event]) -> Vec { + let mut header_refs = vec![]; for (i, event) in events.iter().enumerate() { match event { Event::Start(Tag::Header(level)) => { - header_indexes.push(HeaderIndex::new(i, *level)); + header_refs.push(HeaderRef::new(i, *level)); } Event::End(Tag::Header(_)) => { let msg = "Header end before start?"; - header_indexes.last_mut().expect(msg).end = i; + header_refs.last_mut().expect(msg).end_idx = i; } _ => (), } } - header_indexes + header_refs } pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result { @@ -220,19 +221,19 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result>(); // We need to collect the events to make a second pass - let mut header_indexes = get_header_indexes(&events); + let header_refs = get_header_refs(&events); let mut anchors_to_insert = vec![]; - for header_idx in header_indexes { - let start_idx = header_idx.start; - let end_idx = header_idx.end; + for header_ref in header_refs { + let start_idx = header_ref.start_idx; + let end_idx = header_ref.end_idx; let title = get_text(&events[start_idx + 1 .. end_idx]); let id = find_anchor(&inserted_anchors, slugify(&title), 0); inserted_anchors.push(id.clone()); // insert `id` to the tag - let html = format!("", lvl = header_idx.level, id = id); + let html = format!("", lvl = header_ref.level, id = id); events[start_idx] = Event::Html(Owned(html)); // generate anchors and places to insert them @@ -250,7 +251,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result InsertMany for Vec { type Element = T; /// Efficiently insert multiple element in their specified index. - /// The index should be sorted in ascending order. + /// The elements should sorted in ascending order by their index. /// /// This is done in O(n) time. fn insert_many(&mut self, elem_to_insert: Vec<(usize, T)>) {