Footnote is now supported in headers

This fixes #569 .

`markdown_to_html` is heavily refactored, header-related things is
handled in a second pass.
This commit is contained in:
Peng Guanwen 2019-01-12 16:55:52 +08:00
parent 538866487b
commit c027cd97d6
5 changed files with 169 additions and 145 deletions

View file

@ -1,22 +1,25 @@
use std::borrow::Cow::{Borrowed, Owned};
use self::cmark::{Event, Options, Parser, Tag};
use pulldown_cmark as cmark;
use slug::slugify;
use syntect::easy::HighlightLines;
use syntect::html::{
start_highlighted_html_snippet, styled_line_to_highlighted_html, IncludeBackground,
IncludeBackground, start_highlighted_html_snippet, styled_line_to_highlighted_html,
};
use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
use errors::Result;
use link_checker::check_url;
use utils::site::resolve_internal_link;
use context::RenderContext;
use table_of_contents::{make_table_of_contents, Header, TempHeader};
use errors::Result;
use front_matter::InsertAnchor;
use link_checker::check_url;
use table_of_contents::{Header, make_table_of_contents, TempHeader};
use utils::site::resolve_internal_link;
use utils::vec::InsertMany;
use self::cmark::{Event, Options, Parser, Tag};
const CONTINUE_READING: &str = "<p id=\"zola-continue-reading\"><a name=\"continue-reading\"></a></p>\n";
const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html";
#[derive(Debug)]
pub struct Rendered {
@ -25,6 +28,18 @@ pub struct Rendered {
pub toc: Vec<Header>,
}
struct HeaderIndex {
start: usize,
end: usize,
level: i32,
}
impl HeaderIndex {
fn new(start: usize, level: i32) -> HeaderIndex {
HeaderIndex { start, end: 0, level }
}
}
// We might have cases where the slug is already present in our list of anchor
// for example an article could have several titles named Example
// We add a counter after the slug if the slug is already present, which
@ -65,7 +80,8 @@ fn fix_link(link: &str, context: &RenderContext) -> Result<String> {
format!("{}{}", context.current_page_permalink, link)
} else if context.config.check_external_links
&& !link.starts_with('#')
&& !link.starts_with("mailto:") {
&& !link.starts_with("mailto:")
{
let res = check_url(&link);
if res.is_valid() {
link.to_string()
@ -80,35 +96,36 @@ fn fix_link(link: &str, context: &RenderContext) -> Result<String> {
Ok(result)
}
fn push_start_tag(temp_header: &mut TempHeader, tag: &Tag) -> bool {
match tag {
Tag::Emphasis => temp_header.add_html("<em>"),
Tag::Strong => temp_header.add_html("<strong>"),
Tag::Code => temp_header.add_html("<code>"),
// Tag::Link is handled in `markdown_to_html`
_ => return false,
/// get only text in a slice of events
fn get_text(parser_slice: &[Event]) -> String {
let mut title = String::new();
for event in parser_slice.iter() {
if let Event::Text(text) = event {
title += text;
}
true
}
fn push_end_tag(temp_header: &mut TempHeader, tag: &Tag) -> bool {
match tag {
Tag::Emphasis => temp_header.add_html("</em>"),
Tag::Strong => temp_header.add_html("</strong>"),
Tag::Code => temp_header.add_html("</code>"),
Tag::Link(_, _) => temp_header.add_html("</a>"),
_ => return false,
}
true
title
}
/// returns true if event have been processed
fn push_to_temp_header(event: &Event, temp_header: &mut TempHeader) -> bool {
fn get_header_indexes(events: &[Event]) -> Vec<HeaderIndex> {
let mut header_indexes = vec![];
for (i, event) in events.iter().enumerate() {
match event {
Event::Start(tag) => push_start_tag(temp_header, tag),
Event::End(tag) => push_end_tag(temp_header, tag),
_ => false,
Event::Start(Tag::Header(level)) => {
header_indexes.push(HeaderIndex::new(i, *level));
}
Event::End(Tag::Header(_)) => {
let msg = "Header end before start?";
header_indexes.last_mut().expect(msg).end = i;
}
_ => (),
}
}
header_indexes
}
pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Rendered> {
@ -119,17 +136,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
let mut background = IncludeBackground::Yes;
let mut highlighter: Option<(HighlightLines, bool)> = None;
// If we get text in header, we need to insert the id and a anchor
let mut in_header = false;
// pulldown_cmark can send several text events for a title if there are markdown
// specific characters like `!` in them. We only want to insert the anchor the first time
let mut header_created = false;
let mut anchors: Vec<String> = vec![];
let mut headers = vec![];
// Defaults to a 0 level so not a real header
// It should be an Option ideally but not worth the hassle to update
let mut temp_header = TempHeader::default();
let mut inserted_anchors: Vec<String> = vec![];
let mut headers: Vec<TempHeader> = vec![];
let mut opts = Options::empty();
let mut has_summary = false;
@ -137,26 +146,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
opts.insert(Options::ENABLE_FOOTNOTES);
{
let parser = Parser::new_ext(content, opts).map(|event| {
// if in header, just do the parse ourselves
if in_header && push_to_temp_header(&event, &mut temp_header) {
return Event::Html(Borrowed(""));
}
let mut events = Parser::new_ext(content, opts).map(|event| {
match event {
Event::Text(text) => {
// Header first
if in_header {
if header_created {
temp_header.add_text(&text);
return Event::Html(Borrowed(""));
}
// += as we might have some <code> or other things already there
temp_header.add_text(&text);
header_created = true;
return Event::Html(Borrowed(""));
}
// if we are in the middle of a code block
if let Some((ref mut highlighter, in_extra)) = highlighter {
let highlighted = if in_extra {
@ -217,47 +209,55 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
}
};
if in_header {
let html = if title.is_empty() {
format!("<a href=\"{}\">", fixed_link)
} else {
format!("<a href=\"{}\" title=\"{}\">", fixed_link, title)
};
temp_header.add_html(&html);
return Event::Html(Borrowed(""));
}
Event::Start(Tag::Link(Owned(fixed_link), title))
}
Event::Start(Tag::Header(num)) => {
in_header = true;
temp_header = TempHeader::new(num);
Event::Html(Borrowed(""))
}
Event::End(Tag::Header(_)) => {
// End of a header, reset all the things and return the header string
let id = find_anchor(&anchors, slugify(&temp_header.title), 0);
anchors.push(id.clone());
temp_header.permalink = format!("{}#{}", context.current_page_permalink, id);
temp_header.id = id;
in_header = false;
header_created = false;
let val = temp_header.to_string(context.tera, context.insert_anchor);
headers.push(temp_header.clone());
temp_header = TempHeader::default();
Event::Html(Owned(val))
}
Event::Html(ref markup) if markup.contains("<!-- more -->") => {
has_summary = true;
Event::Html(Borrowed(CONTINUE_READING))
}
_ => event,
}
});
}).collect::<Vec<_>>(); // We need to collect the events to make a second pass
cmark::html::push_html(&mut html, parser);
let mut header_indexes = get_header_indexes(&events);
let mut anchors_to_insert = vec![];
for header_idx in header_indexes {
let start_idx = header_idx.start;
let end_idx = header_idx.end;
let title = get_text(&events[start_idx + 1 .. end_idx]);
let id = find_anchor(&inserted_anchors, slugify(&title), 0);
inserted_anchors.push(id.clone());
// insert `id` to the tag
let html = format!("<h{lvl} id=\"{id}\">", lvl = header_idx.level, id = id);
events[start_idx] = Event::Html(Owned(html));
// generate anchors and places to insert them
if context.insert_anchor != InsertAnchor::None {
let anchor_idx = match context.insert_anchor {
InsertAnchor::Left => start_idx + 1,
InsertAnchor::Right => end_idx,
InsertAnchor::None => 0, // Not important
};
let mut c = tera::Context::new();
c.insert("id", &id);
let anchor_link = context.tera.render(ANCHOR_LINK_TEMPLATE, &c).unwrap();
anchors_to_insert.push((anchor_idx, Event::Html(Owned(anchor_link))));
}
// record header to make table of contents
let permalink = format!("{}#{}", context.current_page_permalink, id);
let temp_header = TempHeader { level: header_idx.level, id, permalink, title };
headers.push(temp_header);
}
if context.insert_anchor != InsertAnchor::None {
events.insert_many(anchors_to_insert);
}
cmark::html::push_html(&mut html, events.into_iter());
}
if let Some(e) = error {

View file

@ -1,6 +1,3 @@
use front_matter::InsertAnchor;
use tera::{Context as TeraContext, Tera};
#[derive(Debug, PartialEq, Clone, Serialize)]
pub struct Header {
#[serde(skip_serializing)]
@ -30,7 +27,6 @@ pub struct TempHeader {
pub id: String,
pub permalink: String,
pub title: String,
pub html: String,
}
impl TempHeader {
@ -40,50 +36,6 @@ impl TempHeader {
id: String::new(),
permalink: String::new(),
title: String::new(),
html: String::new(),
}
}
pub fn add_html(&mut self, val: &str) {
self.html += val;
}
pub fn add_text(&mut self, val: &str) {
self.html += val;
self.title += val;
}
/// Transform all the information we have about this header into the HTML string for it
pub fn to_string(&self, tera: &Tera, insert_anchor: InsertAnchor) -> String {
let anchor_link = if insert_anchor != InsertAnchor::None {
let mut c = TeraContext::new();
c.insert("id", &self.id);
tera.render("anchor-link.html", &c).unwrap()
} else {
String::new()
};
match insert_anchor {
InsertAnchor::None => format!(
"<h{lvl} id=\"{id}\">{t}</h{lvl}>\n",
lvl = self.level,
t = self.html,
id = self.id
),
InsertAnchor::Left => format!(
"<h{lvl} id=\"{id}\">{a}{t}</h{lvl}>\n",
lvl = self.level,
a = anchor_link,
t = self.html,
id = self.id
),
InsertAnchor::Right => format!(
"<h{lvl} id=\"{id}\">{t}{a}</h{lvl}>\n",
lvl = self.level,
a = anchor_link,
t = self.html,
id = self.id
),
}
}
}

View file

@ -375,6 +375,19 @@ fn can_insert_anchor_right() {
);
}
#[test]
fn can_insert_anchor_for_multi_header() {
let permalinks_ctx = HashMap::new();
let config = Config::default();
let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::Right);
let res = render_content("# Hello\n# World", &context).unwrap();
assert_eq!(
res.body,
"<h1 id=\"hello\">Hello<a class=\"zola-anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\n</h1>\n\
<h1 id=\"world\">World<a class=\"zola-anchor\" href=\"#world\" aria-label=\"Anchor link for: world\">🔗</a>\n</h1>\n"
);
}
// See https://github.com/Keats/gutenberg/issues/42
#[test]
fn can_insert_anchor_with_exclamation_mark() {
@ -528,7 +541,7 @@ fn can_understand_emphasis_in_header() {
let config = Config::default();
let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content("# *Emphasis* text", &context).unwrap();
assert_eq!(res.body, "<h1 id=\"emphasis-text\"><em>Emphasis</em> text</h1>\n")
assert_eq!(res.body, "<h1 id=\"emphasis-text\"><em>Emphasis</em> text</h1>\n");
}
#[test]
@ -537,7 +550,7 @@ fn can_understand_strong_in_header() {
let config = Config::default();
let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content("# **Strong** text", &context).unwrap();
assert_eq!(res.body, "<h1 id=\"strong-text\"><strong>Strong</strong> text</h1>\n")
assert_eq!(res.body, "<h1 id=\"strong-text\"><strong>Strong</strong> text</h1>\n");
}
#[test]
@ -546,7 +559,21 @@ fn can_understand_code_in_header() {
let config = Config::default();
let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content("# `Code` text", &context).unwrap();
assert_eq!(res.body, "<h1 id=\"code-text\"><code>Code</code> text</h1>\n")
assert_eq!(res.body, "<h1 id=\"code-text\"><code>Code</code> text</h1>\n");
}
// See https://github.com/getzola/zola/issues/569
#[test]
fn can_understand_footnote_in_header() {
let permalinks_ctx = HashMap::new();
let config = Config::default();
let context = RenderContext::new(&ZOLA_TERA, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content("# text [^1] there\n[^1]: footnote", &context).unwrap();
assert_eq!(res.body, r##"<h1 id="text-there">text <sup class="footnote-reference"><a href="#1">1</a></sup> there</h1>
<div class="footnote-definition" id="1"><sup class="footnote-definition-label">1</sup>
<p>footnote</p>
</div>
"##);
}
#[test]
@ -641,8 +668,8 @@ fn can_validate_valid_external_links() {
&permalinks_ctx,
InsertAnchor::None,
);
let res = render_content("[a link](http://google.com)", &context).unwrap();
assert_eq!(res.body, "<p><a href=\"http://google.com\">a link</a></p>\n");
let res = render_content("[a link](http://bing.com)", &context).unwrap();
assert_eq!(res.body, "<p><a href=\"http://bing.com\">a link</a></p>\n");
}
#[test]

View file

@ -14,3 +14,4 @@ pub mod fs;
pub mod net;
pub mod site;
pub mod templates;
pub mod vec;

View file

@ -0,0 +1,44 @@
pub trait InsertMany {
type Element;
fn insert_many(&mut self, elem_to_insert: Vec<(usize, Self::Element)>);
}
impl<T> InsertMany for Vec<T> {
type Element = T;
/// Efficiently insert multiple element in their specified index.
/// The index should be sorted in ascending order.
///
/// This is done in O(n) time.
fn insert_many(&mut self, elem_to_insert: Vec<(usize, T)>) {
let mut inserted = vec![];
let mut last_idx = 0;
for (idx, elem) in elem_to_insert.into_iter() {
let head_len = idx - last_idx;
inserted.extend(self.splice(0 .. head_len, std::iter::empty()));
inserted.push(elem);
last_idx = idx;
}
let len = self.len();
inserted.extend(self.drain(0..len));
*self = inserted;
}
}
#[cfg(test)]
mod test {
use super::InsertMany;
#[test]
fn insert_many_works() {
let mut v = vec![1, 2, 3, 4, 5];
v.insert_many(vec![(0, 0), (2, -1), (5, 6)]);
assert_eq!(v, &[0, 1, 2, -1, 3, 4, 5, 6]);
let mut v2 = vec![1, 2, 3, 4, 5];
v2.insert_many(vec![(0, 0), (2, -1)]);
assert_eq!(v2, &[0, 1, 2, -1, 3, 4, 5]);
}
}