2019-12-21 21:52:39 +00:00
|
|
|
use lazy_static::lazy_static;
|
2017-03-07 12:34:31 +00:00
|
|
|
use pulldown_cmark as cmark;
|
2019-10-10 18:23:16 +00:00
|
|
|
use regex::Regex;
|
2017-03-27 14:17:33 +00:00
|
|
|
|
2019-12-21 21:52:39 +00:00
|
|
|
use crate::context::RenderContext;
|
|
|
|
use crate::table_of_contents::{make_table_of_contents, Heading};
|
2019-01-23 18:20:02 +00:00
|
|
|
use errors::{Error, Result};
|
2019-01-12 08:55:52 +00:00
|
|
|
use front_matter::InsertAnchor;
|
2018-10-31 07:18:57 +00:00
|
|
|
use utils::site::resolve_internal_link;
|
2020-02-05 08:13:14 +00:00
|
|
|
use utils::slugs::slugify_anchors;
|
2019-12-23 08:21:51 +00:00
|
|
|
use utils::vec::InsertMany;
|
2018-05-06 20:58:39 +00:00
|
|
|
|
2019-03-16 09:01:11 +00:00
|
|
|
use self::cmark::{Event, LinkType, Options, Parser, Tag};
|
2021-07-10 06:49:44 +00:00
|
|
|
use crate::codeblock::{CodeBlock, FenceSettings};
|
2020-09-08 19:32:30 +00:00
|
|
|
|
2020-02-07 20:07:10 +00:00
|
|
|
const CONTINUE_READING: &str = "<span id=\"continue-reading\"></span>";
|
2019-01-12 08:55:52 +00:00
|
|
|
const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html";
|
2018-08-22 16:34:32 +00:00
|
|
|
|
2018-08-24 21:37:39 +00:00
|
|
|
#[derive(Debug)]
|
2018-08-22 16:34:32 +00:00
|
|
|
pub struct Rendered {
|
|
|
|
pub body: String,
|
|
|
|
pub summary_len: Option<usize>,
|
2019-09-06 21:36:30 +00:00
|
|
|
pub toc: Vec<Heading>,
|
2021-04-21 19:13:11 +00:00
|
|
|
/// Links to site-local pages: relative path plus optional anchor target.
|
|
|
|
pub internal_links: Vec<(String, Option<String>)>,
|
|
|
|
/// Outgoing links to external webpages (i.e. HTTP(S) targets).
|
2019-05-27 12:05:07 +00:00
|
|
|
pub external_links: Vec<String>,
|
2018-08-22 16:34:32 +00:00
|
|
|
}
|
|
|
|
|
2021-07-10 06:49:44 +00:00
|
|
|
/// Tracks a heading in a slice of pulldown-cmark events
|
2019-01-16 09:09:23 +00:00
|
|
|
#[derive(Debug)]
|
2019-09-06 21:36:30 +00:00
|
|
|
struct HeadingRef {
|
2019-01-18 14:46:18 +00:00
|
|
|
start_idx: usize,
|
|
|
|
end_idx: usize,
|
2019-09-06 21:36:30 +00:00
|
|
|
level: u32,
|
Allow manual specification of header IDs (#685)
Justification for this feature is added in the docs.
Precedent for the precise syntax: Hugo.
Hugo puts this syntax behind a preference named headerIds, and automatic
header ID generation behind a preference named autoHeaderIds, with both
enabled by default. I have not implemented a switch to disable this.
My suggestion for a workaround for the improbable case of desiring a
literal “{#…}” at the end of a header is to replace `}` with `}`.
The algorithm I have used is not identical to [that
which Hugo uses][0], because Hugo’s looks to work at the source level,
whereas here we work at the pulldown-cmark event level, which is
generally more sane, but potentially limiting for extremely esoteric
IDs.
Practical differences in implementation from Hugo (based purely on
reading [blackfriday’s implementation][0], not actually trying it):
- I believe Hugo would treat `# Foo {#*bar*}` as a heading with text
“Foo” and ID `*bar*`, since it is working at the source level; whereas
this code turns it into a heading with HTML `Foo {#<em>bar</em>}`, as
it works at the pulldown-cmark event level and doesn’t go out of its
way to make that work (I’m not familiar with pulldown-cmark, but I get
the impression that you could make it work Hugo’s way on this point).
The difference should be negligible: only *very* esoteric hashes would
include magic Markdown characters.
- Hugo will automatically generate an ID for `{#}`, whereas what I’ve
coded here will yield a blank ID instead (which feels more correct to
me—`None` versus `Some("")`, and all that).
In practice the results should be identical.
Fixes #433.
[0]: https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L218-L234
2019-05-20 20:08:49 +00:00
|
|
|
id: Option<String>,
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
impl HeadingRef {
|
|
|
|
fn new(start: usize, level: u32) -> HeadingRef {
|
|
|
|
HeadingRef { start_idx: start, end_idx: 0, level, id: None }
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-06 20:58:39 +00:00
|
|
|
// We might have cases where the slug is already present in our list of anchor
|
|
|
|
// for example an article could have several titles named Example
|
|
|
|
// We add a counter after the slug if the slug is already present, which
|
|
|
|
// means we will have example, example-1, example-2 etc
|
2021-04-26 07:24:24 +00:00
|
|
|
fn find_anchor(anchors: &[String], name: String, level: u16) -> String {
|
2018-05-06 20:58:39 +00:00
|
|
|
if level == 0 && !anchors.contains(&name) {
|
2018-11-19 14:04:22 +00:00
|
|
|
return name;
|
2018-05-06 20:58:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let new_anchor = format!("{}-{}", name, level + 1);
|
|
|
|
if !anchors.contains(&new_anchor) {
|
|
|
|
return new_anchor;
|
|
|
|
}
|
|
|
|
|
|
|
|
find_anchor(anchors, name, level + 1)
|
|
|
|
}
|
2017-03-07 12:34:31 +00:00
|
|
|
|
2021-07-10 06:49:44 +00:00
|
|
|
/// Returns whether the given string starts with a schema.
|
|
|
|
///
|
|
|
|
/// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
|
|
|
|
/// private schemes. This function checks if the given string starts with something that just looks
|
|
|
|
/// like a scheme, i.e., a case-insensitive identifier followed by a colon.
|
|
|
|
///
|
|
|
|
/// [uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
2019-10-10 18:23:16 +00:00
|
|
|
fn starts_with_schema(s: &str) -> bool {
|
|
|
|
lazy_static! {
|
|
|
|
static ref PATTERN: Regex = Regex::new(r"^[0-9A-Za-z\-]+:").unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
PATTERN.is_match(s)
|
|
|
|
}
|
|
|
|
|
2021-07-10 06:49:44 +00:00
|
|
|
/// Colocated asset links refers to the files in the same directory,
|
|
|
|
/// there it should be a filename only
|
2018-05-07 19:03:51 +00:00
|
|
|
fn is_colocated_asset_link(link: &str) -> bool {
|
2018-09-30 19:15:09 +00:00
|
|
|
!link.contains('/') // http://, ftp://, ../ etc
|
2019-10-10 18:23:16 +00:00
|
|
|
&& !starts_with_schema(link)
|
|
|
|
}
|
|
|
|
|
2021-07-10 06:49:44 +00:00
|
|
|
/// Returns whether a link starts with an HTTP(s) scheme.
|
2019-10-10 18:23:16 +00:00
|
|
|
fn is_external_link(link: &str) -> bool {
|
|
|
|
link.starts_with("http:") || link.starts_with("https:")
|
2018-05-07 19:03:51 +00:00
|
|
|
}
|
|
|
|
|
2019-06-02 18:21:06 +00:00
|
|
|
fn fix_link(
|
|
|
|
link_type: LinkType,
|
|
|
|
link: &str,
|
|
|
|
context: &RenderContext,
|
2021-04-21 19:13:11 +00:00
|
|
|
internal_links: &mut Vec<(String, Option<String>)>,
|
2019-06-02 18:21:06 +00:00
|
|
|
external_links: &mut Vec<String>,
|
|
|
|
) -> Result<String> {
|
2019-03-15 20:24:06 +00:00
|
|
|
if link_type == LinkType::Email {
|
|
|
|
return Ok(link.to_string());
|
|
|
|
}
|
2019-07-12 20:55:44 +00:00
|
|
|
|
2019-01-05 14:37:24 +00:00
|
|
|
// A few situations here:
|
2019-05-27 12:35:14 +00:00
|
|
|
// - it could be a relative link (starting with `@/`)
|
2019-01-05 14:37:24 +00:00
|
|
|
// - it could be a link to a co-located asset
|
|
|
|
// - it could be a normal link
|
2019-05-27 12:35:14 +00:00
|
|
|
let result = if link.starts_with("@/") {
|
2020-11-28 12:04:49 +00:00
|
|
|
match resolve_internal_link(&link, &context.permalinks) {
|
2019-06-06 17:49:31 +00:00
|
|
|
Ok(resolved) => {
|
2021-04-21 19:13:11 +00:00
|
|
|
internal_links.push((resolved.md_path, resolved.anchor));
|
2019-06-06 17:49:31 +00:00
|
|
|
resolved.permalink
|
|
|
|
}
|
2019-01-05 14:37:24 +00:00
|
|
|
Err(_) => {
|
|
|
|
return Err(format!("Relative link {} not found.", link).into());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if is_colocated_asset_link(&link) {
|
|
|
|
format!("{}{}", context.current_page_permalink, link)
|
|
|
|
} else {
|
2019-10-10 18:23:16 +00:00
|
|
|
if is_external_link(link) {
|
2019-05-27 12:05:07 +00:00
|
|
|
external_links.push(link.to_owned());
|
|
|
|
}
|
2019-01-05 14:37:24 +00:00
|
|
|
link.to_string()
|
|
|
|
};
|
|
|
|
Ok(result)
|
|
|
|
}
|
|
|
|
|
2019-01-12 08:55:52 +00:00
|
|
|
/// get only text in a slice of events
|
|
|
|
fn get_text(parser_slice: &[Event]) -> String {
|
|
|
|
let mut title = String::new();
|
2019-01-05 15:30:53 +00:00
|
|
|
|
2019-01-12 08:55:52 +00:00
|
|
|
for event in parser_slice.iter() {
|
2019-05-29 18:33:34 +00:00
|
|
|
match event {
|
|
|
|
Event::Text(text) | Event::Code(text) => title += text,
|
2019-06-02 18:21:06 +00:00
|
|
|
_ => continue,
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
2019-01-05 15:30:53 +00:00
|
|
|
}
|
2019-01-12 08:55:52 +00:00
|
|
|
|
|
|
|
title
|
2019-01-05 15:30:53 +00:00
|
|
|
}
|
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
fn get_heading_refs(events: &[Event]) -> Vec<HeadingRef> {
|
|
|
|
let mut heading_refs = vec![];
|
2019-01-12 08:55:52 +00:00
|
|
|
|
|
|
|
for (i, event) in events.iter().enumerate() {
|
|
|
|
match event {
|
2019-09-06 21:36:30 +00:00
|
|
|
Event::Start(Tag::Heading(level)) => {
|
|
|
|
heading_refs.push(HeadingRef::new(i, *level));
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
2019-09-06 21:36:30 +00:00
|
|
|
Event::End(Tag::Heading(_)) => {
|
|
|
|
let msg = "Heading end before start?";
|
|
|
|
heading_refs.last_mut().expect(msg).end_idx = i;
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
2019-01-05 14:37:24 +00:00
|
|
|
}
|
2019-01-12 08:55:52 +00:00
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
heading_refs
|
2019-01-05 14:37:24 +00:00
|
|
|
}
|
|
|
|
|
2018-08-22 16:34:32 +00:00
|
|
|
pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Rendered> {
|
2020-10-30 16:02:07 +00:00
|
|
|
lazy_static! {
|
|
|
|
static ref EMOJI_REPLACER: gh_emoji::Replacer = gh_emoji::Replacer::new();
|
|
|
|
}
|
|
|
|
|
2021-07-10 06:49:44 +00:00
|
|
|
let path = context
|
|
|
|
.tera_context
|
|
|
|
.get("page")
|
|
|
|
.or(context.tera_context.get("section"))
|
|
|
|
.map(|x| x.as_object().unwrap().get("relative_path").unwrap().as_str().unwrap());
|
2018-05-06 20:58:39 +00:00
|
|
|
// the rendered html
|
2018-05-07 19:03:51 +00:00
|
|
|
let mut html = String::with_capacity(content.len());
|
2017-03-27 14:17:33 +00:00
|
|
|
// Set while parsing
|
|
|
|
let mut error = None;
|
2018-05-06 20:58:39 +00:00
|
|
|
|
2021-07-10 06:49:44 +00:00
|
|
|
let mut code_block: Option<CodeBlock> = None;
|
2019-01-12 08:55:52 +00:00
|
|
|
|
|
|
|
let mut inserted_anchors: Vec<String> = vec![];
|
2019-09-06 21:36:30 +00:00
|
|
|
let mut headings: Vec<Heading> = vec![];
|
2021-04-21 19:13:11 +00:00
|
|
|
let mut internal_links = Vec::new();
|
2019-05-27 12:05:07 +00:00
|
|
|
let mut external_links = Vec::new();
|
2017-06-16 04:00:48 +00:00
|
|
|
|
2017-04-19 13:16:21 +00:00
|
|
|
let mut opts = Options::empty();
|
2018-08-22 16:34:32 +00:00
|
|
|
let mut has_summary = false;
|
2020-09-22 20:11:42 +00:00
|
|
|
let mut in_html_block = false;
|
2018-11-07 18:42:15 +00:00
|
|
|
opts.insert(Options::ENABLE_TABLES);
|
|
|
|
opts.insert(Options::ENABLE_FOOTNOTES);
|
2020-01-17 18:00:51 +00:00
|
|
|
opts.insert(Options::ENABLE_STRIKETHROUGH);
|
2020-06-11 16:29:05 +00:00
|
|
|
opts.insert(Options::ENABLE_TASKLISTS);
|
2017-04-19 13:16:21 +00:00
|
|
|
|
2020-11-27 10:11:19 +00:00
|
|
|
if context.config.markdown.smart_punctuation {
|
|
|
|
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
|
|
|
|
}
|
|
|
|
|
2017-03-27 14:17:33 +00:00
|
|
|
{
|
2019-02-09 18:54:46 +00:00
|
|
|
let mut events = Parser::new_ext(content, opts)
|
|
|
|
.map(|event| {
|
|
|
|
match event {
|
|
|
|
Event::Text(text) => {
|
2020-09-08 19:32:30 +00:00
|
|
|
// if we are in the middle of a highlighted code block
|
2021-07-10 06:49:44 +00:00
|
|
|
if let Some(ref mut code_block) = code_block {
|
2020-09-08 19:32:30 +00:00
|
|
|
let html = code_block.highlight(&text);
|
|
|
|
Event::Html(html.into())
|
2020-11-21 11:30:55 +00:00
|
|
|
} else if context.config.markdown.render_emoji {
|
|
|
|
let processed_text = EMOJI_REPLACER.replace_all(&text);
|
|
|
|
Event::Text(processed_text.to_string().into())
|
2020-09-08 19:32:30 +00:00
|
|
|
} else {
|
2020-11-21 11:30:55 +00:00
|
|
|
// Business as usual
|
|
|
|
Event::Text(text)
|
2019-02-09 18:54:46 +00:00
|
|
|
}
|
2017-03-27 14:17:33 +00:00
|
|
|
}
|
2020-02-16 18:17:41 +00:00
|
|
|
Event::Start(Tag::CodeBlock(ref kind)) => {
|
2021-07-10 06:49:44 +00:00
|
|
|
let fence = match kind {
|
2020-11-21 21:20:54 +00:00
|
|
|
cmark::CodeBlockKind::Fenced(fence_info) => {
|
2021-07-10 06:49:44 +00:00
|
|
|
FenceSettings::new(fence_info)
|
2020-02-16 18:17:41 +00:00
|
|
|
}
|
2021-07-10 06:49:44 +00:00
|
|
|
_ => FenceSettings::new(""),
|
2020-02-16 18:17:41 +00:00
|
|
|
};
|
2021-07-10 06:49:44 +00:00
|
|
|
let (block, begin) = CodeBlock::new(fence, &context.config, path);
|
|
|
|
code_block = Some(block);
|
|
|
|
Event::Html(begin.into())
|
2017-03-27 14:17:33 +00:00
|
|
|
}
|
2019-02-09 18:54:46 +00:00
|
|
|
Event::End(Tag::CodeBlock(_)) => {
|
|
|
|
// reset highlight and close the code block
|
2021-07-10 06:49:44 +00:00
|
|
|
code_block = None;
|
|
|
|
Event::Html("</code></pre>\n".into())
|
2018-05-07 19:23:27 +00:00
|
|
|
}
|
2019-03-15 20:24:06 +00:00
|
|
|
Event::Start(Tag::Image(link_type, src, title)) => {
|
2019-02-09 18:54:46 +00:00
|
|
|
if is_colocated_asset_link(&src) {
|
2019-03-15 20:24:06 +00:00
|
|
|
let link = format!("{}{}", context.current_page_permalink, &*src);
|
2019-03-16 09:01:11 +00:00
|
|
|
return Event::Start(Tag::Image(link_type, link.into(), title));
|
2018-07-16 19:13:00 +00:00
|
|
|
}
|
2018-05-07 19:03:51 +00:00
|
|
|
|
2019-03-15 20:24:06 +00:00
|
|
|
Event::Start(Tag::Image(link_type, src, title))
|
2019-02-09 18:54:46 +00:00
|
|
|
}
|
2020-01-11 09:34:03 +00:00
|
|
|
Event::Start(Tag::Link(link_type, link, title)) if link.is_empty() => {
|
|
|
|
error = Some(Error::msg("There is a link that is missing a URL"));
|
|
|
|
Event::Start(Tag::Link(link_type, "#".into(), title))
|
|
|
|
}
|
2019-03-15 20:24:06 +00:00
|
|
|
Event::Start(Tag::Link(link_type, link, title)) => {
|
2019-06-06 17:49:31 +00:00
|
|
|
let fixed_link = match fix_link(
|
|
|
|
link_type,
|
|
|
|
&link,
|
|
|
|
context,
|
2021-04-21 19:13:11 +00:00
|
|
|
&mut internal_links,
|
2019-06-06 17:49:31 +00:00
|
|
|
&mut external_links,
|
|
|
|
) {
|
|
|
|
Ok(fixed_link) => fixed_link,
|
|
|
|
Err(err) => {
|
|
|
|
error = Some(err);
|
|
|
|
return Event::Html("".into());
|
|
|
|
}
|
|
|
|
};
|
2020-11-21 21:20:54 +00:00
|
|
|
if is_external_link(&link)
|
|
|
|
&& context.config.markdown.has_external_link_tweaks()
|
|
|
|
{
|
|
|
|
let mut escaped = String::new();
|
|
|
|
// write_str can fail but here there are no reasons it should (afaik?)
|
2020-11-25 10:53:14 +00:00
|
|
|
cmark::escape::escape_href(&mut escaped, &link)
|
|
|
|
.expect("Could not write to buffer");
|
2020-11-21 21:20:54 +00:00
|
|
|
Event::Html(
|
|
|
|
context
|
|
|
|
.config
|
|
|
|
.markdown
|
|
|
|
.construct_external_link_tag(&escaped, &title)
|
|
|
|
.into(),
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
Event::Start(Tag::Link(link_type, fixed_link.into(), title))
|
|
|
|
}
|
2019-02-09 18:54:46 +00:00
|
|
|
}
|
2020-09-22 20:11:42 +00:00
|
|
|
Event::Html(ref markup) => {
|
|
|
|
if markup.contains("<!-- more -->") {
|
|
|
|
has_summary = true;
|
|
|
|
Event::Html(CONTINUE_READING.into())
|
2020-11-21 11:38:43 +00:00
|
|
|
} else if in_html_block && markup.contains("</pre>") {
|
|
|
|
in_html_block = false;
|
|
|
|
Event::Html(markup.replacen("</pre>", "", 1).into())
|
|
|
|
} else if markup.contains("pre data-shortcode") {
|
|
|
|
in_html_block = true;
|
|
|
|
let m = markup.replacen("<pre data-shortcode>", "", 1);
|
|
|
|
if m.contains("</pre>") {
|
2020-09-22 20:11:42 +00:00
|
|
|
in_html_block = false;
|
2020-11-21 11:38:43 +00:00
|
|
|
Event::Html(m.replacen("</pre>", "", 1).into())
|
2020-09-22 20:11:42 +00:00
|
|
|
} else {
|
2020-11-21 11:38:43 +00:00
|
|
|
Event::Html(m.into())
|
2020-09-22 20:11:42 +00:00
|
|
|
}
|
2020-11-21 11:38:43 +00:00
|
|
|
} else {
|
|
|
|
event
|
2020-09-22 20:11:42 +00:00
|
|
|
}
|
2019-02-09 18:54:46 +00:00
|
|
|
}
|
|
|
|
_ => event,
|
2018-08-22 16:34:32 +00:00
|
|
|
}
|
2019-02-09 18:54:46 +00:00
|
|
|
})
|
|
|
|
.collect::<Vec<_>>(); // We need to collect the events to make a second pass
|
2019-01-12 08:55:52 +00:00
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
let mut heading_refs = get_heading_refs(&events);
|
2019-01-12 08:55:52 +00:00
|
|
|
|
|
|
|
let mut anchors_to_insert = vec![];
|
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
// First heading pass: look for a manually-specified IDs, e.g. `# Heading text {#hash}`
|
Allow manual specification of header IDs (#685)
Justification for this feature is added in the docs.
Precedent for the precise syntax: Hugo.
Hugo puts this syntax behind a preference named headerIds, and automatic
header ID generation behind a preference named autoHeaderIds, with both
enabled by default. I have not implemented a switch to disable this.
My suggestion for a workaround for the improbable case of desiring a
literal “{#…}” at the end of a header is to replace `}` with `}`.
The algorithm I have used is not identical to [that
which Hugo uses][0], because Hugo’s looks to work at the source level,
whereas here we work at the pulldown-cmark event level, which is
generally more sane, but potentially limiting for extremely esoteric
IDs.
Practical differences in implementation from Hugo (based purely on
reading [blackfriday’s implementation][0], not actually trying it):
- I believe Hugo would treat `# Foo {#*bar*}` as a heading with text
“Foo” and ID `*bar*`, since it is working at the source level; whereas
this code turns it into a heading with HTML `Foo {#<em>bar</em>}`, as
it works at the pulldown-cmark event level and doesn’t go out of its
way to make that work (I’m not familiar with pulldown-cmark, but I get
the impression that you could make it work Hugo’s way on this point).
The difference should be negligible: only *very* esoteric hashes would
include magic Markdown characters.
- Hugo will automatically generate an ID for `{#}`, whereas what I’ve
coded here will yield a blank ID instead (which feels more correct to
me—`None` versus `Some("")`, and all that).
In practice the results should be identical.
Fixes #433.
[0]: https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L218-L234
2019-05-20 20:08:49 +00:00
|
|
|
// (This is a separate first pass so that auto IDs can avoid collisions with manual IDs.)
|
2019-09-06 21:36:30 +00:00
|
|
|
for heading_ref in heading_refs.iter_mut() {
|
|
|
|
let end_idx = heading_ref.end_idx;
|
Allow manual specification of header IDs (#685)
Justification for this feature is added in the docs.
Precedent for the precise syntax: Hugo.
Hugo puts this syntax behind a preference named headerIds, and automatic
header ID generation behind a preference named autoHeaderIds, with both
enabled by default. I have not implemented a switch to disable this.
My suggestion for a workaround for the improbable case of desiring a
literal “{#…}” at the end of a header is to replace `}` with `}`.
The algorithm I have used is not identical to [that
which Hugo uses][0], because Hugo’s looks to work at the source level,
whereas here we work at the pulldown-cmark event level, which is
generally more sane, but potentially limiting for extremely esoteric
IDs.
Practical differences in implementation from Hugo (based purely on
reading [blackfriday’s implementation][0], not actually trying it):
- I believe Hugo would treat `# Foo {#*bar*}` as a heading with text
“Foo” and ID `*bar*`, since it is working at the source level; whereas
this code turns it into a heading with HTML `Foo {#<em>bar</em>}`, as
it works at the pulldown-cmark event level and doesn’t go out of its
way to make that work (I’m not familiar with pulldown-cmark, but I get
the impression that you could make it work Hugo’s way on this point).
The difference should be negligible: only *very* esoteric hashes would
include magic Markdown characters.
- Hugo will automatically generate an ID for `{#}`, whereas what I’ve
coded here will yield a blank ID instead (which feels more correct to
me—`None` versus `Some("")`, and all that).
In practice the results should be identical.
Fixes #433.
[0]: https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L218-L234
2019-05-20 20:08:49 +00:00
|
|
|
if let Event::Text(ref mut text) = events[end_idx - 1] {
|
|
|
|
if text.as_bytes().last() == Some(&b'}') {
|
|
|
|
if let Some(mut i) = text.find("{#") {
|
|
|
|
let id = text[i + 2..text.len() - 1].to_owned();
|
|
|
|
inserted_anchors.push(id.clone());
|
|
|
|
while i > 0 && text.as_bytes()[i - 1] == b' ' {
|
|
|
|
i -= 1;
|
|
|
|
}
|
2019-09-06 21:36:30 +00:00
|
|
|
heading_ref.id = Some(id);
|
Allow manual specification of header IDs (#685)
Justification for this feature is added in the docs.
Precedent for the precise syntax: Hugo.
Hugo puts this syntax behind a preference named headerIds, and automatic
header ID generation behind a preference named autoHeaderIds, with both
enabled by default. I have not implemented a switch to disable this.
My suggestion for a workaround for the improbable case of desiring a
literal “{#…}” at the end of a header is to replace `}` with `}`.
The algorithm I have used is not identical to [that
which Hugo uses][0], because Hugo’s looks to work at the source level,
whereas here we work at the pulldown-cmark event level, which is
generally more sane, but potentially limiting for extremely esoteric
IDs.
Practical differences in implementation from Hugo (based purely on
reading [blackfriday’s implementation][0], not actually trying it):
- I believe Hugo would treat `# Foo {#*bar*}` as a heading with text
“Foo” and ID `*bar*`, since it is working at the source level; whereas
this code turns it into a heading with HTML `Foo {#<em>bar</em>}`, as
it works at the pulldown-cmark event level and doesn’t go out of its
way to make that work (I’m not familiar with pulldown-cmark, but I get
the impression that you could make it work Hugo’s way on this point).
The difference should be negligible: only *very* esoteric hashes would
include magic Markdown characters.
- Hugo will automatically generate an ID for `{#}`, whereas what I’ve
coded here will yield a blank ID instead (which feels more correct to
me—`None` versus `Some("")`, and all that).
In practice the results should be identical.
Fixes #433.
[0]: https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L218-L234
2019-05-20 20:08:49 +00:00
|
|
|
*text = text[..i].to_owned().into();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
// Second heading pass: auto-generate remaining IDs, and emit HTML
|
|
|
|
for heading_ref in heading_refs {
|
|
|
|
let start_idx = heading_ref.start_idx;
|
|
|
|
let end_idx = heading_ref.end_idx;
|
2019-02-09 18:54:46 +00:00
|
|
|
let title = get_text(&events[start_idx + 1..end_idx]);
|
2019-12-23 08:21:51 +00:00
|
|
|
let id = heading_ref.id.unwrap_or_else(|| {
|
|
|
|
find_anchor(
|
|
|
|
&inserted_anchors,
|
2020-02-05 08:13:14 +00:00
|
|
|
slugify_anchors(&title, context.config.slugify.anchors),
|
2019-12-23 08:21:51 +00:00
|
|
|
0,
|
|
|
|
)
|
|
|
|
});
|
2019-01-12 08:55:52 +00:00
|
|
|
inserted_anchors.push(id.clone());
|
|
|
|
|
|
|
|
// insert `id` to the tag
|
2019-09-06 21:36:30 +00:00
|
|
|
let html = format!("<h{lvl} id=\"{id}\">", lvl = heading_ref.level, id = id);
|
2019-03-15 20:24:06 +00:00
|
|
|
events[start_idx] = Event::Html(html.into());
|
2019-01-12 08:55:52 +00:00
|
|
|
|
|
|
|
// generate anchors and places to insert them
|
|
|
|
if context.insert_anchor != InsertAnchor::None {
|
|
|
|
let anchor_idx = match context.insert_anchor {
|
|
|
|
InsertAnchor::Left => start_idx + 1,
|
|
|
|
InsertAnchor::Right => end_idx,
|
|
|
|
InsertAnchor::None => 0, // Not important
|
|
|
|
};
|
|
|
|
let mut c = tera::Context::new();
|
|
|
|
c.insert("id", &id);
|
2020-04-29 17:40:25 +00:00
|
|
|
c.insert("level", &heading_ref.level);
|
2019-01-23 18:20:02 +00:00
|
|
|
|
2019-02-09 18:54:46 +00:00
|
|
|
let anchor_link = utils::templates::render_template(
|
|
|
|
&ANCHOR_LINK_TEMPLATE,
|
2020-11-28 12:04:49 +00:00
|
|
|
&context.tera,
|
2019-02-09 18:54:46 +00:00
|
|
|
c,
|
|
|
|
&None,
|
|
|
|
)
|
|
|
|
.map_err(|e| Error::chain("Failed to render anchor link template", e))?;
|
2019-03-15 20:24:06 +00:00
|
|
|
anchors_to_insert.push((anchor_idx, Event::Html(anchor_link.into())));
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
|
|
|
|
2019-09-06 21:36:30 +00:00
|
|
|
// record heading to make table of contents
|
2019-01-12 08:55:52 +00:00
|
|
|
let permalink = format!("{}#{}", context.current_page_permalink, id);
|
2019-10-14 16:31:03 +00:00
|
|
|
let h =
|
|
|
|
Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() };
|
2019-09-06 21:36:30 +00:00
|
|
|
headings.push(h);
|
2019-01-12 08:55:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if context.insert_anchor != InsertAnchor::None {
|
|
|
|
events.insert_many(anchors_to_insert);
|
|
|
|
}
|
2017-03-07 12:34:31 +00:00
|
|
|
|
2019-01-12 08:55:52 +00:00
|
|
|
cmark::html::push_html(&mut html, events.into_iter());
|
2017-03-07 12:34:31 +00:00
|
|
|
}
|
2017-03-23 05:11:24 +00:00
|
|
|
|
2018-08-22 16:34:32 +00:00
|
|
|
if let Some(e) = error {
|
2019-08-24 20:23:08 +00:00
|
|
|
Err(e)
|
2018-08-22 16:34:32 +00:00
|
|
|
} else {
|
|
|
|
Ok(Rendered {
|
|
|
|
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
|
|
|
|
body: html,
|
2019-09-06 21:36:30 +00:00
|
|
|
toc: make_table_of_contents(headings),
|
2021-04-21 19:13:11 +00:00
|
|
|
internal_links,
|
2019-05-27 12:35:14 +00:00
|
|
|
external_links,
|
2018-08-22 16:34:32 +00:00
|
|
|
})
|
2017-03-27 14:17:33 +00:00
|
|
|
}
|
2017-03-07 12:34:31 +00:00
|
|
|
}
|
2019-10-10 18:23:16 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_starts_with_schema() {
|
|
|
|
// registered
|
|
|
|
assert!(starts_with_schema("https://example.com/"));
|
|
|
|
assert!(starts_with_schema("ftp://example.com/"));
|
|
|
|
assert!(starts_with_schema("mailto:user@example.com"));
|
|
|
|
assert!(starts_with_schema("xmpp:node@example.com"));
|
|
|
|
assert!(starts_with_schema("tel:18008675309"));
|
|
|
|
assert!(starts_with_schema("sms:18008675309"));
|
|
|
|
assert!(starts_with_schema("h323:user@example.com"));
|
|
|
|
|
|
|
|
// arbitrary
|
|
|
|
assert!(starts_with_schema("zola:post?content=hi"));
|
|
|
|
|
|
|
|
// case-insensitive
|
|
|
|
assert!(starts_with_schema("MailTo:user@example.com"));
|
|
|
|
assert!(starts_with_schema("MAILTO:user@example.com"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_external_link() {
|
|
|
|
assert!(is_external_link("http://example.com/"));
|
|
|
|
assert!(is_external_link("https://example.com/"));
|
|
|
|
assert!(is_external_link("https://example.com/index.html#introduction"));
|
|
|
|
|
|
|
|
assert!(!is_external_link("mailto:user@example.com"));
|
|
|
|
assert!(!is_external_link("tel:18008675309"));
|
|
|
|
|
|
|
|
assert!(!is_external_link("#introduction"));
|
|
|
|
|
|
|
|
assert!(!is_external_link("http.jpg"))
|
|
|
|
}
|
|
|
|
}
|