rendering: Avoid prepending URL prefix to links that start with a scheme (#817)

Links that start with a scheme (e.g., `tel:18008675309`) inadvertently
had a URL prefix prepended. Previously, only `mailto:` was handled, but
given the sheer number of [registered URI schemes][uri-schemes], a loose
pattern matcher is used to detect schemes instead.

External links, as identified by the renderer, are now limited to `http`
and `https` schemes.

Fixes #747 and fixes #816.

[uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
This commit is contained in:
Michael Macias 2019-10-10 13:23:16 -05:00 committed by Vincent Prouillet
parent cb4056c0f2
commit b1ceb3e80e
2 changed files with 87 additions and 6 deletions

View file

@ -1,4 +1,5 @@
use pulldown_cmark as cmark; use pulldown_cmark as cmark;
use regex::Regex;
use slug::slugify; use slug::slugify;
use syntect::easy::HighlightLines; use syntect::easy::HighlightLines;
use syntect::html::{ use syntect::html::{
@ -60,11 +61,31 @@ fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
find_anchor(anchors, name, level + 1) find_anchor(anchors, name, level + 1)
} }
// Returns whether the given string starts with a schema.
//
// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
// private schemes. This function checks if the given string starts with something that just looks
// like a scheme, i.e., a case-insensitive identifier followed by a colon.
//
// [uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
fn starts_with_schema(s: &str) -> bool {
lazy_static! {
static ref PATTERN: Regex = Regex::new(r"^[0-9A-Za-z\-]+:").unwrap();
}
PATTERN.is_match(s)
}
// Colocated asset links refers to the files in the same directory, // Colocated asset links refers to the files in the same directory,
// there it should be a filename only // there it should be a filename only
fn is_colocated_asset_link(link: &str) -> bool { fn is_colocated_asset_link(link: &str) -> bool {
!link.contains('/') // http://, ftp://, ../ etc !link.contains('/') // http://, ftp://, ../ etc
&& !link.starts_with("mailto:") && !starts_with_schema(link)
}
// Returns whether a link starts with an HTTP(s) scheme.
fn is_external_link(link: &str) -> bool {
link.starts_with("http:") || link.starts_with("https:")
} }
fn fix_link( fn fix_link(
@ -103,7 +124,7 @@ fn fix_link(
} else if is_colocated_asset_link(&link) { } else if is_colocated_asset_link(&link) {
format!("{}{}", context.current_page_permalink, link) format!("{}{}", context.current_page_permalink, link)
} else { } else {
if !link.starts_with('#') && !link.starts_with("mailto:") { if is_external_link(link) {
external_links.push(link.to_owned()); external_links.push(link.to_owned());
} }
link.to_string() link.to_string()
@ -328,3 +349,41 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
}) })
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_starts_with_schema() {
// registered
assert!(starts_with_schema("https://example.com/"));
assert!(starts_with_schema("ftp://example.com/"));
assert!(starts_with_schema("mailto:user@example.com"));
assert!(starts_with_schema("xmpp:node@example.com"));
assert!(starts_with_schema("tel:18008675309"));
assert!(starts_with_schema("sms:18008675309"));
assert!(starts_with_schema("h323:user@example.com"));
// arbitrary
assert!(starts_with_schema("zola:post?content=hi"));
// case-insensitive
assert!(starts_with_schema("MailTo:user@example.com"));
assert!(starts_with_schema("MAILTO:user@example.com"));
}
#[test]
fn test_is_external_link() {
assert!(is_external_link("http://example.com/"));
assert!(is_external_link("https://example.com/"));
assert!(is_external_link("https://example.com/index.html#introduction"));
assert!(!is_external_link("mailto:user@example.com"));
assert!(!is_external_link("tel:18008675309"));
assert!(!is_external_link("#introduction"));
assert!(!is_external_link("http.jpg"))
}
}

View file

@ -821,12 +821,34 @@ fn doesnt_try_to_highlight_content_from_shortcode() {
//} //}
// https://github.com/getzola/zola/issues/747 // https://github.com/getzola/zola/issues/747
// https://github.com/getzola/zola/issues/816
#[test] #[test]
fn leaves_custom_url_scheme_untouched() { fn leaves_custom_url_scheme_untouched() {
let content = r#"[foo@bar.tld](xmpp:foo@bar.tld)
[(123) 456-7890](tel:+11234567890)
[blank page](about:blank)
"#;
let tera_ctx = Tera::default(); let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let config = Config::default(); let config = Config::default();
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); let permalinks_ctx = HashMap::new();
let res = render_content("[foo@bar.tld](xmpp:foo@bar.tld)", &context).unwrap();
assert_eq!(res.body, "<p><a href=\"xmpp:foo@bar.tld\">foo@bar.tld</a></p>\n"); let context = RenderContext::new(
&tera_ctx,
&config,
"https://vincent.is/",
&permalinks_ctx,
InsertAnchor::None,
);
let res = render_content(content, &context).unwrap();
let expected = r#"<p><a href="xmpp:foo@bar.tld">foo@bar.tld</a></p>
<p><a href="tel:+11234567890">(123) 456-7890</a></p>
<p><a href="about:blank">blank page</a></p>
"#;
assert_eq!(res.body, expected);
} }