Add line highlighting to code blocks (#1131)

* Add line highlighting to code blocks

* Fix highlighting of lines

Apparently every line to be highlighted is provided in one chunk.

* Add more documentation to codeblock.rs

* Turn FenceIter into an Iterator

* Move Range to fence.rs

* Add tests
This commit is contained in:
Alice Ryhl 2020-09-08 21:32:30 +02:00 committed by Vincent Prouillet
parent bff0193df7
commit 826e701b4f
5 changed files with 642 additions and 36 deletions

View file

@ -17,11 +17,14 @@ lazy_static! {
}
/// Returns the highlighter and whether it was found in the extra or not
pub fn get_highlighter<'a>(info: &str, config: &Config) -> (HighlightLines<'a>, bool) {
pub fn get_highlighter(
language: Option<&str>,
config: &Config
) -> (HighlightLines<'static>, bool) {
let theme = &THEME_SET.themes[&config.highlight_theme];
let mut in_extra = false;
if let Some(ref lang) = info.split(' ').next() {
if let Some(ref lang) = language {
let syntax = SYNTAX_SET
.find_syntax_by_token(lang)
.or_else(|| {

View file

@ -1,14 +1,11 @@
use lazy_static::lazy_static;
use pulldown_cmark as cmark;
use regex::Regex;
use syntect::easy::HighlightLines;
use syntect::html::{
start_highlighted_html_snippet, styled_line_to_highlighted_html, IncludeBackground,
};
use syntect::html::{start_highlighted_html_snippet, IncludeBackground};
use crate::context::RenderContext;
use crate::table_of_contents::{make_table_of_contents, Heading};
use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
use config::highlighting::THEME_SET;
use errors::{Error, Result};
use front_matter::InsertAnchor;
use utils::site::resolve_internal_link;
@ -18,6 +15,10 @@ use utils::vec::InsertMany;
use self::cmark::{Event, LinkType, Options, Parser, Tag};
use pulldown_cmark::CodeBlockKind;
mod codeblock;
mod fence;
use self::codeblock::CodeBlock;
const CONTINUE_READING: &str = "<span id=\"continue-reading\"></span>";
const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html";
@ -172,8 +173,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
// Set while parsing
let mut error = None;
let mut background = IncludeBackground::Yes;
let mut highlighter: Option<(HighlightLines, bool)> = None;
let mut highlighter: Option<CodeBlock> = None;
let mut inserted_anchors: Vec<String> = vec![];
let mut headings: Vec<Heading> = vec![];
@ -192,26 +192,14 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
.map(|event| {
match event {
Event::Text(text) => {
// if we are in the middle of a code block
if let Some((ref mut highlighter, in_extra)) = highlighter {
let highlighted = if in_extra {
if let Some(ref extra) = context.config.extra_syntax_set {
highlighter.highlight(&text, &extra)
} else {
unreachable!(
"Got a highlighter from extra syntaxes but no extra?"
);
}
} else {
highlighter.highlight(&text, &SYNTAX_SET)
};
//let highlighted = &highlighter.highlight(&text, ss);
let html = styled_line_to_highlighted_html(&highlighted, background);
return Event::Html(html.into());
// if we are in the middle of a highlighted code block
if let Some(ref mut code_block) = highlighter {
let html = code_block.highlight(&text);
Event::Html(html.into())
} else {
// Business as usual
Event::Text(text)
}
// Business as usual
Event::Text(text)
}
Event::Start(Tag::CodeBlock(ref kind)) => {
if !context.config.highlight_code {
@ -221,16 +209,21 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
let theme = &THEME_SET.themes[&context.config.highlight_theme];
match kind {
CodeBlockKind::Indented => (),
CodeBlockKind::Fenced(info) => {
highlighter = Some(get_highlighter(info, &context.config));
CodeBlockKind::Fenced(fence_info) => {
// This selects the background color the same way that
// start_coloured_html_snippet does
let color = theme
.settings
.background
.unwrap_or(::syntect::highlighting::Color::WHITE);
highlighter = Some(CodeBlock::new(
fence_info,
&context.config,
IncludeBackground::IfDifferent(color),
));
}
};
// This selects the background color the same way that start_coloured_html_snippet does
let color = theme
.settings
.background
.unwrap_or(::syntect::highlighting::Color::WHITE);
background = IncludeBackground::IfDifferent(color);
let snippet = start_highlighted_html_snippet(theme);
let mut html = snippet.0;
html.push_str("<code>");

View file

@ -0,0 +1,196 @@
use syntect::html::{IncludeBackground, styled_line_to_highlighted_html};
use syntect::easy::HighlightLines;
use syntect::parsing::SyntaxSet;
use syntect::highlighting::{Color, Theme, Style};
use config::Config;
use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
use std::cmp::min;
use std::collections::HashSet;
use super::fence::{FenceSettings, Range};
pub struct CodeBlock<'config> {
highlighter: HighlightLines<'static>,
extra_syntax_set: Option<&'config SyntaxSet>,
background: IncludeBackground,
theme: &'static Theme,
/// List of ranges of lines to highlight.
highlight_lines: Vec<Range>,
/// The number of lines in the code block being processed.
num_lines: usize,
}
impl<'config> CodeBlock<'config> {
pub fn new(
fence_info: &str,
config: &'config Config,
background: IncludeBackground,
) -> Self {
let fence_info = FenceSettings::new(fence_info);
let theme = &THEME_SET.themes[&config.highlight_theme];
let (highlighter, in_extra) = get_highlighter(fence_info.language, config);
Self {
highlighter,
extra_syntax_set: match in_extra {
true => config.extra_syntax_set.as_ref(),
false => None,
},
background,
theme,
highlight_lines: fence_info.highlight_lines,
num_lines: 0,
}
}
pub fn highlight(&mut self, text: &str) -> String {
let highlighted = self.highlighter.highlight(
text,
self.extra_syntax_set.unwrap_or(&SYNTAX_SET),
);
let line_boundaries = self.find_line_boundaries(&highlighted);
// First we make sure that `highlighted` is split at every line
// boundary. The `styled_line_to_highlighted_html` function will
// merge split items with identical styles, so this is not a
// problem.
//
// Note that this invalidates the values in `line_boundaries`.
// The `perform_split` function takes it by value to ensure that
// we don't use it later.
let mut highlighted = perform_split(&highlighted, line_boundaries);
let hl_background = self.theme.settings.line_highlight
.unwrap_or(Color { r: 255, g: 255, b: 0, a: 0 });
let hl_lines = self.get_highlighted_lines();
color_highlighted_lines(&mut highlighted, &hl_lines, hl_background);
styled_line_to_highlighted_html(&highlighted, self.background)
}
fn find_line_boundaries(&mut self, styled: &[(Style, &str)]) -> Vec<StyledIdx> {
let mut boundaries = Vec::new();
for (vec_idx, (_style, s)) in styled.iter().enumerate() {
for (str_idx, character) in s.char_indices() {
if character == '\n' {
boundaries.push(StyledIdx {
vec_idx,
str_idx,
});
}
}
}
self.num_lines = boundaries.len() + 1;
boundaries
}
fn get_highlighted_lines(&self) -> HashSet<usize> {
let mut lines = HashSet::new();
for range in &self.highlight_lines {
for line in range.from ..= min(range.to, self.num_lines) {
// Ranges are one-indexed
lines.insert(line.saturating_sub(1));
}
}
lines
}
}
/// This is an index of a character in a `&[(Style, &'b str)]`. The `vec_idx` is the
/// index in the slice, and `str_idx` is the byte index of the character in the
/// corresponding string slice.
///
/// The `Ord` impl on this type sorts lexiographically on `vec_idx`, and then `str_idx`.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
struct StyledIdx {
vec_idx: usize,
str_idx: usize,
}
/// This is a utility used by `perform_split`. If the `vec_idx` in the `StyledIdx` is
/// equal to the provided value, return the `str_idx`, otherwise return `None`.
fn get_str_idx_if_vec_idx_is(idx: Option<&StyledIdx>, vec_idx: usize) -> Option<usize> {
match idx {
Some(idx) if idx.vec_idx == vec_idx => Some(idx.str_idx),
_ => None,
}
}
/// This function assumes that `line_boundaries` is sorted according to the `Ord` impl on
/// the `StyledIdx` type.
fn perform_split<'b>(
split: &[(Style, &'b str)],
line_boundaries: Vec<StyledIdx>
) -> Vec<(Style, &'b str)> {
let mut result = Vec::new();
let mut idxs_iter = line_boundaries.into_iter().peekable();
for (split_idx, item) in split.iter().enumerate() {
let mut last_split = 0;
// Since `line_boundaries` is sorted, we know that any remaining indexes in
// `idxs_iter` have `vec_idx >= split_idx`, and that if there are any with
// `vec_idx == split_idx`, they will be first.
//
// Using the `get_str_idx_if_vec_idx_is` utility, this loop will keep consuming
// indexes from `idxs_iter` as long as `vec_idx == split_idx` holds. Once
// `vec_idx` becomes larger than `split_idx`, the loop will finish without
// consuming that index.
//
// If `idxs_iter` is empty, or there are no indexes with `vec_idx == split_idx`,
// the loop does nothing.
while let Some(str_idx) = get_str_idx_if_vec_idx_is(idxs_iter.peek(), split_idx) {
// Consume the value we just peeked.
idxs_iter.next();
// This consumes the index to split at. We add one to include the newline
// together with its own line, rather than as the first character in the next
// line.
let split_at = min(str_idx + 1, item.1.len());
// This will fail if `line_boundaries` is not sorted.
debug_assert!(split_at >= last_split);
// Skip splitting if the string slice would be empty.
if last_split != split_at {
result.push((item.0, &item.1[last_split..split_at]));
last_split = split_at;
}
}
// Now append the remainder. If the current item was not split, this will
// append the entire item.
if last_split != item.1.len() {
result.push((item.0, &item.1[last_split..]));
}
}
result
}
fn color_highlighted_lines(
data: &mut [(Style, &str)],
lines: &HashSet<usize>,
background: Color,
) {
if lines.is_empty() {
return;
}
let mut current_line = 0;
for item in data {
if lines.contains(&current_line) {
item.0.background = background;
}
// We split the lines such that every newline is at the end of an item.
if item.1.ends_with('\n') {
current_line += 1;
}
}
}

View file

@ -0,0 +1,102 @@
#[derive(Copy, Clone, Debug)]
pub struct Range {
pub from: usize,
pub to: usize,
}
impl Range {
fn parse(s: &str) -> Option<Range> {
match s.find('-') {
Some(dash) => {
let mut from = s[..dash].parse().ok()?;
let mut to = s[dash+1..].parse().ok()?;
if to < from {
std::mem::swap(&mut from, &mut to);
}
Some(Range {
from,
to,
})
},
None => {
let val = s.parse().ok()?;
Some(Range {
from: val,
to: val,
})
},
}
}
}
#[derive(Debug)]
pub struct FenceSettings<'a> {
pub language: Option<&'a str>,
pub line_numbers: bool,
pub highlight_lines: Vec<Range>,
}
impl<'a> FenceSettings<'a> {
pub fn new(fence_info: &'a str) -> Self {
let mut me = Self {
language: None,
line_numbers: false,
highlight_lines: Vec::new(),
};
for token in FenceIter::new(fence_info) {
match token {
FenceToken::Language(lang) => me.language = Some(lang),
FenceToken::EnableLineNumbers => me.line_numbers = true,
FenceToken::HighlightLines(lines) => me.highlight_lines.extend(lines),
}
}
me
}
}
#[derive(Debug)]
enum FenceToken<'a> {
Language(&'a str),
EnableLineNumbers,
HighlightLines(Vec<Range>),
}
struct FenceIter<'a> {
split: std::str::Split<'a, char>,
}
impl<'a> FenceIter<'a> {
fn new(fence_info: &'a str) -> Self {
Self {
split: fence_info.split(','),
}
}
}
impl<'a> Iterator for FenceIter<'a> {
type Item = FenceToken<'a>;
fn next(&mut self) -> Option<FenceToken<'a>> {
loop {
let tok = self.split.next()?.trim();
let mut tok_split = tok.split('=');
match tok_split.next().unwrap_or("").trim() {
"" => continue,
"linenos" => return Some(FenceToken::EnableLineNumbers),
"hl_lines" => {
let mut ranges = Vec::new();
for range in tok_split.next().unwrap_or("").split(' ') {
if let Some(range) = Range::parse(range) {
ranges.push(range);
}
}
return Some(FenceToken::HighlightLines(ranges));
},
lang => {
return Some(FenceToken::Language(lang));
},
}
}
}
}

View file

@ -0,0 +1,312 @@
use std::collections::HashMap;
use tera::Tera;
use config::Config;
use front_matter::InsertAnchor;
use rendering::{render_content, RenderContext};
use templates::ZOLA_TERA;
use utils::slugs::SlugifyStrategy;
macro_rules! colored_html_line {
( @no $s:expr ) => {{
let mut result = "<span style=\"color:#c0c5ce;\">".to_string();
result.push_str($s);
result.push_str("\n</span>");
result
}};
( @hl $s:expr ) => {{
let mut result = "<span style=\"background-color:#65737e30;color:#c0c5ce;\">".to_string();
result.push_str($s);
result.push_str("\n</span>");
result
}};
}
macro_rules! colored_html {
( $(@$kind:tt $s:expr),* $(,)* ) => {{
let mut result = "<pre style=\"background-color:#2b303b;\">\n<code>".to_string();
$(
result.push_str(colored_html_line!(@$kind $s).as_str());
)*
result.push_str("</code></pre>");
result
}};
}
#[test]
fn hl_lines_simple() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=2
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@no "foo",
@hl "bar",
@no "bar\nbaz",
));
}
#[test]
fn hl_lines_in_middle() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=2-3
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@no "foo",
@hl "bar\nbar",
@no "baz",
));
}
#[test]
fn hl_lines_all() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=1-4
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo\nbar\nbar\nbaz",
));
}
#[test]
fn hl_lines_start_from_one() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=1-3
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo\nbar\nbar",
@no "baz",
));
}
#[test]
fn hl_lines_start_from_zero() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=0-3
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo\nbar\nbar",
@no "baz",
));
}
#[test]
fn hl_lines_end() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=3-4
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@no "foo\nbar",
@hl "bar\nbaz",
));
}
#[test]
fn hl_lines_end_out_of_bounds() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=3-4294967295
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@no "foo\nbar",
@hl "bar\nbaz",
));
}
#[test]
fn hl_lines_overlap() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=2-3 1-2
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo\nbar\nbar",
@no "baz",
));
}
#[test]
fn hl_lines_multiple() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=2-3,hl_lines=1-2
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo\nbar\nbar",
@no "baz",
));
}
#[test]
fn hl_lines_extra_spaces() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
``` hl_lines = 2 - 3 1 - 2
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo\nbar\nbar",
@no "baz",
));
}
#[test]
fn hl_lines_int_and_range() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=1 3-4
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@hl "foo",
@no "bar",
@hl "bar\nbaz",
));
}
#[test]
fn hl_lines_single_line_range() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=2-2
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@no "foo",
@hl "bar",
@no "bar\nbaz",
));
}
#[test]
fn hl_lines_reverse_range() {
let tera_ctx = Tera::default();
let permalinks_ctx = HashMap::new();
let mut config = Config::default();
config.highlight_code = true;
let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None);
let res = render_content(r#"
```hl_lines=3-2
foo
bar
bar
baz
```
"#, &context).unwrap();
assert_eq!(res.body, colored_html!(
@no "foo",
@hl "bar\nbar",
@no "baz",
));
}