2018-03-15 17:58:32 +00:00
|
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
|
2018-03-20 20:27:33 +00:00
|
|
|
use elasticlunr::{Index, Language};
|
2019-12-21 21:52:39 +00:00
|
|
|
use lazy_static::lazy_static;
|
2018-03-20 20:27:33 +00:00
|
|
|
|
2020-06-29 18:02:05 +00:00
|
|
|
use config::Config;
|
2019-12-21 21:52:39 +00:00
|
|
|
use errors::{bail, Result};
|
2018-10-31 07:18:57 +00:00
|
|
|
use library::{Library, Section};
|
2018-03-15 17:58:32 +00:00
|
|
|
|
2018-09-30 19:15:09 +00:00
|
|
|
pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js");
|
2018-03-15 17:58:32 +00:00
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref AMMONIA: ammonia::Builder<'static> = {
|
|
|
|
let mut clean_content = HashSet::new();
|
|
|
|
clean_content.insert("script");
|
|
|
|
clean_content.insert("style");
|
|
|
|
let mut builder = ammonia::Builder::new();
|
|
|
|
builder
|
|
|
|
.tags(HashSet::new())
|
|
|
|
.tag_attributes(HashMap::new())
|
|
|
|
.generic_attributes(HashSet::new())
|
|
|
|
.link_rel(None)
|
|
|
|
.allowed_classes(HashMap::new())
|
|
|
|
.clean_content_tags(clean_content);
|
|
|
|
builder
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-06-29 18:02:05 +00:00
|
|
|
fn build_fields(config: &Config) -> Vec<String> {
|
|
|
|
let mut fields = vec![];
|
|
|
|
if config.search.include_title {
|
|
|
|
fields.push("title".to_owned());
|
|
|
|
}
|
|
|
|
|
|
|
|
if config.search.include_description {
|
|
|
|
fields.push("description".to_owned());
|
|
|
|
}
|
|
|
|
|
|
|
|
if config.search.include_content {
|
|
|
|
fields.push("body".to_owned());
|
|
|
|
}
|
|
|
|
|
|
|
|
fields
|
|
|
|
}
|
|
|
|
|
|
|
|
fn fill_index(
|
|
|
|
config: &Config,
|
|
|
|
title: &Option<String>,
|
|
|
|
description: &Option<String>,
|
|
|
|
content: &str,
|
|
|
|
) -> Vec<String> {
|
|
|
|
let mut row = vec![];
|
|
|
|
|
|
|
|
if config.search.include_title {
|
|
|
|
row.push(title.clone().unwrap_or_default());
|
|
|
|
}
|
|
|
|
|
|
|
|
if config.search.include_description {
|
|
|
|
row.push(description.clone().unwrap_or_default());
|
|
|
|
}
|
|
|
|
|
|
|
|
if config.search.include_content {
|
|
|
|
let body = AMMONIA.clean(&content).to_string();
|
|
|
|
if let Some(truncate_len) = config.search.truncate_content_length {
|
|
|
|
// Not great for unicode
|
|
|
|
// TODO: fix it like the truncate in Tera
|
|
|
|
match body.char_indices().nth(truncate_len) {
|
|
|
|
None => row.push(body),
|
|
|
|
Some((idx, _)) => row.push((&body[..idx]).to_string()),
|
|
|
|
};
|
|
|
|
} else {
|
|
|
|
row.push(body);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
row
|
|
|
|
}
|
|
|
|
|
2018-03-20 20:27:33 +00:00
|
|
|
/// Returns the generated JSON index with all the documents of the site added using
|
|
|
|
/// the language given
|
|
|
|
/// Errors if the language given is not available in Elasticlunr
|
2018-03-15 17:58:32 +00:00
|
|
|
/// TODO: is making `in_search_index` apply to subsections of a `false` section useful?
|
2020-06-29 18:02:05 +00:00
|
|
|
pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<String> {
|
2018-03-20 20:27:33 +00:00
|
|
|
let language = match Language::from_code(lang) {
|
|
|
|
Some(l) => l,
|
2018-10-31 07:18:57 +00:00
|
|
|
None => {
|
|
|
|
bail!("Tried to build search index for language {} which is not supported", lang);
|
|
|
|
}
|
2018-03-20 20:27:33 +00:00
|
|
|
};
|
|
|
|
|
2020-06-29 18:02:05 +00:00
|
|
|
let mut index = Index::with_language(language, &build_fields(&config));
|
2018-03-15 17:58:32 +00:00
|
|
|
|
2018-10-02 14:42:34 +00:00
|
|
|
for section in library.sections_values() {
|
2019-09-03 14:50:23 +00:00
|
|
|
if section.lang == lang {
|
2020-06-29 18:02:05 +00:00
|
|
|
add_section_to_index(&mut index, section, library, config);
|
2019-09-03 14:50:23 +00:00
|
|
|
}
|
2018-03-15 17:58:32 +00:00
|
|
|
}
|
|
|
|
|
2018-03-20 20:27:33 +00:00
|
|
|
Ok(index.to_json())
|
2018-03-15 17:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-06-29 18:02:05 +00:00
|
|
|
fn add_section_to_index(index: &mut Index, section: &Section, library: &Library, config: &Config) {
|
2018-03-15 17:58:32 +00:00
|
|
|
if !section.meta.in_search_index {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't index redirecting sections
|
|
|
|
if section.meta.redirect_to.is_none() {
|
|
|
|
index.add_doc(
|
|
|
|
§ion.permalink,
|
2020-06-29 18:02:05 +00:00
|
|
|
&fill_index(config, §ion.meta.title, §ion.meta.description, §ion.content),
|
2018-03-15 17:58:32 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2018-10-02 14:42:34 +00:00
|
|
|
for key in §ion.pages {
|
|
|
|
let page = library.get_page_by_key(*key);
|
2019-07-19 09:10:28 +00:00
|
|
|
if !page.meta.in_search_index {
|
2018-03-15 17:58:32 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
index.add_doc(
|
|
|
|
&page.permalink,
|
2020-06-29 18:02:05 +00:00
|
|
|
&fill_index(config, &page.meta.title, &page.meta.description, &page.content),
|
2018-03-15 17:58:32 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
2020-06-29 18:02:05 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
use config::Config;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn can_build_fields() {
|
|
|
|
let mut config = Config::default();
|
|
|
|
let fields = build_fields(&config);
|
|
|
|
assert_eq!(fields, vec!["title", "body"]);
|
|
|
|
|
|
|
|
config.search.include_content = false;
|
|
|
|
config.search.include_description = true;
|
|
|
|
let fields = build_fields(&config);
|
|
|
|
assert_eq!(fields, vec!["title", "description"]);
|
|
|
|
|
|
|
|
config.search.include_content = true;
|
|
|
|
let fields = build_fields(&config);
|
|
|
|
assert_eq!(fields, vec!["title", "description", "body"]);
|
|
|
|
|
|
|
|
config.search.include_title = false;
|
|
|
|
let fields = build_fields(&config);
|
|
|
|
assert_eq!(fields, vec!["description", "body"]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn can_fill_index_default() {
|
|
|
|
let config = Config::default();
|
|
|
|
let title = Some("A title".to_string());
|
|
|
|
let description = Some("A description".to_string());
|
|
|
|
let content = "Some content".to_string();
|
|
|
|
|
|
|
|
let res = fill_index(&config, &title, &description, &content);
|
|
|
|
assert_eq!(res.len(), 2);
|
|
|
|
assert_eq!(res[0], title.unwrap());
|
|
|
|
assert_eq!(res[1], content);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn can_fill_index_description() {
|
|
|
|
let mut config = Config::default();
|
|
|
|
config.search.include_description = true;
|
|
|
|
let title = Some("A title".to_string());
|
|
|
|
let description = Some("A description".to_string());
|
|
|
|
let content = "Some content".to_string();
|
|
|
|
|
|
|
|
let res = fill_index(&config, &title, &description, &content);
|
|
|
|
assert_eq!(res.len(), 3);
|
|
|
|
assert_eq!(res[0], title.unwrap());
|
|
|
|
assert_eq!(res[1], description.unwrap());
|
|
|
|
assert_eq!(res[2], content);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn can_fill_index_truncated_content() {
|
|
|
|
let mut config = Config::default();
|
|
|
|
config.search.truncate_content_length = Some(5);
|
|
|
|
let title = Some("A title".to_string());
|
|
|
|
let description = Some("A description".to_string());
|
|
|
|
let content = "Some content".to_string();
|
|
|
|
|
|
|
|
let res = fill_index(&config, &title, &description, &content);
|
|
|
|
assert_eq!(res.len(), 2);
|
|
|
|
assert_eq!(res[0], title.unwrap());
|
|
|
|
assert_eq!(res[1], content[..5]);
|
|
|
|
}
|
|
|
|
}
|