zola/components/search/src/lib.rs

196 lines
5.7 KiB
Rust
Raw Normal View History

2018-03-15 17:58:32 +00:00
use std::collections::{HashMap, HashSet};
2018-03-20 20:27:33 +00:00
use elasticlunr::{Index, Language};
2019-12-21 21:52:39 +00:00
use lazy_static::lazy_static;
2018-03-20 20:27:33 +00:00
use config::Config;
2019-12-21 21:52:39 +00:00
use errors::{bail, Result};
2018-10-31 07:18:57 +00:00
use library::{Library, Section};
2018-03-15 17:58:32 +00:00
2018-09-30 19:15:09 +00:00
pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js");
2018-03-15 17:58:32 +00:00
lazy_static! {
static ref AMMONIA: ammonia::Builder<'static> = {
let mut clean_content = HashSet::new();
clean_content.insert("script");
clean_content.insert("style");
let mut builder = ammonia::Builder::new();
builder
.tags(HashSet::new())
.tag_attributes(HashMap::new())
.generic_attributes(HashSet::new())
.link_rel(None)
.allowed_classes(HashMap::new())
.clean_content_tags(clean_content);
builder
};
}
fn build_fields(config: &Config) -> Vec<String> {
let mut fields = vec![];
if config.search.include_title {
fields.push("title".to_owned());
}
if config.search.include_description {
fields.push("description".to_owned());
}
if config.search.include_content {
fields.push("body".to_owned());
}
fields
}
fn fill_index(
config: &Config,
title: &Option<String>,
description: &Option<String>,
content: &str,
) -> Vec<String> {
let mut row = vec![];
if config.search.include_title {
row.push(title.clone().unwrap_or_default());
}
if config.search.include_description {
row.push(description.clone().unwrap_or_default());
}
if config.search.include_content {
let body = AMMONIA.clean(&content).to_string();
if let Some(truncate_len) = config.search.truncate_content_length {
// Not great for unicode
// TODO: fix it like the truncate in Tera
match body.char_indices().nth(truncate_len) {
None => row.push(body),
Some((idx, _)) => row.push((&body[..idx]).to_string()),
};
} else {
row.push(body);
};
}
row
}
2018-03-20 20:27:33 +00:00
/// Returns the generated JSON index with all the documents of the site added using
/// the language given
/// Errors if the language given is not available in Elasticlunr
2018-03-15 17:58:32 +00:00
/// TODO: is making `in_search_index` apply to subsections of a `false` section useful?
pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<String> {
2018-03-20 20:27:33 +00:00
let language = match Language::from_code(lang) {
Some(l) => l,
2018-10-31 07:18:57 +00:00
None => {
bail!("Tried to build search index for language {} which is not supported", lang);
}
2018-03-20 20:27:33 +00:00
};
let mut index = Index::with_language(language, &build_fields(&config));
2018-03-15 17:58:32 +00:00
2018-10-02 14:42:34 +00:00
for section in library.sections_values() {
if section.lang == lang {
add_section_to_index(&mut index, section, library, config);
}
2018-03-15 17:58:32 +00:00
}
2018-03-20 20:27:33 +00:00
Ok(index.to_json())
2018-03-15 17:58:32 +00:00
}
fn add_section_to_index(index: &mut Index, section: &Section, library: &Library, config: &Config) {
2018-03-15 17:58:32 +00:00
if !section.meta.in_search_index {
return;
}
// Don't index redirecting sections
if section.meta.redirect_to.is_none() {
index.add_doc(
&section.permalink,
&fill_index(config, &section.meta.title, &section.meta.description, &section.content),
2018-03-15 17:58:32 +00:00
);
}
2018-10-02 14:42:34 +00:00
for key in &section.pages {
let page = library.get_page_by_key(*key);
2019-07-19 09:10:28 +00:00
if !page.meta.in_search_index {
2018-03-15 17:58:32 +00:00
continue;
}
index.add_doc(
&page.permalink,
&fill_index(config, &page.meta.title, &page.meta.description, &page.content),
2018-03-15 17:58:32 +00:00
);
}
}
#[cfg(test)]
mod tests {
use super::*;
use config::Config;
#[test]
fn can_build_fields() {
let mut config = Config::default();
let fields = build_fields(&config);
assert_eq!(fields, vec!["title", "body"]);
config.search.include_content = false;
config.search.include_description = true;
let fields = build_fields(&config);
assert_eq!(fields, vec!["title", "description"]);
config.search.include_content = true;
let fields = build_fields(&config);
assert_eq!(fields, vec!["title", "description", "body"]);
config.search.include_title = false;
let fields = build_fields(&config);
assert_eq!(fields, vec!["description", "body"]);
}
#[test]
fn can_fill_index_default() {
let config = Config::default();
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let content = "Some content".to_string();
let res = fill_index(&config, &title, &description, &content);
assert_eq!(res.len(), 2);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], content);
}
#[test]
fn can_fill_index_description() {
let mut config = Config::default();
config.search.include_description = true;
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let content = "Some content".to_string();
let res = fill_index(&config, &title, &description, &content);
assert_eq!(res.len(), 3);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], description.unwrap());
assert_eq!(res[2], content);
}
#[test]
fn can_fill_index_truncated_content() {
let mut config = Config::default();
config.search.truncate_content_length = Some(5);
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let content = "Some content".to_string();
let res = fill_index(&config, &title, &description, &content);
assert_eq!(res.len(), 2);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], content[..5]);
}
}