Include path in the search index with include_path (#1509)
This commit is contained in:
parent
4c22996e11
commit
16c123aa20
45
Cargo.lock
generated
45
Cargo.lock
generated
|
@ -155,9 +155,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.6.1"
|
||||
version = "3.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63396b8a4b9de3f4fdfb320ab6080762242f66a8ef174c49d8e19b674db4cdbe"
|
||||
checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631"
|
||||
|
||||
[[package]]
|
||||
name = "byte-tools"
|
||||
|
@ -207,9 +207,9 @@ checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.67"
|
||||
version = "1.0.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
|
||||
checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
@ -348,9 +348,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.4"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52fb27eab85b17fbb9f6fd667089e07d6a2eb8743d02639ee7f6a7a7729c9c94"
|
||||
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils",
|
||||
|
@ -361,11 +361,10 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.4"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4feb231f0d4d6af81aed15928e58ecf5816aa62a2393e2c82f46973e92a9a278"
|
||||
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"lazy_static",
|
||||
]
|
||||
|
@ -462,9 +461,9 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
|||
|
||||
[[package]]
|
||||
name = "elasticlunr-rs"
|
||||
version = "2.3.11"
|
||||
version = "2.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "959fbc9a6ebced545cbe365fdce5e25c6ab7683f2ca4ecc9fb9d0db663bf73d5"
|
||||
checksum = "2f8cf73b19a7aece6942f5745a2fc1ae3c8b0533569707d596b5d6baa7d6c600"
|
||||
dependencies = [
|
||||
"jieba-rs",
|
||||
"lazy_static",
|
||||
|
@ -922,9 +921,9 @@ checksum = "f3a87b616e37e93c22fb19bcd386f02f3af5ea98a25670ad0fce773de23c5e68"
|
|||
|
||||
[[package]]
|
||||
name = "httpdate"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05842d0d43232b23ccb7060ecb0f0626922c21f30012e97b767b30afd4a5d4b9"
|
||||
checksum = "6456b8a6c8f33fee7d958fcd1b60d55b11940a79e63ae87013e6d22e26034440"
|
||||
|
||||
[[package]]
|
||||
name = "humansize"
|
||||
|
@ -934,9 +933,9 @@ checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026"
|
|||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.7"
|
||||
version = "0.14.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e5f105c494081baa3bf9e200b279e27ec1623895cd504c7dbef8d0b080fcf54"
|
||||
checksum = "d3f71a7eea53a3f8257a7b4795373ff886397178cd634430ea94e12d7fe4fe34"
|
||||
dependencies = [
|
||||
"bytes 1.0.1",
|
||||
"futures-channel",
|
||||
|
@ -1177,9 +1176,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.94"
|
||||
version = "0.2.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e"
|
||||
checksum = "789da6d93f1b866ffe175afc5322a4d76c038605a1c3319bb57b06967ca98a36"
|
||||
|
||||
[[package]]
|
||||
name = "library"
|
||||
|
@ -1364,9 +1363,9 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
|||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.6.3"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f83fb6581e8ed1f85fd45c116db8405483899489e38406156c25eb743554361d"
|
||||
checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
@ -2719,9 +2718,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
|||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.6.0"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd3076b5c8cc18138b8f8814895c11eb4de37114a5d127bafdc5e55798ceef37"
|
||||
checksum = "0a38d31d7831c6ed7aad00aa4c12d9375fd225a6dd77da1d25b707346319a975"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes 1.0.1",
|
||||
|
@ -2898,9 +2897,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.17"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07fbfce1c8a97d547e8b5334978438d9d6ec8c20e38f56d4a4374d181493eaef"
|
||||
checksum = "33717dca7ac877f497014e10d73f3acf948c342bee31b5ca7892faf94ccc6b49"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
|
|
@ -13,6 +13,8 @@ pub struct Search {
|
|||
/// Includes the description in the search index. When the site becomes too large, you can switch
|
||||
/// to that instead. `false` by default
|
||||
pub include_description: bool,
|
||||
/// Include the path of the page in the search index. `false` by default.
|
||||
pub include_path: bool,
|
||||
}
|
||||
|
||||
impl Default for Search {
|
||||
|
@ -21,6 +23,7 @@ impl Default for Search {
|
|||
include_title: true,
|
||||
include_content: true,
|
||||
include_description: false,
|
||||
include_path: false,
|
||||
truncate_content_length: None,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use elasticlunr::{Index, Language};
|
||||
use elasticlunr::pipeline;
|
||||
use elasticlunr::pipeline::TokenizerFn;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use config::{Config, Search};
|
||||
|
@ -36,6 +38,10 @@ fn build_fields(search_config: &Search) -> Vec<String> {
|
|||
fields.push("description".to_owned());
|
||||
}
|
||||
|
||||
if search_config.include_path {
|
||||
fields.push("path".to_owned());
|
||||
}
|
||||
|
||||
if search_config.include_content {
|
||||
fields.push("body".to_owned());
|
||||
}
|
||||
|
@ -43,10 +49,46 @@ fn build_fields(search_config: &Search) -> Vec<String> {
|
|||
fields
|
||||
}
|
||||
|
||||
fn path_tokenizer(text: &str) -> Vec<String> {
|
||||
text.split(|c: char| c.is_whitespace() || c == '-' || c == '/')
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.trim().to_lowercase())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_tokenizers(search_config: &Search, language: Language) -> Vec<TokenizerFn> {
|
||||
let text_tokenizer = match language {
|
||||
#[cfg(feature = "indexing-zh")]
|
||||
Language::Chinese => pipeline::tokenize_chinese,
|
||||
#[cfg(feature = "indexing-ja")]
|
||||
Language::Japanese => pipeline::tokenize_japanese,
|
||||
_ => pipeline::tokenize,
|
||||
};
|
||||
let mut tokenizers: Vec<TokenizerFn> = vec![];
|
||||
if search_config.include_title {
|
||||
tokenizers.push(text_tokenizer);
|
||||
}
|
||||
|
||||
if search_config.include_description {
|
||||
tokenizers.push(text_tokenizer);
|
||||
}
|
||||
|
||||
if search_config.include_path {
|
||||
tokenizers.push(path_tokenizer);
|
||||
}
|
||||
|
||||
if search_config.include_content {
|
||||
tokenizers.push(text_tokenizer);
|
||||
}
|
||||
|
||||
tokenizers
|
||||
}
|
||||
|
||||
fn fill_index(
|
||||
search_config: &Search,
|
||||
title: &Option<String>,
|
||||
description: &Option<String>,
|
||||
path: &str,
|
||||
content: &str,
|
||||
) -> Vec<String> {
|
||||
let mut row = vec![];
|
||||
|
@ -59,6 +101,10 @@ fn fill_index(
|
|||
row.push(description.clone().unwrap_or_default());
|
||||
}
|
||||
|
||||
if search_config.include_path {
|
||||
row.push(path.to_string());
|
||||
}
|
||||
|
||||
if search_config.include_content {
|
||||
let body = AMMONIA.clean(&content).to_string();
|
||||
if let Some(truncate_len) = search_config.truncate_content_length {
|
||||
|
@ -90,9 +136,11 @@ pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<Str
|
|||
let language_options = &config.languages[lang];
|
||||
let mut index = Index::with_language(language, &build_fields(&language_options.search));
|
||||
|
||||
let tokenizers = build_tokenizers(&language_options.search, language);
|
||||
|
||||
for section in library.sections_values() {
|
||||
if section.lang == lang {
|
||||
add_section_to_index(&mut index, section, library, &language_options.search);
|
||||
add_section_to_index(&mut index, section, library, &language_options.search, tokenizers.clone());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,6 +152,7 @@ fn add_section_to_index(
|
|||
section: &Section,
|
||||
library: &Library,
|
||||
search_config: &Search,
|
||||
tokenizers: Vec<TokenizerFn>,
|
||||
) {
|
||||
if !section.meta.in_search_index {
|
||||
return;
|
||||
|
@ -111,14 +160,16 @@ fn add_section_to_index(
|
|||
|
||||
// Don't index redirecting sections
|
||||
if section.meta.redirect_to.is_none() {
|
||||
index.add_doc(
|
||||
index.add_doc_with_tokenizers(
|
||||
§ion.permalink,
|
||||
&fill_index(
|
||||
search_config,
|
||||
§ion.meta.title,
|
||||
§ion.meta.description,
|
||||
§ion.path,
|
||||
§ion.content,
|
||||
),
|
||||
tokenizers.clone(),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -128,9 +179,10 @@ fn add_section_to_index(
|
|||
continue;
|
||||
}
|
||||
|
||||
index.add_doc(
|
||||
index.add_doc_with_tokenizers(
|
||||
&page.permalink,
|
||||
&fill_index(search_config, &page.meta.title, &page.meta.description, &page.content),
|
||||
&fill_index(search_config, &page.meta.title, &page.meta.description, &page.path, &page.content),
|
||||
tokenizers.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -166,9 +218,10 @@ mod tests {
|
|||
let config = Config::default();
|
||||
let title = Some("A title".to_string());
|
||||
let description = Some("A description".to_string());
|
||||
let path = "/a/page/".to_string();
|
||||
let content = "Some content".to_string();
|
||||
|
||||
let res = fill_index(&config.search, &title, &description, &content);
|
||||
let res = fill_index(&config.search, &title, &description, &path, &content);
|
||||
assert_eq!(res.len(), 2);
|
||||
assert_eq!(res[0], title.unwrap());
|
||||
assert_eq!(res[1], content);
|
||||
|
@ -180,9 +233,10 @@ mod tests {
|
|||
config.search.include_description = true;
|
||||
let title = Some("A title".to_string());
|
||||
let description = Some("A description".to_string());
|
||||
let path = "/a/page/".to_string();
|
||||
let content = "Some content".to_string();
|
||||
|
||||
let res = fill_index(&config.search, &title, &description, &content);
|
||||
let res = fill_index(&config.search, &title, &description, &path, &content);
|
||||
assert_eq!(res.len(), 3);
|
||||
assert_eq!(res[0], title.unwrap());
|
||||
assert_eq!(res[1], description.unwrap());
|
||||
|
@ -195,9 +249,10 @@ mod tests {
|
|||
config.search.truncate_content_length = Some(5);
|
||||
let title = Some("A title".to_string());
|
||||
let description = Some("A description".to_string());
|
||||
let path = "/a/page/".to_string();
|
||||
let content = "Some content".to_string();
|
||||
|
||||
let res = fill_index(&config.search, &title, &description, &content);
|
||||
let res = fill_index(&config.search, &title, &description, &path, &content);
|
||||
assert_eq!(res.len(), 2);
|
||||
assert_eq!(res[0], title.unwrap());
|
||||
assert_eq!(res[1], content[..5]);
|
||||
|
|
|
@ -151,6 +151,8 @@ build_search_index = false
|
|||
include_title = true
|
||||
# Whether to include the description of the page/section in the index
|
||||
include_description = false
|
||||
# Whether to include the path of the page/section in the index
|
||||
include_path = false
|
||||
# Whether to include the rendered content of the page/section in the index
|
||||
include_content = true
|
||||
# At which character to truncate the content to. Useful if you have a lot of pages and the index would
|
||||
|
|
Loading…
Reference in a new issue