-
Notifications
You must be signed in to change notification settings - Fork 972
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for Fuse.js search format (#2507)
* inital "just barely works" Fuse.js support * implement FuseJavascript; refactor index_for_lang * support search config * move fuse index building to it's own file * update doc of Search.index_format * update config docs * update search documentation * use &str where possible * use libs::serde_json remmeber to commit Cargo.lock * move extension logic to IndexFormat * move the entire filename logic inside IndexFormat * move elasticlunr to it's own module * only create elasticlunr.min.js if we're actually using elasticlunr * move ELASTICLUNR_JS to elasticlunr.js * hide the details of search's submodules * optionally include path * explain include_path better * remove references to stork * replace if with match * support include_description * specify "permalink" * move body cleaning and truncation to a function * update truncate_content_length docs to specify *code points*
- Loading branch information
1 parent
d75b00c
commit 8c56a0e
Showing
10 changed files
with
496 additions
and
348 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
use config::{Config, Search}; | ||
use content::{Library, Section}; | ||
use errors::{bail, Result}; | ||
use libs::elasticlunr::{lang, Index, IndexBuilder}; | ||
use libs::time::format_description::well_known::Rfc3339; | ||
use libs::time::OffsetDateTime; | ||
|
||
use crate::clean_and_truncate_body; | ||
|
||
pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js"); | ||
|
||
fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder { | ||
if search_config.include_title { | ||
index = index.add_field("title"); | ||
} | ||
|
||
if search_config.include_description { | ||
index = index.add_field("description"); | ||
} | ||
|
||
if search_config.include_date { | ||
index = index.add_field("date") | ||
} | ||
|
||
if search_config.include_path { | ||
index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer)); | ||
} | ||
|
||
if search_config.include_content { | ||
index = index.add_field("body") | ||
} | ||
|
||
index | ||
} | ||
|
||
fn path_tokenizer(text: &str) -> Vec<String> { | ||
text.split(|c: char| c.is_whitespace() || c == '-' || c == '/') | ||
.filter(|s| !s.is_empty()) | ||
.map(|s| s.trim().to_lowercase()) | ||
.collect() | ||
} | ||
|
||
fn fill_index( | ||
search_config: &Search, | ||
title: &Option<String>, | ||
description: &Option<String>, | ||
datetime: &Option<OffsetDateTime>, | ||
path: &str, | ||
content: &str, | ||
) -> Vec<String> { | ||
let mut row = vec![]; | ||
|
||
if search_config.include_title { | ||
row.push(title.clone().unwrap_or_default()); | ||
} | ||
|
||
if search_config.include_description { | ||
row.push(description.clone().unwrap_or_default()); | ||
} | ||
|
||
if search_config.include_date { | ||
if let Some(date) = datetime { | ||
if let Ok(d) = date.format(&Rfc3339) { | ||
row.push(d); | ||
} | ||
} | ||
} | ||
|
||
if search_config.include_path { | ||
row.push(path.to_string()); | ||
} | ||
|
||
if search_config.include_content { | ||
row.push(clean_and_truncate_body(search_config.truncate_content_length, content)); | ||
} | ||
row | ||
} | ||
|
||
/// Returns the generated JSON index with all the documents of the site added using | ||
/// the language given | ||
/// Errors if the language given is not available in Elasticlunr | ||
/// TODO: is making `in_search_index` apply to subsections of a `false` section useful? | ||
pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<String> { | ||
let language = match lang::from_code(lang) { | ||
Some(l) => l, | ||
None => { | ||
bail!("Tried to build search index for language {} which is not supported", lang); | ||
} | ||
}; | ||
let language_options = &config.languages[lang]; | ||
let mut index = IndexBuilder::with_language(language); | ||
index = build_fields(&language_options.search, index); | ||
let mut index = index.build(); | ||
|
||
for (_, section) in &library.sections { | ||
if section.lang == lang { | ||
add_section_to_index(&mut index, section, library, &language_options.search); | ||
} | ||
} | ||
|
||
Ok(index.to_json()) | ||
} | ||
|
||
fn add_section_to_index( | ||
index: &mut Index, | ||
section: &Section, | ||
library: &Library, | ||
search_config: &Search, | ||
) { | ||
if !section.meta.in_search_index { | ||
return; | ||
} | ||
|
||
// Don't index redirecting sections | ||
if section.meta.redirect_to.is_none() { | ||
index.add_doc( | ||
§ion.permalink, | ||
&fill_index( | ||
search_config, | ||
§ion.meta.title, | ||
§ion.meta.description, | ||
&None, | ||
§ion.path, | ||
§ion.content, | ||
), | ||
); | ||
} | ||
|
||
for key in §ion.pages { | ||
let page = &library.pages[key]; | ||
if !page.meta.in_search_index { | ||
continue; | ||
} | ||
|
||
index.add_doc( | ||
&page.permalink, | ||
&fill_index( | ||
search_config, | ||
&page.meta.title, | ||
&page.meta.description, | ||
&page.meta.datetime, | ||
&page.path, | ||
&page.content, | ||
), | ||
); | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use config::Config; | ||
use libs::elasticlunr::IndexBuilder; | ||
|
||
#[test] | ||
fn can_build_fields() { | ||
let mut config = Config::default(); | ||
let index = build_fields(&config.search, IndexBuilder::new()).build(); | ||
assert_eq!(index.get_fields(), vec!["title", "body"]); | ||
|
||
config.search.include_content = false; | ||
config.search.include_description = true; | ||
let index = build_fields(&config.search, IndexBuilder::new()).build(); | ||
assert_eq!(index.get_fields(), vec!["title", "description"]); | ||
|
||
config.search.include_content = true; | ||
let index = build_fields(&config.search, IndexBuilder::new()).build(); | ||
assert_eq!(index.get_fields(), vec!["title", "description", "body"]); | ||
|
||
config.search.include_title = false; | ||
let index = build_fields(&config.search, IndexBuilder::new()).build(); | ||
assert_eq!(index.get_fields(), vec!["description", "body"]); | ||
} | ||
|
||
#[test] | ||
fn can_fill_index_default() { | ||
let config = Config::default(); | ||
let title = Some("A title".to_string()); | ||
let description = Some("A description".to_string()); | ||
let path = "/a/page/".to_string(); | ||
let content = "Some content".to_string(); | ||
|
||
let res = fill_index(&config.search, &title, &description, &None, &path, &content); | ||
assert_eq!(res.len(), 2); | ||
assert_eq!(res[0], title.unwrap()); | ||
assert_eq!(res[1], content); | ||
} | ||
|
||
#[test] | ||
fn can_fill_index_description() { | ||
let mut config = Config::default(); | ||
config.search.include_description = true; | ||
let title = Some("A title".to_string()); | ||
let description = Some("A description".to_string()); | ||
let path = "/a/page/".to_string(); | ||
let content = "Some content".to_string(); | ||
|
||
let res = fill_index(&config.search, &title, &description, &None, &path, &content); | ||
assert_eq!(res.len(), 3); | ||
assert_eq!(res[0], title.unwrap()); | ||
assert_eq!(res[1], description.unwrap()); | ||
assert_eq!(res[2], content); | ||
} | ||
|
||
#[test] | ||
fn can_fill_index_truncated_content() { | ||
let mut config = Config::default(); | ||
config.search.truncate_content_length = Some(5); | ||
let title = Some("A title".to_string()); | ||
let description = Some("A description".to_string()); | ||
let path = "/a/page/".to_string(); | ||
let content = "Some content".to_string(); | ||
|
||
let res = fill_index(&config.search, &title, &description, &None, &path, &content); | ||
assert_eq!(res.len(), 2); | ||
assert_eq!(res[0], title.unwrap()); | ||
assert_eq!(res[1], content[..5]); | ||
} | ||
|
||
#[test] | ||
fn can_fill_index_date() { | ||
let mut config = Config::default(); | ||
config.search.include_date = true; | ||
let title = Some("A title".to_string()); | ||
let description = Some("A description".to_string()); | ||
let path = "/a/page/".to_string(); | ||
let content = "Some content".to_string(); | ||
let datetime = Some(OffsetDateTime::parse("2023-01-31T00:00:00Z", &Rfc3339).unwrap()); | ||
|
||
let res = fill_index(&config.search, &title, &description, &datetime, &path, &content); | ||
assert_eq!(res.len(), 3); | ||
assert_eq!(res[0], title.unwrap()); | ||
assert_eq!(res[1], "2023-01-31T00:00:00Z"); | ||
assert_eq!(res[2], content); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
use config::Search; | ||
use content::Library; | ||
use errors::Result; | ||
use libs::serde_json; | ||
|
||
use crate::clean_and_truncate_body; | ||
|
||
/// build index in Fuse.js format. | ||
pub fn build_index(lang: &str, library: &Library, config: &Search) -> Result<String> { | ||
#[derive(serde::Serialize)] | ||
struct Item<'a> { | ||
url: &'a str, | ||
title: Option<&'a str>, | ||
description: Option<&'a str>, | ||
body: Option<String>, // AMMONIA.clean has to allocate anyway | ||
path: Option<&'a str>, | ||
} | ||
let mut items: Vec<Item> = Vec::new(); | ||
for (_, section) in &library.sections { | ||
if section.lang == lang | ||
&& section.meta.redirect_to.is_none() | ||
&& section.meta.in_search_index | ||
{ | ||
items.push(Item { | ||
url: §ion.permalink, | ||
title: match config.include_title { | ||
true => Some(§ion.meta.title.as_deref().unwrap_or_default()), | ||
false => None, | ||
}, | ||
description: match config.include_description { | ||
true => Some(§ion.meta.description.as_deref().unwrap_or_default()), | ||
false => None, | ||
}, | ||
body: match config.include_content { | ||
true => Some(clean_and_truncate_body( | ||
config.truncate_content_length, | ||
§ion.content, | ||
)), | ||
false => None, | ||
}, | ||
path: match config.include_path { | ||
true => Some(§ion.path), | ||
false => None, | ||
}, | ||
}); | ||
for page in §ion.pages { | ||
let page = &library.pages[page]; | ||
if page.meta.in_search_index { | ||
items.push(Item { | ||
url: &page.permalink, | ||
title: match config.include_title { | ||
true => Some(&page.meta.title.as_deref().unwrap_or_default()), | ||
false => None, | ||
}, | ||
description: match config.include_description { | ||
true => Some(&page.meta.description.as_deref().unwrap_or_default()), | ||
false => None, | ||
}, | ||
body: match config.include_content { | ||
true => Some(super::clean_and_truncate_body( | ||
config.truncate_content_length, | ||
&page.content, | ||
)), | ||
false => None, | ||
}, | ||
path: match config.include_path { | ||
true => Some(&page.path), | ||
false => None, | ||
}, | ||
}) | ||
} | ||
} | ||
} | ||
} | ||
Ok(serde_json::to_string(&items)?) | ||
} |
Oops, something went wrong.