From 37af36186ad829d2bc93a0864f526f7b92fc6099 Mon Sep 17 00:00:00 2001 From: Vincent Prouillet Date: Sat, 1 Jul 2017 19:13:21 +0900 Subject: [PATCH] Improve sorting speed --- CHANGELOG.md | 1 + appveyor.yml | 2 +- ci/script.sh | 4 +- components/content/benches/sorting.rs | 135 ++++++++++++++++++++++++++ components/content/src/sorting.rs | 132 ++++++++++++------------- components/front_matter/src/page.rs | 7 ++ components/site/src/lib.rs | 8 +- components/taxonomies/src/lib.rs | 3 +- 8 files changed, 214 insertions(+), 78 deletions(-) create mode 100644 components/content/benches/sorting.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e5dc0c7a..5047dec9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Add weight sorting - Remove `section` from the `page` rendering context: this is too expensive. Use the global function `get_section` if you need to get it +- Fix next/previous in pagination being incorrect ## 0.0.7 (2017-06-19) diff --git a/appveyor.yml b/appveyor.yml index a3b020ac..85eebd83 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -35,7 +35,7 @@ install: test_script: # we don't run the "test phase" when doing deploys - if [%APPVEYOR_REPO_TAG%]==[false] ( - cargo test --target %TARGET% + cargo test -all --target %TARGET% ) before_deploy: diff --git a/ci/script.sh b/ci/script.sh index 293504df..9d086826 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -11,8 +11,8 @@ main() { return fi - cross test --target $TARGET - cross test --target $TARGET --release + cross test --all --target $TARGET + cross test --all --target $TARGET --release } # we don't run the "test phase" when doing deploys diff --git a/components/content/benches/sorting.rs b/components/content/benches/sorting.rs new file mode 100644 index 00000000..5803e40f --- /dev/null +++ b/components/content/benches/sorting.rs @@ -0,0 +1,135 @@ +#![feature(test)] +extern crate test; +extern crate tera; + +extern crate content; +extern crate front_matter; +extern crate config; + +use std::collections::HashMap; + +use config::Config; +use tera::Tera; +use front_matter::{SortBy, InsertAnchor}; +use content::{Page, sort_pages, populate_previous_and_next_pages}; + + +fn create_pages(number: usize, sort_by: SortBy) -> Vec { + let mut pages = vec![]; + let config = Config::default(); + let tera = Tera::default(); + let permalinks = HashMap::new(); + + for i in 0..number { + let mut page = Page::default(); + match sort_by { + SortBy::Weight => { page.meta.weight = Some(i); }, + SortBy::Order => { page.meta.order = Some(i); }, + _ => (), + }; + page.raw_content = r#" +# Modus cognitius profanam ne duae virtutis mundi + +## Ut vita + +Lorem markdownum litora, care ponto nomina, et ut aspicit gelidas sui et +purpureo genuit. Tamen colla venientis [delphina](http://nil-sol.com/ecquis) +Tusci et temptata citaeque curam isto ubi vult vulnere reppulit. + +- Seque vidit flendoque de quodam +- Dabit minimos deiecto caputque noctis pluma +- Leti coniunx est Helicen +- Illius pulvereumque Icare inpositos +- Vivunt pereo pluvio tot ramos Olenios gelidis +- Quater teretes natura inde + +### A subsection + +Protinus dicunt, breve per, et vivacis genus Orphei munere. Me terram [dimittere +casside](http://corpus.org/) pervenit saxo primoque frequentat genuum sorori +praeferre causas Libys. Illud in serpit adsuetam utrimque nunc haberent, +**terrae si** veni! Hectoreis potes sumite [Mavortis retusa](http://tua.org/) +granum captantur potuisse Minervae, frugum. + +> Clivo sub inprovisoque nostrum minus fama est, discordia patrem petebat precatur +absumitur, poena per sit. Foramina *tamen cupidine* memor supplex tollentes +dictum unam orbem, Anubis caecae. Viderat formosior tegebat satis, Aethiopasque +sit submisso coniuge tristis ubi! + +## Praeceps Corinthus totidem quem crus vultum cape + +```rs +#[derive(Debug)] +pub struct Site { + /// The base path of the gutenberg site + pub base_path: PathBuf, + /// The parsed config for the site + pub config: Config, + pub pages: HashMap, + pub sections: HashMap, + pub tera: Tera, + live_reload: bool, + output_path: PathBuf, + static_path: PathBuf, + pub tags: Option, + pub categories: Option, + /// A map of all .md files (section and pages) and their permalink + /// We need that if there are relative links in the content that need to be resolved + pub permalinks: HashMap, +} +``` + +## More stuff +And a shortcode: + +{{ youtube(id="my_youtube_id") }} + +### Another subsection +Gotta make the toc do a little bit of work + +# A big title + +- hello +- world +- ! + +```py +if __name__ == "__main__": + gen_site("basic-blog", [""], 250, paginate=True) +``` +"#.to_string(); + page.render_markdown(&permalinks, &tera, &config, InsertAnchor::None).unwrap(); + pages.push(page); + } + + pages +} + +// Most of the time spent in those benches are due to the .clone()... +// but i don't know how to remove them so there are some baseline bench with +// just the cloning and with a bit of math we can figure it out + +#[bench] +fn bench_baseline_cloning(b: &mut test::Bencher) { + let pages = create_pages(250, SortBy::Order); + b.iter(|| pages.clone()); +} + +#[bench] +fn bench_sorting_none(b: &mut test::Bencher) { + let pages = create_pages(250, SortBy::Order); + b.iter(|| sort_pages(pages.clone(), SortBy::None)); +} + +#[bench] +fn bench_sorting_order(b: &mut test::Bencher) { + let pages = create_pages(250, SortBy::Order); + b.iter(|| sort_pages(pages.clone(), SortBy::Order)); +} + +#[bench] +fn bench_populate_previous_and_next_pages(b: &mut test::Bencher) { + let pages = create_pages(250, SortBy::Order); + let (sorted_pages, _) = sort_pages(pages, SortBy::Order); + b.iter(|| populate_previous_and_next_pages(sorted_pages.clone())); +} diff --git a/components/content/src/sorting.rs b/components/content/src/sorting.rs index a006bcfc..77271774 100644 --- a/components/content/src/sorting.rs +++ b/components/content/src/sorting.rs @@ -3,76 +3,60 @@ use rayon::prelude::*; use page::Page; use front_matter::SortBy; -/// Sort pages using the method for the given section +/// Sort pages by the given criteria /// -/// Any pages that doesn't have a date when the sorting method is date or order -/// when the sorting method is order will be ignored. +/// Any pages that doesn't have a the required field when the sorting method is other than none +/// will be ignored. pub fn sort_pages(pages: Vec, sort_by: SortBy) -> (Vec, Vec) { - match sort_by { - SortBy::Date => { - let mut can_be_sorted = vec![]; - let mut cannot_be_sorted = vec![]; - for page in pages { - if page.meta.date.is_some() { - can_be_sorted.push(page); - } else { - cannot_be_sorted.push(page); - } - } - can_be_sorted.par_sort_unstable_by(|a, b| b.meta.date().unwrap().cmp(&a.meta.date().unwrap())); - - (can_be_sorted, cannot_be_sorted) - }, - SortBy::Order | SortBy::Weight => { - let mut can_be_sorted = vec![]; - let mut cannot_be_sorted = vec![]; - for page in pages { - if page.meta.order.is_some() { - can_be_sorted.push(page); - } else { - cannot_be_sorted.push(page); - } - } - if sort_by == SortBy::Order { - can_be_sorted.par_sort_unstable_by(|a, b| b.meta.order().cmp(&a.meta.order())); - } else { - can_be_sorted.par_sort_unstable_by(|a, b| a.meta.order().cmp(&b.meta.order())); - } - - (can_be_sorted, cannot_be_sorted) - }, - SortBy::None => (pages, vec![]) + if sort_by == SortBy::None { + return (pages, vec![]); } + + let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = pages + .into_par_iter() + .partition(|ref page| { + match sort_by { + SortBy::Date => page.meta.date.is_some(), + SortBy::Order => page.meta.order.is_some(), + SortBy::Weight => page.meta.weight.is_some(), + _ => unreachable!() + } + }); + + match sort_by { + SortBy::Date => can_be_sorted.par_sort_unstable_by(|a, b| b.meta.date().unwrap().cmp(&a.meta.date().unwrap())), + SortBy::Order => can_be_sorted.par_sort_unstable_by(|a, b| b.meta.order().cmp(&a.meta.order())), + SortBy::Weight => can_be_sorted.par_sort_unstable_by(|a, b| a.meta.weight().cmp(&b.meta.weight())), + _ => unreachable!() + }; + + (can_be_sorted, cannot_be_sorted) } /// Horribly inefficient way to set previous and next on each pages /// So many clones -pub fn populate_previous_and_next_pages(input: &[Page]) -> Vec { - let pages = input.to_vec(); - let mut res = Vec::new(); +pub fn populate_previous_and_next_pages(input: Vec) -> Vec { + let mut res = Vec::with_capacity(input.len()); - // the input is already sorted - // We might put prev/next randomly if a page is missing date/order, probably fine - for (i, page) in input.iter().enumerate() { - let mut new_page = page.clone(); + // The input is already sorted + for (i, _) in input.iter().enumerate() { + let mut new_page = input[i].clone(); if i > 0 { - let next = &pages[i - 1]; - let mut next_page = next.clone(); + let mut previous_page = input[i - 1].clone(); + // Remove prev/next otherwise we serialise the whole thing... + previous_page.previous = None; + previous_page.next = None; + new_page.previous = Some(Box::new(previous_page)); + } + + if i < input.len() - 1 { + let mut next_page = input[i + 1].clone(); // Remove prev/next otherwise we serialise the whole thing... next_page.previous = None; next_page.next = None; new_page.next = Some(Box::new(next_page)); } - - if i < input.len() - 1 { - let previous = &pages[i + 1]; - // Remove prev/next otherwise we serialise the whole thing... - let mut previous_page = previous.clone(); - previous_page.previous = None; - previous_page.next = None; - new_page.previous = Some(Box::new(previous_page)); - } res.push(new_page); } @@ -97,6 +81,12 @@ mod tests { Page::new("content/hello.md", front_matter) } + fn create_page_with_weight(weight: usize) -> Page { + let mut front_matter = PageFrontMatter::default(); + front_matter.weight = Some(weight); + Page::new("content/hello.md", front_matter) + } + #[test] fn can_sort_by_dates() { let input = vec![ @@ -128,15 +118,15 @@ mod tests { #[test] fn can_sort_by_weight() { let input = vec![ - create_page_with_order(2), - create_page_with_order(3), - create_page_with_order(1), + create_page_with_weight(2), + create_page_with_weight(3), + create_page_with_weight(1), ]; let (pages, _) = sort_pages(input, SortBy::Weight); // Should be sorted by date - assert_eq!(pages[0].clone().meta.order.unwrap(), 1); - assert_eq!(pages[1].clone().meta.order.unwrap(), 2); - assert_eq!(pages[2].clone().meta.order.unwrap(), 3); + assert_eq!(pages[0].clone().meta.weight.unwrap(), 1); + assert_eq!(pages[1].clone().meta.weight.unwrap(), 2); + assert_eq!(pages[2].clone().meta.weight.unwrap(), 3); } #[test] @@ -168,23 +158,23 @@ mod tests { #[test] fn can_populate_previous_and_next_pages() { let input = vec![ - create_page_with_order(3), - create_page_with_order(2), create_page_with_order(1), + create_page_with_order(2), + create_page_with_order(3), ]; - let pages = populate_previous_and_next_pages(input.as_slice()); + let pages = populate_previous_and_next_pages(input); - assert!(pages[0].clone().next.is_none()); - assert!(pages[0].clone().previous.is_some()); - assert_eq!(pages[0].clone().previous.unwrap().meta.order.unwrap(), 2); + assert!(pages[0].clone().previous.is_none()); + assert!(pages[0].clone().next.is_some()); + assert_eq!(pages[0].clone().next.unwrap().meta.order.unwrap(), 2); assert!(pages[1].clone().next.is_some()); assert!(pages[1].clone().previous.is_some()); - assert_eq!(pages[1].clone().next.unwrap().meta.order.unwrap(), 3); assert_eq!(pages[1].clone().previous.unwrap().meta.order.unwrap(), 1); + assert_eq!(pages[1].clone().next.unwrap().meta.order.unwrap(), 3); - assert!(pages[2].clone().next.is_some()); - assert!(pages[2].clone().previous.is_none()); - assert_eq!(pages[2].clone().next.unwrap().meta.order.unwrap(), 2); + assert!(pages[2].clone().next.is_none()); + assert!(pages[2].clone().previous.is_some()); + assert_eq!(pages[2].clone().previous.unwrap().meta.order.unwrap(), 2); } } diff --git a/components/front_matter/src/page.rs b/components/front_matter/src/page.rs index f095a1b9..42f50a00 100644 --- a/components/front_matter/src/page.rs +++ b/components/front_matter/src/page.rs @@ -28,6 +28,8 @@ pub struct PageFrontMatter { pub category: Option, /// Integer to use to order content. Lowest is at the bottom, highest first pub order: Option, + /// Integer to use to order content. Highest is at the bottom, lowest first + pub weight: Option, /// All aliases for that page. Gutenberg will create HTML templates that will #[serde(skip_serializing)] pub aliases: Option>, @@ -84,6 +86,10 @@ impl PageFrontMatter { self.order.unwrap() } + pub fn weight(&self) -> usize { + self.weight.unwrap() + } + pub fn has_tags(&self) -> bool { match self.tags { Some(ref t) => !t.is_empty(), @@ -103,6 +109,7 @@ impl Default for PageFrontMatter { tags: None, category: None, order: None, + weight: None, aliases: None, template: None, extra: None, diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs index 8c5e9ff4..eb51d34e 100644 --- a/components/site/src/lib.rs +++ b/components/site/src/lib.rs @@ -17,6 +17,7 @@ extern crate tempdir; use std::collections::HashMap; use std::fs::{remove_dir_all, copy, create_dir_all}; +use std::mem; use std::path::{Path, PathBuf}; use glob::glob; @@ -289,8 +290,9 @@ impl Site { continue; } } - let (sorted_pages, cannot_be_sorted_pages) = sort_pages(section.pages.clone(), section.meta.sort_by()); - section.pages = populate_previous_and_next_pages(&sorted_pages); + let pages = mem::replace(&mut section.pages, vec![]); + let (sorted_pages, cannot_be_sorted_pages) = sort_pages(pages, section.meta.sort_by()); + section.pages = populate_previous_and_next_pages(sorted_pages); section.ignored_pages = cannot_be_sorted_pages; } } @@ -305,7 +307,7 @@ impl Site { // TODO: can we pass a reference? let (tags, categories) = Taxonomy::find_tags_and_categories( - self.pages.values().cloned().collect::>() + self.pages.values().cloned().collect::>().as_slice() ); if generate_tags_pages { self.tags = Some(tags); diff --git a/components/taxonomies/src/lib.rs b/components/taxonomies/src/lib.rs index a7ced45d..05c3dfc9 100644 --- a/components/taxonomies/src/lib.rs +++ b/components/taxonomies/src/lib.rs @@ -37,6 +37,7 @@ impl TaxonomyItem { pub fn new(name: &str, pages: Vec) -> TaxonomyItem { // We shouldn't have any pages without dates there let (sorted_pages, _) = sort_pages(pages, SortBy::Date); + TaxonomyItem { name: name.to_string(), slug: slugify(name), @@ -55,7 +56,7 @@ pub struct Taxonomy { impl Taxonomy { // TODO: take a Vec<&'a Page> if it makes a difference in terms of perf for actual sites - pub fn find_tags_and_categories(all_pages: Vec) -> (Taxonomy, Taxonomy) { + pub fn find_tags_and_categories(all_pages: &[Page]) -> (Taxonomy, Taxonomy) { let mut tags = HashMap::new(); let mut categories = HashMap::new();