zola/components/library/src/sorting.rs

use std::cmp::Ordering;

use chrono::NaiveDateTime;
use rayon::prelude::*;
use slotmap::DefaultKey;

use crate::content::Page;

/// Used by the feed
/// There to not have to import sorting stuff in the site crate
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering {
    let ord = b.meta.datetime.unwrap().cmp(&a.meta.datetime.unwrap());
    if ord == Ordering::Equal {
        a.permalink.cmp(&b.permalink)
    } else {
        ord
    }
}

/// Takes a list of (page key, date, permalink) and sort them by dates if possible
/// Pages without date will be put in the unsortable bucket
/// The permalink is used to break ties
pub fn sort_pages_by_date(
    pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>,
) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
    let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =
        pages.into_par_iter().partition(|page| page.1.is_some());

    can_be_sorted.par_sort_unstable_by(|a, b| {
        let ord = b.1.unwrap().cmp(&a.1.unwrap());
        if ord == Ordering::Equal {
            a.2.cmp(&b.2)
        } else {
            ord
        }
    });

    (can_be_sorted.iter().map(|p| *p.0).collect(), cannot_be_sorted.iter().map(|p| *p.0).collect())
}

/// Takes a list of (page key, weight, permalink) and sort them by weight if possible
/// Pages without weight will be put in the unsortable bucket
/// The permalink is used to break ties
pub fn sort_pages_by_weight(
    pages: Vec<(&DefaultKey, Option<usize>, &str)>,
) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
    let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =
        pages.into_par_iter().partition(|page| page.1.is_some());

    can_be_sorted.par_sort_unstable_by(|a, b| {
        let ord = a.1.unwrap().cmp(&b.1.unwrap());
        if ord == Ordering::Equal {
            a.2.cmp(&b.2)
        } else {
            ord
        }
    });

    (can_be_sorted.iter().map(|p| *p.0).collect(), cannot_be_sorted.iter().map(|p| *p.0).collect())
}

/// Find the lighter/heavier and earlier/later pages for all pages having a date/weight
pub fn find_siblings(
    sorted: &[DefaultKey],
) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> {
    let mut res = Vec::with_capacity(sorted.len());
    let length = sorted.len();

    for (i, key) in sorted.iter().enumerate() {
        let mut with_siblings = (*key, None, None);

        if i > 0 {
            // lighter / later
            with_siblings.1 = Some(sorted[i - 1]);
        }

        if i < length - 1 {
            // heavier/earlier
            with_siblings.2 = Some(sorted[i + 1]);
        }
        res.push(with_siblings);
    }

    res
}

#[cfg(test)]
mod tests {
    use slotmap::DenseSlotMap;
    use std::path::PathBuf;

    use super::{find_siblings, sort_pages_by_date, sort_pages_by_weight};
    use crate::content::Page;
    use front_matter::PageFrontMatter;

    fn create_page_with_date(date: &str) -> Page {
        let mut front_matter = PageFrontMatter::default();
        front_matter.date = Some(date.to_string());
        front_matter.date_to_datetime();
        Page::new("content/hello.md", front_matter, &PathBuf::new())
    }

    fn create_page_with_weight(weight: usize) -> Page {
        let mut front_matter = PageFrontMatter::default();
        front_matter.weight = Some(weight);
        Page::new("content/hello.md", front_matter, &PathBuf::new())
    }

    #[test]
    fn can_sort_by_dates() {
        let mut dense = DenseSlotMap::new();
        let page1 = create_page_with_date("2018-01-01");
        let key1 = dense.insert(page1.clone());
        let page2 = create_page_with_date("2017-01-01");
        let key2 = dense.insert(page2.clone());
        let page3 = create_page_with_date("2019-01-01");
        let key3 = dense.insert(page3.clone());

        let input = vec![
            (&key1, page1.meta.datetime, page1.permalink.as_ref()),
            (&key2, page2.meta.datetime, page2.permalink.as_ref()),
            (&key3, page3.meta.datetime, page3.permalink.as_ref()),
        ];
        let (pages, _) = sort_pages_by_date(input);
        // Should be sorted by date
        assert_eq!(pages[0], key3);
        assert_eq!(pages[1], key1);
        assert_eq!(pages[2], key2);
    }

    #[test]
    fn can_sort_by_weight() {
        let mut dense = DenseSlotMap::new();
        let page1 = create_page_with_weight(2);
        let key1 = dense.insert(page1.clone());
        let page2 = create_page_with_weight(3);
        let key2 = dense.insert(page2.clone());
        let page3 = create_page_with_weight(1);
        let key3 = dense.insert(page3.clone());

        let input = vec![
            (&key1, page1.meta.weight, page1.permalink.as_ref()),
            (&key2, page2.meta.weight, page2.permalink.as_ref()),
            (&key3, page3.meta.weight, page3.permalink.as_ref()),
        ];
        let (pages, _) = sort_pages_by_weight(input);
        // Should be sorted by weight
        assert_eq!(pages[0], key3);
        assert_eq!(pages[1], key1);
        assert_eq!(pages[2], key2);
    }

    #[test]
    fn ignore_page_with_missing_field() {
        let mut dense = DenseSlotMap::new();
        let page1 = create_page_with_weight(2);
        let key1 = dense.insert(page1.clone());
        let page2 = create_page_with_weight(3);
        let key2 = dense.insert(page2.clone());
        let page3 = create_page_with_date("2019-01-01");
        let key3 = dense.insert(page3.clone());

        let input = vec![
            (&key1, page1.meta.weight, page1.permalink.as_ref()),
            (&key2, page2.meta.weight, page2.permalink.as_ref()),
            (&key3, page3.meta.weight, page3.permalink.as_ref()),
        ];

        let (pages, unsorted) = sort_pages_by_weight(input);
        assert_eq!(pages.len(), 2);
        assert_eq!(unsorted.len(), 1);
    }

    #[test]
    fn can_find_siblings() {
        let mut dense = DenseSlotMap::new();
        let page1 = create_page_with_weight(1);
        let key1 = dense.insert(page1.clone());
        let page2 = create_page_with_weight(2);
        let key2 = dense.insert(page2.clone());
        let page3 = create_page_with_weight(3);
        let key3 = dense.insert(page3.clone());

        let input = vec![key1, key2, key3];

        let pages = find_siblings(&input);

        assert_eq!(pages[0].1, None);
        assert_eq!(pages[0].2, Some(key2));

        assert_eq!(pages[1].1, Some(key1));
        assert_eq!(pages[1].2, Some(key3));

        assert_eq!(pages[2].1, Some(key2));
        assert_eq!(pages[2].2, None);
    }
}
Slotmap refactor 2018-10-02 14:42:34 +00:00			`use std::cmp::Ordering;`

rustfmt 2018-10-31 07:18:57 +00:00			`use chrono::NaiveDateTime;`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`use rayon::prelude::*;`
Update slotmap 2019-09-16 09:44:39 +00:00			`use slotmap::DefaultKey;`
Slotmap refactor 2018-10-02 14:42:34 +00:00
Use Rust 2018 edition (#885) 2019-12-21 21:52:39 +00:00			`use crate::content::Page;`
Slotmap refactor 2018-10-02 14:42:34 +00:00
Support and default to generating Atom feeds This includes several breaking changes, but they’re easy to adjust for. Atom 1.0 is superior to RSS 2.0 in a number of ways, both technical and legal, though information from the last decade is hard to find. http://www.intertwingly.net/wiki/pie/Rss20AndAtom10Compared has some info which is probably still mostly correct. How do RSS and Atom compare in terms of implementation support? The impression I get is that proper Atom support in normal content websites has been universal for over twelve years, but that support in podcasts was not quite so good, but getting there, over twelve years ago. I have no more recent facts or figures; no one talks about this stuff these days. I remember investigating this stuff back in 2011–2013 and coming to the same conclusion. At that time, I went with Atom on websites and RSS in podcasts. Now I’d just go full Atom and hang any podcast tools that don’t support Atom, because Atom’s semantics truly are much better. In light of all this, I make the bold recommendation to default to Atom. Nonetheless, for compatibility for existing users, and for those that have Opinions, I’ve retained the RSS template, so that you can escape the breaking change easily. I personally prefer to give feeds a basename that doesn’t mention “Atom” or “RSS”, e.g. “feed.xml”. I’ll be doing that myself, as I’ll be using my own template with more Atom features anyway, like author information, taxonomies and making the title field HTML. Some notes about the Atom feed template: - I went with atom.xml rather than something like feed.atom (the .atom file format being registered for this purpose by RFC4287) due to lack of confidence that it’ll be served with the right MIME type. .xml is a safer default. - It might be nice to get Zola’s version number into the <generator> tag. Not for any particularly good reason, y’know. Just picture it: <generator uri="https://www.getzola.org/" version="0.10.0"> Zola </generator> - I’d like to get taxonomies into the feed, but this requires exposing a little more info than is currently exposed. I think it’d require `TaxonomyConfig` to preferably have a new member `permalink` added (which should be equivalent to something like `config.base_url ~ "/" ~ taxonomy.slug ~ "/"`), and for the feed to get all the taxonomies passed into it (`taxonomies: HashMap<String, TaxonomyTerm>`). Then, the template could be like this, inside the entry: {% for taxonomy, terms in page.taxonomies %} {% for term in terms %} <category scheme="{{ taxonomies[taxonomy].permalink }}" term="{{ term.slug }}" label="{{ term.name }}" /> {% endfor %} {% endfor %} Other remarks: - I have added a date field `extra.updated` to my posts and include that in the feed; I’ve observed others with a similar field. I believe this should be included as an official field. I’m inclined to add author to at least config.toml, too, for feeds. - We need to have a link from the docs to the source of the built-in templates, to help people that wish to alter it. 2019-08-11 10:25:24 +00:00			`/// Used by the feed`
Cleanup of slotmap impl Fix #205 2018-10-05 17:34:27 +00:00			`/// There to not have to import sorting stuff in the site crate`
Fix clippy warnings (#744) Clippy is returning some warnings. Let's fix or explicitly ignore them. In particular: - In `components/imageproc/src/lib.rs`, we implement `Hash` explicitly but derive `PartialEq`. We need to maintain the property that two keys being equal implies the hashes of those two keys are equal. Our `Hash` implementations preserve this, so we'll explicitly ignore the warnings. - In `components/site/src/lib.rs`, we were calling `.into()` on some values that are already of the correct type. - In `components/site/src/lib.rs`, we were using `.map(\|x\| *x)` in iterator chains to remove a level of indirection; we can instead say `.copied()` (introduced in Rust v1.36) or `.cloned()`. Using `.copied` here is better from a type-checking point of view, but we'll use `.cloned` for now as Rust v1.36 was only recently released. - In `components/templates/src/filters.rs` and `components/utils/src/site.rs`, we were taking `HashMap`s as function arguments but not generically accepting alternate `Hasher` implementations. - In `src/cmd/check.rs`, we use `env::current_dir()` as a default value, but our use of `unwrap_or` meant that we would always retrieve the current directory even when not needed. - In `components/errors/src/lib.rs`, we can use `if let` rather than `match`. - In `components/library/src/content/page.rs`, we can collapse a nested conditional into `else if let ...`. - In `components/library/src/sorting.rs`, a function takes `&&Page` arguments. Clippy warns about this for efficiency reasons, but we're doing it here to match a particular sorting API, so we'll explicitly ignore the warning. 2019-07-12 08:29:44 +00:00			`#[allow(clippy::trivially_copy_pass_by_ref)]`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering {`
			`let ord = b.meta.datetime.unwrap().cmp(&a.meta.datetime.unwrap());`
			`if ord == Ordering::Equal {`
			`a.permalink.cmp(&b.permalink)`
			`} else {`
			`ord`
			`}`
			`}`

Cleanup of slotmap impl Fix #205 2018-10-05 17:34:27 +00:00			`/// Takes a list of (page key, date, permalink) and sort them by dates if possible`
			`/// Pages without date will be put in the unsortable bucket`
			`/// The permalink is used to break ties`
Skip anchor checking for URL with prefix in config (#812) * cargo fmt & clippy * Skip anchor checking for URL with prefix in config 2019-10-14 16:31:03 +00:00			`pub fn sort_pages_by_date(`
			`pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>,`
			`) -> (Vec<DefaultKey>, Vec<DefaultKey>) {`
rustfmt 2018-10-31 07:18:57 +00:00			`let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =`
			`pages.into_par_iter().partition(\|page\| page.1.is_some());`

			`can_be_sorted.par_sort_unstable_by(\|a, b\| {`
			`let ord = b.1.unwrap().cmp(&a.1.unwrap());`
			`if ord == Ordering::Equal {`
			`a.2.cmp(&b.2)`
			`} else {`
			`ord`
			`}`
			`});`
Slotmap refactor 2018-10-02 14:42:34 +00:00
			`(can_be_sorted.iter().map(\|p\| p.0).collect(), cannot_be_sorted.iter().map(\|p\| p.0).collect())`
			`}`

Cleanup of slotmap impl Fix #205 2018-10-05 17:34:27 +00:00			`/// Takes a list of (page key, weight, permalink) and sort them by weight if possible`
			`/// Pages without weight will be put in the unsortable bucket`
			`/// The permalink is used to break ties`
Skip anchor checking for URL with prefix in config (#812) * cargo fmt & clippy * Skip anchor checking for URL with prefix in config 2019-10-14 16:31:03 +00:00			`pub fn sort_pages_by_weight(`
			`pages: Vec<(&DefaultKey, Option<usize>, &str)>,`
			`) -> (Vec<DefaultKey>, Vec<DefaultKey>) {`
rustfmt 2018-10-31 07:18:57 +00:00			`let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =`
			`pages.into_par_iter().partition(\|page\| page.1.is_some());`

			`can_be_sorted.par_sort_unstable_by(\|a, b\| {`
			`let ord = a.1.unwrap().cmp(&b.1.unwrap());`
			`if ord == Ordering::Equal {`
			`a.2.cmp(&b.2)`
			`} else {`
			`ord`
			`}`
			`});`
Slotmap refactor 2018-10-02 14:42:34 +00:00
			`(can_be_sorted.iter().map(\|p\| p.0).collect(), cannot_be_sorted.iter().map(\|p\| p.0).collect())`
			`}`

Cleanup of slotmap impl Fix #205 2018-10-05 17:34:27 +00:00			`/// Find the lighter/heavier and earlier/later pages for all pages having a date/weight`
Skip anchor checking for URL with prefix in config (#812) * cargo fmt & clippy * Skip anchor checking for URL with prefix in config 2019-10-14 16:31:03 +00:00			`pub fn find_siblings(`
			`sorted: &[DefaultKey],`
			`) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> {`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`let mut res = Vec::with_capacity(sorted.len());`
			`let length = sorted.len();`

Remove all draft specific code 2019-07-19 09:10:28 +00:00			`for (i, key) in sorted.iter().enumerate() {`
			`let mut with_siblings = (*key, None, None);`
Slotmap refactor 2018-10-02 14:42:34 +00:00
			`if i > 0 {`
Remove all draft specific code 2019-07-19 09:10:28 +00:00			`// lighter / later`
			`with_siblings.1 = Some(sorted[i - 1]);`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`}`

			`if i < length - 1 {`
Remove all draft specific code 2019-07-19 09:10:28 +00:00			`// heavier/earlier`
			`with_siblings.2 = Some(sorted[i + 1]);`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`}`
			`res.push(with_siblings);`
			`}`

			`res`
			`}`

			`#[cfg(test)]`
			`mod tests {`
			`use slotmap::DenseSlotMap;`
Strip base_path from page/section paths To ensure we will get the right `content` directory. Fix #629 2019-03-08 22:26:57 +00:00			`use std::path::PathBuf;`
Slotmap refactor 2018-10-02 14:42:34 +00:00
rustfmt 2018-10-31 07:18:57 +00:00			`use super::{find_siblings, sort_pages_by_date, sort_pages_by_weight};`
Use Rust 2018 edition (#885) 2019-12-21 21:52:39 +00:00			`use crate::content::Page;`
rustfmt 2018-10-31 07:18:57 +00:00			`use front_matter::PageFrontMatter;`
Slotmap refactor 2018-10-02 14:42:34 +00:00
			`fn create_page_with_date(date: &str) -> Page {`
			`let mut front_matter = PageFrontMatter::default();`
			`front_matter.date = Some(date.to_string());`
			`front_matter.date_to_datetime();`
Strip base_path from page/section paths To ensure we will get the right `content` directory. Fix #629 2019-03-08 22:26:57 +00:00			`Page::new("content/hello.md", front_matter, &PathBuf::new())`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`}`

			`fn create_page_with_weight(weight: usize) -> Page {`
			`let mut front_matter = PageFrontMatter::default();`
			`front_matter.weight = Some(weight);`
Strip base_path from page/section paths To ensure we will get the right `content` directory. Fix #629 2019-03-08 22:26:57 +00:00			`Page::new("content/hello.md", front_matter, &PathBuf::new())`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`}`

			`#[test]`
			`fn can_sort_by_dates() {`
			`let mut dense = DenseSlotMap::new();`
			`let page1 = create_page_with_date("2018-01-01");`
			`let key1 = dense.insert(page1.clone());`
			`let page2 = create_page_with_date("2017-01-01");`
			`let key2 = dense.insert(page2.clone());`
			`let page3 = create_page_with_date("2019-01-01");`
			`let key3 = dense.insert(page3.clone());`

			`let input = vec![`
			`(&key1, page1.meta.datetime, page1.permalink.as_ref()),`
			`(&key2, page2.meta.datetime, page2.permalink.as_ref()),`
			`(&key3, page3.meta.datetime, page3.permalink.as_ref()),`
			`];`
			`let (pages, _) = sort_pages_by_date(input);`
			`// Should be sorted by date`
			`assert_eq!(pages[0], key3);`
			`assert_eq!(pages[1], key1);`
			`assert_eq!(pages[2], key2);`
			`}`

			`#[test]`
			`fn can_sort_by_weight() {`
			`let mut dense = DenseSlotMap::new();`
			`let page1 = create_page_with_weight(2);`
			`let key1 = dense.insert(page1.clone());`
			`let page2 = create_page_with_weight(3);`
			`let key2 = dense.insert(page2.clone());`
			`let page3 = create_page_with_weight(1);`
			`let key3 = dense.insert(page3.clone());`

			`let input = vec![`
			`(&key1, page1.meta.weight, page1.permalink.as_ref()),`
			`(&key2, page2.meta.weight, page2.permalink.as_ref()),`
			`(&key3, page3.meta.weight, page3.permalink.as_ref()),`
			`];`
			`let (pages, _) = sort_pages_by_weight(input);`
			`// Should be sorted by weight`
			`assert_eq!(pages[0], key3);`
			`assert_eq!(pages[1], key1);`
			`assert_eq!(pages[2], key2);`
			`}`

			`#[test]`
			`fn ignore_page_with_missing_field() {`
			`let mut dense = DenseSlotMap::new();`
			`let page1 = create_page_with_weight(2);`
			`let key1 = dense.insert(page1.clone());`
			`let page2 = create_page_with_weight(3);`
			`let key2 = dense.insert(page2.clone());`
			`let page3 = create_page_with_date("2019-01-01");`
			`let key3 = dense.insert(page3.clone());`

			`let input = vec![`
			`(&key1, page1.meta.weight, page1.permalink.as_ref()),`
			`(&key2, page2.meta.weight, page2.permalink.as_ref()),`
			`(&key3, page3.meta.weight, page3.permalink.as_ref()),`
			`];`

rustfmt 2018-10-31 07:18:57 +00:00			`let (pages, unsorted) = sort_pages_by_weight(input);`
Slotmap refactor 2018-10-02 14:42:34 +00:00			`assert_eq!(pages.len(), 2);`
			`assert_eq!(unsorted.len(), 1);`
			`}`

			`#[test]`
			`fn can_find_siblings() {`
			`let mut dense = DenseSlotMap::new();`
			`let page1 = create_page_with_weight(1);`
			`let key1 = dense.insert(page1.clone());`
			`let page2 = create_page_with_weight(2);`
			`let key2 = dense.insert(page2.clone());`
			`let page3 = create_page_with_weight(3);`
			`let key3 = dense.insert(page3.clone());`

Remove all draft specific code 2019-07-19 09:10:28 +00:00			`let input = vec![key1, key2, key3];`
Slotmap refactor 2018-10-02 14:42:34 +00:00
Remove all draft specific code 2019-07-19 09:10:28 +00:00			`let pages = find_siblings(&input);`
Slotmap refactor 2018-10-02 14:42:34 +00:00
			`assert_eq!(pages[0].1, None);`
			`assert_eq!(pages[0].2, Some(key2));`

			`assert_eq!(pages[1].1, Some(key1));`
			`assert_eq!(pages[1].2, Some(key3));`

			`assert_eq!(pages[2].1, Some(key2));`
			`assert_eq!(pages[2].2, None);`
			`}`
			`}`