From da3b0fcf0ee880cbd642b2c7bd3066e8ab81ab02 Mon Sep 17 00:00:00 2001 From: Clar Charr Date: Wed, 16 May 2018 17:52:26 -0400 Subject: [PATCH] Use proper Unicode word count; fixes #304 --- components/utils/Cargo.toml | 2 +- components/utils/src/site.rs | 4 ++-- docs/content/documentation/templates/pages-sections.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/utils/Cargo.toml b/components/utils/Cargo.toml index 2c5cc0f0..eaa8419d 100644 --- a/components/utils/Cargo.toml +++ b/components/utils/Cargo.toml @@ -6,8 +6,8 @@ authors = ["Vincent Prouillet "] [dependencies] errors = { path = "../errors" } tera = "0.11" +unicode-segmentation = "1.2" walkdir = "2" - [dev-dependencies] tempfile = "3" diff --git a/components/utils/src/site.rs b/components/utils/src/site.rs index f73166a1..f3d094d2 100644 --- a/components/utils/src/site.rs +++ b/components/utils/src/site.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; +use unicode_segmentation::UnicodeSegmentation; use errors::Result; /// Get word count and estimated reading time pub fn get_reading_analytics(content: &str) -> (usize, usize) { - // Only works for latin language but good enough for a start - let word_count: usize = content.split_whitespace().count(); + let word_count: usize = content.unicode_words().count(); // https://help.medium.com/hc/en-us/articles/214991667-Read-time // 275 seems a bit too high though diff --git a/docs/content/documentation/templates/pages-sections.md b/docs/content/documentation/templates/pages-sections.md index f09c777c..7580855c 100644 --- a/docs/content/documentation/templates/pages-sections.md +++ b/docs/content/documentation/templates/pages-sections.md @@ -63,7 +63,7 @@ extra: HashMap; pages: Array; // Direct subsections to this section, sorted by subsections weight subsections: Array
; -// Naive word count, will not work for languages without whitespace +// Unicode word count word_count: Number; // Based on https://help.medium.com/hc/en-us/articles/214991667-Read-time reading_time: Number;