Use proper Unicode word count; fixes #304

This commit is contained in:
Clar Charr 2018-05-16 17:52:26 -04:00 committed by Vincent Prouillet
parent 3694c43d3a
commit da3b0fcf0e
3 changed files with 4 additions and 4 deletions

View file

@ -6,8 +6,8 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"]
[dependencies] [dependencies]
errors = { path = "../errors" } errors = { path = "../errors" }
tera = "0.11" tera = "0.11"
unicode-segmentation = "1.2"
walkdir = "2" walkdir = "2"
[dev-dependencies] [dev-dependencies]
tempfile = "3" tempfile = "3"

View file

@ -1,11 +1,11 @@
use std::collections::HashMap; use std::collections::HashMap;
use unicode_segmentation::UnicodeSegmentation;
use errors::Result; use errors::Result;
/// Get word count and estimated reading time /// Get word count and estimated reading time
pub fn get_reading_analytics(content: &str) -> (usize, usize) { pub fn get_reading_analytics(content: &str) -> (usize, usize) {
// Only works for latin language but good enough for a start let word_count: usize = content.unicode_words().count();
let word_count: usize = content.split_whitespace().count();
// https://help.medium.com/hc/en-us/articles/214991667-Read-time // https://help.medium.com/hc/en-us/articles/214991667-Read-time
// 275 seems a bit too high though // 275 seems a bit too high though

View file

@ -63,7 +63,7 @@ extra: HashMap<String, Any>;
pages: Array<Pages>; pages: Array<Pages>;
// Direct subsections to this section, sorted by subsections weight // Direct subsections to this section, sorted by subsections weight
subsections: Array<Section>; subsections: Array<Section>;
// Naive word count, will not work for languages without whitespace // Unicode word count
word_count: Number; word_count: Number;
// Based on https://help.medium.com/hc/en-us/articles/214991667-Read-time // Based on https://help.medium.com/hc/en-us/articles/214991667-Read-time
reading_time: Number; reading_time: Number;