zola/components/site/src/lib.rs
2017-07-05 19:34:41 +09:00

704 lines
25 KiB
Rust

extern crate tera;
extern crate rayon;
extern crate glob;
extern crate walkdir;
extern crate serde;
#[macro_use]
extern crate serde_derive;
extern crate errors;
extern crate config;
extern crate utils;
extern crate front_matter;
extern crate templates;
extern crate pagination;
extern crate taxonomies;
extern crate content;
#[cfg(test)]
extern crate tempdir;
use std::collections::HashMap;
use std::fs::{remove_dir_all, copy, create_dir_all};
use std::mem;
use std::path::{Path, PathBuf};
use glob::glob;
use tera::{Tera, Context};
use walkdir::WalkDir;
use errors::{Result, ResultExt};
use config::{Config, get_config};
use utils::fs::{create_file, create_directory, ensure_directory_exists};
use content::{Page, Section, populate_previous_and_next_pages, sort_pages};
use templates::{GUTENBERG_TERA, global_fns, render_redirect_template};
use front_matter::{SortBy, InsertAnchor};
use taxonomies::Taxonomy;
use pagination::Paginator;
use rayon::prelude::*;
/// The sitemap only needs links and potentially date so we trim down
/// all pages to only that
#[derive(Debug, Serialize)]
struct SitemapEntry {
permalink: String,
date: Option<String>,
}
impl SitemapEntry {
pub fn new(permalink: String, date: Option<String>) -> SitemapEntry {
SitemapEntry { permalink, date }
}
}
#[derive(Debug)]
pub struct Site {
/// The base path of the gutenberg site
pub base_path: PathBuf,
/// The parsed config for the site
pub config: Config,
pub pages: HashMap<PathBuf, Page>,
pub sections: HashMap<PathBuf, Section>,
pub tera: Tera,
live_reload: bool,
output_path: PathBuf,
static_path: PathBuf,
pub tags: Option<Taxonomy>,
pub categories: Option<Taxonomy>,
/// A map of all .md files (section and pages) and their permalink
/// We need that if there are relative links in the content that need to be resolved
pub permalinks: HashMap<String, String>,
}
impl Site {
/// Parse a site at the given path. Defaults to the current dir
/// Passing in a path is only used in tests
pub fn new<P: AsRef<Path>>(path: P, config_file: &str) -> Result<Site> {
let path = path.as_ref();
let tpl_glob = format!("{}/{}", path.to_string_lossy().replace("\\", "/"), "templates/**/*.*ml");
let mut tera = Tera::new(&tpl_glob).chain_err(|| "Error parsing templates")?;
tera.extend(&GUTENBERG_TERA)?;
let site = Site {
base_path: path.to_path_buf(),
config: get_config(path, config_file),
pages: HashMap::new(),
sections: HashMap::new(),
tera: tera,
live_reload: false,
output_path: path.join("public"),
static_path: path.join("static"),
tags: None,
categories: None,
permalinks: HashMap::new(),
};
Ok(site)
}
/// What the function name says
pub fn enable_live_reload(&mut self) {
self.live_reload = true;
}
/// Get all the orphan (== without section) pages in the site
pub fn get_all_orphan_pages(&self) -> Vec<&Page> {
let mut pages_in_sections = vec![];
let mut orphans = vec![];
for s in self.sections.values() {
pages_in_sections.extend(s.all_pages_path());
}
for page in self.pages.values() {
if !pages_in_sections.contains(&page.file.path) {
orphans.push(page);
}
}
orphans
}
/// Used by tests to change the output path to a tmp dir
#[doc(hidden)]
pub fn set_output_path<P: AsRef<Path>>(&mut self, path: P) {
self.output_path = path.as_ref().to_path_buf();
}
/// Reads all .md files in the `content` directory and create pages/sections
/// out of them
pub fn load(&mut self) -> Result<()> {
let base_path = self.base_path.to_string_lossy().replace("\\", "/");
let content_glob = format!("{}/{}", base_path, "content/**/*.md");
let (section_entries, page_entries): (Vec<_>, Vec<_>) = glob(&content_glob)
.unwrap()
.filter_map(|e| e.ok())
.partition(|ref entry| entry.as_path().file_name().unwrap() == "_index.md");
let sections = {
let config = &self.config;
section_entries
.into_par_iter()
.filter(|entry| entry.as_path().file_name().unwrap() == "_index.md")
.map(|entry| {
let path = entry.as_path();
Section::from_file(path, &config)
}).collect::<Vec<_>>()
};
let pages = {
let config = &self.config;
page_entries
.into_par_iter()
.filter(|entry| entry.as_path().file_name().unwrap() != "_index.md")
.map(|entry| {
let path = entry.as_path();
Page::from_file(path, &config)
}).collect::<Vec<_>>()
};
// Kinda duplicated code for add_section/add_page but necessary to do it that
// way because of the borrow checker
for section in sections {
let s = section?;
self.add_section(s, false)?;
}
// Insert a default index section if necessary so we don't need to create
// a _index.md to render the index page
let index_path = self.base_path.join("content").join("_index.md");
if !self.sections.contains_key(&index_path) {
let mut index_section = Section::default();
index_section.permalink = self.config.make_permalink("");
// TODO: need to insert into permalinks too
self.sections.insert(index_path, index_section);
}
let mut pages_insert_anchors = HashMap::new();
for page in pages {
let p = page?;
pages_insert_anchors.insert(p.file.path.clone(), self.find_parent_section_insert_anchor(&p.file.parent.clone()));
self.add_page(p, false)?;
}
{
// Another silly thing needed to not borrow &self in parallel and
// make the borrow checker happy
let permalinks = &self.permalinks;
let tera = &self.tera;
let config = &self.config;
self.pages.par_iter_mut()
.map(|(_, page)| page)
.map(|page| {
let insert_anchor = pages_insert_anchors[&page.file.path];
page.render_markdown(&permalinks, &tera, &config, insert_anchor)
})
.fold(|| Ok(()), Result::and)
.reduce(|| Ok(()), Result::and)?;
self.sections.par_iter_mut()
.map(|(_, section)| section)
.map(|section| section.render_markdown(permalinks, tera, config))
.fold(|| Ok(()), Result::and)
.reduce(|| Ok(()), Result::and)?;
}
self.populate_sections();
self.populate_tags_and_categories();
self.tera.register_global_function("get_page", global_fns::make_get_page(&self.pages));
self.tera.register_global_function("get_section", global_fns::make_get_section(&self.sections));
self.register_get_url_fn();
Ok(())
}
/// Separate fn as it can be called in the serve command
pub fn register_get_url_fn(&mut self) {
self.tera.register_global_function("get_url", global_fns::make_get_url(self.permalinks.clone()));
}
/// Add a page to the site
/// The `render` parameter is used in the serve command, when rebuilding a page.
/// If `true`, it will also render the markdown for that page
/// Returns the previous page struct if there was one
pub fn add_page(&mut self, page: Page, render: bool) -> Result<Option<Page>> {
let path = page.file.path.clone();
self.permalinks.insert(page.file.relative.clone(), page.permalink.clone());
let prev = self.pages.insert(page.file.path.clone(), page);
if render {
let insert_anchor = self.find_parent_section_insert_anchor(&self.pages[&path].file.parent);
let mut page = self.pages.get_mut(&path).unwrap();
page.render_markdown(&self.permalinks, &self.tera, &self.config, insert_anchor)?;
}
Ok(prev)
}
/// Add a section to the site
/// The `render` parameter is used in the serve command, when rebuilding a page.
/// If `true`, it will also render the markdown for that page
/// Returns the previous section struct if there was one
pub fn add_section(&mut self, section: Section, render: bool) -> Result<Option<Section>> {
let path = section.file.path.clone();
self.permalinks.insert(section.file.relative.clone(), section.permalink.clone());
let prev = self.sections.insert(section.file.path.clone(), section);
if render {
let mut section = self.sections.get_mut(&path).unwrap();
section.render_markdown(&self.permalinks, &self.tera, &self.config)?;
}
Ok(prev)
}
/// Finds the insert_anchor for the parent section of the directory at `path`.
/// Defaults to `AnchorInsert::None` if no parent section found
pub fn find_parent_section_insert_anchor(&self, parent_path: &PathBuf) -> InsertAnchor {
match self.sections.get(&parent_path.join("_index.md")) {
Some(s) => s.meta.insert_anchor.unwrap(),
None => InsertAnchor::None
}
}
/// Find out the direct subsections of each subsection if there are some
/// as well as the pages for each section
pub fn populate_sections(&mut self) {
let mut grandparent_paths = HashMap::new();
for section in self.sections.values_mut() {
if let Some(ref grand_parent) = section.file.grand_parent {
grandparent_paths.entry(grand_parent.to_path_buf()).or_insert_with(|| vec![]).push(section.clone());
}
// Make sure the pages of a section are empty since we can call that many times on `serve`
section.pages = vec![];
section.ignored_pages = vec![];
}
for page in self.pages.values() {
let parent_section_path = page.file.parent.join("_index.md");
if self.sections.contains_key(&parent_section_path) {
self.sections.get_mut(&parent_section_path).unwrap().pages.push(page.clone());
}
}
for section in self.sections.values_mut() {
match grandparent_paths.get(&section.file.parent) {
Some(paths) => section.subsections.extend(paths.clone()),
None => continue,
};
}
self.sort_sections_pages(None);
}
/// Sorts the pages of the section at the given path
/// By default will sort all sections but can be made to only sort a single one by providing a path
pub fn sort_sections_pages(&mut self, only: Option<&Path>) {
for (path, section) in &mut self.sections {
if let Some(p) = only {
if p != path {
continue;
}
}
let pages = mem::replace(&mut section.pages, vec![]);
let (sorted_pages, cannot_be_sorted_pages) = sort_pages(pages, section.meta.sort_by());
section.pages = populate_previous_and_next_pages(sorted_pages);
section.ignored_pages = cannot_be_sorted_pages;
}
}
/// Find all the tags and categories if it's asked in the config
pub fn populate_tags_and_categories(&mut self) {
let generate_tags_pages = self.config.generate_tags_pages.unwrap();
let generate_categories_pages = self.config.generate_categories_pages.unwrap();
if !generate_tags_pages && !generate_categories_pages {
return;
}
// TODO: can we pass a reference?
let (tags, categories) = Taxonomy::find_tags_and_categories(
self.pages.values().cloned().collect::<Vec<_>>().as_slice()
);
if generate_tags_pages {
self.tags = Some(tags);
}
if generate_categories_pages {
self.categories = Some(categories);
}
}
/// Inject live reload script tag if in live reload mode
fn inject_livereload(&self, html: String) -> String {
if self.live_reload {
return html.replace(
"</body>",
r#"<script src="/livereload.js?port=1112&mindelay=10"></script></body>"#
);
}
html
}
/// Copy static file to public directory.
pub fn copy_static_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let relative_path = path.as_ref().strip_prefix(&self.static_path).unwrap();
let target_path = self.output_path.join(relative_path);
if let Some(parent_directory) = target_path.parent() {
create_dir_all(parent_directory)?;
}
copy(path.as_ref(), &target_path)?;
Ok(())
}
/// Copy the content of the `static` folder into the `public` folder
pub fn copy_static_directory(&self) -> Result<()> {
for entry in WalkDir::new(&self.static_path).into_iter().filter_map(|e| e.ok()) {
let relative_path = entry.path().strip_prefix(&self.static_path).unwrap();
let target_path = self.output_path.join(relative_path);
if entry.path().is_dir() {
if !target_path.exists() {
create_directory(&target_path)?;
}
} else {
let entry_fullpath = self.base_path.join(entry.path());
self.copy_static_file(entry_fullpath)?;
}
}
Ok(())
}
/// Deletes the `public` directory if it exists
pub fn clean(&self) -> Result<()> {
if self.output_path.exists() {
// Delete current `public` directory so we can start fresh
remove_dir_all(&self.output_path).chain_err(|| "Couldn't delete `public` directory")?;
}
Ok(())
}
/// Renders a single content page
pub fn render_page(&self, page: &Page) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
// Copy the nesting of the content directory if we have sections for that page
let mut current_path = self.output_path.to_path_buf();
for component in page.path.split('/') {
current_path.push(component);
if !current_path.exists() {
create_directory(&current_path)?;
}
}
// Make sure the folder exists
create_directory(&current_path)?;
// Finally, create a index.html file there with the page rendered
let output = page.render_html(&self.tera, &self.config)?;
create_file(&current_path.join("index.html"), &self.inject_livereload(output))?;
// Copy any asset we found previously into the same directory as the index.html
for asset in &page.assets {
let asset_path = asset.as_path();
copy(&asset_path, &current_path.join(asset_path.file_name().unwrap()))?;
}
Ok(())
}
/// Deletes the `public` directory and builds the site
pub fn build(&self) -> Result<()> {
self.clean()?;
// Render aliases first to allow overwriting
self.render_aliases()?;
self.render_sections()?;
self.render_orphan_pages()?;
self.render_sitemap()?;
if self.config.generate_rss.unwrap() {
self.render_rss_feed()?;
}
self.render_robots()?;
// `render_categories` and `render_tags` will check whether the config allows
// them to render or not
self.render_categories()?;
self.render_tags()?;
self.copy_static_directory()
}
pub fn render_aliases(&self) -> Result<()> {
for page in self.pages.values() {
if let Some(ref aliases) = page.meta.aliases {
for alias in aliases {
let mut output_path = self.output_path.to_path_buf();
for component in alias.split("/") {
output_path.push(&component);
if !output_path.exists() {
create_directory(&output_path)?;
}
}
create_file(&output_path.join("index.html"), &render_redirect_template(&page.permalink, &self.tera)?)?;
}
}
}
Ok(())
}
/// Renders robots.txt
pub fn render_robots(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
create_file(
&self.output_path.join("robots.txt"),
&self.tera.render("robots.txt", &Context::new())?
)
}
/// Renders all categories and the single category pages if there are some
pub fn render_categories(&self) -> Result<()> {
if let Some(ref categories) = self.categories {
self.render_taxonomy(categories)?;
}
Ok(())
}
/// Renders all tags and the single tag pages if there are some
pub fn render_tags(&self) -> Result<()> {
if let Some(ref tags) = self.tags {
self.render_taxonomy(tags)?;
}
Ok(())
}
fn render_taxonomy(&self, taxonomy: &Taxonomy) -> Result<()> {
if taxonomy.items.is_empty() {
return Ok(())
}
ensure_directory_exists(&self.output_path)?;
let output_path = self.output_path.join(&taxonomy.get_list_name());
let list_output = taxonomy.render_list(&self.tera, &self.config)?;
create_directory(&output_path)?;
create_file(&output_path.join("index.html"), &self.inject_livereload(list_output))?;
taxonomy
.items
.par_iter()
.map(|item| {
let single_output = taxonomy.render_single_item(item, &self.tera, &self.config)?;
create_directory(&output_path.join(&item.slug))?;
create_file(
&output_path.join(&item.slug).join("index.html"),
&self.inject_livereload(single_output)
)
})
.fold(|| Ok(()), Result::and)
.reduce(|| Ok(()), Result::and)
}
/// What it says on the tin
pub fn render_sitemap(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
let mut context = Context::new();
context.add(
"pages",
&self.pages.values().map(|p| SitemapEntry::new(p.permalink.clone(), p.meta.date.clone())).collect::<Vec<_>>()
);
context.add(
"sections",
&self.sections.values().map(|s| SitemapEntry::new(s.permalink.clone(), None)).collect::<Vec<_>>()
);
let mut categories = vec![];
if let Some(ref c) = self.categories {
let name = c.get_list_name();
categories.push(SitemapEntry::new(self.config.make_permalink(&name), None));
for item in &c.items {
categories.push(
SitemapEntry::new(self.config.make_permalink(&format!("{}/{}", &name, item.slug)), None),
);
}
}
context.add("categories", &categories);
let mut tags = vec![];
if let Some(ref t) = self.tags {
let name = t.get_list_name();
tags.push(SitemapEntry::new(self.config.make_permalink(&name), None));
for item in &t.items {
tags.push(
SitemapEntry::new(self.config.make_permalink(&format!("{}/{}", &name, item.slug)), None),
);
}
}
context.add("tags", &tags);
let sitemap = self.tera.render("sitemap.xml", &context)?;
create_file(&self.output_path.join("sitemap.xml"), &sitemap)?;
Ok(())
}
pub fn render_rss_feed(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
let mut context = Context::new();
let pages = self.pages.values()
.filter(|p| p.meta.date.is_some())
.take(self.config.rss_limit.unwrap()) // limit to the last n elements
.cloned()
.collect::<Vec<Page>>();
// Don't generate a RSS feed if none of the pages has a date
if pages.is_empty() {
return Ok(());
}
let (sorted_pages, _) = sort_pages(pages, SortBy::Date);
context.add("last_build_date", &sorted_pages[0].meta.date);
context.add("pages", &sorted_pages);
context.add("config", &self.config);
let rss_feed_url = if self.config.base_url.ends_with('/') {
format!("{}{}", self.config.base_url, "rss.xml")
} else {
format!("{}/{}", self.config.base_url, "rss.xml")
};
context.add("feed_url", &rss_feed_url);
let sitemap = self.tera.render("rss.xml", &context)?;
create_file(&self.output_path.join("rss.xml"), &sitemap)?;
Ok(())
}
/// Create a hashmap of paths to section
/// For example `content/posts/_index.md` key will be `posts`
/// The index section will always be called `index` so don't use a path such as
/// `content/index/_index.md` yourself
fn get_sections_map(&self) -> HashMap<String, Section> {
self.sections
.values()
.map(|s| (if s.is_index() { "index".to_string() } else { s.file.components.join("/") }, s.clone()))
.collect()
}
/// Renders a single section
pub fn render_section(&self, section: &Section, render_pages: bool) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
let public = self.output_path.clone();
let mut output_path = public.to_path_buf();
for component in &section.file.components {
output_path.push(component);
if !output_path.exists() {
create_directory(&output_path)?;
}
}
if render_pages {
section
.pages
.par_iter()
.map(|p| self.render_page(&p))
.fold(|| Ok(()), Result::and)
.reduce(|| Ok(()), Result::and)?;
}
if !section.meta.should_render() {
return Ok(());
}
if section.meta.is_paginated() {
self.render_paginated(&output_path, section)?;
} else {
let output = section.render_html(
if section.is_index() { self.get_sections_map() } else { HashMap::new() },
&self.tera,
&self.config,
)?;
create_file(&output_path.join("index.html"), &self.inject_livereload(output))?;
}
Ok(())
}
pub fn render_index(&self) -> Result<()> {
self.render_section(&self.sections[&self.base_path.join("content").join("_index.md")], false)
}
/// Renders all sections
pub fn render_sections(&self) -> Result<()> {
self.sections
.values()
.collect::<Vec<_>>()
.into_par_iter()
.map(|s| self.render_section(s, true))
.fold(|| Ok(()), Result::and)
.reduce(|| Ok(()), Result::and)
}
/// Renders all pages that do not belong to any sections
pub fn render_orphan_pages(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
for page in self.get_all_orphan_pages() {
self.render_page(page)?;
}
Ok(())
}
/// Renders a list of pages when the section/index is wanting pagination.
fn render_paginated(&self, output_path: &Path, section: &Section) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
let paginate_path = match section.meta.paginate_path {
Some(ref s) => s.clone(),
None => unreachable!()
};
let paginator = Paginator::new(&section.pages, section);
let folder_path = output_path.join(&paginate_path);
create_directory(&folder_path)?;
paginator
.pagers
.par_iter()
.enumerate()
.map(|(i, pager)| {
let page_path = folder_path.join(&format!("{}", i + 1));
create_directory(&page_path)?;
let output = paginator.render_pager(pager, &self.config, &self.sections, &self.tera)?;
if i > 0 {
create_file(&page_path.join("index.html"), &self.inject_livereload(output))?;
} else {
create_file(&output_path.join("index.html"), &self.inject_livereload(output))?;
create_file(&page_path.join("index.html"), &render_redirect_template(&section.permalink, &self.tera)?)?;
}
Ok(())
})
.fold(|| Ok(()), Result::and)
.reduce(|| Ok(()), Result::and)
}
}