Split sitemap (#619)

Split sitemap when it is getting too big
This commit is contained in:
Sébastien Mariaux 2019-03-14 20:57:22 +01:00 committed by Vincent Prouillet
parent 3b8a95eb8f
commit 2a0d0b9b77
6 changed files with 54 additions and 28 deletions

View file

@ -10,5 +10,5 @@ What is the issue? Is the documentation unclear? Is it missing information?
## Proposed solution ## Proposed solution
A quick explanation of what you would like to see to solve the issue. A quick explanation of what you would like to see to solve the issue.
If you want to add content, please explain what you were looking fod and what was If you want to add content, please explain what you were looking for and what was
your process while looking at the current documentation. your process while looking at the current documentation.

View file

@ -369,6 +369,7 @@ pub fn after_template_change(site: &mut Site, path: &Path) -> Result<()> {
match filename { match filename {
"sitemap.xml" => site.render_sitemap(), "sitemap.xml" => site.render_sitemap(),
"rss.xml" => site.render_rss_feed(site.library.read().unwrap().pages_values(), None), "rss.xml" => site.render_rss_feed(site.library.read().unwrap().pages_values(), None),
"split_sitemap_index.xml" => site.render_sitemap(),
"robots.txt" => site.render_robots(), "robots.txt" => site.render_robots(),
"single.html" | "list.html" => site.render_taxonomies(), "single.html" | "list.html" => site.render_taxonomies(),
"page.html" => { "page.html" => {

View file

@ -788,8 +788,6 @@ impl Site {
pub fn render_sitemap(&self) -> Result<()> { pub fn render_sitemap(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?; ensure_directory_exists(&self.output_path)?;
let mut context = Context::new();
let mut pages = self let mut pages = self
.library .library
.read() .read()
@ -806,7 +804,6 @@ impl Site {
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
pages.sort_by(|a, b| a.permalink.cmp(&b.permalink)); pages.sort_by(|a, b| a.permalink.cmp(&b.permalink));
context.insert("pages", &pages);
let mut sections = self let mut sections = self
.library .library
@ -835,7 +832,6 @@ impl Site {
} }
} }
sections.sort_by(|a, b| a.permalink.cmp(&b.permalink)); sections.sort_by(|a, b| a.permalink.cmp(&b.permalink));
context.insert("sections", &sections);
let mut taxonomies = vec![]; let mut taxonomies = vec![];
for taxonomy in &self.taxonomies { for taxonomy in &self.taxonomies {
@ -869,13 +865,46 @@ impl Site {
taxonomies.push(terms); taxonomies.push(terms);
} }
context.insert("taxonomies", &taxonomies); // Group all sitemap entries in one vector
context.insert("config", &self.config); let mut all_sitemap_entries = Vec::new();
all_sitemap_entries.append(&mut pages);
all_sitemap_entries.append(&mut sections);
for terms in taxonomies {
let mut terms = terms;
all_sitemap_entries.append(&mut terms);
}
let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?; // Count total number of sitemap entries to include in sitemap
let total_number = all_sitemap_entries.len();
create_file(&self.output_path.join("sitemap.xml"), sitemap)?; let sitemap_limit = 30000;
if total_number < sitemap_limit {
// Create single sitemap
let mut context = Context::new();
context.insert("sitemap_entries", &all_sitemap_entries);
let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?;
create_file(&self.output_path.join("sitemap.xml"), sitemap)?;
return Ok(())
}
// Create multiple sitemaps (max 30000 urls each)
let mut sitemap_index = Vec::new();
for (i, chunk) in all_sitemap_entries.chunks(sitemap_limit).enumerate() {
let mut context = Context::new();
context.insert("sitemap_entries", &chunk);
let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?;
let file_name = format!("sitemap{}.xml", i+1);
create_file(&self.output_path.join(&file_name), sitemap)?;
let mut sitemap_url:String = self.config.make_permalink(&file_name);
sitemap_url.pop(); // Remove trailing slash
sitemap_index.push(sitemap_url);
}
// Create main sitemap that reference numbered sitemaps
let mut main_context = Context::new();
main_context.insert("sitemaps", &sitemap_index);
let sitemap = &render_template("split_sitemap_index.xml", &self.tera, main_context, &self.config.theme)?;
create_file(&self.output_path.join("sitemap.xml"), sitemap)?;
Ok(()) Ok(())
} }

View file

@ -1,22 +1,10 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for page in pages %} {% for sitemap_entry in sitemap_entries %}
<url> <url>
<loc>{{ page.permalink | safe }}</loc> <loc>{{ sitemap_entry.permalink | safe }}</loc>
{% if page.date %} {% if sitemap_entry.date %}
<lastmod>{{ page.date }}</lastmod> <lastmod>{{ sitemap_entry.date }}</lastmod>
{% endif %} {% endif %}
</url> </url>
{% endfor %} {% endfor %}
{% for section in sections %} </urlset>
<url>
<loc>{{ section.permalink | safe }}</loc>
</url>
{% endfor %}
{% for taxonomy in taxonomies %}
{% for entry in taxonomy %}
<url>
<loc>{{ entry.permalink | safe }}</loc>
</url>
{% endfor %}
{% endfor %}
</urlset>

View file

@ -0,0 +1,7 @@
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for sitemap in sitemaps %}
<sitemap>
<loc>{{ sitemap }}</loc>
</sitemap>
{% endfor %}
</sitemapindex>

View file

@ -35,7 +35,8 @@ lazy_static! {
("__zola_builtins/rss.xml", include_str!("builtins/rss.xml")), ("__zola_builtins/rss.xml", include_str!("builtins/rss.xml")),
("__zola_builtins/sitemap.xml", include_str!("builtins/sitemap.xml")), ("__zola_builtins/sitemap.xml", include_str!("builtins/sitemap.xml")),
("__zola_builtins/robots.txt", include_str!("builtins/robots.txt")), ("__zola_builtins/robots.txt", include_str!("builtins/robots.txt")),
("anchor-link.html", include_str!("builtins/anchor-link.html")), ("__zola_builtins/split_sitemap_index.xml", include_str!("builtins/split_sitemap_index.xml")),
("__zola_builtins/anchor-link.html", include_str!("builtins/anchor-link.html")),
( (
"__zola_builtins/shortcodes/youtube.html", "__zola_builtins/shortcodes/youtube.html",
include_str!("builtins/shortcodes/youtube.html"), include_str!("builtins/shortcodes/youtube.html"),