2018-02-02 20:35:04 +00:00
|
|
|
#[macro_use]
|
|
|
|
extern crate lazy_static;
|
|
|
|
extern crate regex;
|
|
|
|
extern crate image;
|
|
|
|
extern crate rayon;
|
|
|
|
|
|
|
|
extern crate utils;
|
|
|
|
extern crate errors;
|
|
|
|
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use std::hash::{Hash, Hasher};
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::collections::hash_map::Entry as HEntry;
|
2018-06-23 14:48:37 +00:00
|
|
|
use std::collections::hash_map::DefaultHasher;
|
2018-02-02 20:35:04 +00:00
|
|
|
use std::fs::{self, File};
|
|
|
|
|
|
|
|
use regex::Regex;
|
|
|
|
use image::{GenericImage, FilterType};
|
|
|
|
use image::jpeg::JPEGEncoder;
|
|
|
|
use rayon::prelude::*;
|
|
|
|
|
|
|
|
use utils::fs as ufs;
|
|
|
|
use errors::{Result, ResultExt};
|
|
|
|
|
|
|
|
|
2018-06-24 20:21:33 +00:00
|
|
|
static RESIZED_SUBDIR: &'static str = "_processed_images";
|
2018-06-25 17:13:21 +00:00
|
|
|
|
2018-07-31 13:17:31 +00:00
|
|
|
lazy_static! {
|
2018-02-02 20:35:04 +00:00
|
|
|
pub static ref RESIZED_FILENAME: Regex = Regex::new(r#"([0-9a-f]{16})([0-9a-f]{2})[.]jpg"#).unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Describes the precise kind of a resize operation
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum ResizeOp {
|
|
|
|
/// A simple scale operation that doesn't take aspect ratio into account
|
|
|
|
Scale(u32, u32),
|
2018-06-25 17:13:21 +00:00
|
|
|
/// Scales the image to a specified width with height computed such
|
|
|
|
/// that aspect ratio is preserved
|
2018-02-02 20:35:04 +00:00
|
|
|
FitWidth(u32),
|
2018-06-25 17:13:21 +00:00
|
|
|
/// Scales the image to a specified height with width computed such
|
|
|
|
/// that aspect ratio is preserved
|
2018-02-02 20:35:04 +00:00
|
|
|
FitHeight(u32),
|
2018-06-25 17:13:21 +00:00
|
|
|
/// Scales the image such that it fits within the specified width and
|
|
|
|
/// height preserving aspect ratio.
|
2018-02-02 20:35:04 +00:00
|
|
|
/// Either dimension may end up being smaller, but never larger than specified.
|
|
|
|
Fit(u32, u32),
|
2018-06-25 17:13:21 +00:00
|
|
|
/// Scales the image such that it fills the specified width and height.
|
|
|
|
/// Output will always have the exact dimensions specified.
|
|
|
|
/// The part of the image that doesn't fit in the thumbnail due to differing
|
|
|
|
/// aspect ratio will be cropped away, if any.
|
2018-02-02 20:35:04 +00:00
|
|
|
Fill(u32, u32),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ResizeOp {
|
|
|
|
pub fn from_args(op: &str, width: Option<u32>, height: Option<u32>) -> Result<ResizeOp> {
|
|
|
|
use ResizeOp::*;
|
|
|
|
|
|
|
|
// Validate args:
|
|
|
|
match op {
|
2018-06-25 17:13:21 +00:00
|
|
|
"fit_width" => if width.is_none() {
|
2018-07-31 13:17:31 +00:00
|
|
|
return Err("op=\"fit_width\" requires a `width` argument".to_string().into());
|
2018-06-25 17:13:21 +00:00
|
|
|
},
|
|
|
|
"fit_height" => if height.is_none() {
|
2018-07-31 13:17:31 +00:00
|
|
|
return Err("op=\"fit_height\" requires a `height` argument".to_string().into());
|
2018-06-25 17:13:21 +00:00
|
|
|
},
|
2018-02-02 20:35:04 +00:00
|
|
|
"scale" | "fit" | "fill" => if width.is_none() || height.is_none() {
|
2018-07-31 13:17:31 +00:00
|
|
|
return Err(format!("op={} requires a `width` and `height` argument", op).into());
|
2018-02-02 20:35:04 +00:00
|
|
|
},
|
|
|
|
_ => return Err(format!("Invalid image resize operation: {}", op).into())
|
|
|
|
};
|
|
|
|
|
|
|
|
Ok(match op {
|
|
|
|
"scale" => Scale(width.unwrap(), height.unwrap()),
|
2018-05-25 16:30:22 +00:00
|
|
|
"fit_width" => FitWidth(width.unwrap()),
|
|
|
|
"fit_height" => FitHeight(height.unwrap()),
|
2018-02-02 20:35:04 +00:00
|
|
|
"fit" => Fit(width.unwrap(), height.unwrap()),
|
|
|
|
"fill" => Fill(width.unwrap(), height.unwrap()),
|
|
|
|
_ => unreachable!(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn width(self) -> Option<u32> {
|
|
|
|
use ResizeOp::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
Scale(w, _) => Some(w),
|
|
|
|
FitWidth(w) => Some(w),
|
|
|
|
FitHeight(_) => None,
|
|
|
|
Fit(w, _) => Some(w),
|
|
|
|
Fill(w, _) => Some(w),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn height(self) -> Option<u32> {
|
|
|
|
use ResizeOp::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
Scale(_, h) => Some(h),
|
|
|
|
FitWidth(_) => None,
|
|
|
|
FitHeight(h) => Some(h),
|
|
|
|
Fit(_, h) => Some(h),
|
|
|
|
Fill(_, h) => Some(h),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<ResizeOp> for u8 {
|
|
|
|
fn from(op: ResizeOp) -> u8 {
|
|
|
|
use ResizeOp::*;
|
|
|
|
|
|
|
|
match op {
|
|
|
|
Scale(_, _) => 1,
|
|
|
|
FitWidth(_) => 2,
|
|
|
|
FitHeight(_) => 3,
|
|
|
|
Fit(_, _) => 4,
|
|
|
|
Fill(_, _) => 5,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Hash for ResizeOp {
|
|
|
|
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
|
|
|
hasher.write_u8(u8::from(*self));
|
2018-07-31 13:17:31 +00:00
|
|
|
if let Some(w) = self.width() { hasher.write_u32(w); }
|
2018-02-02 20:35:04 +00:00
|
|
|
if let Some(h) = self.height() { hasher.write_u32(h); }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Holds all data needed to perform a resize operation
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub struct ImageOp {
|
|
|
|
source: String,
|
|
|
|
op: ResizeOp,
|
|
|
|
quality: u8,
|
2018-05-25 16:30:22 +00:00
|
|
|
/// Hash of the above parameters
|
2018-02-02 20:35:04 +00:00
|
|
|
hash: u64,
|
2018-05-25 16:37:27 +00:00
|
|
|
/// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
|
|
|
|
/// identifying the collision in the order as encountered (which is essentially random).
|
|
|
|
/// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
|
2018-06-25 17:13:21 +00:00
|
|
|
/// Note that this is very unlikely to happen in practice
|
2018-05-25 16:37:27 +00:00
|
|
|
collision_id: u32,
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ImageOp {
|
|
|
|
pub fn new(source: String, op: ResizeOp, quality: u8) -> ImageOp {
|
2018-06-23 14:48:37 +00:00
|
|
|
let mut hasher = DefaultHasher::new();
|
2018-02-02 20:35:04 +00:00
|
|
|
hasher.write(source.as_ref());
|
|
|
|
op.hash(&mut hasher);
|
|
|
|
hasher.write_u8(quality);
|
|
|
|
let hash = hasher.finish();
|
|
|
|
|
2018-05-25 16:37:27 +00:00
|
|
|
ImageOp { source, op, quality, hash, collision_id: 0 }
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
2018-06-25 17:13:21 +00:00
|
|
|
pub fn from_args(
|
|
|
|
source: String,
|
|
|
|
op: &str,
|
|
|
|
width: Option<u32>,
|
|
|
|
height: Option<u32>,
|
|
|
|
quality: u8,
|
|
|
|
) -> Result<ImageOp> {
|
2018-02-02 20:35:04 +00:00
|
|
|
let op = ResizeOp::from_args(op, width, height)?;
|
|
|
|
Ok(Self::new(source, op, quality))
|
|
|
|
}
|
|
|
|
|
|
|
|
fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
|
|
|
|
use ResizeOp::*;
|
|
|
|
|
|
|
|
let src_path = content_path.join(&self.source);
|
|
|
|
if !ufs::file_stale(&src_path, target_path) {
|
2018-07-31 13:17:31 +00:00
|
|
|
return Ok(());
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut img = image::open(&src_path)?;
|
|
|
|
let (img_w, img_h) = img.dimensions();
|
|
|
|
|
|
|
|
const RESIZE_FILTER: FilterType = FilterType::Gaussian;
|
|
|
|
const RATIO_EPSILLION: f32 = 0.1;
|
|
|
|
|
|
|
|
let img = match self.op {
|
|
|
|
Scale(w, h) => img.resize_exact(w, h, RESIZE_FILTER),
|
|
|
|
FitWidth(w) => img.resize(w, u32::max_value(), RESIZE_FILTER),
|
|
|
|
FitHeight(h) => img.resize(u32::max_value(), h, RESIZE_FILTER),
|
|
|
|
Fit(w, h) => img.resize(w, h, RESIZE_FILTER),
|
|
|
|
Fill(w, h) => {
|
2018-05-25 16:30:22 +00:00
|
|
|
let factor_w = img_w as f32 / w as f32;
|
|
|
|
let factor_h = img_h as f32 / h as f32;
|
2018-02-02 20:35:04 +00:00
|
|
|
|
2018-05-25 16:30:22 +00:00
|
|
|
if (factor_w - factor_h).abs() <= RATIO_EPSILLION {
|
2018-06-25 17:13:21 +00:00
|
|
|
// If the horizontal and vertical factor is very similar,
|
|
|
|
// that means the aspect is similar enough that there's not much point
|
|
|
|
// in cropping, so just perform a simple scale in this case.
|
2018-02-02 20:35:04 +00:00
|
|
|
img.resize_exact(w, h, RESIZE_FILTER)
|
|
|
|
} else {
|
2018-06-25 17:13:21 +00:00
|
|
|
// We perform the fill such that a crop is performed first
|
|
|
|
// and then resize_exact can be used, which should be cheaper than
|
|
|
|
// resizing and then cropping (smaller number of pixels to resize).
|
2018-05-25 16:30:22 +00:00
|
|
|
let (crop_w, crop_h) = if factor_w < factor_h {
|
|
|
|
(img_w, (factor_w * h as f32).round() as u32)
|
|
|
|
} else {
|
|
|
|
((factor_h * w as f32).round() as u32, img_h)
|
2018-02-02 20:35:04 +00:00
|
|
|
};
|
2018-05-25 16:30:22 +00:00
|
|
|
|
|
|
|
let (offset_w, offset_h) = if factor_w < factor_h {
|
|
|
|
(0, (img_h - crop_h) / 2)
|
|
|
|
} else {
|
|
|
|
((img_w - crop_w) / 2, 0)
|
2018-02-02 20:35:04 +00:00
|
|
|
};
|
2018-05-25 16:30:22 +00:00
|
|
|
|
2018-06-25 17:13:21 +00:00
|
|
|
img.crop(offset_w, offset_h, crop_w, crop_h)
|
|
|
|
.resize_exact(w, h, RESIZE_FILTER)
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
2018-07-31 13:17:31 +00:00
|
|
|
}
|
2018-02-02 20:35:04 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
let mut f = File::create(target_path)?;
|
|
|
|
let mut enc = JPEGEncoder::new_with_quality(&mut f, self.quality);
|
|
|
|
let (img_w, img_h) = img.dimensions();
|
|
|
|
enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// A strcture into which image operations can be enqueued and then performed.
|
|
|
|
/// All output is written in a subdirectory in `static_path`,
|
|
|
|
/// taking care of file stale status based on timestamps and possible hash collisions.
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Processor {
|
|
|
|
content_path: PathBuf,
|
|
|
|
resized_path: PathBuf,
|
|
|
|
resized_url: String,
|
2018-05-25 16:30:22 +00:00
|
|
|
/// A map of a ImageOps by their stored hash.
|
2018-06-23 14:38:53 +00:00
|
|
|
/// Note that this cannot be a HashSet, because hashset handles collisions and we don't want that,
|
2018-05-25 16:30:22 +00:00
|
|
|
/// we need to be aware of and handle collisions ourselves.
|
2018-02-02 20:35:04 +00:00
|
|
|
img_ops: HashMap<u64, ImageOp>,
|
2018-05-25 16:30:22 +00:00
|
|
|
/// Hash collisions go here:
|
|
|
|
img_ops_collisions: Vec<ImageOp>,
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Processor {
|
|
|
|
pub fn new(content_path: PathBuf, static_path: &Path, base_url: &str) -> Processor {
|
|
|
|
Processor {
|
|
|
|
content_path,
|
|
|
|
resized_path: static_path.join(RESIZED_SUBDIR),
|
|
|
|
resized_url: Self::resized_url(base_url),
|
|
|
|
img_ops: HashMap::new(),
|
2018-05-25 16:30:22 +00:00
|
|
|
img_ops_collisions: Vec::new(),
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn resized_url(base_url: &str) -> String {
|
2018-05-25 16:30:22 +00:00
|
|
|
if base_url.ends_with('/') {
|
2018-07-31 13:17:31 +00:00
|
|
|
format!("{}{}", base_url, RESIZED_SUBDIR)
|
2018-05-25 16:30:22 +00:00
|
|
|
} else {
|
2018-07-31 13:17:31 +00:00
|
|
|
format!("{}/{}", base_url, RESIZED_SUBDIR)
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_base_url(&mut self, base_url: &str) {
|
|
|
|
self.resized_url = Self::resized_url(base_url);
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn source_exists(&self, source: &str) -> bool {
|
|
|
|
self.content_path.join(source).exists()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn num_img_ops(&self) -> usize {
|
2018-05-25 16:30:22 +00:00
|
|
|
self.img_ops.len() + self.img_ops_collisions.len()
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
2018-05-25 16:30:22 +00:00
|
|
|
fn insert_with_collisions(&mut self, mut img_op: ImageOp) -> u32 {
|
2018-02-02 20:35:04 +00:00
|
|
|
match self.img_ops.entry(img_op.hash) {
|
|
|
|
HEntry::Occupied(entry) => if *entry.get() == img_op { return 0; },
|
|
|
|
HEntry::Vacant(entry) => {
|
|
|
|
entry.insert(img_op);
|
|
|
|
return 0;
|
2018-07-31 13:17:31 +00:00
|
|
|
}
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we get here, that means a hash collision.
|
2018-06-25 17:13:21 +00:00
|
|
|
// This is detected when there is an ImageOp with the same hash in the `img_ops`
|
|
|
|
// map but which is not equal to this one.
|
2018-05-25 16:30:22 +00:00
|
|
|
// To deal with this, all collisions get a (random) sequential ID number.
|
|
|
|
|
2018-06-25 17:13:21 +00:00
|
|
|
// First try to look up this ImageOp in `img_ops_collisions`, maybe we've
|
|
|
|
// already seen the same ImageOp.
|
2018-05-25 16:37:27 +00:00
|
|
|
// At the same time, count IDs to figure out the next free one.
|
2018-06-25 17:13:21 +00:00
|
|
|
// Start with the ID of 2, because we'll need to use 1 for the ImageOp
|
|
|
|
// already present in the map:
|
2018-05-25 16:37:27 +00:00
|
|
|
let mut collision_id = 2;
|
2018-05-25 16:30:22 +00:00
|
|
|
for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
|
2018-02-02 20:35:04 +00:00
|
|
|
if *op == img_op {
|
2018-06-25 17:13:21 +00:00
|
|
|
// This is a colliding ImageOp, but we've already seen an equal one
|
|
|
|
// (not just by hash, but by content too), so just return its ID:
|
2018-05-25 16:37:27 +00:00
|
|
|
return collision_id;
|
2018-02-02 20:35:04 +00:00
|
|
|
} else {
|
2018-05-25 16:37:27 +00:00
|
|
|
collision_id += 1;
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-25 17:13:21 +00:00
|
|
|
// If we get here, that means this is a new colliding ImageOp and
|
|
|
|
// `collision_id` is the next free ID
|
2018-05-25 16:37:27 +00:00
|
|
|
if collision_id == 2 {
|
2018-06-25 17:13:21 +00:00
|
|
|
// This is the first collision found with this hash, update the ID
|
|
|
|
// of the matching ImageOp in the map.
|
2018-05-25 16:37:27 +00:00
|
|
|
self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
2018-05-25 16:37:27 +00:00
|
|
|
img_op.collision_id = collision_id;
|
2018-05-25 16:30:22 +00:00
|
|
|
self.img_ops_collisions.push(img_op);
|
2018-05-25 16:37:27 +00:00
|
|
|
collision_id
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
2018-05-25 16:37:27 +00:00
|
|
|
fn op_filename(hash: u64, collision_id: u32) -> String {
|
2018-02-02 20:35:04 +00:00
|
|
|
// Please keep this in sync with RESIZED_FILENAME
|
2018-05-25 16:37:27 +00:00
|
|
|
assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
|
|
|
|
format!("{:016x}{:02x}.jpg", hash, collision_id)
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
2018-05-25 16:37:27 +00:00
|
|
|
fn op_url(&self, hash: u64, collision_id: u32) -> String {
|
|
|
|
format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id))
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn insert(&mut self, img_op: ImageOp) -> String {
|
|
|
|
let hash = img_op.hash;
|
2018-05-25 16:37:27 +00:00
|
|
|
let collision_id = self.insert_with_collisions(img_op);
|
|
|
|
self.op_url(hash, collision_id)
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn prune(&self) -> Result<()> {
|
2018-07-16 08:54:05 +00:00
|
|
|
// Do not create folders if they don't exist
|
|
|
|
if !self.resized_path.exists() {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2018-02-02 20:35:04 +00:00
|
|
|
ufs::ensure_directory_exists(&self.resized_path)?;
|
|
|
|
let entries = fs::read_dir(&self.resized_path)?;
|
|
|
|
for entry in entries {
|
|
|
|
let entry_path = entry?.path();
|
|
|
|
if entry_path.is_file() {
|
|
|
|
let filename = entry_path.file_name().unwrap().to_string_lossy();
|
|
|
|
if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
|
|
|
|
let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
|
2018-06-25 17:13:21 +00:00
|
|
|
let collision_id = u32::from_str_radix(
|
2018-07-31 13:17:31 +00:00
|
|
|
capts.get(2).unwrap().as_str(), 16,
|
2018-06-25 17:13:21 +00:00
|
|
|
).unwrap();
|
|
|
|
|
2018-05-25 16:37:27 +00:00
|
|
|
if collision_id > 0 || !self.img_ops.contains_key(&hash) {
|
2018-02-02 20:35:04 +00:00
|
|
|
fs::remove_file(&entry_path)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn do_process(&mut self) -> Result<()> {
|
2018-07-16 08:54:05 +00:00
|
|
|
if !self.img_ops.is_empty() {
|
|
|
|
ufs::ensure_directory_exists(&self.resized_path)?;
|
|
|
|
}
|
|
|
|
|
2018-02-02 20:35:04 +00:00
|
|
|
self.img_ops.par_iter().map(|(hash, op)| {
|
2018-05-25 16:37:27 +00:00
|
|
|
let target = self.resized_path.join(Self::op_filename(*hash, op.collision_id));
|
2018-02-02 20:35:04 +00:00
|
|
|
op.perform(&self.content_path, &target)
|
|
|
|
.chain_err(|| format!("Failed to process image: {}", op.source))
|
|
|
|
})
|
2018-07-31 13:17:31 +00:00
|
|
|
.fold(|| Ok(()), Result::and)
|
|
|
|
.reduce(|| Ok(()), Result::and)
|
2018-02-02 20:35:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Looks at file's extension and returns whether it's a supported image format
|
|
|
|
pub fn file_is_img<P: AsRef<Path>>(p: P) -> bool {
|
|
|
|
p.as_ref().extension().and_then(|s| s.to_str()).map(|ext| {
|
|
|
|
match ext.to_lowercase().as_str() {
|
|
|
|
"jpg" | "jpeg" => true,
|
|
|
|
"png" => true,
|
|
|
|
"gif" => true,
|
|
|
|
"bmp" => true,
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}).unwrap_or(false)
|
|
|
|
}
|