imageproc: Cleanup and comments in hash collision resolution
This commit is contained in:
parent
77bc526008
commit
4a6244adcf
|
@ -125,7 +125,10 @@ pub struct ImageOp {
|
||||||
quality: u8,
|
quality: u8,
|
||||||
/// Hash of the above parameters
|
/// Hash of the above parameters
|
||||||
hash: u64,
|
hash: u64,
|
||||||
collision: Option<u32>,
|
/// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
|
||||||
|
/// identifying the collision in the order as encountered (which is essentially random).
|
||||||
|
/// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
|
||||||
|
collision_id: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ImageOp {
|
impl ImageOp {
|
||||||
|
@ -136,7 +139,7 @@ impl ImageOp {
|
||||||
hasher.write_u8(quality);
|
hasher.write_u8(quality);
|
||||||
let hash = hasher.finish();
|
let hash = hasher.finish();
|
||||||
|
|
||||||
ImageOp { source, op, quality, hash, collision: None }
|
ImageOp { source, op, quality, hash, collision_id: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_args(source: String, op: &str, width: Option<u32>, height: Option<u32>, quality: u8) -> Result<ImageOp> {
|
pub fn from_args(source: String, op: &str, width: Option<u32>, height: Option<u32>, quality: u8) -> Result<ImageOp> {
|
||||||
|
@ -144,8 +147,6 @@ impl ImageOp {
|
||||||
Ok(Self::new(source, op, quality))
|
Ok(Self::new(source, op, quality))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn num_collisions(&self) -> u32 { self.collision.unwrap_or(0) }
|
|
||||||
|
|
||||||
fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
|
fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
|
||||||
use ResizeOp::*;
|
use ResizeOp::*;
|
||||||
|
|
||||||
|
@ -263,40 +264,44 @@ impl Processor {
|
||||||
// This is detected when there is an ImageOp with the same hash in the `img_ops` map but which is not equal to this one.
|
// This is detected when there is an ImageOp with the same hash in the `img_ops` map but which is not equal to this one.
|
||||||
// To deal with this, all collisions get a (random) sequential ID number.
|
// To deal with this, all collisions get a (random) sequential ID number.
|
||||||
|
|
||||||
// First try to look up this ImageOp in `img_ops_collisions`, maybe we've already seen the same ImageOp
|
// First try to look up this ImageOp in `img_ops_collisions`, maybe we've already seen the same ImageOp.
|
||||||
let mut num = 1;
|
// At the same time, count IDs to figure out the next free one.
|
||||||
|
// Start with the ID of 2, because we'll need to use 1 for the ImageOp already present in the map:
|
||||||
|
let mut collision_id = 2;
|
||||||
for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
|
for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
|
||||||
if *op == img_op {
|
if *op == img_op {
|
||||||
// This is a colliding ImageOp, but we've already seen the very same one, so just return its ID
|
// This is a colliding ImageOp, but we've already seen an equal one (not just by hash, but by content too),
|
||||||
return num;
|
// so just return its ID:
|
||||||
|
return collision_id;
|
||||||
} else {
|
} else {
|
||||||
num += 1;
|
collision_id += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we get here, that means this is a new colliding ImageOp and `num` has the next free ID
|
// If we get here, that means this is a new colliding ImageOp and `collision_id` is the next free ID
|
||||||
if num == 1 {
|
if collision_id == 2 {
|
||||||
self.img_ops.get_mut(&img_op.hash).unwrap().collision = Some(0);
|
// This is the first collision found with this hash, update the ID of the matching ImageOp in the map.
|
||||||
|
self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
|
||||||
}
|
}
|
||||||
img_op.collision = Some(num);
|
img_op.collision_id = collision_id;
|
||||||
self.img_ops_collisions.push(img_op);
|
self.img_ops_collisions.push(img_op);
|
||||||
num
|
collision_id
|
||||||
}
|
}
|
||||||
|
|
||||||
fn op_filename(hash: u64, num_collisions: u32) -> String {
|
fn op_filename(hash: u64, collision_id: u32) -> String {
|
||||||
// Please keep this in sync with RESIZED_FILENAME
|
// Please keep this in sync with RESIZED_FILENAME
|
||||||
assert!(num_collisions < 256, "Unexpectedly large number of collisions: {}", num_collisions);
|
assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
|
||||||
format!("{:016x}{:02x}.jpg", hash, num_collisions)
|
format!("{:016x}{:02x}.jpg", hash, collision_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn op_url(&self, hash: u64, num_collisions: u32) -> String {
|
fn op_url(&self, hash: u64, collision_id: u32) -> String {
|
||||||
format!("{}/{}", &self.resized_url, Self::op_filename(hash, num_collisions))
|
format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(&mut self, img_op: ImageOp) -> String {
|
pub fn insert(&mut self, img_op: ImageOp) -> String {
|
||||||
let hash = img_op.hash;
|
let hash = img_op.hash;
|
||||||
let num_collisions = self.insert_with_collisions(img_op);
|
let collision_id = self.insert_with_collisions(img_op);
|
||||||
self.op_url(hash, num_collisions)
|
self.op_url(hash, collision_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn prune(&self) -> Result<()> {
|
pub fn prune(&self) -> Result<()> {
|
||||||
|
@ -308,8 +313,8 @@ impl Processor {
|
||||||
let filename = entry_path.file_name().unwrap().to_string_lossy();
|
let filename = entry_path.file_name().unwrap().to_string_lossy();
|
||||||
if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
|
if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
|
||||||
let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
|
let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
|
||||||
let num_collisions = u32::from_str_radix(capts.get(2).unwrap().as_str(), 16).unwrap();
|
let collision_id = u32::from_str_radix(capts.get(2).unwrap().as_str(), 16).unwrap();
|
||||||
if num_collisions > 0 || !self.img_ops.contains_key(&hash) {
|
if collision_id > 0 || !self.img_ops.contains_key(&hash) {
|
||||||
fs::remove_file(&entry_path)?;
|
fs::remove_file(&entry_path)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -320,7 +325,7 @@ impl Processor {
|
||||||
|
|
||||||
pub fn do_process(&mut self) -> Result<()> {
|
pub fn do_process(&mut self) -> Result<()> {
|
||||||
self.img_ops.par_iter().map(|(hash, op)| {
|
self.img_ops.par_iter().map(|(hash, op)| {
|
||||||
let target = self.resized_path.join(Self::op_filename(*hash, op.num_collisions()));
|
let target = self.resized_path.join(Self::op_filename(*hash, op.collision_id));
|
||||||
op.perform(&self.content_path, &target)
|
op.perform(&self.content_path, &target)
|
||||||
.chain_err(|| format!("Failed to process image: {}", op.source))
|
.chain_err(|| format!("Failed to process image: {}", op.source))
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in a new issue