#!/usr/bin/perl use Digest::MD5::File qw(dir_md5_hex file_md5_hex url_md5_hex); use Digest::SHA; my $dir = shift || "."; chdir $dir; # Table of which files have a given size open(IN,"-|",'find "`pwd`" -type f -printf "%s\t%p\0"') || die; $/="\0"; my %size; while() { chop; # Remove \0 my($s,$f) = split /\t/,$_; push @{$size{$s}}, $f; } close IN; # Read hash-value of removed files of a given size my %rrm; my $rrmfile = find_rrm_file(".") || ".rrm"; if(not -r $rrmfile) { print STDERR "Cannot read $rrmfile\n"; exit(1); } open(RRM,"<",$rrmfile) || die; while() { my($size,$hashval,$file) = split /\t/,$_; $rrm{0+$size}{$hashval}++; } close RRM; # Which existing files are the same size as some of the removed files? for my $size (keys %rrm) { for my $file (@{$size{$size}}) { if(-e $file) { my $md5 = Digest::MD5->new; $md5->addpath($file); my $sha = Digest::SHA->new(256); $sha->addfile($file); # Do they have the same hash-value? if($rrm{$size}{$md5->hexdigest} or $rrm{$size}{"SHA256:".$sha->b64digest}) { # remove this print "$file\n"; } } } } sub find_rrm_file { my $dir = shift; if(-r "$dir/.rrm") { return "$dir/.rrm"; } if(join(" ",stat $dir) eq join(" ",stat "$dir/..")) { # root return undef; } else { return find_rrm_file("$dir/.."); } }