tangetools/rrm/rclean

67 lines
1.4 KiB
Plaintext
Raw Permalink Normal View History

2016-05-22 22:55:10 +00:00
#!/usr/bin/perl
use Digest::MD5::File qw(dir_md5_hex file_md5_hex url_md5_hex);
2017-03-20 21:43:31 +00:00
use Digest::SHA;
2016-05-22 22:55:10 +00:00
my $dir = shift || ".";
chdir $dir;
# Table of which files have a given size
open(IN,"-|",'find "`pwd`" -type f -printf "%s\t%p\0"') || die;
$/="\0";
my %size;
while(<IN>) {
chop; # Remove \0
my($s,$f) = split /\t/,$_;
push @{$size{$s}}, $f;
}
close IN;
2017-03-20 21:43:31 +00:00
# Read hash-value of removed files of a given size
2016-05-22 22:55:10 +00:00
my %rrm;
my $rrmfile = find_rrm_file(".") || ".rrm";
2020-10-04 11:47:31 +00:00
if(not -r $rrmfile) {
print STDERR "Cannot read $rrmfile\n";
exit(1);
}
2016-05-22 22:55:10 +00:00
open(RRM,"<",$rrmfile) || die;
while(<RRM>) {
2017-03-20 21:43:31 +00:00
my($size,$hashval,$file) = split /\t/,$_;
$rrm{0+$size}{$hashval}++;
2016-05-22 22:55:10 +00:00
}
close RRM;
# Which existing files are the same size as some of the removed files?
for my $size (keys %rrm) {
for my $file (@{$size{$size}}) {
if(-e $file) {
my $md5 = Digest::MD5->new;
$md5->addpath($file);
2017-03-20 21:43:31 +00:00
my $sha = Digest::SHA->new(256);
$sha->addfile($file);
# Do they have the same hash-value?
if($rrm{$size}{$md5->hexdigest} or
$rrm{$size}{"SHA256:".$sha->b64digest}) {
2016-05-22 22:55:10 +00:00
# remove this
print "$file\n";
}
}
}
}
sub find_rrm_file {
my $dir = shift;
if(-r "$dir/.rrm") {
return "$dir/.rrm";
}
if(join(" ",stat $dir) eq join(" ",stat "$dir/..")) {
# root
return undef;
} else {
return find_rrm_file("$dir/..");
}
}