rrm: Convert from MD5 to SHA256.

This commit is contained in:
Ole Tange 2017-03-20 22:43:31 +01:00
parent 3c5accdf5e
commit 6be767cbec
3 changed files with 26 additions and 17 deletions

View file

@ -1,4 +1,4 @@
CMD = blink bsearch G histogram upsidedown tracefile timestamp rand rrm goodpasswd gitnext puniq ramusage ramusage
CMD = blink bsearch G histogram upsidedown tracefile timestamp rand rclean rrm goodpasswd gitnext pdfman puniq ramusage ramusage
all: blink/blink.1 bsearch/bsearch.1 G/G.1 goodpasswd/goodpasswd.1 histogram/histogram.1 puniq/puniq.1 rand/rand.1 rrm/rrm.1 timestamp/timestamp.1 tracefile/tracefile.1 upsidedown/upsidedown.1 wssh/wssh.1

View file

@ -1,6 +1,7 @@
#!/usr/bin/perl
use Digest::MD5::File qw(dir_md5_hex file_md5_hex url_md5_hex);
use Digest::SHA;
my $dir = shift || ".";
@ -18,13 +19,13 @@ while(<IN>) {
}
close IN;
# Read md5sum of removed files of a given size
# Read hash-value of removed files of a given size
my %rrm;
my $rrmfile = find_rrm_file(".") || ".rrm";
open(RRM,"<",$rrmfile) || die;
while(<RRM>) {
my($size,$md5,$file) = split /\t/,$_;
$rrm{0+$size}{$md5}++;
my($size,$hashval,$file) = split /\t/,$_;
$rrm{0+$size}{$hashval}++;
}
close RRM;
@ -34,8 +35,11 @@ for my $size (keys %rrm) {
if(-e $file) {
my $md5 = Digest::MD5->new;
$md5->addpath($file);
# Do they have the same md5sum?
if($rrm{$size}{$md5->hexdigest}) {
my $sha = Digest::SHA->new(256);
$sha->addfile($file);
# Do they have the same hash-value?
if($rrm{$size}{$md5->hexdigest} or
$rrm{$size}{"SHA256:".$sha->b64digest}) {
# remove this
print "$file\n";
}

25
rrm/rrm
View file

@ -31,44 +31,49 @@ Restore a backup containing I<IMG_2035.JPG>.
B<rclean>
B<rclean> will find B<IMG_2035.JPG> as it has the same size and MD5sum
B<rclean> will find B<IMG_2035.JPG> as it has the same size and SHA256sum
as an already removed file.
=head1 FILES
The file B<.rrm> contains the database of size, md5sum, and names of
The file B<.rrm> contains the database of size, sha256sum, and names of
the files. It is created in current directory if it cannot be found in
any of the (grand*)parent directories.
=head1 SEE ALSO
B<rclean>(1), B<rm>(1), B<md5sum>(1)
B<rclean>(1), B<rm>(1), B<sha256sum>(1)
=cut
use Digest::MD5::File qw(dir_md5_hex file_md5_hex url_md5_hex);
use Digest::SHA;
my %size;
my %md5;
my %hashval;
my @remove;
for my $file (@ARGV) {
if(-s $file > 0) {
$size{$file} = -s $file;
my $md5 = Digest::MD5->new;
$md5->addpath($file);
$md5{$file} = $md5->hexdigest;
my $sha = Digest::SHA->new(256);
$sha->addfile($file);
$hashval{$file} = "SHA256:".$sha->b64digest;
push @remove, $file;
}
if(not -e $file) {
warn("File does not exist: $file");
}
}
$rrmfile = find_rrm_file(".") || ".rrm";
open(RRM,">>",$rrmfile) || die("Cannot write to $rrmfile.");
print RRM map { $size{$_}."\t".$md5{$_}."\t".$_."\0\n" } @ARGV;
print RRM map { $size{$_}."\t".$hashval{$_}."\t".$_."\0\n" } @remove;
close RRM;
unlink @ARGV;
unlink @remove;
sub find_rrm_file {
my $dir = shift;