parallel: Fixed bug #48449: memory leak.

This commit is contained in:
Ole Tange 2016-07-10 16:02:26 +02:00
parent 8a5729cebd
commit 402d3960d4
3 changed files with 117 additions and 71 deletions

View file

@ -191,10 +191,10 @@ reaper();
::debug("init", "Done reaping\n");
if($opt::pipe and @opt::a) {
for my $job (@Global::tee_jobs) {
unlink $job->fh(2,"name");
::rm($job->fh(2,"name"));
$job->set_fh(2,"name","");
$job->print();
unlink $job->fh(1,"name");
::rm($job->fh(1,"name"));
}
}
::debug("init", "Cleaning\n");
@ -3488,6 +3488,7 @@ sub reaper {
::wait_and_exit($Global::halt_exitstatus);
}
}
$job->cleanup();
start_more_jobs();
if($opt::progress) {
my %progress = progress();
@ -3814,6 +3815,7 @@ sub tmpfile {
sub tmpname {
# Select a name that does not exist
# Do not create the file as it may be used for creating a socket (by tmux)
# Remember the name in $Global::unlink to avoid hitting the same name twice
my $name = shift;
my($tmpname);
if(not -w $ENV{'TMPDIR'}) {
@ -3832,13 +3834,21 @@ sub tmpname {
}
sub tmpfifo {
# Securely make a fifo by securely making a dir with a fifo in it
# Find an unused name and mkfifo on it
use POSIX qw(mkfifo);
my $tmpfifo = tmpname("fif",@_);
mkfifo($tmpfifo,0600);
return $tmpfifo;
}
sub rm {
# Remove file and remove it from %Global::unlink
# Uses:
# %Global::unlink
delete @Global::unlink{@_};
unlink @_;
}
sub size_of_block_dev {
# Like -s but for block devices
# Input:
@ -4452,8 +4462,8 @@ sub new {
sub DESTROY {
my $self = shift;
# Remove temporary files if they are created.
unlink $self->{'loadavg_file'};
unlink $self->{'swap_activity_file'};
::rm($self->{'loadavg_file'});
::rm($self->{'swap_activity_file'});
}
sub string {
@ -6158,6 +6168,8 @@ sub new {
# filehandle for stdin (used for --pipe)
# filename for writing stdout to (used for --files)
# remaining data not sent to stdin (used for --pipe)
# tmpfiles to cleanup when job is done
'unlink' => [],
# amount of data sent via stdin (used for --pipe)
'transfersize' => 0, # size of files using --transfer
'returnsize' => 0, # size of files using --return
@ -6381,6 +6393,25 @@ sub openoutputfiles {
}
}
sub add_rm {
# Files to remove when job is done
my $self = shift;
push $self->{'unlink'}, @_;
}
sub get_rm {
# Files to remove when job is done
my $self = shift;
return @{$self->{'unlink'}};
}
sub cleanup {
# Remove files when job is done
my $self = shift;
unlink $self->get_rm();
delete @Global::unlink{$self->get_rm()};
}
sub grouped {
my $self = shift;
# Set reading FD if using --group (--ungroup does not need)
@ -6392,7 +6423,7 @@ sub grouped {
::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
$self->set_fh($fdno,'r',$fdr);
# Unlink if required
$Global::debug or unlink $self->fh($fdno,"unlink");
$Global::debug or ::rm($self->fh($fdno,"unlink"));
}
}
@ -7402,7 +7433,7 @@ sub sshcleanup {
my $workdir = $self->workdir();
my $cleancmd = "";
for my $file ($self->cleanup()) {
for my $file ($self->remote_cleanup()) {
my @subworkdirs = parentdirs_of($file);
$cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
}
@ -7412,7 +7443,7 @@ sub sshcleanup {
return $cleancmd;
}
sub cleanup {
sub remote_cleanup {
# Returns:
# Files to remove at cleanup
my $self = shift;
@ -7602,6 +7633,7 @@ sub start {
$ENV{'PARALLEL_SEQ'} = $job->seq();
$ENV{'PARALLEL_PID'} = $$;
$ENV{'PARALLEL_TMP'} = ::tmpname("par");
$job->add_rm($ENV{'PARALLEL_TMP'});
::debug("run", $Global::total_running, " processes . Starting (",
$job->seq(), "): $command\n");
if($opt::pipe) {
@ -7721,6 +7753,7 @@ sub print_dryrun_and_verbose {
my $actual_command = shift;
# Temporary file name. Used for fifo to communicate exit val
my $tmpfifo=::tmpname("tmx");
$self->add_rm($tmpfifo);
if(length($tmpfifo) >=100) {
::error("tmux does not support sockets with path > 100.");
@ -7882,7 +7915,7 @@ sub print {
if($opt::dryrun) {
# Nothing was printed to this job:
# cleanup tmp files if --files was set
unlink $self->fh(1,"name");
::rm($self->fh(1,"name"));
}
if($opt::pipe and $self->virgin()) {
# Skip --joblog, --dryrun, --verbose
@ -7973,8 +8006,8 @@ sub files_print {
# Nothing was printed to this job:
# cleanup unused tmp files if --files was set
for my $fdno (1,2) {
unlink $self->fh($fdno,"name");
unlink $self->fh($fdno,"unlink");
::rm($self->fh($fdno,"name"));
::rm($self->fh($fdno,"unlink"));
}
} elsif($fdno == 1 and $self->fh($fdno,"name")) {
print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
@ -9256,7 +9289,7 @@ sub tmux_length {
my $tmuxcmd = $ENV{'TMUX'}." -S $tmpfile new-session -d -n echo $l".
("x"x$l). " && echo $l; rm -f $tmpfile";
push @out, ::qqx($tmuxcmd);
unlink $tmpfile;
::rm($tmpfile);
}
::debug("tmux","tmux-out ",@out);
chomp @out;
@ -10124,9 +10157,9 @@ sub run {
eval { $rv = $sth->execute(@_) }) {
last;
} else {
if($@ =~ /no such table/
if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/
or
$DBI::errstr =~ /no such table/) {
$DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) {
# This is fine:
# It is just a worker that reported back too late -
# another worker had finished the job first
@ -10330,7 +10363,7 @@ sub remove_dead_locks {
if($host eq ::hostname()) {
if(not kill 0, $pid) {
::debug("sem", "Dead: $d\n");
unlink $d;
::rm($d);
} else {
::debug("sem", "Alive: $d\n");
}
@ -10379,12 +10412,12 @@ sub acquire {
sub release {
my $self = shift;
unlink $self->{'pidfile'};
::rm($self->{'pidfile'});
if($self->nlinks() == 1) {
# This is the last link, so atomic cleanup
$self->lock();
if($self->nlinks() == 1) {
unlink $self->{'idfile'};
::rm($self->{'idfile'});
rmdir $self->{'lockdir'};
}
$self->unlock();
@ -10402,7 +10435,7 @@ sub pid_change {
$self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
my $retval = link $self->{'idfile'}, $self->{'pidfile'};
::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
unlink $old_pidfile;
::rm($old_pidfile);
}
sub atomic_link_if_count_less_than {
@ -10500,7 +10533,7 @@ sub lock {
sub unlock {
my $self = shift;
unlink $self->{'lockfile'};
::rm($self->{'lockfile'});
close $self->{'lockfh'};
::debug("run", "unlocked\n");
}

View file

@ -4,27 +4,51 @@
# Each should be taking 30-100s and be possible to run in parallel
# I.e.: No race conditions, no logins
# Assume /tmp/shm is easy to fill up
export SHM=/tmp/shm/parallel
mkdir -p $SHM
sudo umount -l $SHM
sudo mount -t tmpfs -o size=10% none $SHM
par_race_condition1() {
echo '### Test race condition on 8 CPU (my laptop)'
seq 1 5000000 > /tmp/parallel_race_cond
seq 1 10 | parallel -k "cat /tmp/parallel_race_cond | parallel --pipe --recend '' -k gzip >/dev/null; echo {}"
rm /tmp/parallel_race_cond
}
cat <<'EOF' | sed -e 's/;$/; /;s/$SERVER1/'$SERVER1'/;s/$SERVER2/'$SERVER2'/' | stdout parallel -vj4 -k --joblog /tmp/jl-`basename $0` -L1
echo '### Test race condition on 8 CPU (my laptop)';
seq 1 5000000 > /tmp/parallel_test;
seq 1 10 | parallel -k "cat /tmp/parallel_test | parallel --pipe --recend '' -k gzip >/dev/null; echo {}";
rm /tmp/parallel_test
par_tmp_full() {
# Assume /tmp/shm is easy to fill up
export SHM=/tmp/shm/parallel
mkdir -p $SHM
sudo umount -l $SHM
sudo mount -t tmpfs -o size=10% none $SHM
echo '**'
echo "### Test --tmpdir running full. bug #40733 was caused by this"
stdout parallel -j1 --tmpdir $SHM cat /dev/zero ::: dummy
}
echo "### Test --tmpdir running full. bug #40733 was caused by this"
stdout parallel -j1 --tmpdir $SHM cat /dev/zero ::: dummy
par_bug_48290() {
echo "### bug #48290: round-robin does not distribute data based on business"
echo "Jobslot 1 is 256 times slower than jobslot 4 and should get much less data"
yes "$(seq 1000|xargs)" | head -c 30M |
parallel --tagstring {%} --linebuffer --compress -j4 --roundrobin --pipe --block 10k \
pv -qL '{= $_=int( $job->slot()**4/2+1) =}'0000 |
perl -ne '/^\d+/ and $s{$&}++; END { print map { "$_\n" } sort { $s{$b} <=> $s{$a} } keys %s}'
}
echo '**'
par_memory_leak() {
a_run() {
seq $1 |time -v parallel true 2>&1 |
grep 'Maximum resident' |
field 6;
}
export -f a_run
echo "### Test for memory leaks"
echo "Of 10 runs of 1 job at least one should be bigger than a 3000 job run"
small_max=$(seq 10 | parallel a_run 1 | jq -s max)
big=$(a_run 3000)
if [ $small_max -lt $big ] ; then
echo "Bad: Memleak likely."
else
echo "Good: No memleak detected."
fi
}
echo "### bug #48290: round-robin does not distribute data based on business"
echo "Jobslot 1 is 8 times slower than jobslot 8 and should get much less data"
seq 10000000 | parallel --tagstring {%} --linebuffer --compress -j8 --roundrobin --pipe --block 300k 'pv -qL {%}00000'| perl -ne '/^\d+/ and $s{$&}++; END { print map { "$_\n" } sort { $s{$a} <=> $s{$b} } keys %s}'
EOF
export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | sort | parallel -j6 --tag -k '{} 2>&1'

View file

@ -1,34 +1,23 @@
echo '### Test race condition on 8 CPU (my laptop)'; seq 1 5000000 > /tmp/parallel_test; seq 1 10 | parallel -k "cat /tmp/parallel_test | parallel --pipe --recend '' -k gzip >/dev/null; echo {}"; rm /tmp/parallel_test
### Test race condition on 8 CPU (my laptop)
1
2
3
4
5
6
7
8
9
10
echo '**'
**
echo "### Test --tmpdir running full. bug #40733 was caused by this"
### Test --tmpdir running full. bug #40733 was caused by this
stdout parallel -j1 --tmpdir $SHM cat /dev/zero ::: dummy
parallel: Error: Output is incomplete. Cannot append to buffer file in /tmp/shm/parallel. Is the disk full?
parallel: Error: Change $TMPDIR with --tmpdir or use --compress.
echo '**'
**
echo "### bug #48290: round-robin does not distribute data based on business"
### bug #48290: round-robin does not distribute data based on business
echo "Jobslot 1 is 8 times slower than jobslot 8 and should get much less data"
Jobslot 1 is 8 times slower than jobslot 8 and should get much less data
seq 10000000 | parallel --tagstring {%} --linebuffer --compress -j8 --roundrobin --pipe --block 300k 'pv -qL {%}00000'| perl -ne '/^\d+/ and $s{$&}++; END { print map { "$_\n" } sort { $s{$a} <=> $s{$b} } keys %s}'
1
2
3
5
4
7
6
8
par_bug_48290 ### bug #48290: round-robin does not distribute data based on business
par_bug_48290 Jobslot 1 is 256 times slower than jobslot 4 and should get much less data
par_bug_48290 4
par_bug_48290 3
par_bug_48290 2
par_bug_48290 1
par_memory_leak ### Test for memory leaks
par_memory_leak Of 10 runs of 1 job at least one should be bigger than a 3000 job run
par_memory_leak Good: No memleak detected.
par_race_condition1 ### Test race condition on 8 CPU (my laptop)
par_race_condition1 1
par_race_condition1 2
par_race_condition1 3
par_race_condition1 4
par_race_condition1 5
par_race_condition1 6
par_race_condition1 7
par_race_condition1 8
par_race_condition1 9
par_race_condition1 10
par_tmp_full ### Test --tmpdir running full. bug #40733 was caused by this
par_tmp_full parallel: Error: Output is incomplete. Cannot append to buffer file in /tmp/shm/parallel. Is the disk full?
par_tmp_full parallel: Error: Change $TMPDIR with --tmpdir or use --compress.