parallel: killall() kills jobs with: TERM, wait, TERM, wait, KILL, KILL rest of family.

This commit is contained in:
Ole Tange 2015-04-15 00:55:09 +02:00
parent c7bea14805
commit 0996bbff6b
4 changed files with 102 additions and 33 deletions

View file

@ -3031,6 +3031,72 @@ sub reaper {
sub __USAGE__ {}
sub killall {
# Kill all jobs
# Send all jobs TERM
# Wait
# Send all jobs TERM
# Wait
# Send all jobs KILL
# Send all (grand*)children KILL
$Global::start_no_new_jobs ||= 1;
# pids of the all children and (grand*)children
# before we start the blood bath
my @family_pids = family_pids(keys %Global::running);
# Send jobs TERM
kill "TERM", keys %Global::running;
# Wait up to 200 ms
# Send jobs TERM (again)
my $sleepsum = 0;
my $sleep = 0;
for (; kill(0, keys %Global::running) and $sleepsum < 200;
$sleepsum += $sleep) {
# This can change %Global::running
$sleep = ::reap_usleep($sleep);
}
kill "TERM", keys %Global::running;
# Wait up to 200 ms
# Send jobs KILL
$sleepsum = 0;
$sleep = 0;
for (; kill(0, keys %Global::running) and $sleepsum < 200;
$sleepsum += $sleep) {
# This can change %Global::running
$sleep = ::reap_usleep($sleep);
}
kill "KILL", keys %Global::running;
# Send all (grand*)children KILL (if there are any left)
kill "KILL", @family_pids;
}
sub family_pids {
# Find the pids with this->pid as (grand)*parent
# Input:
# @parents = pids of parents
# Returns:
# @pids = pids of (grand)*children
my @parents = @_;
my @pids;
my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
my @more = @parents;
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of_ref->{$parent}) {
# add the children of this parent
push @m, @{$children_of_ref->{$parent}};
}
}
@more = @m;
}
return (@pids);
}
sub wait_and_exit {
# If we do not wait, we sometimes get segfault
# Returns: N/A
@ -3038,9 +3104,7 @@ sub wait_and_exit {
unlink keys %Global::unlink;
if($error) {
# Kill all without printing
for my $job (values %Global::running) {
$job->kill();
}
killall();
}
for (keys %Global::unkilled_children) {
kill 9, $_;
@ -3359,13 +3423,14 @@ sub multiply_binary_prefix {
}
{
my ($disk_full_fh, $b8193, $name);
my ($disk_full_fh, $b8193, $error_printed);
sub exit_if_disk_full {
# Checks if $TMPDIR is full by writing 8kb to a tmpfile
# If the disk is full: Exit immediately.
# Returns:
# N/A
if(not $disk_full_fh) {
my $name;
($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df");
# Separate unlink due to NFS dealing badly with File::Temp
unlink $name;
@ -3392,8 +3457,11 @@ sub multiply_binary_prefix {
or
tell $disk_full_fh != 8193) {
# On raspbian the disk can be full except for 10 chars.
if(not $error_printed) {
::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n");
::error("Change \$TMPDIR with --tmpdir or use --compress.\n");
$error_printed = 1;
}
::wait_and_exit(255);
}
truncate $disk_full_fh, 0;

View file

@ -16,7 +16,6 @@ echo '### Tests on polarhome machines'
echo 'Setup on polarhome machines'
stdout parallel -kj0 ssh -oLogLevel=quiet {} mkdir -p bin ::: $POLAR &
test_empty_cmd() {
echo '### Test if empty command in process list causes problems'
perl -e '$0=" ";sleep 1' &
@ -36,7 +35,8 @@ copy_and_test() {
perl -pe 's:/[a-z0-9_]+.arg:/XXXXXXXX.arg:gi; s/\d\d\d\d/0000/gi;'
}
export -f copy_and_test
stdout parallel -j0 -k --retries 5 --timeout 80 --delay 0.1 --tag -v copy_and_test {} ::: $POLAR
# 20150414 --timeout 80 -> 40
stdout parallel -j0 -k --retries 5 --timeout 40 --delay 0.1 --tag -v copy_and_test {} ::: $POLAR
cat /tmp/test_empty_cmd
rm /tmp/test_empty_cmd

View file

@ -15,6 +15,8 @@ perl -ne '$/="\n\n"; /^Output/../^[^O]\S/ and next; /^ / and print;' ../../src/
s/zenity/zenity --timeout=12/;
s:/usr/bin/time:/usr/bin/time -f %e:;
s:ignored_vars:ignored_vars|sort:;
# Remove \n to join all joblogs into the previous block
s:cat /tmp/log\n:cat /tmp/log;:;
# When parallelized: Sleep to make sure the abc-files are made
/%head1/ and $_.="sleep .3\n\n"x10;
' |

View file

@ -440,30 +440,30 @@ Computer:jobs running/jobs completed/%of started jobs/Average seconds to complet
seq 1000 | parallel -j10 --bar '(echo -n {};sleep 0.1)' 2> >(zenity --timeout=12 --progress --auto-kill)
BASE64 parallel --joblog /tmp/log exit ::: 1 2 3 0
cat /tmp/log
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
cat /tmp/log;
parallel --joblog /tmp/log exit ::: 1 2 3 0
cat /tmp/log
parallel --resume --joblog /tmp/log exit ::: 1 2 3 0 0 0
cat /tmp/log
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
5 : TIMESTAMP 9.999 0 0 0 0 exit 0
6 : TIMESTAMP 9.999 0 0 0 0 exit 0
cat /tmp/log; parallel --resume --joblog /tmp/log exit ::: 1 2 3 0 0 0
cat /tmp/log;
parallel --resume-failed --joblog /tmp/log exit ::: 1 2 3 0 0 0
cat /tmp/log
cat /tmp/log;
parallel -j2 --halt 1 echo {}\; exit {} ::: 0 0 1 2 3
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
5 : TIMESTAMP 9.999 0 0 0 0 exit 0
6 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
@ -474,7 +474,6 @@ Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
parallel -j2 --halt 1 echo {}\; exit {} ::: 0 0 1 2 3
0
0
1
@ -936,7 +935,7 @@ This helps funding further development; and it won't cost you a cent.
If you pay 10000 EUR you should feel free to use GNU Parallel without citing.
parallel --version
GNU parallel 20150403
GNU parallel 20150415
Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange
and Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
@ -948,7 +947,7 @@ Web site: http://www.gnu.org/software/parallel
When using programs that use GNU Parallel to process data for publication
please cite as described in 'parallel --bibtex'.
parallel --minversion 20130722 && echo Your version is at least 20130722.
20150403
20150415
Your version is at least 20130722.
parallel --bibtex
Academic tradition requires you to cite works you base your article on.