parallel: killall() kills jobs with: TERM, wait, TERM, wait, KILL, KILL rest of family.

This commit is contained in:
Ole Tange 2015-04-15 00:55:09 +02:00
parent c7bea14805
commit 0996bbff6b
4 changed files with 102 additions and 33 deletions

View file

@ -3031,6 +3031,72 @@ sub reaper {
sub __USAGE__ {} sub __USAGE__ {}
sub killall {
# Kill all jobs
# Send all jobs TERM
# Wait
# Send all jobs TERM
# Wait
# Send all jobs KILL
# Send all (grand*)children KILL
$Global::start_no_new_jobs ||= 1;
# pids of the all children and (grand*)children
# before we start the blood bath
my @family_pids = family_pids(keys %Global::running);
# Send jobs TERM
kill "TERM", keys %Global::running;
# Wait up to 200 ms
# Send jobs TERM (again)
my $sleepsum = 0;
my $sleep = 0;
for (; kill(0, keys %Global::running) and $sleepsum < 200;
$sleepsum += $sleep) {
# This can change %Global::running
$sleep = ::reap_usleep($sleep);
}
kill "TERM", keys %Global::running;
# Wait up to 200 ms
# Send jobs KILL
$sleepsum = 0;
$sleep = 0;
for (; kill(0, keys %Global::running) and $sleepsum < 200;
$sleepsum += $sleep) {
# This can change %Global::running
$sleep = ::reap_usleep($sleep);
}
kill "KILL", keys %Global::running;
# Send all (grand*)children KILL (if there are any left)
kill "KILL", @family_pids;
}
sub family_pids {
# Find the pids with this->pid as (grand)*parent
# Input:
# @parents = pids of parents
# Returns:
# @pids = pids of (grand)*children
my @parents = @_;
my @pids;
my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
my @more = @parents;
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of_ref->{$parent}) {
# add the children of this parent
push @m, @{$children_of_ref->{$parent}};
}
}
@more = @m;
}
return (@pids);
}
sub wait_and_exit { sub wait_and_exit {
# If we do not wait, we sometimes get segfault # If we do not wait, we sometimes get segfault
# Returns: N/A # Returns: N/A
@ -3038,9 +3104,7 @@ sub wait_and_exit {
unlink keys %Global::unlink; unlink keys %Global::unlink;
if($error) { if($error) {
# Kill all without printing # Kill all without printing
for my $job (values %Global::running) { killall();
$job->kill();
}
} }
for (keys %Global::unkilled_children) { for (keys %Global::unkilled_children) {
kill 9, $_; kill 9, $_;
@ -3359,13 +3423,14 @@ sub multiply_binary_prefix {
} }
{ {
my ($disk_full_fh, $b8193, $name); my ($disk_full_fh, $b8193, $error_printed);
sub exit_if_disk_full { sub exit_if_disk_full {
# Checks if $TMPDIR is full by writing 8kb to a tmpfile # Checks if $TMPDIR is full by writing 8kb to a tmpfile
# If the disk is full: Exit immediately. # If the disk is full: Exit immediately.
# Returns: # Returns:
# N/A # N/A
if(not $disk_full_fh) { if(not $disk_full_fh) {
my $name;
($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df");
# Separate unlink due to NFS dealing badly with File::Temp # Separate unlink due to NFS dealing badly with File::Temp
unlink $name; unlink $name;
@ -3392,8 +3457,11 @@ sub multiply_binary_prefix {
or or
tell $disk_full_fh != 8193) { tell $disk_full_fh != 8193) {
# On raspbian the disk can be full except for 10 chars. # On raspbian the disk can be full except for 10 chars.
if(not $error_printed) {
::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n");
::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); ::error("Change \$TMPDIR with --tmpdir or use --compress.\n");
$error_printed = 1;
}
::wait_and_exit(255); ::wait_and_exit(255);
} }
truncate $disk_full_fh, 0; truncate $disk_full_fh, 0;

View file

@ -16,7 +16,6 @@ echo '### Tests on polarhome machines'
echo 'Setup on polarhome machines' echo 'Setup on polarhome machines'
stdout parallel -kj0 ssh -oLogLevel=quiet {} mkdir -p bin ::: $POLAR & stdout parallel -kj0 ssh -oLogLevel=quiet {} mkdir -p bin ::: $POLAR &
test_empty_cmd() { test_empty_cmd() {
echo '### Test if empty command in process list causes problems' echo '### Test if empty command in process list causes problems'
perl -e '$0=" ";sleep 1' & perl -e '$0=" ";sleep 1' &
@ -36,7 +35,8 @@ copy_and_test() {
perl -pe 's:/[a-z0-9_]+.arg:/XXXXXXXX.arg:gi; s/\d\d\d\d/0000/gi;' perl -pe 's:/[a-z0-9_]+.arg:/XXXXXXXX.arg:gi; s/\d\d\d\d/0000/gi;'
} }
export -f copy_and_test export -f copy_and_test
stdout parallel -j0 -k --retries 5 --timeout 80 --delay 0.1 --tag -v copy_and_test {} ::: $POLAR # 20150414 --timeout 80 -> 40
stdout parallel -j0 -k --retries 5 --timeout 40 --delay 0.1 --tag -v copy_and_test {} ::: $POLAR
cat /tmp/test_empty_cmd cat /tmp/test_empty_cmd
rm /tmp/test_empty_cmd rm /tmp/test_empty_cmd

View file

@ -15,6 +15,8 @@ perl -ne '$/="\n\n"; /^Output/../^[^O]\S/ and next; /^ / and print;' ../../src/
s/zenity/zenity --timeout=12/; s/zenity/zenity --timeout=12/;
s:/usr/bin/time:/usr/bin/time -f %e:; s:/usr/bin/time:/usr/bin/time -f %e:;
s:ignored_vars:ignored_vars|sort:; s:ignored_vars:ignored_vars|sort:;
# Remove \n to join all joblogs into the previous block
s:cat /tmp/log\n:cat /tmp/log;:;
# When parallelized: Sleep to make sure the abc-files are made # When parallelized: Sleep to make sure the abc-files are made
/%head1/ and $_.="sleep .3\n\n"x10; /%head1/ and $_.="sleep .3\n\n"x10;
' | ' |

View file

@ -440,30 +440,30 @@ Computer:jobs running/jobs completed/%of started jobs/Average seconds to complet
seq 1000 | parallel -j10 --bar '(echo -n {};sleep 0.1)' 2> >(zenity --timeout=12 --progress --auto-kill) seq 1000 | parallel -j10 --bar '(echo -n {};sleep 0.1)' 2> >(zenity --timeout=12 --progress --auto-kill)
BASE64 parallel --joblog /tmp/log exit ::: 1 2 3 0 BASE64 parallel --joblog /tmp/log exit ::: 1 2 3 0
cat /tmp/log cat /tmp/log;
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
parallel --joblog /tmp/log exit ::: 1 2 3 0 parallel --joblog /tmp/log exit ::: 1 2 3 0
cat /tmp/log cat /tmp/log; parallel --resume --joblog /tmp/log exit ::: 1 2 3 0 0 0
parallel --resume --joblog /tmp/log exit ::: 1 2 3 0 0 0 cat /tmp/log;
cat /tmp/log
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
5 : TIMESTAMP 9.999 0 0 0 0 exit 0
6 : TIMESTAMP 9.999 0 0 0 0 exit 0
parallel --resume-failed --joblog /tmp/log exit ::: 1 2 3 0 0 0 parallel --resume-failed --joblog /tmp/log exit ::: 1 2 3 0 0 0
cat /tmp/log cat /tmp/log;
parallel -j2 --halt 1 echo {}\; exit {} ::: 0 0 1 2 3
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3
4 : TIMESTAMP 9.999 0 0 0 0 exit 0
5 : TIMESTAMP 9.999 0 0 0 0 exit 0
6 : TIMESTAMP 9.999 0 0 0 0 exit 0
Seq Host Starttime JobRuntime Send Receive Exitval Signal Command Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1 1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2 2 : TIMESTAMP 9.999 0 0 2 0 exit 2
@ -474,7 +474,6 @@ Seq Host Starttime JobRuntime Send Receive Exitval Signal Command
1 : TIMESTAMP 9.999 0 0 1 0 exit 1 1 : TIMESTAMP 9.999 0 0 1 0 exit 1
2 : TIMESTAMP 9.999 0 0 2 0 exit 2 2 : TIMESTAMP 9.999 0 0 2 0 exit 2
3 : TIMESTAMP 9.999 0 0 3 0 exit 3 3 : TIMESTAMP 9.999 0 0 3 0 exit 3
parallel -j2 --halt 1 echo {}\; exit {} ::: 0 0 1 2 3
0 0
0 0
1 1
@ -936,7 +935,7 @@ This helps funding further development; and it won't cost you a cent.
If you pay 10000 EUR you should feel free to use GNU Parallel without citing. If you pay 10000 EUR you should feel free to use GNU Parallel without citing.
parallel --version parallel --version
GNU parallel 20150403 GNU parallel 20150415
Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange
and Free Software Foundation, Inc. and Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
@ -948,7 +947,7 @@ Web site: http://www.gnu.org/software/parallel
When using programs that use GNU Parallel to process data for publication When using programs that use GNU Parallel to process data for publication
please cite as described in 'parallel --bibtex'. please cite as described in 'parallel --bibtex'.
parallel --minversion 20130722 && echo Your version is at least 20130722. parallel --minversion 20130722 && echo Your version is at least 20130722.
20150403 20150415
Your version is at least 20130722. Your version is at least 20130722.
parallel --bibtex parallel --bibtex
Academic tradition requires you to cite works you base your article on. Academic tradition requires you to cite works you base your article on.