parallel: Implemented --halt -1 and --halt -2.

This commit is contained in:
Ole Tange 2015-01-02 12:55:02 +01:00
parent 303bc5f465
commit b71d442c1c
4 changed files with 161 additions and 43 deletions

View file

@ -162,8 +162,8 @@ for(keys %Global::sshmaster) {
kill "TERM", $_; kill "TERM", $_;
} }
::debug("init", "Halt\n"); ::debug("init", "Halt\n");
if($opt::halt_on_error) { if($opt::halt) {
wait_and_exit($Global::halt_on_error_exitstatus); wait_and_exit($Global::halt_exitstatus);
} else { } else {
wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
} }
@ -664,7 +664,7 @@ sub options_hash {
"compress" => \$opt::compress, "compress" => \$opt::compress,
"tty" => \$opt::tty, "tty" => \$opt::tty,
"T" => \$opt::retired, "T" => \$opt::retired,
"halt-on-error|halt=s" => \$opt::halt_on_error, "halt-on-error|halt=s" => \$opt::halt,
"H=i" => \$opt::retired, "H=i" => \$opt::retired,
"retries=i" => \$opt::retries, "retries=i" => \$opt::retries,
"dry-run|dryrun" => \$opt::dryrun, "dry-run|dryrun" => \$opt::dryrun,
@ -822,8 +822,8 @@ sub parse_options {
$opt::blocksize = multiply_binary_prefix($opt::blocksize); $opt::blocksize = multiply_binary_prefix($opt::blocksize);
$opt::memfree = multiply_binary_prefix($opt::memfree); $opt::memfree = multiply_binary_prefix($opt::memfree);
if(defined $opt::controlmaster) { $opt::noctrlc = 1; } if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
if(defined $opt::halt_on_error and if(defined $opt::halt and
$opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; } $opt::halt =~ /%/) { $opt::halt /= 100; }
if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) {
::error("--timeout must be seconds or percentage\n"); ::error("--timeout must be seconds or percentage\n");
wait_and_exit(255); wait_and_exit(255);
@ -996,7 +996,7 @@ sub init_globals {
$Global::stderr_verbose = 0; $Global::stderr_verbose = 0;
$Global::default_simultaneous_sshlogins = 9; $Global::default_simultaneous_sshlogins = 9;
$Global::exitstatus = 0; $Global::exitstatus = 0;
$Global::halt_on_error_exitstatus = 0; $Global::halt_exitstatus = 0;
$Global::arg_sep = ":::"; $Global::arg_sep = ":::";
$Global::arg_file_sep = "::::"; $Global::arg_file_sep = "::::";
$Global::trim = 'n'; $Global::trim = 'n';
@ -2861,7 +2861,7 @@ sub reaper {
# @Global::slots # @Global::slots
# $opt::timeout # $opt::timeout
# $Global::timeoutq # $Global::timeoutq
# $opt::halt_on_error # $opt::halt
# $opt::keeporder # $opt::keeporder
# $Global::total_running # $Global::total_running
# Returns: N/A # Returns: N/A
@ -2895,18 +2895,17 @@ sub reaper {
# Update average runtime for timeout # Update average runtime for timeout
$Global::timeoutq->update_delta_time($job->runtime()); $Global::timeoutq->update_delta_time($job->runtime());
} }
# Force printing now if the job failed and we are going to exit # Force printing now if --halt forces us to exit
my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2 my $print_now = $opt::halt and
and $job->exitstatus()); (($opt::halt == 2 and $job->exitstatus())
or
($opt::halt == -2 and not $job->exitstatus()));
if($opt::keeporder and not $print_now) { if($opt::keeporder and not $print_now) {
$job->print_earlier_jobs(); $job->print_earlier_jobs();
} else { } else {
$job->print(); $job->print();
} }
if($job->exitstatus()) { $job->should_we_halt();
$job->fail();
}
} }
my $sshlogin = $job->sshlogin(); my $sshlogin = $job->sshlogin();
$sshlogin->dec_jobs_running(); $sshlogin->dec_jobs_running();
@ -6778,33 +6777,55 @@ sub set_exitsignal {
} }
} }
sub fail { sub should_we_halt {
# The jobs had a exit status <> 0, so error # Should we halt? Immediately? Gracefully?
# Returns: N/A # Returns: N/A
my $job = shift; my $job = shift;
$Global::exitstatus++; if($job->exitstatus()) {
$Global::total_failed++; $Global::exitstatus++;
if($opt::halt_on_error) { $Global::total_failed++;
if($opt::halt_on_error == 1 if($opt::halt) {
or if($opt::halt == 1
($opt::halt_on_error < 1 and $Global::total_failed > 3 or
and ($opt::halt > 0 and $opt::halt < 1 and $Global::total_failed > 3
$Global::total_failed / $Global::total_started > $opt::halt_on_error)) { and
# If halt on error == 1 or --halt 10% $Global::total_failed / $Global::total_started > $opt::halt)) {
# we should gracefully exit # If halt on error == 1 or --halt 10%
::status # we should gracefully exit
("$Global::progname: Starting no more jobs. ", ::status
"Waiting for ", scalar(keys %Global::running), ("$Global::progname: Starting no more jobs. ",
" jobs to finish. This job failed:\n", "Waiting for ", scalar(keys %Global::running),
$job->replaced(),"\n"); " jobs to finish. This job failed:\n",
$Global::start_no_new_jobs ||= 1; $job->replaced(),"\n");
$Global::halt_on_error_exitstatus = $job->exitstatus(); $Global::start_no_new_jobs ||= 1;
} elsif($opt::halt_on_error == 2) { $Global::halt_exitstatus = $job->exitstatus();
# If halt on error == 2 we should exit immediately } elsif($opt::halt == 2) {
::status # If halt on error == 2 we should exit immediately
("$Global::progname: This job failed:\n", ::status
$job->replaced(),"\n"); ("$Global::progname: This job failed:\n",
exit ($job->exitstatus()); $job->replaced(),"\n");
exit ($job->exitstatus());
}
}
} else {
if($opt::halt) {
if($opt::halt == -1) {
# If halt on error == -1
# we should gracefully exit
::status
("$Global::progname: Starting no more jobs. ",
"Waiting for ", scalar(keys %Global::running),
" jobs to finish. This job succeeded:\n",
$job->replaced(),"\n");
$Global::start_no_new_jobs ||= 1;
$Global::halt_exitstatus = $job->exitstatus();
} elsif($opt::halt == -2) {
# If halt on error == -2 we should exit immediately
::status
("$Global::progname: This job succeeded:\n",
$job->replaced(),"\n");
exit ($job->exitstatus());
}
} }
} }
} }

View file

@ -682,11 +682,11 @@ See also: B<--line-buffer> B<--ungroup>
Print a summary of the options to GNU B<parallel> and exit. Print a summary of the options to GNU B<parallel> and exit.
=item B<--halt-on-error> I<val> =item B<--halt-on-error> I<val> (alpha testing)
=item B<--halt> I<val> =item B<--halt> I<val> (alpha testing)
How should GNU B<parallel> terminate if one of more jobs fail? How should GNU B<parallel> terminate?
=over 7 =over 7
@ -706,6 +706,17 @@ last failing job.
Kill off all jobs immediately and exit without cleanup. The exit Kill off all jobs immediately and exit without cleanup. The exit
status will be the exit status from the failing job. status will be the exit status from the failing job.
=item Z<>-1
Do not start new jobs if a job succeeds, but complete the running jobs
including cleanup. The exit status will be the exit status from the
last failing job if any.
=item Z<>-2
Kill off all jobs immediately and exit without cleanup. The exit
status will be 0.
=item Z<>1-99% =item Z<>1-99%
If I<val>% of the jobs fail and minimum 3: Do not start new jobs, but If I<val>% of the jobs fail and minimum 3: Do not start new jobs, but

View file

@ -35,6 +35,22 @@ echo '### Test --halt-on-error 2';
echo '**' echo '**'
echo '### Test --halt -1';
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -1;
echo $?;
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -1;
echo $?
echo '**'
echo '### Test --halt -2';
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -2;
echo $?;
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -2;
echo $?
echo '**'
echo '### Test last dying print --halt-on-error 1'; echo '### Test last dying print --halt-on-error 1';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift';
echo exit code $? echo exit code $?
@ -43,6 +59,14 @@ echo '### Test last dying print --halt-on-error 2';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift';
echo exit code $? echo exit code $?
echo '### Test last dying print --halt-on-error -1';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift';
echo exit code $?
echo '### Test last dying print --halt-on-error -2';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift';
echo exit code $?
echo '**' echo '**'
echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834'; echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834';

View file

@ -36,10 +36,32 @@ parallel: This job failed:
sleep 2;false sleep 2;false
echo '**' echo '**'
** **
echo '### Test --halt -1'; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -1; echo $?; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -1; echo $?
### Test --halt -1
0
0
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job succeeded:
sleep 2;true
parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job succeeded:
sleep 2;true
/bin/bash: non_exist: command not found
echo '**'
**
echo '### Test --halt -2'; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -2; echo $?; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -2; echo $?
### Test --halt -2
0
0
parallel: This job succeeded:
sleep 2;true
parallel: This job succeeded:
sleep 2;true
echo '**'
**
echo '### Test last dying print --halt-on-error 1'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift'; echo exit code $? echo '### Test last dying print --halt-on-error 1'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift'; echo exit code $?
### Test last dying print --halt-on-error 1 ### Test last dying print --halt-on-error 1
exit code 9 exit code 9
0 0
0
1 1
parallel: Starting no more jobs. Waiting for 9 jobs to finish. This job failed: parallel: Starting no more jobs. Waiting for 9 jobs to finish. This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 1 perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 1
@ -62,7 +84,6 @@ perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 6
parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job failed: parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 7 perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 7
8 8
0
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job failed: parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 8 perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 8
9 9
@ -72,9 +93,50 @@ echo '### Test last dying print --halt-on-error 2'; (seq 0 8;echo 0; echo 9) |
### Test last dying print --halt-on-error 2 ### Test last dying print --halt-on-error 2
exit code 1 exit code 1
0 0
0
1 1
parallel: This job failed: parallel: This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 1 perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 1
echo '### Test last dying print --halt-on-error -1'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift'; echo exit code $?
### Test last dying print --halt-on-error -1
exit code 0
0
0
1
parallel: Starting no more jobs. Waiting for 9 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 1
2
parallel: Starting no more jobs. Waiting for 8 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 2
3
parallel: Starting no more jobs. Waiting for 7 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 3
4
parallel: Starting no more jobs. Waiting for 6 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 4
5
parallel: Starting no more jobs. Waiting for 5 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 5
6
parallel: Starting no more jobs. Waiting for 4 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 6
7
parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 7
8
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 8
9
parallel: Starting no more jobs. Waiting for 1 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 9
echo '### Test last dying print --halt-on-error -2'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift'; echo exit code $?
### Test last dying print --halt-on-error -2
exit code 0
0
0
1
parallel: This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 1
echo '**' echo '**'
** **
echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834'; seq 1 3 | parallel -j1 "sleep 2; echo {}" | parallel -kj2 echo echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834'; seq 1 3 | parallel -j1 "sleep 2; echo {}" | parallel -kj2 echo