parallel: Implemented --halt -1 and --halt -2.

This commit is contained in:
Ole Tange 2015-01-02 12:55:02 +01:00
parent 287fc41c1c
commit 3071e28c15
4 changed files with 161 additions and 43 deletions

View file

@ -162,8 +162,8 @@ for(keys %Global::sshmaster) {
kill "TERM", $_;
}
::debug("init", "Halt\n");
if($opt::halt_on_error) {
wait_and_exit($Global::halt_on_error_exitstatus);
if($opt::halt) {
wait_and_exit($Global::halt_exitstatus);
} else {
wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
}
@ -664,7 +664,7 @@ sub options_hash {
"compress" => \$opt::compress,
"tty" => \$opt::tty,
"T" => \$opt::retired,
"halt-on-error|halt=s" => \$opt::halt_on_error,
"halt-on-error|halt=s" => \$opt::halt,
"H=i" => \$opt::retired,
"retries=i" => \$opt::retries,
"dry-run|dryrun" => \$opt::dryrun,
@ -822,8 +822,8 @@ sub parse_options {
$opt::blocksize = multiply_binary_prefix($opt::blocksize);
$opt::memfree = multiply_binary_prefix($opt::memfree);
if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
if(defined $opt::halt_on_error and
$opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; }
if(defined $opt::halt and
$opt::halt =~ /%/) { $opt::halt /= 100; }
if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) {
::error("--timeout must be seconds or percentage\n");
wait_and_exit(255);
@ -996,7 +996,7 @@ sub init_globals {
$Global::stderr_verbose = 0;
$Global::default_simultaneous_sshlogins = 9;
$Global::exitstatus = 0;
$Global::halt_on_error_exitstatus = 0;
$Global::halt_exitstatus = 0;
$Global::arg_sep = ":::";
$Global::arg_file_sep = "::::";
$Global::trim = 'n';
@ -2861,7 +2861,7 @@ sub reaper {
# @Global::slots
# $opt::timeout
# $Global::timeoutq
# $opt::halt_on_error
# $opt::halt
# $opt::keeporder
# $Global::total_running
# Returns: N/A
@ -2895,18 +2895,17 @@ sub reaper {
# Update average runtime for timeout
$Global::timeoutq->update_delta_time($job->runtime());
}
# Force printing now if the job failed and we are going to exit
my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2
and $job->exitstatus());
# Force printing now if --halt forces us to exit
my $print_now = $opt::halt and
(($opt::halt == 2 and $job->exitstatus())
or
($opt::halt == -2 and not $job->exitstatus()));
if($opt::keeporder and not $print_now) {
$job->print_earlier_jobs();
} else {
$job->print();
}
if($job->exitstatus()) {
$job->fail();
}
$job->should_we_halt();
}
my $sshlogin = $job->sshlogin();
$sshlogin->dec_jobs_running();
@ -6778,33 +6777,55 @@ sub set_exitsignal {
}
}
sub fail {
# The jobs had a exit status <> 0, so error
sub should_we_halt {
# Should we halt? Immediately? Gracefully?
# Returns: N/A
my $job = shift;
$Global::exitstatus++;
$Global::total_failed++;
if($opt::halt_on_error) {
if($opt::halt_on_error == 1
or
($opt::halt_on_error < 1 and $Global::total_failed > 3
and
$Global::total_failed / $Global::total_started > $opt::halt_on_error)) {
# If halt on error == 1 or --halt 10%
# we should gracefully exit
::status
("$Global::progname: Starting no more jobs. ",
"Waiting for ", scalar(keys %Global::running),
" jobs to finish. This job failed:\n",
$job->replaced(),"\n");
$Global::start_no_new_jobs ||= 1;
$Global::halt_on_error_exitstatus = $job->exitstatus();
} elsif($opt::halt_on_error == 2) {
# If halt on error == 2 we should exit immediately
::status
("$Global::progname: This job failed:\n",
$job->replaced(),"\n");
exit ($job->exitstatus());
if($job->exitstatus()) {
$Global::exitstatus++;
$Global::total_failed++;
if($opt::halt) {
if($opt::halt == 1
or
($opt::halt > 0 and $opt::halt < 1 and $Global::total_failed > 3
and
$Global::total_failed / $Global::total_started > $opt::halt)) {
# If halt on error == 1 or --halt 10%
# we should gracefully exit
::status
("$Global::progname: Starting no more jobs. ",
"Waiting for ", scalar(keys %Global::running),
" jobs to finish. This job failed:\n",
$job->replaced(),"\n");
$Global::start_no_new_jobs ||= 1;
$Global::halt_exitstatus = $job->exitstatus();
} elsif($opt::halt == 2) {
# If halt on error == 2 we should exit immediately
::status
("$Global::progname: This job failed:\n",
$job->replaced(),"\n");
exit ($job->exitstatus());
}
}
} else {
if($opt::halt) {
if($opt::halt == -1) {
# If halt on error == -1
# we should gracefully exit
::status
("$Global::progname: Starting no more jobs. ",
"Waiting for ", scalar(keys %Global::running),
" jobs to finish. This job succeeded:\n",
$job->replaced(),"\n");
$Global::start_no_new_jobs ||= 1;
$Global::halt_exitstatus = $job->exitstatus();
} elsif($opt::halt == -2) {
# If halt on error == -2 we should exit immediately
::status
("$Global::progname: This job succeeded:\n",
$job->replaced(),"\n");
exit ($job->exitstatus());
}
}
}
}

View file

@ -682,11 +682,11 @@ See also: B<--line-buffer> B<--ungroup>
Print a summary of the options to GNU B<parallel> and exit.
=item B<--halt-on-error> I<val>
=item B<--halt-on-error> I<val> (alpha testing)
=item B<--halt> I<val>
=item B<--halt> I<val> (alpha testing)
How should GNU B<parallel> terminate if one of more jobs fail?
How should GNU B<parallel> terminate?
=over 7
@ -706,6 +706,17 @@ last failing job.
Kill off all jobs immediately and exit without cleanup. The exit
status will be the exit status from the failing job.
=item Z<>-1
Do not start new jobs if a job succeeds, but complete the running jobs
including cleanup. The exit status will be the exit status from the
last failing job if any.
=item Z<>-2
Kill off all jobs immediately and exit without cleanup. The exit
status will be 0.
=item Z<>1-99%
If I<val>% of the jobs fail and minimum 3: Do not start new jobs, but

View file

@ -35,6 +35,22 @@ echo '### Test --halt-on-error 2';
echo '**'
echo '### Test --halt -1';
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -1;
echo $?;
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -1;
echo $?
echo '**'
echo '### Test --halt -2';
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -2;
echo $?;
(echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -2;
echo $?
echo '**'
echo '### Test last dying print --halt-on-error 1';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift';
echo exit code $?
@ -43,6 +59,14 @@ echo '### Test last dying print --halt-on-error 2';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift';
echo exit code $?
echo '### Test last dying print --halt-on-error -1';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift';
echo exit code $?
echo '### Test last dying print --halt-on-error -2';
(seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift';
echo exit code $?
echo '**'
echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834';

View file

@ -36,10 +36,32 @@ parallel: This job failed:
sleep 2;false
echo '**'
**
echo '### Test --halt -1'; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -1; echo $?; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -1; echo $?
### Test --halt -1
0
0
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job succeeded:
sleep 2;true
parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job succeeded:
sleep 2;true
/bin/bash: non_exist: command not found
echo '**'
**
echo '### Test --halt -2'; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false") | parallel -j10 --halt-on-error -2; echo $?; (echo "sleep 1;false"; echo "sleep 2;true";echo "sleep 3;false";echo "sleep 4; non_exist") | parallel -j10 --halt -2; echo $?
### Test --halt -2
0
0
parallel: This job succeeded:
sleep 2;true
parallel: This job succeeded:
sleep 2;true
echo '**'
**
echo '### Test last dying print --halt-on-error 1'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt 1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit shift'; echo exit code $?
### Test last dying print --halt-on-error 1
exit code 9
0
0
1
parallel: Starting no more jobs. Waiting for 9 jobs to finish. This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 1
@ -62,7 +84,6 @@ perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 6
parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 7
8
0
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 8
9
@ -72,9 +93,50 @@ echo '### Test last dying print --halt-on-error 2'; (seq 0 8;echo 0; echo 9) |
### Test last dying print --halt-on-error 2
exit code 1
0
0
1
parallel: This job failed:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ shift 1
echo '### Test last dying print --halt-on-error -1'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -1 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift'; echo exit code $?
### Test last dying print --halt-on-error -1
exit code 0
0
0
1
parallel: Starting no more jobs. Waiting for 9 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 1
2
parallel: Starting no more jobs. Waiting for 8 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 2
3
parallel: Starting no more jobs. Waiting for 7 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 3
4
parallel: Starting no more jobs. Waiting for 6 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 4
5
parallel: Starting no more jobs. Waiting for 5 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 5
6
parallel: Starting no more jobs. Waiting for 4 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 6
7
parallel: Starting no more jobs. Waiting for 3 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 7
8
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 8
9
parallel: Starting no more jobs. Waiting for 1 jobs to finish. This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 9
echo '### Test last dying print --halt-on-error -2'; (seq 0 8;echo 0; echo 9) | parallel -j10 -kq --halt -2 perl -e 'sleep $ARGV[0];print STDERR @ARGV,"\n"; exit not shift'; echo exit code $?
### Test last dying print --halt-on-error -2
exit code 0
0
0
1
parallel: This job succeeded:
perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ exit\ not\ shift 1
echo '**'
**
echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834'; seq 1 3 | parallel -j1 "sleep 2; echo {}" | parallel -kj2 echo