parallel: --retries did not reset endtime() causing --timeout to fail.

This commit is contained in:
Ole Tange 2013-11-28 15:24:34 +01:00
parent f481acca30
commit e39e3c7b0f
5 changed files with 55 additions and 44 deletions

View file

@ -201,50 +201,21 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
Jesse Alama <jesse.alama@gmail.com>
Subject: GNU Parallel 20131122 ('Haiyan') released
Subject: GNU Parallel 20131222 ('') released
GNU Parallel 20131122 ('Haiyan') has been released. It is
GNU Parallel 20131222 ('') has been released. It is
available for download at: http://ftp.gnu.org/gnu/parallel/
New in this release:
* A citation notice is printed on stderr only if stderr is a terminal,
the user has not specified --no-notice and the user has not run
--bibtex once. This makes the release alpha quality.
* Parallel rsync
http://pastebin.com/JmnB9ffq
* --compress will compress temporary files. If the output is big and
very compressible this will take up less disk space in $TMPDIR and
possibly be faster due to less disk I/O.
* Gnu Parallel for fun and profit
https://gist.github.com/celoyd/f7eb55ad69c9b33fd8c3
* --compress-program comtrols which program to use for compressing
temporary files.
* --bar show progress as a progress bar compatible with zenity.
* --resume can now be used with --result: Jobs already run will be
skipped.
* --transfer and --basefile support paths relative to the --workdir by
inserting /./ into the path.
* GNU Parallel was used (unfortunately with improper citation) in:
'fastphylo: Fast tools for phylogenetics'
http://www.biomedcentral.com/1471-2105/14/334/abstract
* Using GNU parallel
http://davetang.org/muse/2013/11/18/using-gnu-parallel/
* Techlux - GNU - Parallel (German)
https://techlux.de/blog/2013/11/07/gnu-parallel/
* awk, sed, bzip2, grep, wc на всех ядрах
http://vk.com/page-30666517_45528467
* 如何利用多核CPU來加速你的Linux命令 — awk, sed, bzip2, grep, wc等
http://www.hksilicon.com/kb/articles/290543/CPULinuxawk-sed-bzip2-grep-wc
* GNU Parallel (Japanese)
http://jarp.does.notwork.org/diary/201311b.html#20131117
* Procesando la contabilidad del PP
http://www.neorazorx.com/2013/07/procesando-la-contabilidad-del-pp.html
* Bug fixes and man page updates.

View file

@ -119,6 +119,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
print $fh @cores, @cpus, @maxline, @echo;
close $fh;
# my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
::debug($cmd."\n");
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
@ -127,6 +128,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
my @col = split /\t/, $_;
if(defined $col[6]) {
# This is a line from --joblog
# seq host time spent sent received exit signal command
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
if($col[0] eq "Seq" and $col[1] eq "Host" and
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
@ -259,7 +261,7 @@ if($opt::nonall or $opt::onall) {
(@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
);
::debug("| $0 $options\n");
open(my $parallel_fh, "|-", "$0 -j0 $options") ||
open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
::die_bug("This does not run GNU Parallel: $0 $options");
my @joblogs;
for my $sshlogin (values %Global::host) {
@ -2190,7 +2192,7 @@ sub reaper {
$job->set_exitstatus($? >> 8);
$job->set_exitsignal($? & 127);
debug("died (".$job->exitstatus()."): ".$job->seq());
$job->set_endtime();
$job->set_endtime(::now());
if($stiff == $Global::tty_taken) {
# The process that died had the tty => release it
$Global::tty_taken = 0;
@ -2313,11 +2315,13 @@ sub usage {
sub citation_notice {
# if --no-notice: do nothing
# if --no-notice or --plain: do nothing
# if stderr redirected: do nothing
# if ~/.parallel/will-cite: do nothing
# else: print citation notice to stderr
if($opt::no_notice
or
$opt::plain
or
not -t $Global::original_stderr
or
@ -2394,7 +2398,7 @@ sub bibtex {
print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
}
print join("\n",
"When using GNU Parallel to process data for publication please cite:",
"When using programs that use GNU Parallel to process data for publication please cite:",
"",
"\@article{Tange2011a,",
" title = {GNU Parallel - The Command-Line Power Tool},",
@ -2409,6 +2413,8 @@ sub bibtex {
" pages = {42-47}",
"}",
"",
"(Feel free to use \\nocite{Tange2011a})",
"",
"This helps funding further development.",
""
);
@ -4190,7 +4196,7 @@ sub endtime {
sub set_endtime {
my $self = shift;
my $endtime = shift || ::now();
my $endtime = shift;
$self->{'endtime'} = $endtime;
}
@ -4207,6 +4213,7 @@ sub kill {
# Record this jobs as failed
$self->set_exitstatus(-1);
# Send two TERMs to give time to clean up
::debug("Kill seq ".$self->seq()."\n");
for my $signal ("TERM", "TERM", "KILL") {
my $alive = 0;
for my $pid (@family_pids) {
@ -4766,6 +4773,7 @@ sub should_be_retried {
return 0;
} else {
# This command should be retried
$self->set_endtime(undef);
$Global::JobQueue->unget($self);
::debug("Retry ".$self->seq()."\n");
return 1;

Binary file not shown.

View file

@ -614,7 +614,7 @@ Logfile for executed jobs. Save a list of the executed jobs to
I<logfile> in the following TAB separated format: sequence number,
sshlogin, start time as seconds since epoch, run time in seconds,
bytes in files transferred, bytes in files returned, exit status,
and command run.
signal, and command run.
To convert the times into ISO-8601 strict do:
@ -2496,6 +2496,22 @@ files before it removes the files. The output is saved to
B<bigfile.sort>.
=head1 EXAMPLE: Running more than 500 jobs workaround
If you need to run a massive amount of jobs in parallel, then you will
likely hit the filehandle limit which is often around 500 jobs. If you
are super user you can raise the limit in /etc/security/limits.conf
but you can also use this workaround. The filehandle limit is per
process. That means that if you just spawn more GNU B<parallel>s then
each of them can run 500 jobs. This will spawn up to 2500 jobs:
B<cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg>
This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this):
B<cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg>
=head1 EXAMPLE: Working as mutex and counting semaphore
The command B<sem> is an alias for B<parallel --semaphore>.

View file

@ -647,7 +647,7 @@ Logfile for executed jobs. Save a list of the executed jobs to
@emph{logfile} in the following TAB separated format: sequence number,
sshlogin, start time as seconds since epoch, run time in seconds,
bytes in files transferred, bytes in files returned, exit status,
and command run.
signal, and command run.
To convert the times into ISO-8601 strict do:
@ -2691,6 +2691,22 @@ files are passed to the second @strong{parallel} that runs @strong{sort -m} on t
files before it removes the files. The output is saved to
@strong{bigfile.sort}.
@chapter EXAMPLE: Running more than 500 jobs workaround
@anchor{EXAMPLE: Running more than 500 jobs workaround}
If you need to run a massive amount of jobs in parallel, then you will
likely hit the filehandle limit which is often around 500 jobs. If you
are super user you can raise the limit in /etc/security/limits.conf
but you can also use this workaround. The filehandle limit is per
process. That means that if you just spawn more GNU @strong{parallel}s then
each of them can run 500 jobs. This will spawn up to 2500 jobs:
@strong{cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg}
This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this):
@strong{cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg}
@chapter EXAMPLE: Working as mutex and counting semaphore
@anchor{EXAMPLE: Working as mutex and counting semaphore}