parallel: --retries did not reset endtime() causing --timeout to fail.

This commit is contained in:
Ole Tange 2013-11-28 15:24:34 +01:00
parent f481acca30
commit e39e3c7b0f
5 changed files with 55 additions and 44 deletions

View file

@ -201,50 +201,21 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>, Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
Jesse Alama <jesse.alama@gmail.com> Jesse Alama <jesse.alama@gmail.com>
Subject: GNU Parallel 20131122 ('Haiyan') released Subject: GNU Parallel 20131222 ('') released
GNU Parallel 20131122 ('Haiyan') has been released. It is GNU Parallel 20131222 ('') has been released. It is
available for download at: http://ftp.gnu.org/gnu/parallel/ available for download at: http://ftp.gnu.org/gnu/parallel/
New in this release: New in this release:
* A citation notice is printed on stderr only if stderr is a terminal, * Parallel rsync
the user has not specified --no-notice and the user has not run http://pastebin.com/JmnB9ffq
--bibtex once. This makes the release alpha quality.
* --compress will compress temporary files. If the output is big and * Gnu Parallel for fun and profit
very compressible this will take up less disk space in $TMPDIR and https://gist.github.com/celoyd/f7eb55ad69c9b33fd8c3
possibly be faster due to less disk I/O.
* --compress-program comtrols which program to use for compressing * Procesando la contabilidad del PP
temporary files. http://www.neorazorx.com/2013/07/procesando-la-contabilidad-del-pp.html
* --bar show progress as a progress bar compatible with zenity.
* --resume can now be used with --result: Jobs already run will be
skipped.
* --transfer and --basefile support paths relative to the --workdir by
inserting /./ into the path.
* GNU Parallel was used (unfortunately with improper citation) in:
'fastphylo: Fast tools for phylogenetics'
http://www.biomedcentral.com/1471-2105/14/334/abstract
* Using GNU parallel
http://davetang.org/muse/2013/11/18/using-gnu-parallel/
* Techlux - GNU - Parallel (German)
https://techlux.de/blog/2013/11/07/gnu-parallel/
* awk, sed, bzip2, grep, wc на всех ядрах
http://vk.com/page-30666517_45528467
* 如何利用多核CPU來加速你的Linux命令 — awk, sed, bzip2, grep, wc等
http://www.hksilicon.com/kb/articles/290543/CPULinuxawk-sed-bzip2-grep-wc
* GNU Parallel (Japanese)
http://jarp.does.notwork.org/diary/201311b.html#20131117
* Bug fixes and man page updates. * Bug fixes and man page updates.

View file

@ -119,6 +119,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh"); my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
print $fh @cores, @cpus, @maxline, @echo; print $fh @cores, @cpus, @maxline, @echo;
close $fh; close $fh;
# my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null"; my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
::debug($cmd."\n"); ::debug($cmd."\n");
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
@ -127,6 +128,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
my @col = split /\t/, $_; my @col = split /\t/, $_;
if(defined $col[6]) { if(defined $col[6]) {
# This is a line from --joblog # This is a line from --joblog
# seq host time spent sent received exit signal command
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
if($col[0] eq "Seq" and $col[1] eq "Host" and if($col[0] eq "Seq" and $col[1] eq "Host" and
$col[2] eq "Starttime" and $col[3] eq "Runtime") { $col[2] eq "Starttime" and $col[3] eq "Runtime") {
@ -259,7 +261,7 @@ if($opt::nonall or $opt::onall) {
(@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
); );
::debug("| $0 $options\n"); ::debug("| $0 $options\n");
open(my $parallel_fh, "|-", "$0 -j0 $options") || open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
::die_bug("This does not run GNU Parallel: $0 $options"); ::die_bug("This does not run GNU Parallel: $0 $options");
my @joblogs; my @joblogs;
for my $sshlogin (values %Global::host) { for my $sshlogin (values %Global::host) {
@ -2190,7 +2192,7 @@ sub reaper {
$job->set_exitstatus($? >> 8); $job->set_exitstatus($? >> 8);
$job->set_exitsignal($? & 127); $job->set_exitsignal($? & 127);
debug("died (".$job->exitstatus()."): ".$job->seq()); debug("died (".$job->exitstatus()."): ".$job->seq());
$job->set_endtime(); $job->set_endtime(::now());
if($stiff == $Global::tty_taken) { if($stiff == $Global::tty_taken) {
# The process that died had the tty => release it # The process that died had the tty => release it
$Global::tty_taken = 0; $Global::tty_taken = 0;
@ -2313,11 +2315,13 @@ sub usage {
sub citation_notice { sub citation_notice {
# if --no-notice: do nothing # if --no-notice or --plain: do nothing
# if stderr redirected: do nothing # if stderr redirected: do nothing
# if ~/.parallel/will-cite: do nothing # if ~/.parallel/will-cite: do nothing
# else: print citation notice to stderr # else: print citation notice to stderr
if($opt::no_notice if($opt::no_notice
or
$opt::plain
or or
not -t $Global::original_stderr not -t $Global::original_stderr
or or
@ -2394,7 +2398,7 @@ sub bibtex {
print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
} }
print join("\n", print join("\n",
"When using GNU Parallel to process data for publication please cite:", "When using programs that use GNU Parallel to process data for publication please cite:",
"", "",
"\@article{Tange2011a,", "\@article{Tange2011a,",
" title = {GNU Parallel - The Command-Line Power Tool},", " title = {GNU Parallel - The Command-Line Power Tool},",
@ -2409,6 +2413,8 @@ sub bibtex {
" pages = {42-47}", " pages = {42-47}",
"}", "}",
"", "",
"(Feel free to use \\nocite{Tange2011a})",
"",
"This helps funding further development.", "This helps funding further development.",
"" ""
); );
@ -4190,7 +4196,7 @@ sub endtime {
sub set_endtime { sub set_endtime {
my $self = shift; my $self = shift;
my $endtime = shift || ::now(); my $endtime = shift;
$self->{'endtime'} = $endtime; $self->{'endtime'} = $endtime;
} }
@ -4207,6 +4213,7 @@ sub kill {
# Record this jobs as failed # Record this jobs as failed
$self->set_exitstatus(-1); $self->set_exitstatus(-1);
# Send two TERMs to give time to clean up # Send two TERMs to give time to clean up
::debug("Kill seq ".$self->seq()."\n");
for my $signal ("TERM", "TERM", "KILL") { for my $signal ("TERM", "TERM", "KILL") {
my $alive = 0; my $alive = 0;
for my $pid (@family_pids) { for my $pid (@family_pids) {
@ -4766,6 +4773,7 @@ sub should_be_retried {
return 0; return 0;
} else { } else {
# This command should be retried # This command should be retried
$self->set_endtime(undef);
$Global::JobQueue->unget($self); $Global::JobQueue->unget($self);
::debug("Retry ".$self->seq()."\n"); ::debug("Retry ".$self->seq()."\n");
return 1; return 1;

Binary file not shown.

View file

@ -614,7 +614,7 @@ Logfile for executed jobs. Save a list of the executed jobs to
I<logfile> in the following TAB separated format: sequence number, I<logfile> in the following TAB separated format: sequence number,
sshlogin, start time as seconds since epoch, run time in seconds, sshlogin, start time as seconds since epoch, run time in seconds,
bytes in files transferred, bytes in files returned, exit status, bytes in files transferred, bytes in files returned, exit status,
and command run. signal, and command run.
To convert the times into ISO-8601 strict do: To convert the times into ISO-8601 strict do:
@ -2496,6 +2496,22 @@ files before it removes the files. The output is saved to
B<bigfile.sort>. B<bigfile.sort>.
=head1 EXAMPLE: Running more than 500 jobs workaround
If you need to run a massive amount of jobs in parallel, then you will
likely hit the filehandle limit which is often around 500 jobs. If you
are super user you can raise the limit in /etc/security/limits.conf
but you can also use this workaround. The filehandle limit is per
process. That means that if you just spawn more GNU B<parallel>s then
each of them can run 500 jobs. This will spawn up to 2500 jobs:
B<cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg>
This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this):
B<cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg>
=head1 EXAMPLE: Working as mutex and counting semaphore =head1 EXAMPLE: Working as mutex and counting semaphore
The command B<sem> is an alias for B<parallel --semaphore>. The command B<sem> is an alias for B<parallel --semaphore>.

View file

@ -647,7 +647,7 @@ Logfile for executed jobs. Save a list of the executed jobs to
@emph{logfile} in the following TAB separated format: sequence number, @emph{logfile} in the following TAB separated format: sequence number,
sshlogin, start time as seconds since epoch, run time in seconds, sshlogin, start time as seconds since epoch, run time in seconds,
bytes in files transferred, bytes in files returned, exit status, bytes in files transferred, bytes in files returned, exit status,
and command run. signal, and command run.
To convert the times into ISO-8601 strict do: To convert the times into ISO-8601 strict do:
@ -2691,6 +2691,22 @@ files are passed to the second @strong{parallel} that runs @strong{sort -m} on t
files before it removes the files. The output is saved to files before it removes the files. The output is saved to
@strong{bigfile.sort}. @strong{bigfile.sort}.
@chapter EXAMPLE: Running more than 500 jobs workaround
@anchor{EXAMPLE: Running more than 500 jobs workaround}
If you need to run a massive amount of jobs in parallel, then you will
likely hit the filehandle limit which is often around 500 jobs. If you
are super user you can raise the limit in /etc/security/limits.conf
but you can also use this workaround. The filehandle limit is per
process. That means that if you just spawn more GNU @strong{parallel}s then
each of them can run 500 jobs. This will spawn up to 2500 jobs:
@strong{cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg}
This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this):
@strong{cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg}
@chapter EXAMPLE: Working as mutex and counting semaphore @chapter EXAMPLE: Working as mutex and counting semaphore
@anchor{EXAMPLE: Working as mutex and counting semaphore} @anchor{EXAMPLE: Working as mutex and counting semaphore}