parallel: --retries did not reset endtime() causing --timeout to fail.

2025-01-09 08:07:55 +00:00 · 2013-11-28 15:24:34 +01:00 · 2013-11-28 15:24:34 +01:00 · e39e3c7b0f
parent f481acca30
commit e39e3c7b0f
5 changed files with 55 additions and 44 deletions
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -201,50 +201,21 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
   Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
   Jesse Alama <jesse.alama@gmail.com>

-Subject: GNU Parallel 20131122 ('Haiyan') released
+Subject: GNU Parallel 20131222 ('') released

-GNU Parallel 20131122 ('Haiyan') has been released. It is
+GNU Parallel 20131222 ('') has been released. It is
 available for download at: http://ftp.gnu.org/gnu/parallel/

 New in this release:

-* A citation notice is printed on stderr only if stderr is a terminal,
-  the user has not specified --no-notice and the user has not run
-  --bibtex once. This makes the release alpha quality.
+* Parallel rsync
+  http://pastebin.com/JmnB9ffq

-* --compress will compress temporary files. If the output is big and
-  very compressible this will take up less disk space in $TMPDIR and
-  possibly be faster due to less disk I/O.
+* Gnu Parallel for fun and profit
+  https://gist.github.com/celoyd/f7eb55ad69c9b33fd8c3

-* --compress-program comtrols which program to use for compressing
-  temporary files.
-
-* --bar show progress as a progress bar compatible with zenity.
-
-* --resume can now be used with --result: Jobs already run will be
-  skipped.
-
-* --transfer and --basefile support paths relative to the --workdir by
-  inserting /./ into the path. 
-
-* GNU Parallel was used (unfortunately with improper citation) in:
-  'fastphylo: Fast tools for phylogenetics'
-  http://www.biomedcentral.com/1471-2105/14/334/abstract
-
-* Using GNU parallel
-  http://davetang.org/muse/2013/11/18/using-gnu-parallel/
-
-* Techlux - GNU - Parallel (German)
-  https://techlux.de/blog/2013/11/07/gnu-parallel/
-
-* awk, sed, bzip2, grep, wc на всех ядрах
-  http://vk.com/page-30666517_45528467
-
-* 如何利用多核CPU來加速你的Linux命令 — awk, sed, bzip2, grep, wc等
-  http://www.hksilicon.com/kb/articles/290543/CPULinuxawk-sed-bzip2-grep-wc
-
-* GNU Parallel (Japanese)
-  http://jarp.does.notwork.org/diary/201311b.html#20131117
+* Procesando la contabilidad del PP
+  http://www.neorazorx.com/2013/07/procesando-la-contabilidad-del-pp.html

 * Bug fixes and man page updates.

--- a/src/parallel
+++ b/src/parallel
@ -119,6 +119,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
    my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
    print $fh @cores, @cpus, @maxline, @echo;
    close $fh;
+#    my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
    my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
    ::debug($cmd."\n");
    open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
@ -127,6 +128,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
 	my @col = split /\t/, $_;
 	if(defined $col[6]) {
 	    # This is a line from --joblog
+	    # seq host time spent sent received exit signal command
 	    # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
 	    if($col[0] eq "Seq" and $col[1] eq "Host" and
 		    $col[2] eq "Starttime" and $col[3] eq "Runtime") {
@ -259,7 +261,7 @@ if($opt::nonall or $opt::onall) {
 	     (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
 	);
    ::debug("| $0 $options\n");
-    open(my $parallel_fh, "|-", "$0 -j0 $options") ||
+    open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
 	::die_bug("This does not run GNU Parallel: $0 $options");
  my @joblogs;
    for my $sshlogin (values %Global::host) {
@ -2190,7 +2192,7 @@ sub reaper {
        $job->set_exitstatus($? >> 8);
        $job->set_exitsignal($? & 127);
        debug("died (".$job->exitstatus()."): ".$job->seq());
-        $job->set_endtime();
+        $job->set_endtime(::now());
        if($stiff == $Global::tty_taken) {
            # The process that died had the tty => release it
            $Global::tty_taken = 0;
@ -2313,11 +2315,13 @@ sub usage {


 sub citation_notice {
-    # if --no-notice: do nothing
+    # if --no-notice or --plain: do nothing
    # if stderr redirected: do nothing
    # if ~/.parallel/will-cite: do nothing
    # else: print citation notice to stderr
    if($opt::no_notice
+       or
+       $opt::plain
       or
       not -t $Global::original_stderr
       or
@ -2394,7 +2398,7 @@ sub bibtex {
 	print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
    }
    print join("\n",
-	       "When using GNU Parallel to process data for publication please cite:",
+	       "When using programs that use GNU Parallel to process data for publication please cite:",
 	       "",
               "\@article{Tange2011a,",
 	       " title = {GNU Parallel - The Command-Line Power Tool},",
@ -2409,6 +2413,8 @@ sub bibtex {
 	       " pages = {42-47}",
 	       "}",
 	       "",
+	       "(Feel free to use \\nocite{Tange2011a})",
+	       "",
 	       "This helps funding further development.",
 	       ""
        );
@ -4190,7 +4196,7 @@ sub endtime {

 sub set_endtime {
    my $self = shift;
-    my $endtime = shift || ::now();
+    my $endtime = shift;
    $self->{'endtime'} = $endtime;
 }

@ -4207,6 +4213,7 @@ sub kill {
    # Record this jobs as failed
    $self->set_exitstatus(-1);
    # Send two TERMs to give time to clean up
+    ::debug("Kill seq ".$self->seq()."\n");
    for my $signal ("TERM", "TERM", "KILL") {
 	my $alive = 0;
 	for my $pid (@family_pids) {
@ -4766,6 +4773,7 @@ sub should_be_retried {
 	    return 0;
 	} else {
 	    # This command should be retried
+	    $self->set_endtime(undef);
 	    $Global::JobQueue->unget($self);
 	    ::debug("Retry ".$self->seq()."\n");
 	    return 1;
--- a/src/parallel.pdf
+++ b/src/parallel.pdf
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -614,7 +614,7 @@ Logfile for executed jobs. Save a list of the executed jobs to
 I<logfile> in the following TAB separated format: sequence number,
 sshlogin, start time as seconds since epoch, run time in seconds,
 bytes in files transferred, bytes in files returned, exit status,
-and command run.
+signal, and command run.

 To convert the times into ISO-8601 strict do:

@ -2496,6 +2496,22 @@ files before it removes the files. The output is saved to
 B<bigfile.sort>.


+=head1 EXAMPLE: Running more than 500 jobs workaround
+
+If you need to run a massive amount of jobs in parallel, then you will
+likely hit the filehandle limit which is often around 500 jobs. If you
+are super user you can raise the limit in /etc/security/limits.conf
+but you can also use this workaround. The filehandle limit is per
+process. That means that if you just spawn more GNU B<parallel>s then
+each of them can run 500 jobs. This will spawn up to 2500 jobs:
+
+B<cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg>
+
+This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this):
+
+B<cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg>
+
+
 =head1 EXAMPLE: Working as mutex and counting semaphore

 The command B<sem> is an alias for B<parallel --semaphore>.
--- a/src/parallel.texi
+++ b/src/parallel.texi
@ -647,7 +647,7 @@ Logfile for executed jobs. Save a list of the executed jobs to
@emph{logfile} in the following TAB separated format: sequence number,
 sshlogin, start time as seconds since epoch, run time in seconds,
 bytes in files transferred, bytes in files returned, exit status,
-and command run.
+signal, and command run.

 To convert the times into ISO-8601 strict do:

@ -2691,6 +2691,22 @@ files are passed to the second @strong{parallel} that runs @strong{sort -m} on t
 files before it removes the files. The output is saved to
@strong{bigfile.sort}.

+@chapter EXAMPLE: Running more than 500 jobs workaround
+@anchor{EXAMPLE: Running more than 500 jobs workaround}
+
+If you need to run a massive amount of jobs in parallel, then you will
+likely hit the filehandle limit which is often around 500 jobs. If you
+are super user you can raise the limit in /etc/security/limits.conf
+but you can also use this workaround. The filehandle limit is per
+process. That means that if you just spawn more GNU @strong{parallel}s then
+each of them can run 500 jobs. This will spawn up to 2500 jobs:
+
+@strong{cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg}
+
+This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this):
+
+@strong{cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg}
+
@chapter EXAMPLE: Working as mutex and counting semaphore
@anchor{EXAMPLE: Working as mutex and counting semaphore}