parallel: Fixed bug #59893: --halt soon doesn't work with --delay.

2024-11-24 23:17:55 +00:00 · 2021-01-17 12:19:57 +01:00 · 2021-01-17 12:19:57 +01:00 · b210039626
parent f539554727
commit b210039626
8 changed files with 256 additions and 63 deletions
--- a/doc/citation-notice-faq.txt
+++ b/doc/citation-notice-faq.txt
@ -14,6 +14,15 @@ that it is cited. The citation notice makes users aware of this.
 See also: https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
 The funding of free software is hard. There does not seem to be a
 single solution that just works:
 * https://stackoverflow.blog/2021/01/07/open-source-has-a-funding-problem/
 * https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
 * https://www.youtube.com/watch?v=vTsc1m78BUk
 * https://blog.licensezero.com/2019/08/24/process-of-elimination.html
 * https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
 > Is the citation notice compatible with GPLv3?
@ -95,6 +104,7 @@ refer to peer-reviewed articles - others do not:
 * https://www.open-mpi.org/papers/
 * https://www.tensorflow.org/about/bib
 * http://www.fon.hum.uva.nl/paul/praat.html
 * https://github.com/UnixJunkie/PAR/blob/master/README
 > I do not like the notice. Can I fork GNU Parallel and remove it?
--- a/doc/haikus
+++ b/doc/haikus
@ -1,5 +1,8 @@
 Quote of the month:
  Try GNU parallel it's awesome, and exactly the thing you are looking for. It allows you to set number of processes running among many things. I use it a lot, and can't recommend it enough.
    -- mapettheone@reddit
  It's really quite amazing how powerful and flexible it is
    -- schwanengesang @tensegrist@twitter
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -192,7 +192,7 @@ from:tange@gnu.org
 to:parallel@gnu.org, bug-parallel@gnu.org
 stable-bcc: Jesse Alama <jessealama@fastmail.fm>
-Subject: GNU Parallel 20210122 ('Capitol Riots') released <<[stable]>>
+Subject: GNU Parallel 20210122 ('Ask/Capitol Riots') released <<[stable]>>
 GNU Parallel 20210122 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
@ -206,6 +206,11 @@ Quote of the month:
 <<>>
 New in this release:
 * $PARALLEL_ARGHOSTGROUPS and the replacement string {agrp} will give the hostgroup given on the argument when using --hostgroup.
 * Handy time functions for {= =}: yyyy_mm_dd_hh_mm_ss() yyyy_mm_dd_hh_mm() yyyy_mm_dd() yyyymmddhhmmss() yyyymmddhhmm() yyyymmdd()
 <<>>
@ -215,6 +220,10 @@ News about GNU Parallel:
 https://www.codenong.com/25172209/
 https://qiita.com/hana_shin/items/53c3c78525c9c758ae7c
 https://canvas.stanford.edu/courses/133091
 <<>>
 Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html
--- a/src/parallel
+++ b/src/parallel
@ -10624,16 +10624,15 @@ sub set_exitsignal($$) {
 	    } elsif($Global::halt_success) {
 		$Global::halt_exitstatus = 0;
 	    }
-	    if($Global::halt_when eq "soon"
+	    if($Global::halt_when eq "soon") {
 	       and
 	       (scalar(keys %Global::running) > 0
 		or
 		$Global::max_jobs_running == 1)) {
 		::status
 		    ("$Global::progname: Starting no more jobs. ".
 		     "Waiting for ". (keys %Global::running).
 		     " jobs to finish.");
 		$Global::start_no_new_jobs ||= 1;
 		if(scalar(keys %Global::running) > 0) {
 		    # Only warn if there are more jobs running
 		    ::status
 			("$Global::progname: Starting no more jobs. ".
 			 "Waiting for ". (keys %Global::running).
 			 " jobs to finish.");
 		}
 	    }
 	    return($Global::halt_when);
 	}
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -322,6 +322,10 @@ perl quote a string
 do not quote current replacement string
 =item Z<> B<hash(val)>
 compute B::hash(val)
 =item Z<> B<total_jobs()>
 number of jobs in total
--- a/src/parallel_alternatives.pod
+++ b/src/parallel_alternatives.pod
@ -2201,7 +2201,9 @@ is sendt to stdout (standard output) and prepended with e:.
 For short jobs with little output B<par> is 20% faster than GNU
 B<parallel> and 60% slower than B<xargs>.
-http://savannah.nongnu.org/projects/par (Last checked: 2019-02)
+https://github.com/UnixJunkie/PAR
 https://savannah.nongnu.org/projects/par (Last checked: 2019-02)
 =head2 DIFFERENCES BETWEEN fd AND GNU Parallel
@ -2555,15 +2557,17 @@ output to stderr (this can be prevented with -q)
  scanme.nmap.org 443
  EOF
-  cat /tmp/host-port.txt | \
+  1$ cat /tmp/host-port.txt |
-    runp -q -p 'netcat -v -w2 -z' 2>&1 | egrep '(succeeded!|open)$'
+       runp -q -p 'netcat -v -w2 -z' 2>&1 | egrep '(succeeded!|open)$'
-
+  
  # --colsep is needed to split the line
-  cat /tmp/host-port.txt | \
+  1$ cat /tmp/host-port.txt |
-    parallel --colsep ' ' netcat -v -w2 -z 2>&1 | egrep '(succeeded!|open)$'
+       parallel --colsep ' ' netcat -v -w2 -z 2>&1 |
       egrep '(succeeded!|open)$'
  # or use uq for unquoted:
-  cat /tmp/host-port.txt | \
+  1$ cat /tmp/host-port.txt |
-    parallel netcat -v -w2 -z {=uq=} 2>&1 | egrep '(succeeded!|open)$'
+       parallel netcat -v -w2 -z {=uq=} 2>&1 |
       egrep '(succeeded!|open)$'
 https://github.com/jreisinger/runp (Last checked: 2020-04)
@ -2603,11 +2607,11 @@ seconds (compared to 10 seconds with GNU B<parallel>).
 =head3 Examples as GNU Parallel
  1$ papply gzip *.txt
-
+  
  1$ parallel gzip ::: *.txt
-
+  
  2$ papply "convert %F %n.jpg" *.png
-
+  
  2$ parallel convert {} {.}.jpg ::: *.png
@ -2633,18 +2637,18 @@ composed commands.
 =head3 Examples as GNU Parallel
  1$ S="/tmp/example_socket"
-
+  
  1$ ID=myid
-
+  
  2$ async -s="$S" server --start
-
+  
  2$ # GNU Parallel does not need a server to run
-
+  
  3$ for i in {1..20}; do
         # prints command output to stdout
         async -s="$S" cmd -- bash -c "sleep 1 && echo test $i"
     done
-
+  
  3$ for i in {1..20}; do
         # prints command output to stdout
         sem --id "$ID" -j100% "sleep 1 && echo test $i"
@ -2653,38 +2657,38 @@ composed commands.
         # use -u or --line-buffer
         sem --id "$ID" -j100% --line-buffer "sleep 1 && echo test $i"
     done
-
+  
  4$ # wait until all commands are finished
     async -s="$S" wait
-
+  
  4$ sem --id "$ID" --wait
-
+  
  5$ # configure the server to run four commands in parallel
     async -s="$S" server -j4
-
+  
  5$ export PARALLEL=-j4
-
+  
  6$ mkdir "/tmp/ex_dir"
     for i in {21..40}; do
       # redirects command output to /tmp/ex_dir/file*
       async -s="$S" cmd -o "/tmp/ex_dir/file$i" -- \
         bash -c "sleep 1 && echo test $i"
     done
-
+  
  6$ mkdir "/tmp/ex_dir"
     for i in {21..40}; do
       # redirects command output to /tmp/ex_dir/file*
       sem --id "$ID" --result '/tmp/my-ex/file-{=$_=""=}'"$i" \
         "sleep 1 && echo test $i"
     done
-
+  
  7$ sem --id "$ID" --wait
-
+  
  7$ async -s="$S" wait
-
+  
  8$ # stops server
     async -s="$S" server --stop
-
+  
  8$ # GNU Parallel does not need to stop a server
@ -2709,23 +2713,199 @@ name (%IN) can be put in the command line. You can only use %IN once.
 It can also run full command lines in parallel (like: B<cat file |
 parallel>).
 =head3 EXAMPLES FROM pardi test.sh
  1$ time pardi -v -c 100 -i data/decoys.smi -ie .smi -oe .smi \
       -o data/decoys_std_pardi.smi \
          -w '(standardiser -i %IN -o %OUT 2>&1) > /dev/null'
  1$ cat data/decoys.smi |
       time parallel -N 100 --pipe --cat \
         '(standardiser -i {} -o {#} 2>&1) > /dev/null; cat {#}; rm {#}' \
         > data/decoys_std_pardi.smi
  2$ pardi -n 1 -i data/test_in.types -o data/test_out.types \
             -d 'r:^#atoms:' -w 'cat %IN > %OUT'
  2$ cat data/test_in.types | parallel -n 1 -k --pipe --cat \
             --regexp --recstart '^#atoms' 'cat {}' > data/test_out.types
  3$ pardi -c 6 -i data/test_in.types -o data/test_out.types \
             -d 'r:^#atoms:' -w 'cat %IN > %OUT'
  3$ cat data/test_in.types | parallel -n 6 -k --pipe --cat \
             --regexp --recstart '^#atoms' 'cat {}' > data/test_out.types
  4$ pardi -i data/decoys.mol2 -o data/still_decoys.mol2 \
             -d 's:@<TRIPOS>MOLECULE' -w 'cp %IN %OUT'
  4$ cat data/decoys.mol2 |
       parallel -n 1 --pipe --cat --recstart '@<TRIPOS>MOLECULE' \
         'cp {} {#}; cat {#}; rm {#}' > data/still_decoys.mol2
  5$ pardi -i data/decoys.mol2 -o data/decoys2.mol2 \
             -d b:10000 -w 'cp %IN %OUT' --preserve
  5$ cat data/decoys.mol2 |
       parallel -k --pipe --block 10k --recend '' --cat \
         'cat {} > {#}; cat {#}; rm {#}' > data/decoys2.mol2
 https://github.com/UnixJunkie/pardi (Last checked: 2021-01)
 =head2 DIFFERENCES BETWEEN bthread AND GNU Parallel
 Summary (see legend above):
 - - - I4 -  - -
 - - - - - M6
 O1 - O3 - - - O7 O8 - -
 E1 - - - - - -
 - - - - - - - - -
 - -
 B<bthread> takes around 1 sec per MB of output. The maximal output
 line length is 1073741759.
 You cannot quote space in the command, so you cannot run composed
 commands like B<sh -c "echo a; echo b">.
 https://gitlab.com/netikras/bthread (Last checked: 2021-01)
 =head2 DIFFERENCES BETWEEN simple_gpu_scheduler AND GNU Parallel
 Summary (see legend above):
 I1 - - - - - I7
 M1 - - - - M6
 - O2 O3 - - O6 - N/A N/A O10
 E1 - - - - - -
 - - - - - - - - -
 - -
 =head3 EXAMPLES FROM simple_gpu_scheduler MANUAL
  1$ simple_gpu_scheduler --gpus 0 1 2 < gpu_commands.txt
  1$ parallel -j3 --shuf \
     CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' < gpu_commands.txt
  2$ simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" \
       -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 |
       simple_gpu_scheduler --gpus 0,1,2
  2$ parallel --header : --shuf -j3 -v \
       CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =}' \
       python3 train_dnn.py --lr {lr} --batch_size {bs} \
       ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
  3$ simple_hypersearch \
       "python3 train_dnn.py --lr {lr} --batch_size {bs}" \
       --n-samples 5 -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 |
       simple_gpu_scheduler --gpus 0,1,2
  3$ parallel --header : --shuf \
       CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1; seq() > 5 and skip() =}' \
       python3 train_dnn.py --lr {lr} --batch_size {bs} \
       ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
  4$ touch gpu.queue
     tail -f -n 0 gpu.queue | simple_gpu_scheduler --gpus 0,1,2 &
     echo "my_command_with | and stuff > logfile" >> gpu.queue
  4$ touch gpu.queue
     tail -f -n 0 gpu.queue |
       parallel -j3 CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' &
     # Needed to fill job slots once
     seq 3 | parallel echo true >> gpu.queue
     # Add jobs
     echo "my_command_with | and stuff > logfile" >> gpu.queue
     # Needed to flush output from completed jobs
     seq 3 | parallel echo true >> gpu.queue
 https://github.com/ExpectationMax/simple_gpu_scheduler (Last checked:
 2021-01)
 =head2 DIFFERENCES BETWEEN parasweep AND GNU Parallel
 B<parasweep> is a Python module for facilitating parallel parameter
 sweeps.
 A B<parasweep> job will normally take a text file as input. The text
 file contains arguments for the job. Some of these arguments will be
 fixed and some of them will be changed by B<parasweep>.
 It does this by having a template file such as template.txt:
  Xval: {x}
  Yval: {y}
  FixedValue: 9
  # x with 2 decimals
  DecimalX: {x:.2f}
  TenX: ${x*10}
  RandomVal: {r}
 and from this template it generates the file to be used by the job by
 replacing the replacement strings.
 Being a Python module B<parasweep> integrates tighter with Python than
 GNU B<parallel>. You get the parameters directly in a Python data
 structure. With GNU B<parallel> you can use the JSON or CSV output
 format to get something similar, but you would have to read the
 output.
 B<parasweep> has a filtering method to ignore parameter combinations
 you do not need.
 Instead of calling the jobs directly, B<parasweep> can use Python's
 Distributed Resource Management Application API to make jobs run with
 different cluster software.
 =head3 Future support in GNU Parallel
 B<Future> versions of GNU B<parallel> may support templates
 with replacement strings. Such as:
  Xval: {x}
  Yval: {y}
  FixedValue: 9
  # x with 2 decimals
  DecimalX: {=x $_=sprintf("%.2f",$_) =}
  TenX: {=x $_=$_*10 =}
  RandomVal: {= $_=rand() =}
 that can be used like:
  parallel --header : --tmpl my.tmpl {#}.t myprog {#}.t ::: x 1 2 3 ::: y 1 2 3
 Filtering may also be supported as:
  parallel --filter '{1} > {2}' echo ::: 1 2 3 ::: 1 2 3
 which will basically do:
  parallel echo '{= $arg[1] > $arg[2] and skip() =}' ::: 1 2 3 ::: 1 2 3
 https://github.com/eviatarbach/parasweep (Last checked: 2021-01)
 =head2 Todo
 https://reposhub.com/python/command-line-tools/Akianonymus-parallel-bash.html
 https://github.com/Nukesor/pueue
 PASH: Light-touch Data-Parallel Shell Processing
 https://arxiv.org/pdf/2012.15443.pdf KumQuat
 https://arxiv.org/pdf/2007.09436.pdf
 https://github.com/UnixJunkie/PAR (Same as http://savannah.nongnu.org/projects/par above?)
 https://gitlab.com/netikras/bthread
 https://github.com/JeiKeiLim/simple_distribute_job
 https://github.com/Akianonymus/parallel-bash
 https://github.com/reggi/pkgrun
 https://github.com/benoror/better-npm-run - not obvious how to use
@ -2738,29 +2918,6 @@ https://github.com/flesler/parallel
 https://github.com/Julian/Verge
 https://github.com/ExpectationMax/simple_gpu_scheduler
    simple_gpu_scheduler --gpus 0 1 2 < gpu_commands.txt
    parallel -j3 --shuf CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' < gpu_commands.txt
    simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 | simple_gpu_scheduler --gpus 0,1,2
    parallel --header : --shuf -j3 -v CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =}' python3 train_dnn.py --lr {lr} --batch_size {bs} ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
    simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" --n-samples 5 -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 | simple_gpu_scheduler --gpus 0,1,2
    parallel --header : --shuf CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1; seq() > 5 and skip() =}' python3 train_dnn.py --lr {lr} --batch_size {bs} ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
    touch gpu.queue
    tail -f -n 0 gpu.queue | simple_gpu_scheduler --gpus 0,1,2 &
    echo "my_command_with | and stuff > logfile" >> gpu.queue
    touch gpu.queue
    tail -f -n 0 gpu.queue | parallel -j3 CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' &
    # Needed to fill job slots once
    seq 3 | parallel echo true >> gpu.queue
    # Add jobs
    echo "my_command_with | and stuff > logfile" >> gpu.queue
    # Needed to flush output from completed jobs
    seq 3 | parallel echo true >> gpu.queue
 =head1 TESTING OTHER TOOLS
--- a/testsuite/tests-to-run/parallel-local-3s.sh
+++ b/testsuite/tests-to-run/parallel-local-3s.sh
@ -4,6 +4,12 @@
 # Each should be taking 3-10s and be possible to run in parallel
 # I.e.: No race conditions, no logins
 par_delay_halt_soon() {
    echo "bug #59893: --halt soon doesn't work with --delay"
    seq 0 10 |
 	stdout parallel --delay 1 -uj2 --halt soon,fail=1 'sleep 0.{};echo {};exit {}'
 }
 par_show_limits() {
    echo '### Test --show-limits'
    (
--- a/testsuite/wanted-results/parallel-local-3s
+++ b/testsuite/wanted-results/parallel-local-3s
@ -60,6 +60,11 @@ par_compute_command_len	a_b_c_d
 par_compute_command_len	
 par_delay	### Test --delay
 par_delay	More than 3.3 secs: OK
 par_delay_halt_soon	bug #59893: --halt soon doesn't work with --delay
 par_delay_halt_soon	0
 par_delay_halt_soon	1
 par_delay_halt_soon	parallel: This job failed:
 par_delay_halt_soon	sleep 0.1;echo 1;exit 1
 par_delay_human_readable	a
 par_delay_human_readable	b
 par_delay_human_readable	c