mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-21 13:37:56 +00:00
parallel: Fixed bug #59893: --halt soon doesn't work with --delay.
This commit is contained in:
parent
f539554727
commit
b210039626
|
@ -14,6 +14,15 @@ that it is cited. The citation notice makes users aware of this.
|
|||
|
||||
See also: https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html
|
||||
|
||||
The funding of free software is hard. There does not seem to be a
|
||||
single solution that just works:
|
||||
|
||||
* https://stackoverflow.blog/2021/01/07/open-source-has-a-funding-problem/
|
||||
* https://www.slideshare.net/NadiaEghbal/consider-the-maintainer
|
||||
* https://www.youtube.com/watch?v=vTsc1m78BUk
|
||||
* https://blog.licensezero.com/2019/08/24/process-of-elimination.html
|
||||
* https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/
|
||||
|
||||
|
||||
> Is the citation notice compatible with GPLv3?
|
||||
|
||||
|
@ -95,6 +104,7 @@ refer to peer-reviewed articles - others do not:
|
|||
* https://www.open-mpi.org/papers/
|
||||
* https://www.tensorflow.org/about/bib
|
||||
* http://www.fon.hum.uva.nl/paul/praat.html
|
||||
* https://github.com/UnixJunkie/PAR/blob/master/README
|
||||
|
||||
|
||||
> I do not like the notice. Can I fork GNU Parallel and remove it?
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
Quote of the month:
|
||||
|
||||
Try GNU parallel it's awesome, and exactly the thing you are looking for. It allows you to set number of processes running among many things. I use it a lot, and can't recommend it enough.
|
||||
-- mapettheone@reddit
|
||||
|
||||
It's really quite amazing how powerful and flexible it is
|
||||
-- schwanengesang @tensegrist@twitter
|
||||
|
||||
|
|
|
@ -192,7 +192,7 @@ from:tange@gnu.org
|
|||
to:parallel@gnu.org, bug-parallel@gnu.org
|
||||
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
|
||||
|
||||
Subject: GNU Parallel 20210122 ('Capitol Riots') released <<[stable]>>
|
||||
Subject: GNU Parallel 20210122 ('Ask/Capitol Riots') released <<[stable]>>
|
||||
|
||||
GNU Parallel 20210122 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
|
||||
|
||||
|
@ -206,6 +206,11 @@ Quote of the month:
|
|||
<<>>
|
||||
|
||||
New in this release:
|
||||
|
||||
* $PARALLEL_ARGHOSTGROUPS and the replacement string {agrp} will give the hostgroup given on the argument when using --hostgroup.
|
||||
|
||||
* Handy time functions for {= =}: yyyy_mm_dd_hh_mm_ss() yyyy_mm_dd_hh_mm() yyyy_mm_dd() yyyymmddhhmmss() yyyymmddhhmm() yyyymmdd()
|
||||
|
||||
<<>>
|
||||
|
||||
|
||||
|
@ -215,6 +220,10 @@ News about GNU Parallel:
|
|||
|
||||
https://www.codenong.com/25172209/
|
||||
|
||||
https://qiita.com/hana_shin/items/53c3c78525c9c758ae7c
|
||||
|
||||
https://canvas.stanford.edu/courses/133091
|
||||
|
||||
<<>>
|
||||
|
||||
Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html
|
||||
|
|
17
src/parallel
17
src/parallel
|
@ -10624,16 +10624,15 @@ sub set_exitsignal($$) {
|
|||
} elsif($Global::halt_success) {
|
||||
$Global::halt_exitstatus = 0;
|
||||
}
|
||||
if($Global::halt_when eq "soon"
|
||||
and
|
||||
(scalar(keys %Global::running) > 0
|
||||
or
|
||||
$Global::max_jobs_running == 1)) {
|
||||
::status
|
||||
("$Global::progname: Starting no more jobs. ".
|
||||
"Waiting for ". (keys %Global::running).
|
||||
" jobs to finish.");
|
||||
if($Global::halt_when eq "soon") {
|
||||
$Global::start_no_new_jobs ||= 1;
|
||||
if(scalar(keys %Global::running) > 0) {
|
||||
# Only warn if there are more jobs running
|
||||
::status
|
||||
("$Global::progname: Starting no more jobs. ".
|
||||
"Waiting for ". (keys %Global::running).
|
||||
" jobs to finish.");
|
||||
}
|
||||
}
|
||||
return($Global::halt_when);
|
||||
}
|
||||
|
|
|
@ -322,6 +322,10 @@ perl quote a string
|
|||
|
||||
do not quote current replacement string
|
||||
|
||||
=item Z<> B<hash(val)>
|
||||
|
||||
compute B::hash(val)
|
||||
|
||||
=item Z<> B<total_jobs()>
|
||||
|
||||
number of jobs in total
|
||||
|
|
|
@ -2201,7 +2201,9 @@ is sendt to stdout (standard output) and prepended with e:.
|
|||
For short jobs with little output B<par> is 20% faster than GNU
|
||||
B<parallel> and 60% slower than B<xargs>.
|
||||
|
||||
http://savannah.nongnu.org/projects/par (Last checked: 2019-02)
|
||||
https://github.com/UnixJunkie/PAR
|
||||
|
||||
https://savannah.nongnu.org/projects/par (Last checked: 2019-02)
|
||||
|
||||
|
||||
=head2 DIFFERENCES BETWEEN fd AND GNU Parallel
|
||||
|
@ -2555,15 +2557,17 @@ output to stderr (this can be prevented with -q)
|
|||
scanme.nmap.org 443
|
||||
EOF
|
||||
|
||||
cat /tmp/host-port.txt | \
|
||||
runp -q -p 'netcat -v -w2 -z' 2>&1 | egrep '(succeeded!|open)$'
|
||||
|
||||
1$ cat /tmp/host-port.txt |
|
||||
runp -q -p 'netcat -v -w2 -z' 2>&1 | egrep '(succeeded!|open)$'
|
||||
|
||||
# --colsep is needed to split the line
|
||||
cat /tmp/host-port.txt | \
|
||||
parallel --colsep ' ' netcat -v -w2 -z 2>&1 | egrep '(succeeded!|open)$'
|
||||
1$ cat /tmp/host-port.txt |
|
||||
parallel --colsep ' ' netcat -v -w2 -z 2>&1 |
|
||||
egrep '(succeeded!|open)$'
|
||||
# or use uq for unquoted:
|
||||
cat /tmp/host-port.txt | \
|
||||
parallel netcat -v -w2 -z {=uq=} 2>&1 | egrep '(succeeded!|open)$'
|
||||
1$ cat /tmp/host-port.txt |
|
||||
parallel netcat -v -w2 -z {=uq=} 2>&1 |
|
||||
egrep '(succeeded!|open)$'
|
||||
|
||||
https://github.com/jreisinger/runp (Last checked: 2020-04)
|
||||
|
||||
|
@ -2603,11 +2607,11 @@ seconds (compared to 10 seconds with GNU B<parallel>).
|
|||
=head3 Examples as GNU Parallel
|
||||
|
||||
1$ papply gzip *.txt
|
||||
|
||||
|
||||
1$ parallel gzip ::: *.txt
|
||||
|
||||
|
||||
2$ papply "convert %F %n.jpg" *.png
|
||||
|
||||
|
||||
2$ parallel convert {} {.}.jpg ::: *.png
|
||||
|
||||
|
||||
|
@ -2633,18 +2637,18 @@ composed commands.
|
|||
=head3 Examples as GNU Parallel
|
||||
|
||||
1$ S="/tmp/example_socket"
|
||||
|
||||
|
||||
1$ ID=myid
|
||||
|
||||
|
||||
2$ async -s="$S" server --start
|
||||
|
||||
|
||||
2$ # GNU Parallel does not need a server to run
|
||||
|
||||
|
||||
3$ for i in {1..20}; do
|
||||
# prints command output to stdout
|
||||
async -s="$S" cmd -- bash -c "sleep 1 && echo test $i"
|
||||
done
|
||||
|
||||
|
||||
3$ for i in {1..20}; do
|
||||
# prints command output to stdout
|
||||
sem --id "$ID" -j100% "sleep 1 && echo test $i"
|
||||
|
@ -2653,38 +2657,38 @@ composed commands.
|
|||
# use -u or --line-buffer
|
||||
sem --id "$ID" -j100% --line-buffer "sleep 1 && echo test $i"
|
||||
done
|
||||
|
||||
|
||||
4$ # wait until all commands are finished
|
||||
async -s="$S" wait
|
||||
|
||||
|
||||
4$ sem --id "$ID" --wait
|
||||
|
||||
|
||||
5$ # configure the server to run four commands in parallel
|
||||
async -s="$S" server -j4
|
||||
|
||||
|
||||
5$ export PARALLEL=-j4
|
||||
|
||||
|
||||
6$ mkdir "/tmp/ex_dir"
|
||||
for i in {21..40}; do
|
||||
# redirects command output to /tmp/ex_dir/file*
|
||||
async -s="$S" cmd -o "/tmp/ex_dir/file$i" -- \
|
||||
bash -c "sleep 1 && echo test $i"
|
||||
done
|
||||
|
||||
|
||||
6$ mkdir "/tmp/ex_dir"
|
||||
for i in {21..40}; do
|
||||
# redirects command output to /tmp/ex_dir/file*
|
||||
sem --id "$ID" --result '/tmp/my-ex/file-{=$_=""=}'"$i" \
|
||||
"sleep 1 && echo test $i"
|
||||
done
|
||||
|
||||
|
||||
7$ sem --id "$ID" --wait
|
||||
|
||||
|
||||
7$ async -s="$S" wait
|
||||
|
||||
|
||||
8$ # stops server
|
||||
async -s="$S" server --stop
|
||||
|
||||
|
||||
8$ # GNU Parallel does not need to stop a server
|
||||
|
||||
|
||||
|
@ -2709,23 +2713,199 @@ name (%IN) can be put in the command line. You can only use %IN once.
|
|||
It can also run full command lines in parallel (like: B<cat file |
|
||||
parallel>).
|
||||
|
||||
=head3 EXAMPLES FROM pardi test.sh
|
||||
|
||||
1$ time pardi -v -c 100 -i data/decoys.smi -ie .smi -oe .smi \
|
||||
-o data/decoys_std_pardi.smi \
|
||||
-w '(standardiser -i %IN -o %OUT 2>&1) > /dev/null'
|
||||
|
||||
1$ cat data/decoys.smi |
|
||||
time parallel -N 100 --pipe --cat \
|
||||
'(standardiser -i {} -o {#} 2>&1) > /dev/null; cat {#}; rm {#}' \
|
||||
> data/decoys_std_pardi.smi
|
||||
|
||||
2$ pardi -n 1 -i data/test_in.types -o data/test_out.types \
|
||||
-d 'r:^#atoms:' -w 'cat %IN > %OUT'
|
||||
|
||||
2$ cat data/test_in.types | parallel -n 1 -k --pipe --cat \
|
||||
--regexp --recstart '^#atoms' 'cat {}' > data/test_out.types
|
||||
|
||||
3$ pardi -c 6 -i data/test_in.types -o data/test_out.types \
|
||||
-d 'r:^#atoms:' -w 'cat %IN > %OUT'
|
||||
|
||||
3$ cat data/test_in.types | parallel -n 6 -k --pipe --cat \
|
||||
--regexp --recstart '^#atoms' 'cat {}' > data/test_out.types
|
||||
|
||||
4$ pardi -i data/decoys.mol2 -o data/still_decoys.mol2 \
|
||||
-d 's:@<TRIPOS>MOLECULE' -w 'cp %IN %OUT'
|
||||
|
||||
4$ cat data/decoys.mol2 |
|
||||
parallel -n 1 --pipe --cat --recstart '@<TRIPOS>MOLECULE' \
|
||||
'cp {} {#}; cat {#}; rm {#}' > data/still_decoys.mol2
|
||||
|
||||
5$ pardi -i data/decoys.mol2 -o data/decoys2.mol2 \
|
||||
-d b:10000 -w 'cp %IN %OUT' --preserve
|
||||
|
||||
5$ cat data/decoys.mol2 |
|
||||
parallel -k --pipe --block 10k --recend '' --cat \
|
||||
'cat {} > {#}; cat {#}; rm {#}' > data/decoys2.mol2
|
||||
|
||||
https://github.com/UnixJunkie/pardi (Last checked: 2021-01)
|
||||
|
||||
|
||||
=head2 DIFFERENCES BETWEEN bthread AND GNU Parallel
|
||||
|
||||
Summary (see legend above):
|
||||
- - - I4 - - -
|
||||
- - - - - M6
|
||||
O1 - O3 - - - O7 O8 - -
|
||||
E1 - - - - - -
|
||||
- - - - - - - - -
|
||||
- -
|
||||
|
||||
B<bthread> takes around 1 sec per MB of output. The maximal output
|
||||
line length is 1073741759.
|
||||
|
||||
You cannot quote space in the command, so you cannot run composed
|
||||
commands like B<sh -c "echo a; echo b">.
|
||||
|
||||
https://gitlab.com/netikras/bthread (Last checked: 2021-01)
|
||||
|
||||
|
||||
=head2 DIFFERENCES BETWEEN simple_gpu_scheduler AND GNU Parallel
|
||||
|
||||
Summary (see legend above):
|
||||
I1 - - - - - I7
|
||||
M1 - - - - M6
|
||||
- O2 O3 - - O6 - N/A N/A O10
|
||||
E1 - - - - - -
|
||||
- - - - - - - - -
|
||||
- -
|
||||
|
||||
=head3 EXAMPLES FROM simple_gpu_scheduler MANUAL
|
||||
|
||||
1$ simple_gpu_scheduler --gpus 0 1 2 < gpu_commands.txt
|
||||
|
||||
1$ parallel -j3 --shuf \
|
||||
CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' < gpu_commands.txt
|
||||
|
||||
2$ simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" \
|
||||
-p lr 0.001 0.0005 0.0001 -p bs 32 64 128 |
|
||||
simple_gpu_scheduler --gpus 0,1,2
|
||||
|
||||
2$ parallel --header : --shuf -j3 -v \
|
||||
CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =}' \
|
||||
python3 train_dnn.py --lr {lr} --batch_size {bs} \
|
||||
::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
|
||||
|
||||
3$ simple_hypersearch \
|
||||
"python3 train_dnn.py --lr {lr} --batch_size {bs}" \
|
||||
--n-samples 5 -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 |
|
||||
simple_gpu_scheduler --gpus 0,1,2
|
||||
|
||||
3$ parallel --header : --shuf \
|
||||
CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1; seq() > 5 and skip() =}' \
|
||||
python3 train_dnn.py --lr {lr} --batch_size {bs} \
|
||||
::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
|
||||
|
||||
4$ touch gpu.queue
|
||||
tail -f -n 0 gpu.queue | simple_gpu_scheduler --gpus 0,1,2 &
|
||||
echo "my_command_with | and stuff > logfile" >> gpu.queue
|
||||
|
||||
4$ touch gpu.queue
|
||||
tail -f -n 0 gpu.queue |
|
||||
parallel -j3 CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' &
|
||||
# Needed to fill job slots once
|
||||
seq 3 | parallel echo true >> gpu.queue
|
||||
# Add jobs
|
||||
echo "my_command_with | and stuff > logfile" >> gpu.queue
|
||||
# Needed to flush output from completed jobs
|
||||
seq 3 | parallel echo true >> gpu.queue
|
||||
|
||||
https://github.com/ExpectationMax/simple_gpu_scheduler (Last checked:
|
||||
2021-01)
|
||||
|
||||
|
||||
=head2 DIFFERENCES BETWEEN parasweep AND GNU Parallel
|
||||
|
||||
B<parasweep> is a Python module for facilitating parallel parameter
|
||||
sweeps.
|
||||
|
||||
A B<parasweep> job will normally take a text file as input. The text
|
||||
file contains arguments for the job. Some of these arguments will be
|
||||
fixed and some of them will be changed by B<parasweep>.
|
||||
|
||||
It does this by having a template file such as template.txt:
|
||||
|
||||
Xval: {x}
|
||||
Yval: {y}
|
||||
FixedValue: 9
|
||||
# x with 2 decimals
|
||||
DecimalX: {x:.2f}
|
||||
TenX: ${x*10}
|
||||
RandomVal: {r}
|
||||
|
||||
and from this template it generates the file to be used by the job by
|
||||
replacing the replacement strings.
|
||||
|
||||
Being a Python module B<parasweep> integrates tighter with Python than
|
||||
GNU B<parallel>. You get the parameters directly in a Python data
|
||||
structure. With GNU B<parallel> you can use the JSON or CSV output
|
||||
format to get something similar, but you would have to read the
|
||||
output.
|
||||
|
||||
B<parasweep> has a filtering method to ignore parameter combinations
|
||||
you do not need.
|
||||
|
||||
Instead of calling the jobs directly, B<parasweep> can use Python's
|
||||
Distributed Resource Management Application API to make jobs run with
|
||||
different cluster software.
|
||||
|
||||
|
||||
=head3 Future support in GNU Parallel
|
||||
|
||||
B<Future> versions of GNU B<parallel> may support templates
|
||||
with replacement strings. Such as:
|
||||
|
||||
Xval: {x}
|
||||
Yval: {y}
|
||||
FixedValue: 9
|
||||
# x with 2 decimals
|
||||
DecimalX: {=x $_=sprintf("%.2f",$_) =}
|
||||
TenX: {=x $_=$_*10 =}
|
||||
RandomVal: {= $_=rand() =}
|
||||
|
||||
that can be used like:
|
||||
|
||||
parallel --header : --tmpl my.tmpl {#}.t myprog {#}.t ::: x 1 2 3 ::: y 1 2 3
|
||||
|
||||
Filtering may also be supported as:
|
||||
|
||||
parallel --filter '{1} > {2}' echo ::: 1 2 3 ::: 1 2 3
|
||||
|
||||
which will basically do:
|
||||
|
||||
parallel echo '{= $arg[1] > $arg[2] and skip() =}' ::: 1 2 3 ::: 1 2 3
|
||||
|
||||
https://github.com/eviatarbach/parasweep (Last checked: 2021-01)
|
||||
|
||||
|
||||
=head2 Todo
|
||||
|
||||
https://reposhub.com/python/command-line-tools/Akianonymus-parallel-bash.html
|
||||
|
||||
https://github.com/Nukesor/pueue
|
||||
|
||||
PASH: Light-touch Data-Parallel Shell Processing
|
||||
|
||||
https://arxiv.org/pdf/2012.15443.pdf KumQuat
|
||||
|
||||
https://arxiv.org/pdf/2007.09436.pdf
|
||||
|
||||
https://github.com/UnixJunkie/PAR (Same as http://savannah.nongnu.org/projects/par above?)
|
||||
|
||||
https://gitlab.com/netikras/bthread
|
||||
|
||||
https://github.com/JeiKeiLim/simple_distribute_job
|
||||
|
||||
https://github.com/Akianonymus/parallel-bash
|
||||
|
||||
https://github.com/reggi/pkgrun
|
||||
|
||||
https://github.com/benoror/better-npm-run - not obvious how to use
|
||||
|
@ -2738,29 +2918,6 @@ https://github.com/flesler/parallel
|
|||
|
||||
https://github.com/Julian/Verge
|
||||
|
||||
https://github.com/ExpectationMax/simple_gpu_scheduler
|
||||
simple_gpu_scheduler --gpus 0 1 2 < gpu_commands.txt
|
||||
parallel -j3 --shuf CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' < gpu_commands.txt
|
||||
|
||||
simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 | simple_gpu_scheduler --gpus 0,1,2
|
||||
parallel --header : --shuf -j3 -v CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =}' python3 train_dnn.py --lr {lr} --batch_size {bs} ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
|
||||
|
||||
simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" --n-samples 5 -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 | simple_gpu_scheduler --gpus 0,1,2
|
||||
parallel --header : --shuf CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1; seq() > 5 and skip() =}' python3 train_dnn.py --lr {lr} --batch_size {bs} ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128
|
||||
|
||||
touch gpu.queue
|
||||
tail -f -n 0 gpu.queue | simple_gpu_scheduler --gpus 0,1,2 &
|
||||
echo "my_command_with | and stuff > logfile" >> gpu.queue
|
||||
|
||||
touch gpu.queue
|
||||
tail -f -n 0 gpu.queue | parallel -j3 CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' &
|
||||
# Needed to fill job slots once
|
||||
seq 3 | parallel echo true >> gpu.queue
|
||||
# Add jobs
|
||||
echo "my_command_with | and stuff > logfile" >> gpu.queue
|
||||
# Needed to flush output from completed jobs
|
||||
seq 3 | parallel echo true >> gpu.queue
|
||||
|
||||
|
||||
=head1 TESTING OTHER TOOLS
|
||||
|
||||
|
|
|
@ -4,6 +4,12 @@
|
|||
# Each should be taking 3-10s and be possible to run in parallel
|
||||
# I.e.: No race conditions, no logins
|
||||
|
||||
par_delay_halt_soon() {
|
||||
echo "bug #59893: --halt soon doesn't work with --delay"
|
||||
seq 0 10 |
|
||||
stdout parallel --delay 1 -uj2 --halt soon,fail=1 'sleep 0.{};echo {};exit {}'
|
||||
}
|
||||
|
||||
par_show_limits() {
|
||||
echo '### Test --show-limits'
|
||||
(
|
||||
|
|
|
@ -60,6 +60,11 @@ par_compute_command_len a_b_c_d
|
|||
par_compute_command_len
|
||||
par_delay ### Test --delay
|
||||
par_delay More than 3.3 secs: OK
|
||||
par_delay_halt_soon bug #59893: --halt soon doesn't work with --delay
|
||||
par_delay_halt_soon 0
|
||||
par_delay_halt_soon 1
|
||||
par_delay_halt_soon parallel: This job failed:
|
||||
par_delay_halt_soon sleep 0.1;echo 1;exit 1
|
||||
par_delay_human_readable a
|
||||
par_delay_human_readable b
|
||||
par_delay_human_readable c
|
||||
|
|
Loading…
Reference in a new issue