From 9321ac863a7fd14e41673a6c5aefab6a7687ae9f Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Fri, 5 Jul 2024 07:15:15 +0200 Subject: [PATCH] parallel: Shell detection with rosetta ("busybox" for Docker/arm64). --- doc/release_new_version | 10 + src/parallel | 20 +- src/parallel_alternatives.pod | 589 +++++++++++++++++++++++++++++----- src/parallel_examples.pod | 18 ++ 4 files changed, 557 insertions(+), 80 deletions(-) diff --git a/doc/release_new_version b/doc/release_new_version index 4ee2fdfb..613753cf 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -284,6 +284,16 @@ New in this release: News about GNU Parallel: +https://v2thegreat.com/2024/06/19/lessons-learned-from-scaling-to-multi-terabyte-datasets/ + +https://medium.com/@nfrozi/efisiensi-maksimal-cara-paralelisasi-perintah-di-cli-linux-f4fda3afe2a0 + +https://usercomp.com/news/1295687/parallel-ssh-file-transfer-with-gnuparallel +https://usercomp.com/news/1300817/gnu-parallel-process-one-line-with-pipe +https://datascience.101workbook.org/06-hpc/06-parallel/01-intro-to-gnu-parallel/#gsc.tab=0 + +https://www.cyberciti.biz/faq/linux-unix-copy-a-file-to-multiple-directories-using-cp-command/ + <<>> GNU Parallel - For people who live life in the parallel lane. diff --git a/src/parallel b/src/parallel index dd489fcc..26d7be02 100755 --- a/src/parallel +++ b/src/parallel @@ -2603,6 +2603,10 @@ sub parse_options(@) { # list. Ideas that will cost work and which have not been tested # are, however, unlikely to be prioritized. # + # *YOU* will be harming free software by removing the notice. You + # accept to be added to a public hall of shame by removing the + # line. That includes you, George and Andreas. + # # Please note that GPL version 3 gives you the right to fork GNU # Parallel under a new name, but it does not give you the right to # distribute modified copies with the citation notice disabled in @@ -2636,11 +2640,6 @@ sub parse_options(@) { # If you disagree, please read (especially 77-): # https://www.fordfoundation.org/media/2976/roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf - - # *YOU* will be harming free software by removing the notice. You - # accept to be added to a public hall of shame by removing the - # line. That includes you, George and Andreas. - parse_halt(); if($ENV{'PARALLEL_ENV'}) { @@ -6731,10 +6730,10 @@ sub which(@) { if(not $regexp) { # All shells known to mankind # - # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # ash bash csh dash fdsh fish fizsh ion ksh ksh93 mksh pdksh # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh - my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh + my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ion ksh ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh -sh -csh -bash), '-sh (sh)' # sh on FreeBSD @@ -6744,7 +6743,7 @@ sub which(@) { # /bin/sh /sbin/sh /opt/csw/sh # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; - $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'. + $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|\S*busybox |\S*rosetta )'. '(-?)('. $shell. '))( *$| [^(])'; %fakename = ( # sh disguises itself as -sh (sh) on FreeBSD @@ -6776,6 +6775,11 @@ sub which(@) { if(open(my $fd, "<", "/proc/$testpid/cmdline")) { local $/="\0"; chomp($shellline = <$fd>); + if($shellline =~ /busybox$|rosetta$/) { + # Possibly: busybox \0 sh or .../rosetta \0 /bin/bash + # Skip busybox/rosetta + chomp($shellline = <$fd>); + } if($shellline =~ /$regexp/o) { my $shellname = $4 || $8; my $dash = $3 || $7; diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod index dbf6ab70..82c51a33 100644 --- a/src/parallel_alternatives.pod +++ b/src/parallel_alternatives.pod @@ -118,6 +118,10 @@ The following features are in some of the comparable tools: =item E7. Only spawn new jobs if load is less than a limit +=item E8. Full command has non-zero exit value if one job has non-zero exit value + +=item E9. Jobs can be started without reading all input first + =back @@ -183,7 +187,7 @@ parallel: =item O1 O2 O3 O4 O5 O6 O7 O8 O9 O10 -=item E1 E2 E3 E4 E5 E6 E7 +=item E1 E2 E3 E4 E5 E6 E7 E8 E9 =item R1 R2 R3 R4 R5 R6 R7 R8 R9 @@ -204,7 +208,7 @@ Summary (see legend above): =item - O2 O3 - O5 O6 -=item E1 - - - - - - +=item E1 - - - - - - E8 E9 =item - - - - - x - - - @@ -541,7 +545,7 @@ using GNU B: 'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_{}' https://www.gnu.org/software/pexec/ -(Last checked: 2010-12) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN xjobs AND GNU Parallel @@ -614,7 +618,7 @@ using GNU B: 1$ parallel mogrify -flip ::: *.jpg https://github.com/exzombie/prll -(Last checked: 2019-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN dxargs AND GNU Parallel @@ -645,7 +649,7 @@ berlios.de/usage.html ported to GNU B: find dir -execdir sem cmd {} \; https://github.com/cklin/mdm -(Last checked: 2019-01) +(Last checked: 2014-06) =head2 DIFFERENCES BETWEEN xapply AND GNU Parallel @@ -708,8 +712,8 @@ using GNU B: 11$ parallel '[ -f {} ] && echo {}' < List | ... -https://www.databits.net/~ksb/msrc/local/bin/xapply/xapply.html (Last -checked: 2010-12) +https://www.databits.net/~ksb/msrc/local/bin/xapply/xapply.html +(Last checked: 2010-12) =head2 DIFFERENCES BETWEEN AIX apply AND GNU Parallel @@ -753,55 +757,358 @@ ssw_aix_71/com.ibm.aix.cmds1/apply.htm =head2 DIFFERENCES BETWEEN paexec AND GNU Parallel +Summary (see legend above): + +=over + +=item I1 - - - - - I7 + +=item - - M3 - - - + +=item (O1) O2 O3 (O4) (O5) O6 - O8 x - + +=item E1 - - - (E5) - - - + +=item R1 - - - x R6 - R8 R9 + +=item - - + +=back + B can run jobs in parallel on both the local and remote computers. -B requires commands to print a blank line as the last -output. This means you will have to write a wrapper for most programs. - B has a job dependency facility so a job can depend on another -job to be executed successfully. Sort of a poor-man's B. +job to be executed successfully. Sort of a poor-man's B. This +can partly be emulated in GNU B with B. -=head3 EXAMPLES FROM paexec's EXAMPLE CATALOG +B fails if output of a single line is > 2 GB. Output of a 2 GB +line requires 6 GB RAM. Lines of standard output is interleaved (but +there is no half line mixing), and output of standard error +mixes. Combined with B output order can be the same as +input order. In certain situations B will eat the last newline +of standard output. -Here are the examples from B's example catalog with the equivalent +There seems to be no way to make 4 jobs run on a remote server with 4 +cores and 16 jobs on a remote server with 16 cores. + + +=head3 EXAMPLES FROM man paexec + +Here are the examples from B with the equivalent using GNU +B. + + 1$ paexec -t '/usr/bin/ssh -x' -n 'host1 host2 host3' \ + -le -g -c calculate-me < tasks.txt | + paexec_reorder -Mf -Sl + + # GNU Parallel cannot stop processing jobs that depend on another. + # It can either try all: + 1$ tsort tasks.txt | + parallel --ssh '/usr/bin/ssh -x' -S "host1,host2,host3" \ + --tagstring {#} --pipe -N1 --log my.log calculate-me + + # Or it can stop at the first failing: + 1$ tsort tasks.txt | + parallel --ssh '/usr/bin/ssh -x' -S "host1,host2,host3" \ + --tagstring {#} --halt now,fail=1 --pipe -N1 --log my.log calculate-me + + # To retry the the failed and missing tasks: + 1$ tsort tasks.txt | + parallel --ssh '/usr/bin/ssh -x' -S "host1,host2,host3" \ + --tagstring {#} --halt now,fail=1 --pipe -N1 --joblog my.log \ + --resume-failed calculate-me + + 2$ ls -1 *.wav | paexec -x -n +4 -c 'oggenc -Q' + + 2$ ls -1 *.wav | parallel -j4 oggenc -Q + + 3$ ls -1 *.wav | paexec -xCil -n+4 flac -f --silent + + 3$ ls -1 *.wav | parallel --tagstring {#} -j4 'echo {}; flac -f --silent {}' + + 4$ { uname -s; uname -r; uname -m; } | + paexec -x -lp -n+2 -c banner | + paexec_reorder -l + + 4$ { uname -s; uname -r; uname -m; } | + parallel --tagstring '{#}' -k \ + 'banner {} | perl -pe "s/^/getppid().\" \"/e"' + + 5$ find . -name '*.dat' -print0 | + paexec -0 -n+10 -C -J// scp // remoteserver:/remote/path + + 5$ find . -name '*.dat' -print0 | + parallel -0 -j10 -I// scp // remoteserver:/remote/path + + 6$ ls -1 *.txt | paexec -n+10 -J%% -c 'awk "BEGIN {print toupper(\"%%\")}"' + + 6$ ls -1 *.txt | parallel -j10 -I%% 'awk "BEGIN {print toupper(\"%%\")}"' + +=head3 EXAMPLES FROM presentation/paexec.tex + + 7$ ls -1 *.wav | \ + paexec -x -c 'flac -s' -n +4 > /dev/null + + 7$ ls -1 *.wav | \ + parallel -j4 flac -s > /dev/null + + 8$ cat ~/bin/toupper + #!/usr/bin/awk -f + { + print " ", toupper($0) + print "" # empty line -- end-of-task marker! + fflush() # We must flush stdout! + } + + cat tasks + apple + bananas + orange + + paexec -t ssh -c ~/bin/toupper -n 'server1 server2' < tasks + + 8$ parallel --pipe -n1 -S server1,server2 ~/bin/toupper < tasks + + 9$ paexec -lr -t ssh -c ~/bin/toupper -n 'server1 server2' < tasks + + 9$ # GNU Parallel has no easy way to prepend the server + parallel --tagstring {#} --pipe -n1 -S server1,server2 ~/bin/toupper < tasks + cat tasks | parallel --tagstring {#} --pipe -n1 -S server1,server2 --plus \ + ~/bin/toupper '| perl -pe "s/^/{sshlogin}/"' < tasks + + 10$ paexec -n +4 -c ~/bin/toupper < tasks + + 10$ parallel -j4 --pipe -n1 ~/bin/toupper < tasks + + 11$ paexec -x -t ssh -n 'server1 server2' \ + -c "awk 'BEGIN {print toupper(ARGV[1])}' " < tasks + + 11$ parallel -S 'server1,server2' \ + "awk 'BEGIN {print toupper(ARGV[1])}'" < tasks + + 12$ paexec -x -C -t ssh -n 'server1 server2' \ + awk 'BEGIN {print toupper(ARGV[1])}' < tasks + + 12$ parallel -S 'server1,server2' -q \ + awk 'BEGIN {print toupper(ARGV[1])}' < tasks + + 13$ paexec -Z240 -x -t ssh -n 'server1 badhostname server2' \ + -c "awk 'BEGIN {print toupper(ARGV[1])}' " < tasks + + 13$ parallel --filter-hosts -S 'server1,badhostname,server2' \ + "awk 'BEGIN {print toupper(ARGV[1])}' " < tasks + + 14$ cat ~/bin/pbanner + #!/usr/bin/env sh + while read task; do + banner -f M "$task" | pv -qL 300 + echo "$PAEXEC_EOT" # end-of-task marker + done + + cat tasks + pae + xec + + paexec -l -mt='SE@X-L0S0!&' -c ~/bin/pbanner -n +2 < tasks | + paexec_reorder -mt='SE@X-L0S0!&' + + 14$ paexec -y -lc ~/bin/pbanner -n+2 < tasks | paexec_reorder -y + + 14$ paexec -l -x -c banner -n+2 < tasks + + 14$ parallel --pipe -n1 -j2 ~/bin/pbanner < tasks + + 16$ cat ~/tmp/packages_to_build + audio/cd-discid audio/abcde + textproc/gsed audio/abcde + audio/cdparanoia audio/abcde + audio/id3v2 audio/abcde + audio/id3 audio/abcde + misc/mkcue audio/abcde + shells/bash audio/abcde + devel/libtool-base audio/cdparanoia + devel/gmake audio/cdparanoia + devel/libtool-base audio/id3lib + devel/gmake audio/id3v2 + audio/id3lib audio/id3v2 + devel/m4 devel/bison + lang/f2c devel/libtool-base + devel/gmake misc/mkcue + devel/bison shells/bash + + cat ~/bin/pkg_builder + #!/usr/bin/awk -f + + { + print "build " $0 + print "success" # build succeeded! (paexec -ms=) + print "" # end-of-task marker + fflush() # we must flush stdout + } + + paexec -g -l -c ~/bin/pkg_builder -n 'server2 server1' \ + -t ssh < ~/tmp/packages_to_build | paexec_reorder + + # GNU Parallel cannot postpone jobs that depend on another. + # In some cases this will work + 16$ tsort ~/tmp/packages_to_build | parallel -S server2,server1 \ + --pipe -n1 ~/bin/pkg_builder + + 17$ cat ~/bin/pkg_builder + #!/usr/bin/awk -f + + { + print "build " $0 + if ($0 == "devel/gmake") + print "failure" # Oh no... + exit 255 # Exit value needed for GNU Parallel + else + print "success" # build succeeded! + + print "" # end-of-task marker + fflush() # we must flush stdout + } + + paexec -gl -c ~/bin/pkg_builder -n 'server2 server1' \ + -t ssh < ~/tmp/packages_to_build | paexec_reorder + + # GNU Parallel cannot refrain from starting jobs, that depend on others + # In some cases this will work + 17$ tsort ~/tmp/packages_to_build | parallel -S server2,server1 \ + --halt now,fail=1 --pipe -n1 ~/bin/pkg_builder + + 18$ cat ~/bin/pkg_builder + #!/usr/bin/awk -f + + { + "hostname -s" | getline hostname + print "build " $0 " on " hostname + + if (hostname == "server1" && $0 == "textproc/gsed") + exit 139 + # Damn it, I'm dying... + # Exit value is needed by GNU Parallel + else + print "success" # Yes! :-) + + print "" # end-of-task marker + fflush() # we must flush stdout + } + + paexec -gl -Z300 -t ssh -c ~/bin/pkg_builder \ + -n 'server2 server1' < ~/tmp/packages_to_build | + paexec_reorder > result + + # GNU Parallel retries a job on another server, if --retries > 1 + 17$ tsort ~/tmp/packages_to_build | parallel -S server2,server1 \ + --halt now,fail=1 --retries 2 --pipe -n1 ~/bin/pkg_builder + + 18$ ls -1 *.wav | paexec -x -c 'flac -s' -n+3 >/dev/null + + 18$ ls -1 *.wav | parallel -j3 flac -s >/dev/null + + 19$ ls -1 *.wav | paexec -ixC -n+3 oggenc -Q | grep . + + 19$ ls -1 *.wav | parallel -j3 'echo {}; oggenc -Q {}' | grep . + + 20$ cat calc + #!/bin/sh + # $1 -- task given on input + if test $1 = huge; then + sleep 6 + else + sleep 1 + fi + echo "task $1 done" + + printf 'small1\nsmall2\nsmall3\nsmall4\nsmall5\nhuge\n' | + time -p paexec -c ~/bin/calc -n +2 -xg | grep -v success + + 20$ printf 'small1\nsmall2\nsmall3\nsmall4\nsmall5\nhuge\n' | + time -p parallel -j2 ~/bin/calc | grep -v success + + 21$ printf 'small1\nsmall2\nsmall3\nsmall4\nweight: huge 6\n' | + time -p paexec -c ~/bin/calc -n +2 -x -W1 | grep -v success + + 21$ # GNU Parallel does not support weighted jobs. + # It can be simulated by sorting: + printf 'small1\nsmall2\nsmall3\nsmall4\nweight: huge 6\n' | + perl -pe 's/^weight: (.*) (\d+)/$2 $1/ or s/^/1 /' | + sort -nr | time parallel ~/bin/calc '{=s/^\d* //=}' | + grep -v success + +=head3 EXAMPLES FROM paexec's example dir + +Here are the examples from B's example dir with the equivalent using GNU B: -=head4 1_div_X_run +=head4 all_substr - 1$ ../../paexec -s -l -c "`pwd`/1_div_X_cmd" -n +1 <): 4$ parallel lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 ::: ~/Music/*.wav https://github.com/danielgtaylor/ladon -(Last checked: 2019-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN jobflow AND GNU Parallel @@ -1067,7 +1374,7 @@ jobs. This can be emulated by GNU B using B's B: 5$ seq 100 | parallel echo '{= $_>10 and $_<=20 or skip() =}' https://github.com/rofl0r/jobflow -(Last checked: 2022-05) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN gargs AND GNU Parallel @@ -1641,10 +1948,10 @@ computer has 8 cores). GNU B can be used as a poor-man's version of ClusterSSH: -B + parallel --nonall -S server-a,server-b do_stuff foo bar https://github.com/duncs/clusterssh -(Last checked: 2010-12) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN coshell AND GNU Parallel @@ -1693,7 +2000,7 @@ It can be emulated with GNU B using this Bash function: } https://github.com/tfogo/spread -(Last checked: 2024-04) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN pyargs AND GNU Parallel @@ -1737,7 +2044,7 @@ and fails on B. parallel seq ::: 1 2 3 4 5 6 https://github.com/robertblackwell/pyargs -(Last checked: 2019-01) +(Last checked: 2024-01) =head2 DIFFERENCES BETWEEN concurrently AND GNU Parallel @@ -2022,7 +2329,7 @@ template to generate the jobs, but requires jobs to be in a file. Output from the jobs mix. https://github.com/john01dav/spp -(Last checked: 2019-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN paral AND GNU Parallel @@ -2092,7 +2399,7 @@ the GNU B command): echo g && sleep 0.5 && echo h" https://github.com/amattn/paral -(Last checked: 2019-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN concurr AND GNU Parallel @@ -2120,7 +2427,7 @@ B deals badly empty input files and with output larger than 64 KB. https://github.com/mmstick/concurr -(Last checked: 2019-01) +(Last checked: 2024-01) =head2 DIFFERENCES BETWEEN lesser-parallel AND GNU Parallel @@ -2134,7 +2441,7 @@ hardly any options, whereas B gives you the full GNU B experience. https://github.com/kou1okada/lesser-parallel -(Last checked: 2019-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN npm-parallel AND GNU Parallel @@ -2145,7 +2452,7 @@ There are no examples and very little documentation, so it is hard to compare to GNU B. https://github.com/spion/npm-parallel -(Last checked: 2019-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN machma AND GNU Parallel @@ -2359,7 +2666,7 @@ https://github.com/codingo/Interlace I have been unable to get the code to run at all. It seems unfinished. https://github.com/otonvm/Parallel -(Last checked: 2019-02) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN k-bx par AND GNU Parallel @@ -2434,7 +2741,7 @@ will cause the system to freeze if there are so many jobs that there is not enough memory to run them all at the same time. https://github.com/royriojas/shell-executor -(Last checked: 2019-02) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN non-GNU par AND GNU Parallel @@ -2555,7 +2862,7 @@ corresponding GNU B and GNU B commands: wait https://github.com/akramer/lateral -(Last checked: 2019-03) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN with-this AND GNU Parallel @@ -2589,7 +2896,7 @@ B gives some additional information, so the output has to be cleaned before piping it to the next command. https://github.com/amritb/with-this.git -(Last checked: 2019-03) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN Tollef's parallel (moreutils) AND GNU Parallel @@ -2736,7 +3043,7 @@ lost. B buffers in RAM, so output bigger than the machine's virtual memory will cause the machine to crash. https://github.com/voodooEntity/threader -(Last checked: 2020-04) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN runp AND GNU Parallel @@ -3020,7 +3327,7 @@ composed commands. https://github.com/ctbur/async/ -(Last checked: 2023-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN pardi AND GNU Parallel @@ -3121,7 +3428,7 @@ You cannot quote space in the command, so you cannot run composed commands like B. https://gitlab.com/netikras/bthread -(Last checked: 2021-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN simple_gpu_scheduler AND GNU Parallel @@ -3187,7 +3494,7 @@ Summary (see legend above): seq 3 | parallel echo true >> gpu.queue https://github.com/ExpectationMax/simple_gpu_scheduler -(Last checked: 2021-01) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN parasweep AND GNU Parallel @@ -3250,7 +3557,7 @@ https://github.com/eviatarbach/parasweep (Last checked: 2021-01) -=head2 DIFFERENCES BETWEEN parallel-bash AND GNU Parallel +=head2 DIFFERENCES BETWEEN parallel-bash(2021) AND GNU Parallel Summary (see legend above): @@ -3313,8 +3620,99 @@ running jobs. 4$ something | parallel -j 5 echo {} {} -https://reposhub.com/python/command-line-tools/Akianonymus-parallel-bash.html -(Last checked: 2021-06) +https://github.com/Akianonymus/parallel-bash/ +(Last checked: 2024-06) + + +=head2 DIFFERENCES BETWEEN parallel-bash(2024) AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - - + +=item - - M3 - - M6 + +=item - O2 O3 - O5 O6 - O8 x O10 + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B is written in pure bash. It is really fast (overhead +of ~0.05 ms/job compared to GNU B's 3-10 ms/job). So if your +jobs are extremely short lived, and you can live with the quite +limited command, this may be useful. + +It seems the number of jobs must be divisible by B<-p>, so it +sometimes does not run the jobs: + + # Does nothing + $ seq 3 | parallel-bash -p 4 bash -c 'touch myfile-{}' + +This should create myfile-1..3, but creates nothing. + +It splits the input into queues. Each queue is of length B<-p>. So +this will make 250 queues and run all 250 processes in parallel: + + $ seq 1000 | parallel-bash -p 4 bash -c 'sleep {}' + +This is quite different from B(2021) where B<-p> is the +number of workers - similar to B<--jobs> in GNU B. + +In other words: B does I quarantee that only 4 jobs +will be run in parallel. This can overload your machine: + + # Warning: This will start 25000 processes - not just 4 + $ seq 100000 | parallel-bash -p 4 sleep {} + +If you are unlucky all long jobs may end up in the same queue: + + $ printf "%b\n" 1 1 1 1 5 5 5 5 1 1 1 1 | + time parallel -P4 sleep {} + (7 seconds) + $ printf "%b\n" 1 1 1 1 5 5 5 5 1 1 1 1 | + time ./parallel-bash.bash -p 4 -c sleep {} + (20 seconds) + +Ctrl-C kills the jobs (as expected). Ctrl-Z does not suspend running jobs. + + +=head3 EXAMPLES FROM parallel-bash + + 1$ main() { echo "${1}" ;} + export -f main + + 1$ printf "%b\n" {1..1000} | ./parallel-bash -p 10 main {} + + 1$ printf "%b\n" {1..1000} | parallel -j 100 main {} + + 2$ # Number of inputs must be divisible by 5 + some_input | parallel-bash -p 5 echo + + 2$ some_input | parallel -j 5 echo + + 3$ # Number of inputs must be divisible by 5 + parallel-bash -p 5 echo < some_file + + 3$ parallel -j 5 echo < some_file + + 4$ # Number of lines in 'some string' must be divisible by 5 + parallel-bash -p 5 echo <<< 'some string' + + 4$ parallel -j 5 -c echo <<< 'some string' + + 5$ something | parallel-bash -p 5 echo {} + + 5$ something | parallel -j 5 echo {} + +https://github.com/Akianonymus/parallel-bash/ +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN bash-concurrent AND GNU Parallel @@ -3336,7 +3734,7 @@ It uses an O(n*n) algorithm, so if you have 1000 independent jobs it takes 22 seconds to start it. https://github.com/themattrix/bash-concurrent -(Last checked: 2021-02) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN spawntool AND GNU Parallel @@ -3362,7 +3760,6 @@ Summary (see legend above): B reads a full command line from stdin which it executes in parallel. - http://code.google.com/p/spawntool/ (Last checked: 2021-07) @@ -3426,7 +3823,7 @@ Summary (see legend above): =over -=item I1 I2 - - - - I7 +=item I1 I2 - - - - (I7) =item - - M3 - - M6 @@ -3443,6 +3840,34 @@ Summary (see legend above): B uses Go templates for replacement strings. Quite similar to the I<{= perl expr =}> replacement string. +The basic replacement strings can be emulated by putting this into +B<~/.parallel/config>: + + --rpl '{{.Input}} ' + --rpl '{{.Time}} use DateTime; $_= DateTime->from_epoch(time);' + --rpl '{{.Start}} use DateTime; $_= DateTime->from_epoch($^T);' + +Then you can do: + + seq 10 | parallel sleep {{.Input}}';' echo {{.Start}} {{.Time}} + seq 10 | go-parallel -t 'bash -c "sleep {{.Input}}; echo \"{{.Start}}\" \"{{.Time}}\""' + +If the input is too long (64K), you get no error: + + perl -e 'print "works."."x"x100' | parallel.go -t 'echo {{noExt .Input}} ' + perl -e 'print "fails."."x"x100_000_000' | parallel.go -t 'echo {{noExt .Input}} ' + +Special chars are quoted: + + echo '"&+<>' | go-parallel echo + "&+<> + +but not shell quoted when using replacement strings: + + echo '"&+<>' | go-parallel -t 'echo {{.Input}}' + "&+<> + + =head3 EXAMPLES FROM go-parallel 1$ go-parallel -a ./files.txt -t 'cp {{.Input}} {{.Input | dirname | dirname}}' @@ -3457,8 +3882,14 @@ similar to the I<{= perl expr =}> replacement string. 3$ parallel -a ./files.txt echo mkdir -p {} {/.} + 4$ time find ~/src/go -type f | go-parallel md5sum > /dev/null + + 4$ time find ~/src/go -type f | parallel md5sum > /dev/null + # Though you would probably do this instead: + time find ~/src/go -type f | parallel -X md5sum > /dev/null + https://github.com/mylanconnolly/parallel -(Last checked: 2021-07) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN p AND GNU Parallel @@ -4067,7 +4498,7 @@ jobs to run in parallel. It is half as fast as GNU B for short jobs. https://github.com/thilinaba/bash-parallel -(Last checked: 2023-05) +(Last checked: 2024-06) =head2 DIFFERENCES BETWEEN PaSH AND GNU Parallel @@ -4305,7 +4736,7 @@ Summary (see legend above): =item - - - - - - -=item - O2 O3 N/A - O6 - x x ?O10 +=item - O2 O3 x - O6 - x x ?O10 =item E1 - - - E5 - - @@ -4421,6 +4852,7 @@ cause segfault. https://github.com/simonjwright/parallelize (Last checked: 2024-04) + =head2 Todo https://github.com/justanhduc/task-spooler @@ -4477,7 +4909,7 @@ This test stresses whether output mixes. paralleltool="parallel -j 30" - cat <<-EOF > mycommand + cat <<-'EOF' > mycommand #!/bin/bash # If a, b, c, d, e, and f mix: Very bad @@ -4487,18 +4919,26 @@ This test stresses whether output mixes. perl -e 'print STDERR "d"x3000_000," "' perl -e 'print STDOUT "e"x3000_000," "' perl -e 'print STDERR "f"x3000_000," "' - echo - echo >&2 + echo "stdout line 1 of id $@" + echo "stderr line 1 of id $@" >&2 + perl -e 'print STDOUT "A"x3000_000," "' + perl -e 'print STDERR "B"x3000_000," "' + perl -e 'print STDOUT "C"x3000_000," "' + perl -e 'print STDERR "D"x3000_000," "' + perl -e 'print STDOUT "E"x3000_000," "' + perl -e 'print STDERR "F"x3000_000," "' + echo "stdout line 2 of id $@" + echo "stderr line 2 of id $@" >&2 EOF chmod +x mycommand # Run 30 jobs in parallel seq 30 | - $paralleltool ./mycommand > >(tr -s abcdef) 2> >(tr -s abcdef >&2) + $paralleltool -j 30 ./mycommand > >(tr -s a-zA-Z) 2> >(tr -s a-zA-Z >&2) # 'a c e' and 'b d f' should always stay together - # and there should only be a single line per job - + # For each job there be 2 lines of standard output and standard error + # They should not be interleaved with other id's =head2 STDERRMERGE: Stderr is merged with stdout @@ -4674,7 +5114,12 @@ Some tools become very slow if output lines have many words. #!/bin/bash paralleltool="parallel -j0" - + + wcc() { + parallel --recend '' --block 100M --pipe 'LC_ALL=C wc' | + datamash -W sum 1 sum 2 sum 3 + } + cat <<-EOF > mycommand #!/bin/bash @@ -4683,7 +5128,7 @@ Some tools become very slow if output lines have many words. chmod +x mycommand # Run 1 job - seq 1 | $paralleltool ./mycommand | LC_ALL=C wc + seq 1 | $paralleltool ./mycommand | wcc =head1 AUTHOR diff --git a/src/parallel_examples.pod b/src/parallel_examples.pod index 9045f350..81205168 100644 --- a/src/parallel_examples.pod +++ b/src/parallel_examples.pod @@ -905,6 +905,24 @@ combined in the correct order. {}0000000-{}9999999 https://example.com/the/big/file > file +=head2 EXAMPLE: Keep order, but make job 1 output fast + +If you want the output of job 1 unbuffered, but otherwise keep the +order, you can do this: + + doit() { + echo "$@" ERR >&2 + echo "$@" out + sleep 0.$1 + echo "$@" ERR >&2 + echo "$@" out + } + export -f doit + parallel -k -u doit {= 'seq() > 1 and $opt::ungroup = 0' =} ::: 9 1 2 3 + +It will output job 1 with less overhead. + + =head2 EXAMPLE: Parallel grep B greps recursively through directories. GNU B can