diff --git a/doc/haikus b/doc/haikus index 22ed6fea..624c4e9a 100644 --- a/doc/haikus +++ b/doc/haikus @@ -1,8 +1,11 @@ Quote of the month: + It's really quite amazing how powerful and flexible it is + -- schwanengesang @tensegrist@twitter + Every time I install @ubuntu, one of the first tools I install is @gnuparallel. I love it. - -- Necati Demir @ndemir + -- Necati Demir @ndemir@twitter Today I'm grateful for GNU parallel, especially with the --colsep and --jobs parameters #GiveThanks diff --git a/doc/release_new_version b/doc/release_new_version index 2b3f145b..5614ea71 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -192,7 +192,7 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20210122 ('') released <<[stable]>> +Subject: GNU Parallel 20210122 ('Capitol Riots') released <<[stable]>> GNU Parallel 20210122 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ @@ -213,6 +213,8 @@ New in this release: News about GNU Parallel: +https://www.codenong.com/25172209/ + <<>> Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html diff --git a/src/parallel b/src/parallel index 883442de..e37fa7fd 100755 --- a/src/parallel +++ b/src/parallel @@ -322,6 +322,7 @@ sub parcat_script() { for $infh (@ready) { # There is only one key, namely the output file descriptor for my $outfd (keys %{$buffer{$infh}}) { + # TODO test if 65536 is optimal (2^17 is used elsewhere) $rv = sysread($infh, $buf, 65536); if (!$rv) { if($! == EAGAIN) { @@ -624,6 +625,7 @@ sub find_split_positions($$$) { } # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 # The optimal dd blocksize for freebsd = 2^15..2^17 + # The optimal dd blocksize for ubuntu (AMD6376) = 2^16 my $dd_block_size = 131072; # 2^17 my @pos; my ($recstart,$recend) = recstartrecend(); @@ -1232,6 +1234,11 @@ sub recstartrecend() { # this should only apply to the regexp $recstart = "(?:".$recstart.")"; $recend = "(?:".$recend.")"; + # Quote # and space + $recstart =~ s/#/\\#/g; + $recend =~ s/#/\\#/g; + $recstart =~ s/ /\\ /g; + $recend =~ s/ /\\ /g; } else { # $recstart/$recend = printf strings (\n) $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; @@ -2017,8 +2024,8 @@ sub parse_options(@) { # the alternatives instead? # See a list in: 'man parallel_alternatives' # - # If you want GNU Parallel to be maintained in the future keep - # this line. + # If you want GNU Parallel to be maintained in the future you + # should keep this line. citation_notice(); # Seriously: _YOU_ will be harming free software by removing the # notice. _YOU_ make it harder to justify spending time developing @@ -11722,7 +11729,6 @@ sub max_length($) { my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() . "/linelen"; my $cached_limit; - if(open(my $fh, "<", $len_cache)) { $cached_limit = <$fh>; $cached_limit || ::die_bug("Cannot read $len_cache"); diff --git a/src/parallel.pod b/src/parallel.pod index 780a6ef6..57e17559 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -338,6 +338,20 @@ sequence number of job the arguments +=item Z<> B + +=item Z<> B + +=item Z<> B + +=item Z<> B + +=item Z<> B + +=item Z<> B + +time functions + =back Example: @@ -3845,7 +3859,7 @@ is much faster. If it still does not fit in memory you can do this: - parallel --pipepart -a regexps.txt --block 1M grep -Ff - -n bigfile | \ + parallel --pipepart -a regexps.txt --block 1M grep -F -f - -n bigfile | \ sort -un | perl -pe 's/^\d+://' The 1M should be your free memory divided by the number of CPU threads and diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod index f02ae64d..5cdd85fc 100644 --- a/src/parallel_alternatives.pod +++ b/src/parallel_alternatives.pod @@ -23,7 +23,7 @@ developers with irregular releases and only maintained for a few years. -=head2 SUMMARY TABLE +=head2 SUMMARY LEGEND The following features are in some of the comparable tools: @@ -52,7 +52,7 @@ B O5. Stdout only contains stdout (standard output) from the command O6. Stderr only contains stderr (standard error) from the command O7. Buffering on disk - O8. Cleanup of file if killed + O8. Cleanup of temporary files if killed O9. Test if disk runs full during run O10. Output of a line bigger than 4 GB @@ -86,7 +86,7 @@ B ID = yes As every new version of the programs are not tested the table may be -outdated. Please file a bug-report if you find errors (See REPORTING +outdated. Please file a bug report if you find errors (See REPORTING BUGS). parallel: @@ -97,26 +97,10 @@ E1 E2 E3 E4 E5 E6 E7 R1 R2 R3 R4 R5 R6 R7 R8 R9 S1 S2 -find -exec: -- - - x - x - -- M2 M3 - - - - -- O2 O3 O4 O5 O6 -- - - - - - - -- - - - - - - - - -x x - -make -j: -- - - - - - - -- - - - - - -O1 O2 O3 - x O6 -E1 - - - E5 - -- - - - - - - - - -- - - =head2 DIFFERENCES BETWEEN xargs AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 I2 - - - - - - M2 M3 - - - - O2 O3 - O5 O6 @@ -212,6 +196,14 @@ https://www.gnu.org/software/findutils/ =head2 DIFFERENCES BETWEEN find -exec AND GNU Parallel +Summary (see legend above): +- - - x - x - +- M2 M3 - - - - +- O2 O3 O4 O5 O6 +- - - - - - - +- - - - - - - - - +x x + B offers some of the same possibilities as GNU B. B only works on files. Processing other input (such as @@ -223,6 +215,14 @@ https://www.gnu.org/software/findutils/ (Last checked: 2019-01) =head2 DIFFERENCES BETWEEN make -j AND GNU Parallel +Summary (see legend above): +- - - - - - - +- - - - - - +O1 O2 O3 - x O6 +E1 - - - E5 - +- - - - - - - - - +- - + B can run jobs in parallel, but requires a crafted Makefile to do this. That results in extra quoting to get filenames containing newlines to work correctly. @@ -238,7 +238,7 @@ https://www.gnu.org/software/make/ (Last checked: 2019-01) =head2 DIFFERENCES BETWEEN ppss AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 I2 - - - - I7 M1 - M3 - - M6 O1 - - x - - @@ -323,7 +323,7 @@ https://github.com/louwrentius/PPSS =head2 DIFFERENCES BETWEEN pexec AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 I2 - I4 I5 - - M1 - M3 - - M6 O1 O2 O3 - O5 O6 @@ -646,7 +646,7 @@ https://github.com/cheusov/paexec =head2 DIFFERENCES BETWEEN map(sitaramc) AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 - - I4 - - (I7) M1 (M2) M3 (M4) M5 M6 - O2 O3 - O5 - - N/A N/A O10 @@ -1962,7 +1962,7 @@ https://github.com/fd0/machma (Last checked: 2019-06) =head2 DIFFERENCES BETWEEN interlace AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): - I2 I3 I4 - - - M1 - M3 - - M6 - O2 O3 - - - - x x @@ -2337,7 +2337,7 @@ https://github.com/amritb/with-this.git (Last checked: 2019-03) =head2 DIFFERENCES BETWEEN Tollef's parallel (moreutils) AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): - - - I4 - - I7 - - M3 - - M6 - O2 O3 - O5 O6 - x x @@ -2363,7 +2363,7 @@ B parallel -j 3 ::: ls df "echo hi" =head2 DIFFERENCES BETWEEN rargs AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 - - - - - I7 - - M3 M4 - - - O2 O3 - O5 O6 - O8 - @@ -2422,7 +2422,7 @@ https://github.com/lotabout/rargs (Last checked: 2020-01) =head2 DIFFERENCES BETWEEN threader AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 - - - - - - M1 - M3 - - M6 O1 - O3 - O5 - - N/A N/A @@ -2444,7 +2444,7 @@ https://github.com/voodooEntity/threader (Last checked: 2020-04) =head2 DIFFERENCES BETWEEN runp AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): I1 I2 - - - - - M1 - (M3) - - M6 O1 O2 O3 - O5 O6 - N/A N/A - @@ -2570,7 +2570,7 @@ https://github.com/jreisinger/runp (Last checked: 2020-04) =head2 DIFFERENCES BETWEEN papply AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): - - - I4 - - - M1 - M3 - - M6 - - O3 - O5 - - N/A N/A O10 @@ -2616,7 +2616,7 @@ https://pypi.org/project/papply/ (Last checked: 2020-04) =head2 DIFFERENCES BETWEEN async AND GNU Parallel -Summary table (see legend above): +Summary (see legend above): - - - I4 - - I7 - - - - - M6 - O2 O3 - O5 O6 - N/A N/A O10 @@ -2690,88 +2690,35 @@ composed commands. https://github.com/ctbur/async/ (Last checked: 2020-11) + +=head2 DIFFERENCES BETWEEN pardi AND GNU Parallel + +Summary (see legend above): +I1 I2 - - - - I7 +M1 - - - - M6 +O1 O2 O3 O4 O5 - O7 - - O10 +E1 - - E4 - - - +- - - - - - - - - +- - + +B is very similar to B: It reads blocks +of data and not arguments. So it cannot insert an argument in the +command line. It puts the block into a temporary file, and this file +name (%IN) can be put in the command line. You can only use %IN once. + +It can also run full command lines in parallel (like: B). + +https://github.com/UnixJunkie/pardi (Last checked: 2021-01) + + =head2 Todo -test_many_var() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -for a in `seq 11000`; do eval "export a$a=1" ; done -gen500k | stdout parallel --timeout 5 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - -test_many_var_func() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -for a in `seq 5100`; do eval "export a$a=1" ; done -for a in `seq 5100`; do eval "a$a() { 1; }" ; done -for a in `seq 5100`; do eval export -f a$a ; done -gen500k | stdout parallel --timeout 21 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - -test_many_var_func() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -for a in `seq 8000`; do eval "a$a() { 1; }" ; done -for a in `seq 8000`; do eval export -f a$a ; done -gen500k | stdout parallel --timeout 6 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - -test_big_func() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -big=`seq 1000` -for a in `seq 50`; do eval "a$a() { '$big'; }" ; done -for a in `seq 50`; do eval export -f a$a ; done -gen500k | stdout parallel --timeout 4 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - -test_many_var_big_func() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -big=`seq 1000` -for a in `seq 5100`; do eval "export a$a=1" ; done -for a in `seq 20`; do eval "a$a() { '$big'; }" ; done -for a in `seq 20`; do eval export -f a$a ; done -gen500k | stdout parallel --timeout 6 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - -test_big_func_name() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -big=`perl -e print\"x\"x10000` -for a in `seq 20`; do eval "export a$big$a=1" ; done -gen500k | stdout parallel --timeout 8 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - -test_big_var_func_name() { -gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; } -big=`perl -e print\"x\"x10000` -for a in `seq 2`; do eval "export a$big$a=1" ; done -for a in `seq 2`; do eval "a$big$a() { '$big'; }" ; done -for a in `seq 2`; do eval export -f a$big$a ; done -gen500k | stdout parallel --timeout 1000 -Xj1 'echo {} {} {} {} | wc' | perl -pe 's/\d{3,5} //g' -} - - - -tange@macosx:~$ for a in `seq 100`; do eval export a$a=fffffffffffffffffffffffff ; donetange@macosx:~$ seq 50000 | stdout parallel -Xj1 'echo {} {} | wc' | perl -pe 's/\d{3,5} //g' -tange@macosx:~$ for a in `seq 100`; do eval export a$a=fffffffffffffffffffffffff ; donetange@macosx:~$ seq 50000 | stdout parallel -Xj1 'echo {} {} | wc' | perl -pe 's/\d{3,5} //g' -tange@macosx:~$ for a in `seq 100`; do eval export -f a$a ; done - - -seq 100000 | stdout parallel -Xj1 'echo {} {} | wc' -export a=`seq 10000` -seq 100000 | stdout parallel -Xj1 'echo {} {} | wc' - - - - my $already_spread; - my $env_size; - - if($^O eq "darwin") { - $env_size ||= 500+length(join'',%ENV); - $max_len -= $env_size; - } - - PASH: Light-touch Data-Parallel Shell Processing -https://arxiv.org/pdf/2007.09436.pdf -https://github.com/UnixJunkie/pardi +https://arxiv.org/pdf/2012.15443.pdf KumQuat + +https://arxiv.org/pdf/2007.09436.pdf https://github.com/UnixJunkie/PAR (Same as http://savannah.nongnu.org/projects/par above?) diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod index 81b26ff7..e08a8dd6 100644 --- a/src/parallel_tutorial.pod +++ b/src/parallel_tutorial.pod @@ -2999,7 +2999,7 @@ When asking for help, always report the full output of this: Output: - GNU parallel 20200122 + GNU parallel 20210122 Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later @@ -3161,7 +3161,7 @@ https://my.fsf.org/donate/ =back -(C) 2013-2020 Ole Tange, FDLv1.3 (See fdl.txt) +(C) 2013-2021 Ole Tange, FDLv1.3 (See fdl.txt) =cut diff --git a/src/parsort b/src/parsort index 78404a4d..c22db80d 100755 --- a/src/parsort +++ b/src/parsort @@ -18,7 +18,7 @@ B uses GNU B to sort in parallel. It works just like B but faster on inputs with more than 1 M lines, if you have a multicore machine. -Hopefully these ideas will make it into GNU Sort in the future. +Hopefully these ideas will make it into GNU B in the future. =head1 EXAMPLE diff --git a/testsuite/REQUIREMENTS b/testsuite/REQUIREMENTS index 2c8f44d5..d7575120 100644 --- a/testsuite/REQUIREMENTS +++ b/testsuite/REQUIREMENTS @@ -262,10 +262,14 @@ lsh_setup() { lsh -c aes256-ctr --sloppy-host-authentication \ --capture-to ~/.lsh/host-acls localhost echo Added host-auth lsh-keygen | lsh-writekey -c none - lsh-export-key --openssh < ~/.lsh/identity.pub | - lsh -c aes256-ctr lo 'cat >>.ssh/authorized_keys' - lsh-export-key --openssh < ~/.lsh/identity.pub | - ssh csh@lo 'cat >>.ssh/authorized_keys' + export_key_to_local_users() { + lsh-export-key --openssh < ~/.lsh/identity.pub | + ssh -l $1 lo 'cat >>.ssh/authorized_keys' + } + export -f export_key_to_local_users + shellsplus | parallel --bar --timeout 5 export_key_to_local_users + shellsplus | parallel --bar --timeout 5 'lsh -l {} lo true || export_key_to_local_users {}' + shellsplus | parallel --bar --timeout 5 'lsh -l {} lo true || echo Fail {}' } add_freebsd() { diff --git a/testsuite/tests-to-run/parallel-local-1s.sh b/testsuite/tests-to-run/parallel-local-1s.sh index 55e23e30..0b233885 100644 --- a/testsuite/tests-to-run/parallel-local-1s.sh +++ b/testsuite/tests-to-run/parallel-local-1s.sh @@ -4,6 +4,18 @@ # Each should be taking 1-3s and be possible to run in parallel # I.e.: No race conditions, no logins +par_recend_recstart_hash() { + echo "### bug #59843: --regexp --recstart '#' fails" + (echo '#rec1'; echo 'bar'; echo '#rec2') | + parallel -k --regexp --pipe -N1 --recstart '#' wc + (echo ' rec1'; echo 'bar'; echo ' rec2') | + parallel -k --regexp --pipe -N1 --recstart ' ' wc + (echo 'rec2'; echo 'bar#';echo 'rec2' ) | + parallel -k --regexp --pipe -N1 --recend '#' wc + (echo 'rec2'; echo 'bar ';echo 'rec2' ) | + parallel -k --regexp --pipe -N1 --recend ' ' wc +} + par_sqlandworker_uninstalled_dbd() { echo 'bug #56096: dbi-csv no such column' mkdir -p /tmp/parallel-bug-56096 diff --git a/testsuite/tests-to-run/parallel-local-ssh1.sh b/testsuite/tests-to-run/parallel-local-ssh1.sh index 69d23388..4af8063a 100644 --- a/testsuite/tests-to-run/parallel-local-ssh1.sh +++ b/testsuite/tests-to-run/parallel-local-ssh1.sh @@ -277,6 +277,7 @@ E agrp=c+b+csh@lo+lo+bash@lo E agrp=c+b+lo+bash@lo+csh@lo E agrp=c+b+lo+csh@lo+bash@lo E agrp=c+bash@lo+b+csh@lo+lo +E agrp=c+bash@lo+csh@lo+b+lo E agrp=c+bash@lo+b+lo+csh@lo E agrp=c+bash@lo+csh@lo+lo+b E agrp=c+bash@lo+lo+b+csh@lo diff --git a/testsuite/wanted-results/parallel-local-1s b/testsuite/wanted-results/parallel-local-1s index 8f36f36f..bb5e6f47 100644 --- a/testsuite/wanted-results/parallel-local-1s +++ b/testsuite/wanted-results/parallel-local-1s @@ -384,6 +384,15 @@ par_pxz_complains bug #44250: pxz complains File format not recognized but decom par_pxz_complains ls: cannot access '/OK-if-missing-file': No such file or directory par_pxz_complains can not seek in input: Illegal seek par_pxz_complains ls: cannot access '/OK-if-missing-file': No such file or directory +par_recend_recstart_hash ### bug #59843: --regexp --recstart '#' fails +par_recend_recstart_hash 2 2 10 +par_recend_recstart_hash 1 1 6 +par_recend_recstart_hash 2 2 10 +par_recend_recstart_hash 1 1 6 +par_recend_recstart_hash 1 2 9 +par_recend_recstart_hash 2 1 6 +par_recend_recstart_hash 1 2 9 +par_recend_recstart_hash 2 1 6 par_replacement_rename ### Test --basenamereplace par_replacement_rename b.c b.c b.c b b b par_replacement_rename b.c