From 22244d765a97d489f694fec64a788ef33baea78f Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Tue, 23 Mar 2021 22:11:02 +0100 Subject: [PATCH] parallel: --regexp . no longer matches \n. Allow for pre-record garbage. --- README | 14 ++-- doc/release_new_version | 49 +++--------- src/env_parallel.ash | 2 +- src/env_parallel.dash | 2 +- src/env_parallel.ksh | 2 +- src/env_parallel.mksh | 2 +- src/env_parallel.sh | 2 +- src/env_parallel.zsh | 2 +- src/niceload | 2 +- src/parallel | 50 ++++++++----- src/parallel.pod | 12 +-- src/parsort | 2 +- src/sql | 2 +- testsuite/tests-to-run/parallel-local-0.3s.sh | 12 +++ testsuite/tests-to-run/parallel-local-1s.sh | 2 +- testsuite/tests-to-run/parallel-local-3s.sh | 28 +++++++ testsuite/wanted-results/parallel-local-0.3s | 5 +- testsuite/wanted-results/parallel-local-3s | 74 +++++++++++++++++++ 18 files changed, 185 insertions(+), 79 deletions(-) diff --git a/README b/README index b7c261e2..26a01233 100644 --- a/README +++ b/README @@ -40,13 +40,13 @@ installation. $ (wget -O - pi.dk/3 || lynx -source pi.dk/3 || curl pi.dk/3/ || \ fetch -o - http://pi.dk/3 ) > install.sh - $ sha1sum install.sh | grep 67bd7bc7dc20aff99eb8f1266574dadb - 12345678 67bd7bc7 dc20aff9 9eb8f126 6574dadb - $ md5sum install.sh | grep b7a15cdbb07fb6e11b0338577bc1780f - b7a15cdb b07fb6e1 1b033857 7bc1780f - $ sha512sum install.sh | grep 186000b62b66969d7506ca4f885e0c80e02a22444 - 6f25960b d4b90cf6 ba5b76de c1acdf39 f3d24249 72930394 a4164351 93a7668d - 21ff9839 6f920be5 186000b6 2b66969d 7506ca4f 885e0c80 e02a2244 40e8a43f + $ sha1sum install.sh | grep c82233e7da3166308632ac8c34f850c0 + 12345678 c82233e7 da316630 8632ac8c 34f850c0 + $ md5sum install.sh | grep ae3d7aac5e15cf3dfc87046cfc5918d2 + ae3d7aac 5e15cf3d fc87046c fc5918d2 + $ sha512sum install.sh | grep dfc00d823137271a6d96225cea9e89f533ff6c81f + 9c5198d5 31a3b755 b7910ece 3a42d206 c804694d fc00d823 137271a6 d96225ce + a9e89f53 3ff6c81f f52b298b ef9fb613 2d3f9ccd 0e2c7bd3 c35978b5 79acb5ca $ bash install.sh This will literally install faster than reading the rest of this diff --git a/doc/release_new_version b/doc/release_new_version index f0617691..407e1b18 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -201,9 +201,9 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20210322 ('Sarkozy/Sarah Everard/AstraZeneca/ Meghan<<>>') released <<[stable]>> +Subject: GNU Parallel 20210422 ('<<>>') released <<[stable]>> -GNU Parallel 20210322 ('2002-01-06') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ +GNU Parallel 20210322 ('<<>>') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ <> @@ -213,8 +213,7 @@ It does not have to be as detailed as Juan's. It is perfectly fine if you just s Quote of the month: - GNU Parallel is my new favorite thing - -- Will Tejeda @thewilltejeda + <<>> New in this release: @@ -222,33 +221,7 @@ New in this release: News about GNU Parallel: -* The very first version of Parallel dated 2002-01-06 was found in an - old backup: - - #!/usr/bin/perl - - $processes=shift; - - chomp(@jobs=<>); - for (@jobs) { - $jobnr++; - push @makefile, - (".PHONY : job$jobnr\n", - "job$jobnr :\n", - "\t$_\n"); - } - unshift @makefile, "all : ",(map { "job$_ " } 1 .. $jobnr),"\n"; - - open (MAKE, "| make -k -f - -j $processes") || die; - print MAKE @makefile; - close MAKE; - -* Introduction to GNU Parallel https://www.youtube.com/watch?v=Kj-6JkAqw-8 - -* Using GNU Parallel with GooseSLURM https://readthedocs.org/projects/gooseslurm/downloads/pdf/latest/#chapter.7 - -* Why GNU-parallel? - https://github.com/lijingbu/omics/blob/main/why_gnu_parallel.md +<<>> Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html @@ -280,13 +253,13 @@ You can install GNU Parallel in just 10 seconds with: $ (wget -O - pi.dk/3 || lynx -source pi.dk/3 || curl pi.dk/3/ || \ fetch -o - http://pi.dk/3 ) > install.sh - $ sha1sum install.sh | grep 3374ec53bacb199b245af2dda86df6c9 - 12345678 3374ec53 bacb199b 245af2dd a86df6c9 - $ md5sum install.sh | grep 029a9ac06e8b5bc6052eac57b2c3c9ca - 029a9ac0 6e8b5bc6 052eac57 b2c3c9ca - $ sha512sum install.sh | grep f517006d9897747bed8a4694b1acba1b - 40f53af6 9e20dae5 713ba06c f517006d 9897747b ed8a4694 b1acba1b 1464beb4 - 60055629 3f2356f3 3e9c4e3c 76e3f3af a9db4b32 bd33322b 975696fc e6b23cfb + $ sha1sum install.sh | grep c82233e7da3166308632ac8c34f850c0 + 12345678 c82233e7 da316630 8632ac8c 34f850c0 + $ md5sum install.sh | grep ae3d7aac5e15cf3dfc87046cfc5918d2 + ae3d7aac 5e15cf3d fc87046c fc5918d2 + $ sha512sum install.sh | grep dfc00d823137271a6d96225cea9e89f533ff6c81f + 9c5198d5 31a3b755 b7910ece 3a42d206 c804694d fc00d823 137271a6 d96225ce + a9e89f53 3ff6c81f f52b298b ef9fb613 2d3f9ccd 0e2c7bd3 c35978b5 79acb5ca $ bash install.sh Watch the intro video on http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1 diff --git a/src/env_parallel.ash b/src/env_parallel.ash index 05722f20..c87fea7d 100755 --- a/src/env_parallel.ash +++ b/src/env_parallel.ash @@ -385,7 +385,7 @@ _parset_main() { return 255 fi if [ "$_parset_NAME" = "--version" ] ; then - echo "parset 20210322 (GNU parallel `parallel --minversion 1`)" + echo "parset 20210323 (GNU parallel `parallel --minversion 1`)" echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software" echo "Foundation, Inc." echo "License GPLv3+: GNU GPL version 3 or later " diff --git a/src/env_parallel.dash b/src/env_parallel.dash index 2ddf5c91..6773956f 100755 --- a/src/env_parallel.dash +++ b/src/env_parallel.dash @@ -385,7 +385,7 @@ _parset_main() { return 255 fi if [ "$_parset_NAME" = "--version" ] ; then - echo "parset 20210322 (GNU parallel `parallel --minversion 1`)" + echo "parset 20210323 (GNU parallel `parallel --minversion 1`)" echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software" echo "Foundation, Inc." echo "License GPLv3+: GNU GPL version 3 or later " diff --git a/src/env_parallel.ksh b/src/env_parallel.ksh index ba1edfda..c2aa241f 100755 --- a/src/env_parallel.ksh +++ b/src/env_parallel.ksh @@ -368,7 +368,7 @@ _parset_main() { return 255 fi if [ "$_parset_NAME" = "--version" ] ; then - echo "parset 20210322 (GNU parallel `parallel --minversion 1`)" + echo "parset 20210323 (GNU parallel `parallel --minversion 1`)" echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software" echo "Foundation, Inc." echo "License GPLv3+: GNU GPL version 3 or later " diff --git a/src/env_parallel.mksh b/src/env_parallel.mksh index faec2535..0ae4627f 100644 --- a/src/env_parallel.mksh +++ b/src/env_parallel.mksh @@ -371,7 +371,7 @@ _parset_main() { return 255 fi if [ "$_parset_NAME" = "--version" ] ; then - echo "parset 20210322 (GNU parallel `parallel --minversion 1`)" + echo "parset 20210323 (GNU parallel `parallel --minversion 1`)" echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software" echo "Foundation, Inc." echo "License GPLv3+: GNU GPL version 3 or later " diff --git a/src/env_parallel.sh b/src/env_parallel.sh index 06d69069..55507356 100755 --- a/src/env_parallel.sh +++ b/src/env_parallel.sh @@ -385,7 +385,7 @@ _parset_main() { return 255 fi if [ "$_parset_NAME" = "--version" ] ; then - echo "parset 20210322 (GNU parallel `parallel --minversion 1`)" + echo "parset 20210323 (GNU parallel `parallel --minversion 1`)" echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software" echo "Foundation, Inc." echo "License GPLv3+: GNU GPL version 3 or later " diff --git a/src/env_parallel.zsh b/src/env_parallel.zsh index 1f3ae34e..2838c7fa 100755 --- a/src/env_parallel.zsh +++ b/src/env_parallel.zsh @@ -362,7 +362,7 @@ _parset_main() { return 255 fi if [ "$_parset_NAME" = "--version" ] ; then - echo "parset 20210322 (GNU parallel `parallel --minversion 1`)" + echo "parset 20210323 (GNU parallel `parallel --minversion 1`)" echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software" echo "Foundation, Inc." echo "License GPLv3+: GNU GPL version 3 or later " diff --git a/src/niceload b/src/niceload index 6e44f034..90291e80 100755 --- a/src/niceload +++ b/src/niceload @@ -26,7 +26,7 @@ use strict; use Getopt::Long; $Global::progname="niceload"; -$Global::version = 20210322; +$Global::version = 20210323; Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV) || die_usage(); if($opt::version) { diff --git a/src/parallel b/src/parallel index e192ea0a..10619f37 100755 --- a/src/parallel +++ b/src/parallel @@ -969,6 +969,7 @@ sub spreadstdin() { my $header = find_header(\$buf,$in); my $anything_written; my $eof; + my $garbage_read; sub read_block() { # Read a --blocksize from STDIN @@ -1023,23 +1024,37 @@ sub spreadstdin() { # Pass records of N regexps # -N => (start..*?end){n} # -L -N => (start..*?end){n*l} - my $read_n_lines = -1+ + if(not $garbage_read) { + $garbage_read = 1; + if($buf !~ /^$recstart/o) { + # Buf does not start with $recstart => There is garbage. + # Make a single record of the garbage + if($buf =~ + /(?s)^(?-s)( + (?:(?:(?!$recend$recstart)(?s).(?-s))*?$recend) + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + } + + my $n_records = $Global::max_number_of_args * ($Global::max_lines || 1); # (?!negative lookahead) is needed to avoid backtracking # See: https://unix.stackexchange.com/questions/439356/ + # (?s).(?-s) = (.|[\n]) but faster while($buf =~ - /( - # Either recstart or at least one char from start - ^(?: $recstart | .) - # followed something - (?:(?!$recend$recstart).)*? - # and then recend - $recend - # Then n-1 times recstart.*recend - (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines} + /(?s)^(?-s)( + # n more times recstart.*recend + (?:$recstart(?:(?!$recend$recstart)(?s)(.)(?-s))*?$recend){$n_records} ) # Followed by recstart - (?=$recstart)/osx) { + (?=$recstart)/mox and length $1 > 0) { $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,length $1); @@ -1050,7 +1065,8 @@ sub spreadstdin() { sub pass_regexp() { # Find the last recend-recstart in $buf $eof and return; - if($buf =~ /^(.*$recend)$recstart.*?$/os) { + # (?s).(?-s) = (.|[\n]) but faster + if($buf =~ /^((?s).(?-s)*$recend)$recstart(?s).(?-s)*?$/mox) { $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,length $1); @@ -1230,13 +1246,13 @@ sub recstartrecend() { $recend = $opt::recend; if($opt::regexp and $recend eq '') { # --regexp --recend '' - $recend = '.'; + $recend = '(?s).(?-s)'; } } if($opt::regexp) { # If $recstart/$recend contains '|' - # this should only apply to the regexp + # the | should only apply to the regexp $recstart = "(?:".$recstart.")"; $recend = "(?:".$recend.")"; # Quote # and space @@ -2173,7 +2189,7 @@ sub check_invalid_option_combinations() { sub init_globals() { # Defaults: - $Global::version = 20210322; + $Global::version = 20210323; $Global::progname = 'parallel'; $::name = "GNU Parallel"; $Global::infinity = 2**31; @@ -8675,11 +8691,11 @@ sub remove_rec_sep($) { my ($block_ref,$recstart,$recend) = @_; # Remove record separator if($opt::regexp) { - $$block_ref =~ s/$recend$recstart//gos; + $$block_ref =~ s/$recend$recstart//gom; $$block_ref =~ s/^$recstart//os; $$block_ref =~ s/$recend$//os; } else { - $$block_ref =~ s/\Q$recend$recstart\E//gos; + $$block_ref =~ s/\Q$recend$recstart\E//gom; $$block_ref =~ s/^\Q$recstart\E//os; $$block_ref =~ s/\Q$recend\E$//os; } diff --git a/src/parallel.pod b/src/parallel.pod index 65ae897a..b57ddfe5 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -303,7 +303,7 @@ directory (if any) and extension removed. To understand positional replacement strings see B<{>IB<}>. -=item B<{=>IB<=}> (beta testing) +=item B<{=>IB<=}> Replace with calculated I. B<$_> will contain the same as B<{}>. After evaluating I B<$_> will be used @@ -873,7 +873,7 @@ Implies B<--pipe> unless B<--pipepart> is used. See also: B<--cat>. -=item B<--filter> I (beta testing) +=item B<--filter> I Only run jobs where I is true. I can contain replacement strings and Perl code. Example: @@ -1373,9 +1373,9 @@ mix. Compare: See also: B<--group> B<--ungroup> -=item B<--xapply> (beta testing) +=item B<--xapply> -=item B<--link> (beta testing) +=item B<--link> Link input sources. Read multiple input sources like B. If multiple input sources are given, one argument will be read from each @@ -2638,9 +2638,9 @@ Silent. The job to be run will not be printed. This is the default. Can be reversed with B<-v>. -=item B<--template> I=I (beta testing) +=item B<--template> I=I -=item B<--tmpl> I=I (beta testing) +=item B<--tmpl> I=I Copy I to I. All replacement strings in the contents of I will be replaced. All replacement strings in the name I diff --git a/src/parsort b/src/parsort index 74a3d5a9..4145c510 100755 --- a/src/parsort +++ b/src/parsort @@ -121,7 +121,7 @@ GetOptions( "help" => \$opt::dummy, ) || exit(255); $Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1]; -$Global::version = 20210322; +$Global::version = 20210323; if($opt::version) { version(); exit 0; } @Global::sortoptions = shell_quote(@ARGV_before[0..($#ARGV_before-$#ARGV-1)]); diff --git a/src/sql b/src/sql index 85493cfe..c19d6fc8 100755 --- a/src/sql +++ b/src/sql @@ -600,7 +600,7 @@ $Global::Initfile && unlink $Global::Initfile; exit ($err); sub parse_options { - $Global::version = 20210322; + $Global::version = 20210323; $Global::progname = 'sql'; # This must be done first as this may exec myself diff --git a/testsuite/tests-to-run/parallel-local-0.3s.sh b/testsuite/tests-to-run/parallel-local-0.3s.sh index 8b5ad32b..279ae4f7 100644 --- a/testsuite/tests-to-run/parallel-local-0.3s.sh +++ b/testsuite/tests-to-run/parallel-local-0.3s.sh @@ -16,6 +16,18 @@ export -f stdsort # Test amount of parallelization # parallel --shuf --jl /tmp/myjl -j1 'export JOBS={1};'bash tests-to-run/parallel-local-0.3s.sh ::: {1..16} ::: {1..5} +par_env_parallel_pipefail() { + cat <<'EOF' | bash + echo "### test env_parallel with pipefail + inherit_errexit" + . $(which env_parallel.bash) + env_parallel --session + set -Eeuo pipefail + shopt -s inherit_errexit + + env_parallel echo ::: OK +EOF +} + par_crnl() { echo '### Give a warning if input is DOS-ascii' printf "b\r\nc\r\nd\r\ne\r\nf\r\n" | stdout parallel -k echo {}a diff --git a/testsuite/tests-to-run/parallel-local-1s.sh b/testsuite/tests-to-run/parallel-local-1s.sh index 391496c8..98d16afa 100644 --- a/testsuite/tests-to-run/parallel-local-1s.sh +++ b/testsuite/tests-to-run/parallel-local-1s.sh @@ -887,7 +887,7 @@ par_test_cpu_detection_lscpu() { } export -f test_one compgen -A function | grep ^cpu | sort | parallel -j0 -k test_one - rm ~/.parallel/tmp/sshlogin/*/cpuspec + rm ~/.parallel/tmp/sshlogin/*/cpuspec 2>/dev/null } par_null_resume() { diff --git a/testsuite/tests-to-run/parallel-local-3s.sh b/testsuite/tests-to-run/parallel-local-3s.sh index 66e80959..ddb8c24a 100644 --- a/testsuite/tests-to-run/parallel-local-3s.sh +++ b/testsuite/tests-to-run/parallel-local-3s.sh @@ -8,6 +8,34 @@ # Each should be taking 3-10s and be possible to run in parallel # I.e.: No race conditions, no logins +par_pipe_regexp() { + echo '### --pipe --regexp' + gen() { + cat <