parallel: --regexp . no longer matches \n. Allow for pre-record garbage.

This commit is contained in:
Ole Tange 2021-03-23 22:11:02 +01:00
parent 97adff0293
commit 22244d765a
18 changed files with 185 additions and 79 deletions

14
README
View file

@ -40,13 +40,13 @@ installation.
$ (wget -O - pi.dk/3 || lynx -source pi.dk/3 || curl pi.dk/3/ || \
fetch -o - http://pi.dk/3 ) > install.sh
$ sha1sum install.sh | grep 67bd7bc7dc20aff99eb8f1266574dadb
12345678 67bd7bc7 dc20aff9 9eb8f126 6574dadb
$ md5sum install.sh | grep b7a15cdbb07fb6e11b0338577bc1780f
b7a15cdb b07fb6e1 1b033857 7bc1780f
$ sha512sum install.sh | grep 186000b62b66969d7506ca4f885e0c80e02a22444
6f25960b d4b90cf6 ba5b76de c1acdf39 f3d24249 72930394 a4164351 93a7668d
21ff9839 6f920be5 186000b6 2b66969d 7506ca4f 885e0c80 e02a2244 40e8a43f
$ sha1sum install.sh | grep c82233e7da3166308632ac8c34f850c0
12345678 c82233e7 da316630 8632ac8c 34f850c0
$ md5sum install.sh | grep ae3d7aac5e15cf3dfc87046cfc5918d2
ae3d7aac 5e15cf3d fc87046c fc5918d2
$ sha512sum install.sh | grep dfc00d823137271a6d96225cea9e89f533ff6c81f
9c5198d5 31a3b755 b7910ece 3a42d206 c804694d fc00d823 137271a6 d96225ce
a9e89f53 3ff6c81f f52b298b ef9fb613 2d3f9ccd 0e2c7bd3 c35978b5 79acb5ca
$ bash install.sh
This will literally install faster than reading the rest of this

View file

@ -201,9 +201,9 @@ from:tange@gnu.org
to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20210322 ('Sarkozy/Sarah Everard/AstraZeneca/ Meghan<<>>') released <<[stable]>>
Subject: GNU Parallel 20210422 ('<<>>') released <<[stable]>>
GNU Parallel 20210322 ('2002-01-06') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
GNU Parallel 20210322 ('<<>>') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
<<No new functionality was introduced so this is a good candidate for a stable release.>>
@ -213,8 +213,7 @@ It does not have to be as detailed as Juan's. It is perfectly fine if you just s
Quote of the month:
GNU Parallel is my new favorite thing
-- Will Tejeda @thewilltejeda
<<>>
New in this release:
@ -222,33 +221,7 @@ New in this release:
News about GNU Parallel:
* The very first version of Parallel dated 2002-01-06 was found in an
old backup:
#!/usr/bin/perl
$processes=shift;
chomp(@jobs=<>);
for (@jobs) {
$jobnr++;
push @makefile,
(".PHONY : job$jobnr\n",
"job$jobnr :\n",
"\t$_\n");
}
unshift @makefile, "all : ",(map { "job$_ " } 1 .. $jobnr),"\n";
open (MAKE, "| make -k -f - -j $processes") || die;
print MAKE @makefile;
close MAKE;
* Introduction to GNU Parallel https://www.youtube.com/watch?v=Kj-6JkAqw-8
* Using GNU Parallel with GooseSLURM https://readthedocs.org/projects/gooseslurm/downloads/pdf/latest/#chapter.7
* Why GNU-parallel?
https://github.com/lijingbu/omics/blob/main/why_gnu_parallel.md
<<>>
Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html
@ -280,13 +253,13 @@ You can install GNU Parallel in just 10 seconds with:
$ (wget -O - pi.dk/3 || lynx -source pi.dk/3 || curl pi.dk/3/ || \
fetch -o - http://pi.dk/3 ) > install.sh
$ sha1sum install.sh | grep 3374ec53bacb199b245af2dda86df6c9
12345678 3374ec53 bacb199b 245af2dd a86df6c9
$ md5sum install.sh | grep 029a9ac06e8b5bc6052eac57b2c3c9ca
029a9ac0 6e8b5bc6 052eac57 b2c3c9ca
$ sha512sum install.sh | grep f517006d9897747bed8a4694b1acba1b
40f53af6 9e20dae5 713ba06c f517006d 9897747b ed8a4694 b1acba1b 1464beb4
60055629 3f2356f3 3e9c4e3c 76e3f3af a9db4b32 bd33322b 975696fc e6b23cfb
$ sha1sum install.sh | grep c82233e7da3166308632ac8c34f850c0
12345678 c82233e7 da316630 8632ac8c 34f850c0
$ md5sum install.sh | grep ae3d7aac5e15cf3dfc87046cfc5918d2
ae3d7aac 5e15cf3d fc87046c fc5918d2
$ sha512sum install.sh | grep dfc00d823137271a6d96225cea9e89f533ff6c81f
9c5198d5 31a3b755 b7910ece 3a42d206 c804694d fc00d823 137271a6 d96225ce
a9e89f53 3ff6c81f f52b298b ef9fb613 2d3f9ccd 0e2c7bd3 c35978b5 79acb5ca
$ bash install.sh
Watch the intro video on http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1

View file

@ -385,7 +385,7 @@ _parset_main() {
return 255
fi
if [ "$_parset_NAME" = "--version" ] ; then
echo "parset 20210322 (GNU parallel `parallel --minversion 1`)"
echo "parset 20210323 (GNU parallel `parallel --minversion 1`)"
echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software"
echo "Foundation, Inc."
echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"

View file

@ -385,7 +385,7 @@ _parset_main() {
return 255
fi
if [ "$_parset_NAME" = "--version" ] ; then
echo "parset 20210322 (GNU parallel `parallel --minversion 1`)"
echo "parset 20210323 (GNU parallel `parallel --minversion 1`)"
echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software"
echo "Foundation, Inc."
echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"

View file

@ -368,7 +368,7 @@ _parset_main() {
return 255
fi
if [ "$_parset_NAME" = "--version" ] ; then
echo "parset 20210322 (GNU parallel `parallel --minversion 1`)"
echo "parset 20210323 (GNU parallel `parallel --minversion 1`)"
echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software"
echo "Foundation, Inc."
echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"

View file

@ -371,7 +371,7 @@ _parset_main() {
return 255
fi
if [ "$_parset_NAME" = "--version" ] ; then
echo "parset 20210322 (GNU parallel `parallel --minversion 1`)"
echo "parset 20210323 (GNU parallel `parallel --minversion 1`)"
echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software"
echo "Foundation, Inc."
echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"

View file

@ -385,7 +385,7 @@ _parset_main() {
return 255
fi
if [ "$_parset_NAME" = "--version" ] ; then
echo "parset 20210322 (GNU parallel `parallel --minversion 1`)"
echo "parset 20210323 (GNU parallel `parallel --minversion 1`)"
echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software"
echo "Foundation, Inc."
echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"

View file

@ -362,7 +362,7 @@ _parset_main() {
return 255
fi
if [ "$_parset_NAME" = "--version" ] ; then
echo "parset 20210322 (GNU parallel `parallel --minversion 1`)"
echo "parset 20210323 (GNU parallel `parallel --minversion 1`)"
echo "Copyright (C) 2007-2021 Ole Tange, http://ole.tange.dk and Free Software"
echo "Foundation, Inc."
echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"

View file

@ -26,7 +26,7 @@
use strict;
use Getopt::Long;
$Global::progname="niceload";
$Global::version = 20210322;
$Global::version = 20210323;
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV) || die_usage();
if($opt::version) {

View file

@ -969,6 +969,7 @@ sub spreadstdin() {
my $header = find_header(\$buf,$in);
my $anything_written;
my $eof;
my $garbage_read;
sub read_block() {
# Read a --blocksize from STDIN
@ -1023,23 +1024,37 @@ sub spreadstdin() {
# Pass records of N regexps
# -N => (start..*?end){n}
# -L -N => (start..*?end){n*l}
my $read_n_lines = -1+
if(not $garbage_read) {
$garbage_read = 1;
if($buf !~ /^$recstart/o) {
# Buf does not start with $recstart => There is garbage.
# Make a single record of the garbage
if($buf =~
/(?s)^(?-s)(
(?:(?:(?!$recend$recstart)(?s).(?-s))*?$recend)
)
# Followed by recstart
(?=$recstart)/mox and length $1 > 0) {
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,length $1);
shorten(\$buf,length $1);
}
}
}
my $n_records =
$Global::max_number_of_args * ($Global::max_lines || 1);
# (?!negative lookahead) is needed to avoid backtracking
# See: https://unix.stackexchange.com/questions/439356/
# (?s).(?-s) = (.|[\n]) but faster
while($buf =~
/(
# Either recstart or at least one char from start
^(?: $recstart | .)
# followed something
(?:(?!$recend$recstart).)*?
# and then recend
$recend
# Then n-1 times recstart.*recend
(?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}
/(?s)^(?-s)(
# n more times recstart.*recend
(?:$recstart(?:(?!$recend$recstart)(?s)(.)(?-s))*?$recend){$n_records}
)
# Followed by recstart
(?=$recstart)/osx) {
(?=$recstart)/mox and length $1 > 0) {
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,length $1);
@ -1050,7 +1065,8 @@ sub spreadstdin() {
sub pass_regexp() {
# Find the last recend-recstart in $buf
$eof and return;
if($buf =~ /^(.*$recend)$recstart.*?$/os) {
# (?s).(?-s) = (.|[\n]) but faster
if($buf =~ /^((?s).(?-s)*$recend)$recstart(?s).(?-s)*?$/mox) {
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,length $1);
@ -1230,13 +1246,13 @@ sub recstartrecend() {
$recend = $opt::recend;
if($opt::regexp and $recend eq '') {
# --regexp --recend ''
$recend = '.';
$recend = '(?s).(?-s)';
}
}
if($opt::regexp) {
# If $recstart/$recend contains '|'
# this should only apply to the regexp
# the | should only apply to the regexp
$recstart = "(?:".$recstart.")";
$recend = "(?:".$recend.")";
# Quote # and space
@ -2173,7 +2189,7 @@ sub check_invalid_option_combinations() {
sub init_globals() {
# Defaults:
$Global::version = 20210322;
$Global::version = 20210323;
$Global::progname = 'parallel';
$::name = "GNU Parallel";
$Global::infinity = 2**31;
@ -8675,11 +8691,11 @@ sub remove_rec_sep($) {
my ($block_ref,$recstart,$recend) = @_;
# Remove record separator
if($opt::regexp) {
$$block_ref =~ s/$recend$recstart//gos;
$$block_ref =~ s/$recend$recstart//gom;
$$block_ref =~ s/^$recstart//os;
$$block_ref =~ s/$recend$//os;
} else {
$$block_ref =~ s/\Q$recend$recstart\E//gos;
$$block_ref =~ s/\Q$recend$recstart\E//gom;
$$block_ref =~ s/^\Q$recstart\E//os;
$$block_ref =~ s/\Q$recend\E$//os;
}

View file

@ -303,7 +303,7 @@ directory (if any) and extension removed.
To understand positional replacement strings see B<{>I<n>B<}>.
=item B<{=>I<perl expression>B<=}> (beta testing)
=item B<{=>I<perl expression>B<=}>
Replace with calculated I<perl expression>. B<$_> will contain the
same as B<{}>. After evaluating I<perl expression> B<$_> will be used
@ -873,7 +873,7 @@ Implies B<--pipe> unless B<--pipepart> is used.
See also: B<--cat>.
=item B<--filter> I<filter> (beta testing)
=item B<--filter> I<filter>
Only run jobs where I<filter> is true. I<filter> can contain
replacement strings and Perl code. Example:
@ -1373,9 +1373,9 @@ mix. Compare:
See also: B<--group> B<--ungroup>
=item B<--xapply> (beta testing)
=item B<--xapply>
=item B<--link> (beta testing)
=item B<--link>
Link input sources. Read multiple input sources like B<xapply>. If
multiple input sources are given, one argument will be read from each
@ -2638,9 +2638,9 @@ Silent. The job to be run will not be printed. This is the default.
Can be reversed with B<-v>.
=item B<--template> I<file>=I<repl> (beta testing)
=item B<--template> I<file>=I<repl>
=item B<--tmpl> I<file>=I<repl> (beta testing)
=item B<--tmpl> I<file>=I<repl>
Copy I<file> to I<repl>. All replacement strings in the contents of
I<file> will be replaced. All replacement strings in the name I<repl>

View file

@ -121,7 +121,7 @@ GetOptions(
"help" => \$opt::dummy,
) || exit(255);
$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
$Global::version = 20210322;
$Global::version = 20210323;
if($opt::version) { version(); exit 0; }
@Global::sortoptions =
shell_quote(@ARGV_before[0..($#ARGV_before-$#ARGV-1)]);

View file

@ -600,7 +600,7 @@ $Global::Initfile && unlink $Global::Initfile;
exit ($err);
sub parse_options {
$Global::version = 20210322;
$Global::version = 20210323;
$Global::progname = 'sql';
# This must be done first as this may exec myself

View file

@ -16,6 +16,18 @@ export -f stdsort
# Test amount of parallelization
# parallel --shuf --jl /tmp/myjl -j1 'export JOBS={1};'bash tests-to-run/parallel-local-0.3s.sh ::: {1..16} ::: {1..5}
par_env_parallel_pipefail() {
cat <<'EOF' | bash
echo "### test env_parallel with pipefail + inherit_errexit"
. $(which env_parallel.bash)
env_parallel --session
set -Eeuo pipefail
shopt -s inherit_errexit
env_parallel echo ::: OK
EOF
}
par_crnl() {
echo '### Give a warning if input is DOS-ascii'
printf "b\r\nc\r\nd\r\ne\r\nf\r\n" | stdout parallel -k echo {}a

View file

@ -887,7 +887,7 @@ par_test_cpu_detection_lscpu() {
}
export -f test_one
compgen -A function | grep ^cpu | sort | parallel -j0 -k test_one
rm ~/.parallel/tmp/sshlogin/*/cpuspec
rm ~/.parallel/tmp/sshlogin/*/cpuspec 2>/dev/null
}
par_null_resume() {

View file

@ -8,6 +8,34 @@
# Each should be taking 3-10s and be possible to run in parallel
# I.e.: No race conditions, no logins
par_pipe_regexp() {
echo '### --pipe --regexp'
gen() {
cat <<EOF
A2, Start, 5
A2, 00100, 5
A2, 00200, 6
A2, 00300, 6
A2, Start, 7
A2, 00100, 7
A2, Start, 7
A2, 00200, 8
EOF
true
}
p="parallel --pipe --regexp -k"
gen | $p --recstart 'A\d+, Start' -N1 'echo Record;cat'
gen | $p --recstart '[A-Z]\d+, Start' -N1 'echo Record;cat'
gen | $p --recstart '.*, Start' -N1 'echo Record;cat'
echo '### Prepend first record with garbage'
(echo Garbage; gen) |
$p --recstart 'A\d+, Start' -N1 'echo Record;cat'
(echo Garbage; gen) |
$p --recstart '[A-Z]\d+, Start' -N1 'echo Record;cat'
(echo Garbage; gen) |
$p --recstart '.*, Start' -N1 'echo Record;cat'
}
par_delay_halt_soon() {
echo "bug #59893: --halt soon doesn't work with --delay"
seq 0 10 |

View file

@ -173,6 +173,8 @@ par_empty_line a
par_empty_line b
par_empty_string_quote bug #37694: Empty string argument skipped when using --quote
par_empty_string_quote 3
par_env_parallel_pipefail ### test env_parallel with pipefail + inherit_errexit
par_env_parallel_pipefail OK
par_exit_val ### Test bug #45619: "--halt" erroneous error exit code (should give 0)
par_exit_val 0
par_exit_val ### Test exit val - true
@ -898,7 +900,8 @@ par_sem_quote echo
par_sem_quote
par_slow_pipe_regexp ### bug #53718: --pipe --regexp -N blocks
par_slow_pipe_regexp This should take a few ms, but took more than 2 hours
par_slow_pipe_regexp 980 981 5881
par_slow_pipe_regexp 0 1 1
par_slow_pipe_regexp 980 981 5880
par_slow_pipe_regexp 25021 25021 150125
par_slow_pipe_regexp ### These should give same output
par_slow_pipe_regexp 6e72d7f86f6a423b9a7fa97630587815 -

View file

@ -208,6 +208,80 @@ par_multiline_commands echo finish 4
par_multiline_commands parallel: Warning: Command lines contain newline. Forcing --null.
par_multiline_commands 4
par_multiline_commands finish 4
par_pipe_regexp ### --pipe --regexp
par_pipe_regexp Record
par_pipe_regexp A2, Start, 5
par_pipe_regexp A2, 00100, 5
par_pipe_regexp A2, 00200, 6
par_pipe_regexp A2, 00300, 6
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00100, 7
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00200, 8
par_pipe_regexp Record
par_pipe_regexp A2, Start, 5
par_pipe_regexp A2, 00100, 5
par_pipe_regexp A2, 00200, 6
par_pipe_regexp A2, 00300, 6
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00100, 7
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00200, 8
par_pipe_regexp Record
par_pipe_regexp A2, Start, 5
par_pipe_regexp A2, 00100, 5
par_pipe_regexp A2, 00200, 6
par_pipe_regexp A2, 00300, 6
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00100, 7
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00200, 8
par_pipe_regexp ### Prepend first record with garbage
par_pipe_regexp Record
par_pipe_regexp Garbage
par_pipe_regexp Record
par_pipe_regexp A2, Start, 5
par_pipe_regexp A2, 00100, 5
par_pipe_regexp A2, 00200, 6
par_pipe_regexp A2, 00300, 6
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00100, 7
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00200, 8
par_pipe_regexp Record
par_pipe_regexp Garbage
par_pipe_regexp Record
par_pipe_regexp A2, Start, 5
par_pipe_regexp A2, 00100, 5
par_pipe_regexp A2, 00200, 6
par_pipe_regexp A2, 00300, 6
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00100, 7
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00200, 8
par_pipe_regexp Record
par_pipe_regexp Garbage
par_pipe_regexp Record
par_pipe_regexp A2, Start, 5
par_pipe_regexp A2, 00100, 5
par_pipe_regexp A2, 00200, 6
par_pipe_regexp A2, 00300, 6
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00100, 7
par_pipe_regexp Record
par_pipe_regexp A2, Start, 7
par_pipe_regexp A2, 00200, 8
par_progress ### Test of --progress
par_progress 16
par_progress ### Test of --progress with no jobs