parallel: Fixed bug #53718: --pipe --regexp -N blocks.

This commit is contained in:
Ole Tange 2018-04-23 23:45:16 +02:00
parent 3fdd3a6019
commit 9a1d2e9dad
9 changed files with 42 additions and 41 deletions

View file

@ -199,36 +199,21 @@ to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20180422 ('Trèbes/Tiangong-1/Winnie Mandela/ASIM/Algeria') released <<[stable]>>
Subject: GNU Parallel 20180522 ('') released <<[stable]>>
GNU Parallel 20180422 ('Trèbes') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
GNU Parallel 20180522 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
<<No new functionality was introduced so this is a good candidate for a stable release.>>
Quote of the month:
Today I discovered GNU Parallel, and I dont know what to do with all this spare time.
--Ryan Booker
<<>>
New in this release:
* --csv makes GNU Parallel parse the input sources as CSV. When used with --pipe it only passes full CSV-records.
「今日から使い切る」ための GNU Parallelによる並列処理入門
* Time in --bar is printed as 1d02h03m04s.
* Optimization of --tee: It spawns a process less per value.
* The GNU Parallel 2018 book is now available: http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html
* Modern pentest tricks for faster, wider, greater engagement (15) https://conference.hitb.org/hitbsecconf2018ams/materials/D1%20COMMSEC%20-%20Thomas%20Debize%20-%20Modern%20Pentest%20Tricks%20for%20Faster,%20Wider,%20Greater%20Engagements.pdf
* Running many serial jobs efficiently https://userinfo.surfsara.nl/documentation/running-many-serial-jobs-efficiently
* GNU Parallel: как сохранить результаты нескольких команд для переменной? http://fliplinux.com/gnu-parallel-6.html
* Running Multiple Commands in Parallel on a GNU Linux https://www.youtube.com/watch?v=sHpTywpb4_4
* Klaatu covers the documentation of GNU parallel in episode 12x15 http://gnuworldorder.info/
https://www.slideshare.net/koji_matsuda/gnu-parallel?qid=65d63b1a-1ef4-46fa-82c3-69c9ab7df439&v=&b=&from_search=1
<<Citation not OK: BAMClipper: removing primers from alignments to minimize false-negative mutations in amplicon next-generation sequencing https://www.nature.com/articles/s41598-017-01703-6>>

View file

@ -24,7 +24,7 @@
use strict;
use Getopt::Long;
$Global::progname="niceload";
$Global::version = 20180422;
$Global::version = 20180423;
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV) || die_usage();
if($opt::version) {

View file

@ -132,9 +132,9 @@ useful to keep the connection alive.
Sets niceness. See B<nice>(1).
=item B<-p> I<PID>[,I<PID>] (beta testing)
=item B<-p> I<PID>[,I<PID>]
=item B<--pid> I<PID>[,I<PID>] (beta testing)
=item B<--pid> I<PID>[,I<PID>]
Process IDs of processes to suspend. You can specify multiple process
IDs with multiple B<-p> I<PID> or by separating the PIDs with comma.

View file

@ -515,24 +515,29 @@ sub spreadstdin {
if($Global::max_number_of_args) {
# -N => (start..*?end){n}
# -L -N => (start..*?end){n*l}
my $read_n_lines =
my $read_n_lines = -1+
$Global::max_number_of_args * ($Global::max_lines || 1);
while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) {
# Copy to modifiable variable
my $b = $1;
# (?!negative lookahead) is needed to avoid backtracking
# See: https://unix.stackexchange.com/questions/439356/
while($buf =~ /# From start up till recend
^((?:(?!$recend$recstart).)*?$recend
# Then n-1 times recstart.*recend
(?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines})
# Followed by recstart
(?=$recstart)/osx) {
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$b,
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,length $1);
shorten(\$buf,length $1);
}
} else {
eof($in) and last;
# Find the last recend-recstart in $buf
if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
# Copy to modifiable variable
my $b = $1;
if($buf =~ /^(.*$recend)$recstart.*?$/os) {
$anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$b,
write_record_to_pipe($chunk_number++,\$header,\$buf,
$recstart,$recend,length $1);
shorten(\$buf,length $1);
}
}
} elsif($opt::csv) {
@ -1444,7 +1449,7 @@ sub check_invalid_option_combinations {
sub init_globals {
# Defaults:
$Global::version = 20180422;
$Global::version = 20180423;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;

View file

@ -592,7 +592,7 @@ Use I<prg> for (de)compressing temporary files. It is assumed that I<prg
output) unless B<--decompress-program> is given.
=item B<--csv> (alpha testing)
=item B<--csv> (beta testing)
Treat input as CSV-format. B<--colsep> sets the field delimiter. It
works very much like B<--colsep> except it deals correctly with
@ -665,7 +665,7 @@ If I<eof-str> is omitted, there is no end of file string. If neither
B<-E> nor B<-e> is used, no end of file string is used.
=item B<--embed> (beta testing)
=item B<--embed>
Embed GNU B<parallel> in a shell script. If you need to distribute your
script to someone who does not want to install GNU B<parallel> you can

View file

@ -576,7 +576,7 @@ $Global::Initfile && unlink $Global::Initfile;
exit ($err);
sub parse_options {
$Global::version = 20180422;
$Global::version = 20180423;
$Global::progname = 'sql';
# This must be done first as this may exec myself

View file

@ -782,6 +782,13 @@ par_csv_pipe() {
stdout parallel --pipe --csv -k --block 100k tail -n1
}
par_slow_pipe_regexp() {
echo "### bug #53718: --pipe --regexp -N blocks"
echo This should take a few ms, but took more than 2 hours
seq 54000 80000 |
timeout -k 1 60 parallel -N1000 --regexp --pipe --recstart 4 --recend 5 -k wc
}
export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | sort |
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'

View file

@ -1533,6 +1533,10 @@ par_retries_replacement_string 22
par_retries_replacement_string 33
par_retries_replacement_string 33
par_retries_replacement_string 33
par_slow_pipe_regexp ### bug #53718: --pipe --regexp -N blocks
par_slow_pipe_regexp This should take a few ms, but took more than 2 hours
par_slow_pipe_regexp 979 980 5875
par_slow_pipe_regexp 25022 25022 150131
par_tagstring_pipe bug #50228: --pipe --tagstring broken
par_tagstring_pipe 1 1000 1000 3893
par_tagstring_pipe 2 1000 1000 5000

View file

@ -444,7 +444,7 @@ par_sh_environment_too_big OK_bigvar_remote
par_sh_environment_too_big OK_bigvar_quote
par_sh_environment_too_big OK_bigvar_quote_remote
par_sh_environment_too_big Rest should fail
par_sh_environment_too_big sh: 135: perl: Argument list too long
par_sh_environment_too_big sh: 136: perl: Argument list too long
par_sh_environment_too_big env_parallel: Error: Your environment is too big.
par_sh_environment_too_big env_parallel: Error: You can try 2 different approaches:
par_sh_environment_too_big env_parallel: Error: 1. Use --env and only mention the names to copy.
@ -452,7 +452,7 @@ par_sh_environment_too_big env_parallel: Error: 2. Try running this in a clean e
par_sh_environment_too_big env_parallel: Error: env_parallel --record-env
par_sh_environment_too_big env_parallel: Error: And then use '--env _'
par_sh_environment_too_big env_parallel: Error: For details see: man env_parallel
par_sh_environment_too_big sh: 135: perl: Argument list too long
par_sh_environment_too_big sh: 136: perl: Argument list too long
par_sh_environment_too_big env_parallel: Error: Your environment is too big.
par_sh_environment_too_big env_parallel: Error: You can try 2 different approaches:
par_sh_environment_too_big env_parallel: Error: 1. Use --env and only mention the names to copy.
@ -460,7 +460,7 @@ par_sh_environment_too_big env_parallel: Error: 2. Try running this in a clean e
par_sh_environment_too_big env_parallel: Error: env_parallel --record-env
par_sh_environment_too_big env_parallel: Error: And then use '--env _'
par_sh_environment_too_big env_parallel: Error: For details see: man env_parallel
par_sh_environment_too_big sh: 135: perl: Argument list too long
par_sh_environment_too_big sh: 136: perl: Argument list too long
par_sh_environment_too_big env_parallel: Error: Your environment is too big.
par_sh_environment_too_big env_parallel: Error: You can try 2 different approaches:
par_sh_environment_too_big env_parallel: Error: 1. Use --env and only mention the names to copy.
@ -468,7 +468,7 @@ par_sh_environment_too_big env_parallel: Error: 2. Try running this in a clean e
par_sh_environment_too_big env_parallel: Error: env_parallel --record-env
par_sh_environment_too_big env_parallel: Error: And then use '--env _'
par_sh_environment_too_big env_parallel: Error: For details see: man env_parallel
par_sh_environment_too_big sh: 135: perl: Argument list too long
par_sh_environment_too_big sh: 136: perl: Argument list too long
par_sh_environment_too_big env_parallel: Error: Your environment is too big.
par_sh_environment_too_big env_parallel: Error: You can try 2 different approaches:
par_sh_environment_too_big env_parallel: Error: 1. Use --env and only mention the names to copy.