From 6921d28f9a32a3422729a96ad34f6a0dfe8d5916 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Mon, 26 Nov 2012 00:19:58 +0100 Subject: [PATCH] parallel: Fixed bug #37325: Inefficiency of --pipe -L --- src/parallel | 29 ++++++++++++++------- src/parallel.texi | 8 +++--- testsuite/tests-to-run/parallel-local114.sh | 3 +++ testsuite/tests-to-run/parallel-local5.sh | 4 ++- testsuite/wanted-results/parallel-local114 | 23 ++++++++++++++-- testsuite/wanted-results/parallel-local5 | 11 ++++---- testsuite/wanted-results/test65 | 4 --- 7 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/parallel b/src/parallel index a6398e5d..da6a0872 100755 --- a/src/parallel +++ b/src/parallel @@ -313,19 +313,25 @@ sub spreadstdin { my $force_one_time_through = 0; for my $in (@fhlist) { piperead: while(1) { - if(!$force_one_time_through) { - $force_one_time_through++; - } elsif($Global::max_lines) { - # Read $Global::max_lines lines - eof($in) and last piperead; - for(my $t = 0; !eof($in) and - substr($buf,length $buf,0) = <$in> and $t < $Global::max_lines; - $t++) {} + eof($in) and $force_one_time_through++ and last piperead; + if($Global::max_lines) { + # Read records of $Global::max_lines lines + my @lines; + my $blocksize = length $buf; + do { + for(my $t = 0; !eof($in) and $t < $Global::max_lines; $t++) { + my $l = <$in>; + push @lines, $l; + $blocksize += length($l); + } + } while($blocksize < $opt::blocksize and !eof($in)); + substr($buf,length $buf,0) = join("",@lines); } else { # Read a block - read($in,substr($buf,length $buf,0),$opt::blocksize) or last; + read($in,substr($buf,length $buf,0),$opt::blocksize); # substr above = append to $buf } + if($opt::r) { # Remove empty lines $buf=~s/^\s*\n//gm; @@ -767,7 +773,10 @@ sub parse_options { # Read more than one arg at a time (-L, -N) if(defined $opt::L) { $Global::max_lines = $opt::L; - $Global::max_number_of_args ||= $Global::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } } if(defined $opt::N) { $Global::max_number_of_args = $opt::N; diff --git a/src/parallel.texi b/src/parallel.texi index ee51a114..72b1d4a3 100644 --- a/src/parallel.texi +++ b/src/parallel.texi @@ -1247,7 +1247,7 @@ job sequence number. @item @strong{--hashbang} @anchor{@strong{--hashbang}} -GNU @strong{Parallel} can be called as a shebang (#!) command as the first +GNU @strong{parallel} can be called as a shebang (#!) command as the first line of a script. The content of the file will be treated as inputsource. @@ -1263,10 +1263,10 @@ Like this: @strong{--shebang} must be set as the first option. -@item @strong{--shebang-wrap} -@anchor{@strong{--shebang-wrap}} +@item @strong{--shebang-wrap} (alpha testing) +@anchor{@strong{--shebang-wrap} (alpha testing)} -GNU @strong{Parallel} can parallelize scripts by wrapping the shebang +GNU @strong{parallel} can parallelize scripts by wrapping the shebang line. If the program can be run like this: @verbatim diff --git a/testsuite/tests-to-run/parallel-local114.sh b/testsuite/tests-to-run/parallel-local114.sh index 78493dd3..f12b5dbd 100755 --- a/testsuite/tests-to-run/parallel-local114.sh +++ b/testsuite/tests-to-run/parallel-local114.sh @@ -44,6 +44,9 @@ echo "bug #36657: --load does not work with custom ssh"; echo "bug #34958: --pipe with record size measured in lines"; seq 10 | parallel -k --pipe -L 4 cat\;echo FOO | uniq +echo "bug #37325: Inefficiency of --pipe -L"; + seq 2000 | parallel -k --pipe --block 1k -L 4 wc\;echo FOO | uniq + echo "bug #34958: --pipe with record size measured in lines"; seq 10 | parallel -k --pipe -l 4 cat\;echo FOO | uniq diff --git a/testsuite/tests-to-run/parallel-local5.sh b/testsuite/tests-to-run/parallel-local5.sh index b4ec68f3..7d948445 100644 --- a/testsuite/tests-to-run/parallel-local5.sh +++ b/testsuite/tests-to-run/parallel-local5.sh @@ -35,9 +35,11 @@ seq 1 10 | parallel -j2 -k -N 3 --pipe 'cat;echo jjjjjjjjjj' | uniq echo '### Test -l -N -L and -n with multiple jobslots and multiple args' seq 1 5 | parallel -kj2 -l 2 --pipe "cat; echo a" | uniq seq 1 5 | parallel -kj2 -N 2 --pipe "cat; echo b" | uniq -seq 1 5 | parallel -kj2 -L 2 --pipe "cat; echo c" | uniq seq 1 5 | parallel -kj2 -n 2 --pipe "cat; echo d" | uniq +echo '### Test -L --pipe' +seq 1 5 | parallel -kj2 -L 2 --pipe "cat; echo c" | uniq + echo '### Test output is the same for different block size' echo -n 01a02a0a0a12a34a45a6a | parallel -k -j1 --blocksize 100 --pipe --recend a -N 3 'echo -n "$PARALLEL_SEQ>"; cat; echo; sleep 0.1' diff --git a/testsuite/wanted-results/parallel-local114 b/testsuite/wanted-results/parallel-local114 index 8276e389..712bd454 100644 --- a/testsuite/wanted-results/parallel-local114 +++ b/testsuite/wanted-results/parallel-local114 @@ -93,15 +93,34 @@ bug #34958: --pipe with record size measured in lines 2 3 4 -FOO 5 6 7 8 -FOO 9 10 FOO +bug #37325: Inefficiency of --pipe -L + 280 280 1012 +FOO + 252 252 1008 +FOO + 252 252 1008 +FOO + 244 244 1005 +FOO + 200 200 1000 +FOO + 200 200 1000 +FOO + 200 200 1000 +FOO + 200 200 1000 +FOO + 172 172 860 +FOO + 0 0 0 +FOO bug #34958: --pipe with record size measured in lines 1 2 diff --git a/testsuite/wanted-results/parallel-local5 b/testsuite/wanted-results/parallel-local5 index fee40012..80f7d0a3 100644 --- a/testsuite/wanted-results/parallel-local5 +++ b/testsuite/wanted-results/parallel-local5 @@ -100,20 +100,19 @@ b b 1 2 -c +d 3 4 -c +d 5 -c +d +### Test -L --pipe 1 2 -d 3 4 -d 5 -d +c ### Test output is the same for different block size 1>01a02a0a 2>0a12a34a diff --git a/testsuite/wanted-results/test65 b/testsuite/wanted-results/test65 index 3bf27eb0..4b5e35d8 100644 --- a/testsuite/wanted-results/test65 +++ b/testsuite/wanted-results/test65 @@ -31,10 +31,6 @@ h2 6xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 7xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 8xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -Stop -Start -h1 -h2 9xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 10xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 11xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx