Fixed bug #44614: --pipepart --header off by one.

2024-11-22 14:07:55 +00:00 · 2015-03-25 00:14:50 +01:00 · 2015-03-25 00:14:50 +01:00 · 57c8ca0823
parent be70739bb7
commit 57c8ca0823
6 changed files with 83 additions and 10 deletions
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -136,12 +136,13 @@ pushd
 git diff

 # Recheck OBS https://build.opensuse.org/package/show?package=parallel&project=home%3Atange
+
+YYYYMMDD=`yyyymmdd`
+TAG=MyTag
+echo "Released as $YYYYMMDD ('$TAG')."
+
 git commit -a

-Released as 20150x22 ('').
-
-TAG=MyTag
-YYYYMMDD=`yyyymmdd`
 git tag -s -m "Released as $YYYYMMDD ('$TAG')" $TAG
 git tag -s -m "Released as $YYYYMMDD ('$TAG')" $YYYYMMDD

@ -218,12 +219,20 @@ Haiku of the month:

 New in this release:

+* GNU Parallel was cited in: Bayesian inference of protein structure from chemical shift data https://peerj.com/articles/861/
+
 * <<afventer opdatering>> CIDER: a pipeline for detecting waves of coordinated transcriptional regulation in gene expression time-course data http://biorxiv.org/content/biorxiv/early/2015/03/17/012518.full.pdf

 * <<afventer opdatering>> GNU Parallel was used (unfortunately without citation) in: MUGBAS: a species free gene-based programme suite for post-GWAS analysis http://www.ncbi.nlm.nih.gov/pubmed/25765345

 taxator-tk http://algbio.cs.uni-duesseldorf.de/webapps/wa-download/ (check it)

+* GNU Parallel was used in: landsat-gifworks https://github.com/KAPPS-/landsat-gifworks
+
+* GNU Parallel (Sebuah Uji Coba) http://kaka.prakasa.my.id/2014/09/04/gnu-parallel-sebuah-uji-coba/
+
+* Bug fixes and man page updates.
+
 GNU Parallel - For people who live life in the parallel lane.


--- a/src/parallel
+++ b/src/parallel
@ -242,20 +242,19 @@ sub find_split_positions {
 	while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
 	    if($opt::regexp) {
 		# If match /$recend$recstart/ => Record position
-		if($buf =~ /(.*$recend)$recstart/os) {
-		    my $i = length($1);
-		    push(@pos,$pos+$i);
+		if($buf =~ /^(.*$recend)$recstart/os) {
 		    # Start looking for next record _after_ this match
-		    $pos += $i;
+		    $pos += length($1);
+		    push(@pos,$pos);
 		    last;
 		}
 	    } else {
 		# If match $recend$recstart => Record position
 		my $i = index64(\$buf,$recendrecstart);
 		if($i != -1) {
-		    push(@pos,$pos+$i);
 		    # Start looking for next record _after_ this match
-		    $pos += $i;
+		    $pos += $i + length($recendrecstart);
+		    push(@pos,$pos);
 		    last;
 		}
 	    }
@ -3134,6 +3133,7 @@ sub bibtex {
 	       " url = {http://www.gnu.org/s/parallel},",
 	       " year = {2011},",
 	       " pages = {42-47}",
+	       " doi = {10.5281/zenodo.16303}",
 	       "}",
 	       "",
 	       "(Feel free to use \\nocite{Tange2011a})",
--- a/src/parallel_tutorial.html
+++ b/src/parallel_tutorial.html
@ -81,6 +81,7 @@
      <li><a href="#Records">Records</a></li>
      <li><a href="#Record-separators">Record separators</a></li>
      <li><a href="#Header">Header</a></li>
+      <li><a href="#pipepart">--pipepart</a></li>
    </ul>
  </li>
  <li><a href="#Shebang">Shebang</a>
@ -1828,6 +1829,18 @@

 <p>Output: Same as above.</p>

+<h2 id="pipepart">--pipepart</h2>
+
+<p>--pipe is not very efficient. It maxes out at around 500 MB/s. --pipepart can easily deliver 5 GB/s. But there are a few limitations. The input has to be a normal file (not a pipe) given by -a or :::: and -L/-l/-N do not work.</p>
+
+<pre><code>  parallel --pipepart -a num1000000 --block 3m wc</code></pre>
+
+<p>Output (the order may be different):</p>
+
+<pre><code> 444443  444444 3000002
+ 428572  428572 3000004
+ 126985  126984  888890</code></pre>
+
 <h1 id="Shebang">Shebang</h1>

 <h2 id="Input-data-and-parallel-command-in-the-same-file">Input data and parallel command in the same file</h2>
--- a/src/parallel_tutorial.pod
+++ b/src/parallel_tutorial.pod
@ -1832,6 +1832,22 @@ If the header is 2 lines, --header 2 will work:

 Output: Same as above.

+=head2 --pipepart
+
+--pipe is not very efficient. It maxes out at around 500
+MB/s. --pipepart can easily deliver 5 GB/s. But there are a few
+limitations. The input has to be a normal file (not a pipe) given by
+-a or :::: and -L/-l/-N do not work.
+
+  parallel --pipepart -a num1000000 --block 3m wc
+
+Output (the order may be different):
+
+ 444443  444444 3000002
+ 428572  428572 3000004
+ 126985  126984  888890
+
+
 =head1 Shebang

 =head2 Input data and parallel command in the same file
--- a/testsuite/tests-to-run/parallel-local-0.3s.sh
+++ b/testsuite/tests-to-run/parallel-local-0.3s.sh
@ -84,4 +84,10 @@ echo '### bug #44546: If --compress-program fails: fail'
  parallel --line-buffer --compress-program false echo \;ls ::: /no-existing; echo $?
  parallel --compress-program false echo \;ls ::: /no-existing; echo $?

+echo '### bug #44614: --pipepart --header off by one'
+  seq 10 >/tmp/parallel_44616; 
+    parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat'; 
+    parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat'; 
+    rm /tmp/parallel_44616
+
 EOF
--- a/testsuite/wanted-results/parallel-local-0.3s
+++ b/testsuite/wanted-results/parallel-local-0.3s
@ -128,3 +128,32 @@ parallel: Error: false -dc failed
 1
 parallel: Error: false -dc failed
 parallel: Error: false -dc failed
+echo '### bug #44614: --pipepart --header off by one'
+### bug #44614: --pipepart --header off by one
+  seq 10 >/tmp/parallel_44616;     parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat';     parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat';     rm /tmp/parallel_44616
+foo
+1
+2
+3
+foo
+4
+5
+6
+foo
+7
+8
+9
+foo
+10
+foo
+1
+2
+3
+foo
+4
+5
+6
+7
+8
+9
+10