From 57c8ca0823f99599f9b4211b1830bcf1e39b839c Mon Sep 17 00:00:00 2001
From: Ole Tange <ole@tange.dk>
Date: Wed, 25 Mar 2015 00:14:50 +0100
Subject: [PATCH] Fixed bug #44614: --pipepart --header off by one.

---
 doc/release_new_version                       | 17 ++++++++---
 src/parallel                                  | 12 ++++----
 src/parallel_tutorial.html                    | 13 +++++++++
 src/parallel_tutorial.pod                     | 16 ++++++++++
 testsuite/tests-to-run/parallel-local-0.3s.sh |  6 ++++
 testsuite/wanted-results/parallel-local-0.3s  | 29 +++++++++++++++++++
 6 files changed, 83 insertions(+), 10 deletions(-)
diff --git a/doc/release_new_version b/doc/release_new_version
index a6d99ac3..533a9792 100644
--- a/doc/release_new_version
+++ b/doc/release_new_version
@@ -136,12 +136,13 @@ pushd
 git diff
 
 # Recheck OBS https://build.opensuse.org/package/show?package=parallel&project=home%3Atange
+
+YYYYMMDD=`yyyymmdd`
+TAG=MyTag
+echo "Released as $YYYYMMDD ('$TAG')."
+
 git commit -a
 
-Released as 20150x22 ('').
-
-TAG=MyTag
-YYYYMMDD=`yyyymmdd`
 git tag -s -m "Released as $YYYYMMDD ('$TAG')" $TAG
 git tag -s -m "Released as $YYYYMMDD ('$TAG')" $YYYYMMDD
 
@@ -218,12 +219,20 @@ Haiku of the month:
 
 New in this release:
 
+* GNU Parallel was cited in: Bayesian inference of protein structure from chemical shift data https://peerj.com/articles/861/
+
 * <<afventer opdatering>> CIDER: a pipeline for detecting waves of coordinated transcriptional regulation in gene expression time-course data http://biorxiv.org/content/biorxiv/early/2015/03/17/012518.full.pdf
 
 * <<afventer opdatering>> GNU Parallel was used (unfortunately without citation) in: MUGBAS: a species free gene-based programme suite for post-GWAS analysis http://www.ncbi.nlm.nih.gov/pubmed/25765345
 
 taxator-tk http://algbio.cs.uni-duesseldorf.de/webapps/wa-download/ (check it)
 
+* GNU Parallel was used in: landsat-gifworks https://github.com/KAPPS-/landsat-gifworks
+
+* GNU Parallel (Sebuah Uji Coba) http://kaka.prakasa.my.id/2014/09/04/gnu-parallel-sebuah-uji-coba/
+
+* Bug fixes and man page updates.
+
 GNU Parallel - For people who live life in the parallel lane.
 
 
diff --git a/src/parallel b/src/parallel
index 5e639f93..301fbfea 100755
--- a/src/parallel
+++ b/src/parallel
@@ -242,20 +242,19 @@ sub find_split_positions {
 	while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
 	    if($opt::regexp) {
 		# If match /$recend$recstart/ => Record position
-		if($buf =~ /(.*$recend)$recstart/os) {
-		    my $i = length($1);
-		    push(@pos,$pos+$i);
+		if($buf =~ /^(.*$recend)$recstart/os) {
 		    # Start looking for next record _after_ this match
-		    $pos += $i;
+		    $pos += length($1);
+		    push(@pos,$pos);
 		    last;
 		}
 	    } else {
 		# If match $recend$recstart => Record position
 		my $i = index64(\$buf,$recendrecstart);
 		if($i != -1) {
-		    push(@pos,$pos+$i);
 		    # Start looking for next record _after_ this match
-		    $pos += $i;
+		    $pos += $i + length($recendrecstart);
+		    push(@pos,$pos);
 		    last;
 		}
 	    }
@@ -3134,6 +3133,7 @@ sub bibtex {
 	       " url = {http://www.gnu.org/s/parallel},",
 	       " year = {2011},",
 	       " pages = {42-47}",
+	       " doi = {10.5281/zenodo.16303}",
 	       "}",
 	       "",
 	       "(Feel free to use \\nocite{Tange2011a})",
diff --git a/src/parallel_tutorial.html b/src/parallel_tutorial.html
index 81050788..0ce54260 100644
--- a/src/parallel_tutorial.html
+++ b/src/parallel_tutorial.html
@@ -81,6 +81,7 @@
       <li><a href="#Records">Records</a></li>
       <li><a href="#Record-separators">Record separators</a></li>
       <li><a href="#Header">Header</a></li>
+      <li><a href="#pipepart">--pipepart</a></li>
     </ul>
   </li>
   <li><a href="#Shebang">Shebang</a>
@@ -1828,6 +1829,18 @@
 
 <p>Output: Same as above.</p>
 
+<h2 id="pipepart">--pipepart</h2>
+
+<p>--pipe is not very efficient. It maxes out at around 500 MB/s. --pipepart can easily deliver 5 GB/s. But there are a few limitations. The input has to be a normal file (not a pipe) given by -a or :::: and -L/-l/-N do not work.</p>
+
+<pre><code>  parallel --pipepart -a num1000000 --block 3m wc</code></pre>
+
+<p>Output (the order may be different):</p>
+
+<pre><code> 444443  444444 3000002
+ 428572  428572 3000004
+ 126985  126984  888890</code></pre>
+
 <h1 id="Shebang">Shebang</h1>
 
 <h2 id="Input-data-and-parallel-command-in-the-same-file">Input data and parallel command in the same file</h2>
diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod
index b414d12e..cc054f81 100644
--- a/src/parallel_tutorial.pod
+++ b/src/parallel_tutorial.pod
@@ -1832,6 +1832,22 @@ If the header is 2 lines, --header 2 will work:
 
 Output: Same as above.
 
+=head2 --pipepart
+
+--pipe is not very efficient. It maxes out at around 500
+MB/s. --pipepart can easily deliver 5 GB/s. But there are a few
+limitations. The input has to be a normal file (not a pipe) given by
+-a or :::: and -L/-l/-N do not work.
+
+  parallel --pipepart -a num1000000 --block 3m wc
+
+Output (the order may be different):
+
+ 444443  444444 3000002
+ 428572  428572 3000004
+ 126985  126984  888890
+
+
 =head1 Shebang
 
 =head2 Input data and parallel command in the same file
diff --git a/testsuite/tests-to-run/parallel-local-0.3s.sh b/testsuite/tests-to-run/parallel-local-0.3s.sh
index 21319f4f..769beeb1 100644
--- a/testsuite/tests-to-run/parallel-local-0.3s.sh
+++ b/testsuite/tests-to-run/parallel-local-0.3s.sh
@@ -84,4 +84,10 @@ echo '### bug #44546: If --compress-program fails: fail'
   parallel --line-buffer --compress-program false echo \;ls ::: /no-existing; echo $?
   parallel --compress-program false echo \;ls ::: /no-existing; echo $?
 
+echo '### bug #44614: --pipepart --header off by one'
+  seq 10 >/tmp/parallel_44616; 
+    parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat'; 
+    parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat'; 
+    rm /tmp/parallel_44616
+
 EOF
diff --git a/testsuite/wanted-results/parallel-local-0.3s b/testsuite/wanted-results/parallel-local-0.3s
index 24ab3cc5..71a32f5a 100644
--- a/testsuite/wanted-results/parallel-local-0.3s
+++ b/testsuite/wanted-results/parallel-local-0.3s
@@ -128,3 +128,32 @@ parallel: Error: false -dc failed
 1
 parallel: Error: false -dc failed
 parallel: Error: false -dc failed
+echo '### bug #44614: --pipepart --header off by one'
+### bug #44614: --pipepart --header off by one
+  seq 10 >/tmp/parallel_44616;     parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat';     parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat';     rm /tmp/parallel_44616
+foo
+1
+2
+3
+foo
+4
+5
+6
+foo
+7
+8
+9
+foo
+10
+foo
+1
+2
+3
+foo
+4
+5
+6
+7
+8
+9
+10