From 57c8ca0823f99599f9b4211b1830bcf1e39b839c Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Wed, 25 Mar 2015 00:14:50 +0100 Subject: [PATCH] Fixed bug #44614: --pipepart --header off by one. --- doc/release_new_version | 17 ++++++++--- src/parallel | 12 ++++---- src/parallel_tutorial.html | 13 +++++++++ src/parallel_tutorial.pod | 16 ++++++++++ testsuite/tests-to-run/parallel-local-0.3s.sh | 6 ++++ testsuite/wanted-results/parallel-local-0.3s | 29 +++++++++++++++++++ 6 files changed, 83 insertions(+), 10 deletions(-) diff --git a/doc/release_new_version b/doc/release_new_version index a6d99ac3..533a9792 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -136,12 +136,13 @@ pushd git diff # Recheck OBS https://build.opensuse.org/package/show?package=parallel&project=home%3Atange + +YYYYMMDD=`yyyymmdd` +TAG=MyTag +echo "Released as $YYYYMMDD ('$TAG')." + git commit -a -Released as 20150x22 (''). - -TAG=MyTag -YYYYMMDD=`yyyymmdd` git tag -s -m "Released as $YYYYMMDD ('$TAG')" $TAG git tag -s -m "Released as $YYYYMMDD ('$TAG')" $YYYYMMDD @@ -218,12 +219,20 @@ Haiku of the month: New in this release: +* GNU Parallel was cited in: Bayesian inference of protein structure from chemical shift data https://peerj.com/articles/861/ + * <> CIDER: a pipeline for detecting waves of coordinated transcriptional regulation in gene expression time-course data http://biorxiv.org/content/biorxiv/early/2015/03/17/012518.full.pdf * <> GNU Parallel was used (unfortunately without citation) in: MUGBAS: a species free gene-based programme suite for post-GWAS analysis http://www.ncbi.nlm.nih.gov/pubmed/25765345 taxator-tk http://algbio.cs.uni-duesseldorf.de/webapps/wa-download/ (check it) +* GNU Parallel was used in: landsat-gifworks https://github.com/KAPPS-/landsat-gifworks + +* GNU Parallel (Sebuah Uji Coba) http://kaka.prakasa.my.id/2014/09/04/gnu-parallel-sebuah-uji-coba/ + +* Bug fixes and man page updates. + GNU Parallel - For people who live life in the parallel lane. diff --git a/src/parallel b/src/parallel index 5e639f93..301fbfea 100755 --- a/src/parallel +++ b/src/parallel @@ -242,20 +242,19 @@ sub find_split_positions { while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { if($opt::regexp) { # If match /$recend$recstart/ => Record position - if($buf =~ /(.*$recend)$recstart/os) { - my $i = length($1); - push(@pos,$pos+$i); + if($buf =~ /^(.*$recend)$recstart/os) { # Start looking for next record _after_ this match - $pos += $i; + $pos += length($1); + push(@pos,$pos); last; } } else { # If match $recend$recstart => Record position my $i = index64(\$buf,$recendrecstart); if($i != -1) { - push(@pos,$pos+$i); # Start looking for next record _after_ this match - $pos += $i; + $pos += $i + length($recendrecstart); + push(@pos,$pos); last; } } @@ -3134,6 +3133,7 @@ sub bibtex { " url = {http://www.gnu.org/s/parallel},", " year = {2011},", " pages = {42-47}", + " doi = {10.5281/zenodo.16303}", "}", "", "(Feel free to use \\nocite{Tange2011a})", diff --git a/src/parallel_tutorial.html b/src/parallel_tutorial.html index 81050788..0ce54260 100644 --- a/src/parallel_tutorial.html +++ b/src/parallel_tutorial.html @@ -81,6 +81,7 @@
  • Records
  • Record separators
  • Header
  • +
  • --pipepart
  • Shebang @@ -1828,6 +1829,18 @@

    Output: Same as above.

    +

    --pipepart

    + +

    --pipe is not very efficient. It maxes out at around 500 MB/s. --pipepart can easily deliver 5 GB/s. But there are a few limitations. The input has to be a normal file (not a pipe) given by -a or :::: and -L/-l/-N do not work.

    + +
      parallel --pipepart -a num1000000 --block 3m wc
    + +

    Output (the order may be different):

    + +
     444443  444444 3000002
    + 428572  428572 3000004
    + 126985  126984  888890
    +

    Shebang

    Input data and parallel command in the same file

    diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod index b414d12e..cc054f81 100644 --- a/src/parallel_tutorial.pod +++ b/src/parallel_tutorial.pod @@ -1832,6 +1832,22 @@ If the header is 2 lines, --header 2 will work: Output: Same as above. +=head2 --pipepart + +--pipe is not very efficient. It maxes out at around 500 +MB/s. --pipepart can easily deliver 5 GB/s. But there are a few +limitations. The input has to be a normal file (not a pipe) given by +-a or :::: and -L/-l/-N do not work. + + parallel --pipepart -a num1000000 --block 3m wc + +Output (the order may be different): + + 444443 444444 3000002 + 428572 428572 3000004 + 126985 126984 888890 + + =head1 Shebang =head2 Input data and parallel command in the same file diff --git a/testsuite/tests-to-run/parallel-local-0.3s.sh b/testsuite/tests-to-run/parallel-local-0.3s.sh index 21319f4f..769beeb1 100644 --- a/testsuite/tests-to-run/parallel-local-0.3s.sh +++ b/testsuite/tests-to-run/parallel-local-0.3s.sh @@ -84,4 +84,10 @@ echo '### bug #44546: If --compress-program fails: fail' parallel --line-buffer --compress-program false echo \;ls ::: /no-existing; echo $? parallel --compress-program false echo \;ls ::: /no-existing; echo $? +echo '### bug #44614: --pipepart --header off by one' + seq 10 >/tmp/parallel_44616; + parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat'; + parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat'; + rm /tmp/parallel_44616 + EOF diff --git a/testsuite/wanted-results/parallel-local-0.3s b/testsuite/wanted-results/parallel-local-0.3s index 24ab3cc5..71a32f5a 100644 --- a/testsuite/wanted-results/parallel-local-0.3s +++ b/testsuite/wanted-results/parallel-local-0.3s @@ -128,3 +128,32 @@ parallel: Error: false -dc failed 1 parallel: Error: false -dc failed parallel: Error: false -dc failed +echo '### bug #44614: --pipepart --header off by one' +### bug #44614: --pipepart --header off by one + seq 10 >/tmp/parallel_44616; parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat'; parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat'; rm /tmp/parallel_44616 +foo +1 +2 +3 +foo +4 +5 +6 +foo +7 +8 +9 +foo +10 +foo +1 +2 +3 +foo +4 +5 +6 +7 +8 +9 +10