diff --git a/doc/release_new_version b/doc/release_new_version index 610cfbe0..031f53fe 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -228,6 +228,8 @@ Haiku of the month: New in this release: +* GNU Parallel was cited in: Energy Efficient, High-speed Communication in WSNs https://gupea.ub.gu.se/bitstream/2077/35801/1/gupea_2077_35801_1.pdf + * GNU Parallel was cited in: Ferroelectric contributions to anomalous hysteresis in hybrid perovskite solar cells http://arxiv.org/pdf/1405.5810.pdf * Processes Paralleling to Speed up Computing and Tasks Execution in Linux http://kukuruku.co/hub/nix/processes-paralleling-to-speed-up-computing-and-tasks-execution-in-linux diff --git a/src/parallel b/src/parallel index 57547f50..0fbb1340 100755 --- a/src/parallel +++ b/src/parallel @@ -2352,13 +2352,16 @@ sub reaper { } if(not $job->should_be_retried()) { + # The job is done + # Free the jobslot + push @Global::slots, $job->slot(); if($opt::timeout) { # Update average runtime for timeout $Global::timeoutq->update_delta_time($job->runtime()); } # Force printing now if the job failed and we are going to exit - my $print_now = ($job->exitstatus() and - $opt::halt_on_error and $opt::halt_on_error == 2); + my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2 + and $job->exitstatus()); if($Global::keeporder and not $print_now) { $Private::print_later{$job->seq()} = $job; $Private::job_end_sequence ||= 1; @@ -4182,6 +4185,11 @@ sub seq { return $self->{'commandline'}->seq(); } +sub slot { + my $self = shift; + return $self->{'commandline'}->slot(); +} + sub cattail { # Returns: # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] @@ -5445,11 +5453,14 @@ sub seq { sub slot { my $self = shift; - # $Global::max_jobs_running is 0 while computing $Global::max_jobs_running - # So assume it is huge - my $mod = ($Global::max_jobs_running || 1000000); - my $add = $Global::max_jobs_running ? 1 : 0; - return ($self->{'seq'} - $add) % $mod + $add; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $Global::max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$Global::max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; } sub populate { diff --git a/src/parallel.pdf b/src/parallel.pdf index e7c19c71..46c8edba 100644 Binary files a/src/parallel.pdf and b/src/parallel.pdf differ diff --git a/src/parallel.pod b/src/parallel.pod index ede3bf20..067e049b 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -168,10 +168,12 @@ The replacement string B<{#}> can be changed with B<--seqreplace>. To understand replacement strings see B<{}>. -=item B<{%}> (experimental) +=item B<{%}> (alpha) Job slot number. This replacement string will be replaced by the job's -sequence number modulo the number of jobs to run in parallel. +slot number between 1 and number of jobs to run in parallel. There +will never be 2 jobs running at the same time with the same job slot +number. The replacement string B<{%}> can be changed with B<--slotreplace>. @@ -1224,10 +1226,11 @@ See also B<--joblog>, B<--resume>. =item B<--retries> I -If a job fails, retry it on another computer. Do this I times. If -there are fewer than I computers in B<--sshlogin> GNU B will -re-use the computers. This is useful if some jobs fail for no apparent -reason (such as network failure). +If a job fails, retry it on another computer on which it has not +failed. Do this I times. If there are fewer than I computers in +B<--sshlogin> GNU B will re-use all the computers. This is +useful if some jobs fail for no apparent reason (such as network +failure). =item B<--return> I diff --git a/src/parallel.texi b/src/parallel.texi index 768a897a..725110f6 100644 --- a/src/parallel.texi +++ b/src/parallel.texi @@ -244,11 +244,13 @@ The replacement string @strong{@{#@}} can be changed with @strong{--seqreplace}. To understand replacement strings see @strong{@{@}}. -@item @strong{@{%@}} (experimental) -@anchor{@strong{@{%@}} (experimental)} +@item @strong{@{%@}} (alpha) +@anchor{@strong{@{%@}} (alpha)} Job slot number. This replacement string will be replaced by the job's -sequence number modulo the number of jobs to run in parallel. +slot number between 1 and number of jobs to run in parallel. There +will never be 2 jobs running at the same time with the same job slot +number. The replacement string @strong{@{%@}} can be changed with @strong{--slotreplace}. @@ -1366,10 +1368,11 @@ See also @strong{--joblog}, @strong{--resume}. @item @strong{--retries} @emph{n} @anchor{@strong{--retries} @emph{n}} -If a job fails, retry it on another computer. Do this @emph{n} times. If -there are fewer than @emph{n} computers in @strong{--sshlogin} GNU @strong{parallel} will -re-use the computers. This is useful if some jobs fail for no apparent -reason (such as network failure). +If a job fails, retry it on another computer on which it has not +failed. Do this @emph{n} times. If there are fewer than @emph{n} computers in +@strong{--sshlogin} GNU @strong{parallel} will re-use all the computers. This is +useful if some jobs fail for no apparent reason (such as network +failure). @item @strong{--return} @emph{filename} @anchor{@strong{--return} @emph{filename}} @@ -2849,6 +2852,13 @@ files are passed to the second @strong{parallel} that runs @strong{sort -m} on t files before it removes the files. The output is saved to @strong{bigfile.sort}. +GNU @strong{parallel}'s @strong{--pipe} maxes out at around 100 MB/s because every +byte has to be copied through GNU @strong{parallel}. But if @strong{bigfile} is a +real (seekable) file GNU @strong{parallel} can by-pass the copying and send +the parts directly to the program: + +@strong{parallel --pipepart --block 100m -a bigfile --files sort | parallel -Xj1 sort -m @{@} ';' rm @{@} }>@strong{bigfile.sort} + @node EXAMPLE: Running more than 500 jobs workaround @chapter EXAMPLE: Running more than 500 jobs workaround diff --git a/src/parallel_tutorial.1 b/src/parallel_tutorial.1 index 6a2f0bb3..050d00c0 100644 --- a/src/parallel_tutorial.1 +++ b/src/parallel_tutorial.1 @@ -124,7 +124,7 @@ .\" ======================================================================== .\" .IX Title "PARALLEL_TUTORIAL 1" -.TH PARALLEL_TUTORIAL 1 "2014-01-25" "20140522" "parallel" +.TH PARALLEL_TUTORIAL 1 "2014-05-31" "20140522" "parallel" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -483,8 +483,8 @@ Output (the order may be different): .Ve .SS "Replacement strings" .IX Subsection "Replacement strings" -\fIThe 5 replacement strings\fR -.IX Subsection "The 5 replacement strings" +\fIThe 6 replacement strings\fR +.IX Subsection "The 6 replacement strings" .PP \&\s-1GNU\s0 Parallel has several replacement strings. If no replacement strings are used the default is to append {}: @@ -572,6 +572,21 @@ Output (the order may be different): \& 2 \& 3 .Ve +.PP +The replacement string {%} gives the job slot number (between 1 and +number of jobs to run in parallel): +.PP +.Vb 1 +\& parallel \-j 2 echo {%} ::: A B C +.Ve +.PP +Output (the order may be different): +.PP +.Vb 3 +\& 1 +\& 2 +\& 1 +.Ve .SS "Changing the replacement strings" .IX Subsection "Changing the replacement strings" The replacement string {} can be changed with \-I: @@ -647,6 +662,20 @@ Output (the order may be different): \& 2 \& 3 .Ve +.PP +The replacement string {%} can be changed with \-\-slotreplace: +.PP +.Vb 1 +\& parallel \-j2 \-\-slotreplace ,, echo ,, ::: A B C +.Ve +.PP +Output (the order may be different): +.PP +.Vb 3 +\& 1 +\& 2 +\& 1 +.Ve .SS "Positional replacement strings" .IX Subsection "Positional replacement strings" With multiple input sources the argument from the individual input diff --git a/src/parallel_tutorial.html b/src/parallel_tutorial.html index 47e7c4f8..11dab734 100644 --- a/src/parallel_tutorial.html +++ b/src/parallel_tutorial.html @@ -41,7 +41,7 @@
  • Replacement strings
  • Changing the replacement strings
  • @@ -390,7 +390,7 @@ exported using 'export -f':

    Replacement strings

    -

    The 5 replacement strings

    +

    The 6 replacement strings

    GNU Parallel has several replacement strings. If no replacement strings are used the default is to append {}:

    @@ -436,6 +436,15 @@ strings are used the default is to append {}:

    1 2 3
    +

    The replacement string {%} gives the job slot number (between 1 and +number of jobs to run in parallel):

    +
    +  parallel -j 2 echo {%} ::: A B C
    +

    Output (the order may be different):

    +
    +  1
    +  2
    +  1

    Changing the replacement strings

    @@ -477,6 +486,14 @@ strings are used the default is to append {}:

    1 2 3 +

    The replacement string {%} can be changed with --slotreplace:

    +
    +  parallel -j2 --slotreplace ,, echo ,, ::: A B C
    +

    Output (the order may be different):

    +
    +  1
    +  2
    +  1

    Positional replacement strings

    diff --git a/src/parallel_tutorial.pdf b/src/parallel_tutorial.pdf index 5d24b43a..b855fe22 100644 Binary files a/src/parallel_tutorial.pdf and b/src/parallel_tutorial.pdf differ diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod index 35322401..5b4dab0c 100644 --- a/src/parallel_tutorial.pod +++ b/src/parallel_tutorial.pod @@ -15,7 +15,7 @@ To run this tutorial you must have the following: =over 9 -=item parallel >= version 20130814 +=item parallel >= version 20140622 Install the newest version with: @@ -300,7 +300,7 @@ Output (the order may be different): =head2 Replacement strings -=head3 The 5 replacement strings +=head3 The 7 replacement strings GNU Parallel has several replacement strings. If no replacement strings are used the default is to append {}: @@ -361,6 +361,17 @@ Output (the order may be different): 2 3 +The replacement string {%} gives the job slot number (between 1 and +number of jobs to run in parallel): + + parallel -j 2 echo {%} ::: A B C + +Output (the order may be different): + + 1 + 2 + 1 + =head2 Changing the replacement strings The replacement string {} can be changed with -I: @@ -413,6 +424,16 @@ Output (the order may be different): 2 3 +The replacement string {%} can be changed with --slotreplace: + + parallel -j2 --slotreplace ,, echo ,, ::: A B C + +Output (the order may be different): + + 1 + 2 + 1 + =head2 Positional replacement strings With multiple input sources the argument from the individual input @@ -762,7 +783,7 @@ Output: with half a line of another job. That has happend in the second line, where the line '4-middle' is mixed with '2-start'. -To avoid this use --linebuffer (which, however, is much slower): +To avoid this use --linebuffer: parallel -j2 --linebuffer 'printf "%s-start\n%s" {} {};sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1 @@ -2055,7 +2076,7 @@ If GNU Parallel saves you money: =back -(C) 20130822 Ole Tange GPLv3 +(C) 20140622 Ole Tange GPLv3 =cut