diff --git a/doc/release_new_version b/doc/release_new_version index dbe8ca31..76e1fa95 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -224,6 +224,12 @@ New in this release: * GNU Parallel was used in: https://github.com/alexbyrnes/FCC-Political-Ads_The-Code +* GNU Parallel was used in: https://github.com/martymac/fpart + +* GNU Parallel was used in: https://github.com/mehmattski/HybSeqPipeline + +* Using GNU Parallel on a Raspberry Pi cluster: http://www.dcglug.org.uk/cluster-progress/ + * Bug fixes and man page updates. GNU Parallel - For people who live life in the parallel lane. diff --git a/src/parallel b/src/parallel index 5024f12c..9ce96f5e 100755 --- a/src/parallel +++ b/src/parallel @@ -301,7 +301,7 @@ sub spreadstdin { my $recendrecstart = $recend.$recstart; my $chunk_number = 1; my $one_time_through; - my $two_gb = (1<<31)-1; + my $two_gb = 2**31-1; my $blocksize = $opt::blocksize; my $in = *STDIN; my $header = find_header(\$buf,$in); @@ -460,7 +460,7 @@ sub nindex { # the position where the Nth copy is found my ($buf_ref, $str, $n) = @_; my $i = 0; - my $two_gb = (1<<31)-1; + my $two_gb = 2**31-1; for(1..$n) { $i = index64($buf_ref,$str,$i+1); if($i == -1) { last } @@ -514,13 +514,13 @@ sub index64 { # Do index on strings > 2GB. # index in Perl < v5.22 does not work for > 2GB # Input: - # as index + # as index except STR which must be passed as a reference # Output: # as index my $ref = shift; my $match = shift; my $pos = shift || 0; - my $block_size = (1<<31)-1; + my $block_size = 2**31-1; my $strlen = length($$ref); # No point in doing extra work if we don't need to. if($strlen < $block_size) { @@ -546,13 +546,13 @@ sub rindex64 { # Do rindex on strings > 2GB. # rindex in Perl < v5.22 does not work for > 2GB # Input: - # as rindex + # as rindex except STR which must be passed as a reference # Output: # as rindex my $ref = shift; my $match = shift; my $pos = shift; - my $block_size = (1<<31)-1; + my $block_size = 2**31-1; my $strlen = length($$ref); # Default: search from end $pos = defined $pos ? $pos : $strlen; @@ -895,9 +895,9 @@ sub parse_options { not defined $opt::recend) { $opt::recend = "\n"; } if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } $opt::blocksize = multiply_binary_prefix($opt::blocksize); - if($opt::blocksize > (1<<31)-1) { + if($opt::blocksize > 2**31-1) { warning("--blocksize >= 2G causes problems. Using 2G-1\n"); - $opt::blocksize = (1<<31)-1; + $opt::blocksize = 2**31-1; } $opt::memfree = multiply_binary_prefix($opt::memfree); if(defined $opt::controlmaster) { $opt::noctrlc = 1; } @@ -5347,11 +5347,11 @@ sub slot { *IN = *STDIN; } + my $first_round = 1; my $flags; fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags |= O_NONBLOCK; # Add non-blocking to the flags fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle - open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); while(1) { # clear EOF @@ -5361,32 +5361,29 @@ sub slot { if($read) { # We can unlink the file now: The writer has written something -e $unlink_file and unlink $unlink_file; + + if($first_round) { + # Do not start command if there is no input + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + # Blocking print while($buf) { my $bytes_written = syswrite(OUT,$buf); # syswrite may be interrupted by SIGHUP substr($buf,0,$bytes_written) = ""; } - # Something printed: Wait less next time - $sleep /= 2; } else { if(eof(IN) and not $writer_running) { # Writer dead: There will never be more to read => exit + -e $unlink_file and unlink $unlink_file; exit; } - # TODO This could probably be done more efficiently using select(2) - # Nothing read: Wait longer before next read - # Up to 30 milliseconds - $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); - usleep($sleep); + # Wait for something to happen on file handle IN + select(fileno(IN), undef, undef, undef); } } - - sub usleep { - # Sleep this many milliseconds. - my $secs = shift; - select(undef, undef, undef, $secs/1000); - } }; $cattail =~ s/#.*//mg; $cattail =~ s/\s+/ /g; @@ -5487,14 +5484,38 @@ sub grouped { } } +sub empty_input_detector { + # If no input: exec true + # If some input: Pass input as input to pipe + # This avoids starting the $read command if there is no input. + # Returns: + # $cmd = script to prepend to '($real command)' + + # The $tmpfile might exist if run on a remote system - we accept that risk + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); + # Unlink to avoid leaving files if --dry-run or --sshlogin + unlink $tmpfile; + my $cmd = + # Exit value: + # empty input = true + # some input = exit val from command + # sh -c needed as csh cannot hide stderr + qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }. + qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }. + qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }; + return $cmd; +} + sub filter_through_compress { my $self = shift; # Send stdout to stdin for $opt::compress_program(1) # Send stderr to stdin for $opt::compress_program(2) # cattail get pid: $pid = $self->fh($fdno,'rpid'); my $cattail = cattail(); + for my $fdno (1,2) { - my $wpid = open(my $fdw,"|-","$opt::compress_program >>". + my $wpid = open(my $fdw,"|-", empty_input_detector(). + "($opt::compress_program) >>". $self->fh($fdno,'name')) || die $?; $self->set_fh($fdno,'w',$fdw); $self->set_fh($fdno,'wpid',$wpid); @@ -5931,22 +5952,10 @@ sub wrapped { if(@Global::cat_partials) { # Prepend: # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 | - $command = (shift @Global::cat_partials). "|". "(". $command. ")"; + $command = (shift @Global::cat_partials). " | ($command)"; } elsif($opt::pipe) { # Prepend EOF-detector to avoid starting $command if EOF. - # The $tmpfile might exist if run on a remote system - we accept that risk - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); - # Unlink to avoid leaving files if --dry-run or --sshlogin - unlink $tmpfile; - $command = - # Exit value: - # empty input = true - # some input = exit val from command - # sh -c needed as csh cannot hide stderr - qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }. - qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }. - qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }. - "($command);"; + $command = empty_input_detector(). "($command);"; } if($opt::tmux) { # Wrap command with 'tmux' diff --git a/src/parallel_design.pod b/src/parallel_design.pod index f4de82dd..e6c73b5f 100644 --- a/src/parallel_design.pod +++ b/src/parallel_design.pod @@ -262,6 +262,14 @@ B<--pipepart>/B<--pipe> should be done on the local machine inside B<--tmux> =back +=head2 --block-size adjustment + +Every time GNU B detects a record bigger than +B<--block-size> it increases the block size by 30%. A small +B<--block-size> gives very poor performance; by exponentially +increasing the block size performance will not suffer. + + =head2 Shell shock The shell shock bug in B did not affect GNU B, but the diff --git a/testsuite/tests-to-run/parallel-local9.sh b/testsuite/tests-to-run/parallel-local9.sh index fa6e416d..3c3939d2 100644 --- a/testsuite/tests-to-run/parallel-local9.sh +++ b/testsuite/tests-to-run/parallel-local9.sh @@ -7,7 +7,14 @@ export XAP NICEPAR="nice nice parallel" export NICEPAR -cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | stdout parallel -vj0 -k --joblog /tmp/jl-`basename $0` -L1 +cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | stdout parallel -vj6 -k --joblog /tmp/jl-`basename $0` -L1 +echo 'bug #44250: pxz complains File format not recognized but decompresses anyway' + # The first line dumps core if run from make file. Why?! + stdout parallel --compress --compress-program pxz ls /{} ::: OK-if-missing-file + stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' ls /{} ::: OK-if-missing-file + stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' true ::: OK-if-no-output + stdout parallel --compress --compress-program pxz true ::: OK-if-no-output + echo 'bug #41613: --compress --line-buffer no newline'; perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo diff --git a/testsuite/wanted-results/parallel-local9 b/testsuite/wanted-results/parallel-local9 index b5143bee..6b3204f9 100644 --- a/testsuite/wanted-results/parallel-local9 +++ b/testsuite/wanted-results/parallel-local9 @@ -1,3 +1,12 @@ +echo 'bug #44250: pxz complains File format not recognized but decompresses anyway' +bug #44250: pxz complains File format not recognized but decompresses anyway + # The first line dumps core if run from make file. Why?! + stdout parallel --compress --compress-program pxz ls /{} ::: OK-if-missing-file +Segmentation fault (core dumped) + stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' ls /{} ::: OK-if-missing-file +ls: cannot access /OK-if-missing-file: No such file or directory + stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' true ::: OK-if-no-output + stdout parallel --compress --compress-program pxz true ::: OK-if-no-output echo 'bug #41613: --compress --line-buffer no newline'; bug #41613: --compress --line-buffer no newline perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo