Fixed bug #44250: pxz complains File format not recognized but decompresses anyway.

2024-11-22 22:17:54 +00:00 · 2015-03-03 01:05:36 +01:00 · 2015-03-03 01:05:36 +01:00 · 96cfacb830
parent d19de35acb
commit 96cfacb830
5 changed files with 77 additions and 38 deletions
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -224,6 +224,12 @@ New in this release:
 * GNU Parallel was used in: https://github.com/alexbyrnes/FCC-Political-Ads_The-Code
 * GNU Parallel was used in: https://github.com/martymac/fpart
 * GNU Parallel was used in: https://github.com/mehmattski/HybSeqPipeline
 * Using GNU Parallel on a Raspberry Pi cluster: http://www.dcglug.org.uk/cluster-progress/
 * Bug fixes and man page updates.
 GNU Parallel - For people who live life in the parallel lane.
--- a/src/parallel
+++ b/src/parallel
@ -301,7 +301,7 @@ sub spreadstdin {
    my $recendrecstart = $recend.$recstart;
    my $chunk_number = 1;
    my $one_time_through;
-    my $two_gb = (1<<31)-1;
+    my $two_gb = 2**31-1;
    my $blocksize = $opt::blocksize;
    my $in = *STDIN;
    my $header = find_header(\$buf,$in);
@ -460,7 +460,7 @@ sub nindex {
    #   the position where the Nth copy is found
    my ($buf_ref, $str, $n) = @_;
    my $i = 0;
-    my $two_gb = (1<<31)-1;
+    my $two_gb = 2**31-1;
    for(1..$n) {
 	$i = index64($buf_ref,$str,$i+1);
 	if($i == -1) { last }
@ -514,13 +514,13 @@ sub index64 {
    # Do index on strings > 2GB.
    # index in Perl < v5.22 does not work for > 2GB
    # Input:
-    #   as index
+    #   as index except STR which must be passed as a reference
    # Output:
    #   as index
    my $ref = shift;
    my $match = shift;
    my $pos = shift || 0;
-    my $block_size = (1<<31)-1;
+    my $block_size = 2**31-1;
    my $strlen   = length($$ref);
    # No point in doing extra work if we don't need to.
    if($strlen < $block_size) {
@ -546,13 +546,13 @@ sub rindex64 {
    # Do rindex on strings > 2GB.
    # rindex in Perl < v5.22 does not work for > 2GB
    # Input:
-    #   as rindex
+    #   as rindex except STR which must be passed as a reference
    # Output:
    #   as rindex
    my $ref = shift;
    my $match = shift;
    my $pos = shift;
-    my $block_size = (1<<31)-1;
+    my $block_size = 2**31-1;
    my $strlen = length($$ref);
    # Default: search from end
    $pos = defined $pos ? $pos : $strlen;
@ -895,9 +895,9 @@ sub parse_options {
       not defined $opt::recend) { $opt::recend = "\n"; }
    if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
    $opt::blocksize = multiply_binary_prefix($opt::blocksize);
-    if($opt::blocksize > (1<<31)-1) {
+    if($opt::blocksize > 2**31-1) {
 	warning("--blocksize >= 2G causes problems. Using 2G-1\n");
-	$opt::blocksize = (1<<31)-1;
+	$opt::blocksize = 2**31-1;
    }
    $opt::memfree = multiply_binary_prefix($opt::memfree);
    if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
@ -5347,11 +5347,11 @@ sub slot {
 		    *IN = *STDIN;
 		}
 		my $first_round = 1;
 		my $flags;
 		fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
 		$flags |= O_NONBLOCK; # Add non-blocking to the flags
 		fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
 		open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
 		while(1) {
 		    # clear EOF
@ -5361,32 +5361,29 @@ sub slot {
 		    if($read) {
 			# We can unlink the file now: The writer has written something
 			-e $unlink_file and unlink $unlink_file;
 			if($first_round) {
 			    # Do not start command if there is no input
 			    $first_round = 0;
 			    open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
 			}
 			# Blocking print
 			while($buf) {
 			    my $bytes_written = syswrite(OUT,$buf);
 			    # syswrite may be interrupted by SIGHUP
 			    substr($buf,0,$bytes_written) = "";
 			}
 			# Something printed: Wait less next time
 			$sleep /= 2;
 		    } else {
 			if(eof(IN) and not $writer_running) {
 			    # Writer dead: There will never be more to read => exit
 			    -e $unlink_file and unlink $unlink_file;
 			    exit;
 			}
-			# TODO This could probably be done more efficiently using select(2)
+			# Wait for something to happen on file handle IN
-			# Nothing read: Wait longer before next read
+			select(fileno(IN), undef, undef, undef);
 			# Up to 30 milliseconds
 			$sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
 			usleep($sleep);
 		    }
 		}
 		sub usleep {
 		    # Sleep this many milliseconds.
 		    my $secs = shift;
 		    select(undef, undef, undef, $secs/1000);
 		}
 	    };
 	    $cattail =~ s/#.*//mg;
 	    $cattail =~ s/\s+/ /g;
@ -5487,14 +5484,38 @@ sub grouped {
    }
 }
 sub empty_input_detector {
    # If no input: exec true
    # If some input: Pass input as input to pipe
    # This avoids starting the $read command if there is no input.
    # Returns:
    #   $cmd = script to prepend to '($real command)'
    # The $tmpfile might exist if run on a remote system - we accept that risk
    my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr");
    # Unlink to avoid leaving files if --dry-run or --sshlogin
    unlink $tmpfile;
    my $cmd =
 	# Exit value:
 	#   empty input = true
 	#   some input = exit val from command
 	# sh -c needed as csh cannot hide stderr
 	qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }.
 	qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }.
 	qq{ (cat $tmpfile; rm $tmpfile; cat - ) | };
    return $cmd;
 }
 sub filter_through_compress {
    my $self = shift;
    # Send stdout to stdin for $opt::compress_program(1)
    # Send stderr to stdin for $opt::compress_program(2)
    # cattail get pid:  $pid = $self->fh($fdno,'rpid');
    my $cattail = cattail();
    for my $fdno (1,2) {
-	my $wpid = open(my $fdw,"|-","$opt::compress_program >>".
+	my $wpid = open(my $fdw,"|-", empty_input_detector().
 			"($opt::compress_program) >>".
 			$self->fh($fdno,'name')) || die $?;
 	$self->set_fh($fdno,'w',$fdw);
 	$self->set_fh($fdno,'wpid',$wpid);
@ -5931,22 +5952,10 @@ sub wrapped {
 	if(@Global::cat_partials) {
 	    # Prepend:
 	    # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }'  0 0 0 11 |
-	    $command = (shift @Global::cat_partials). "|". "(". $command. ")";
+	    $command = (shift @Global::cat_partials). " | ($command)";
 	} elsif($opt::pipe) {
 	    # Prepend EOF-detector to avoid starting $command if EOF.
-	    # The $tmpfile might exist if run on a remote system - we accept that risk
+	    $command = empty_input_detector(). "($command);";
 	    my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr");
 	    # Unlink to avoid leaving files if --dry-run or --sshlogin
 	    unlink $tmpfile;
 	    $command =
 		# Exit value:
 		#   empty input = true
 		#   some input = exit val from command
 		# sh -c needed as csh cannot hide stderr
 		qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }.
 		qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }.
 		qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }.
 		"($command);";
 	}
 	if($opt::tmux) {
 	    # Wrap command with 'tmux'
--- a/src/parallel_design.pod
+++ b/src/parallel_design.pod
@ -262,6 +262,14 @@ B<--pipepart>/B<--pipe> should be done on the local machine inside B<--tmux>
 =back
 =head2 --block-size adjustment
 Every time GNU B<parallel> detects a record bigger than
 B<--block-size> it increases the block size by 30%. A small
 B<--block-size> gives very poor performance; by exponentially
 increasing the block size performance will not suffer.
 =head2 Shell shock
 The shell shock bug in B<bash> did not affect GNU B<parallel>, but the
--- a/testsuite/tests-to-run/parallel-local9.sh
+++ b/testsuite/tests-to-run/parallel-local9.sh
@ -7,7 +7,14 @@ export XAP
 NICEPAR="nice nice parallel"
 export NICEPAR
-cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | stdout parallel -vj0 -k --joblog /tmp/jl-`basename $0` -L1
+cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | stdout parallel -vj6 -k --joblog /tmp/jl-`basename $0` -L1
 echo 'bug #44250: pxz complains File format not recognized but decompresses anyway'
  # The first line dumps core if run from make file. Why?!
  stdout parallel --compress --compress-program pxz ls /{} ::: OK-if-missing-file
  stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' ls /{}  ::: OK-if-missing-file
  stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' true ::: OK-if-no-output
  stdout parallel --compress --compress-program pxz true ::: OK-if-no-output
 echo 'bug #41613: --compress --line-buffer no newline';
  perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo
--- a/testsuite/wanted-results/parallel-local9
+++ b/testsuite/wanted-results/parallel-local9
@ -1,3 +1,12 @@
 echo 'bug #44250: pxz complains File format not recognized but decompresses anyway'
 bug #44250: pxz complains File format not recognized but decompresses anyway
  # The first line dumps core if run from make file. Why?!
  stdout parallel --compress --compress-program pxz ls /{} ::: OK-if-missing-file
 Segmentation fault (core dumped)
  stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' ls /{}  ::: OK-if-missing-file
 ls: cannot access /OK-if-missing-file: No such file or directory
  stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' true ::: OK-if-no-output
  stdout parallel --compress --compress-program pxz true ::: OK-if-no-output
 echo 'bug #41613: --compress --line-buffer no newline';
 bug #41613: --compress --line-buffer no newline
  perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo