Fixed bug #44250: pxz complains File format not recognized but decompresses anyway.

This commit is contained in:
Ole Tange 2015-03-03 01:05:36 +01:00
parent d19de35acb
commit 96cfacb830
5 changed files with 77 additions and 38 deletions

View file

@ -224,6 +224,12 @@ New in this release:
* GNU Parallel was used in: https://github.com/alexbyrnes/FCC-Political-Ads_The-Code * GNU Parallel was used in: https://github.com/alexbyrnes/FCC-Political-Ads_The-Code
* GNU Parallel was used in: https://github.com/martymac/fpart
* GNU Parallel was used in: https://github.com/mehmattski/HybSeqPipeline
* Using GNU Parallel on a Raspberry Pi cluster: http://www.dcglug.org.uk/cluster-progress/
* Bug fixes and man page updates. * Bug fixes and man page updates.
GNU Parallel - For people who live life in the parallel lane. GNU Parallel - For people who live life in the parallel lane.

View file

@ -301,7 +301,7 @@ sub spreadstdin {
my $recendrecstart = $recend.$recstart; my $recendrecstart = $recend.$recstart;
my $chunk_number = 1; my $chunk_number = 1;
my $one_time_through; my $one_time_through;
my $two_gb = (1<<31)-1; my $two_gb = 2**31-1;
my $blocksize = $opt::blocksize; my $blocksize = $opt::blocksize;
my $in = *STDIN; my $in = *STDIN;
my $header = find_header(\$buf,$in); my $header = find_header(\$buf,$in);
@ -460,7 +460,7 @@ sub nindex {
# the position where the Nth copy is found # the position where the Nth copy is found
my ($buf_ref, $str, $n) = @_; my ($buf_ref, $str, $n) = @_;
my $i = 0; my $i = 0;
my $two_gb = (1<<31)-1; my $two_gb = 2**31-1;
for(1..$n) { for(1..$n) {
$i = index64($buf_ref,$str,$i+1); $i = index64($buf_ref,$str,$i+1);
if($i == -1) { last } if($i == -1) { last }
@ -514,13 +514,13 @@ sub index64 {
# Do index on strings > 2GB. # Do index on strings > 2GB.
# index in Perl < v5.22 does not work for > 2GB # index in Perl < v5.22 does not work for > 2GB
# Input: # Input:
# as index # as index except STR which must be passed as a reference
# Output: # Output:
# as index # as index
my $ref = shift; my $ref = shift;
my $match = shift; my $match = shift;
my $pos = shift || 0; my $pos = shift || 0;
my $block_size = (1<<31)-1; my $block_size = 2**31-1;
my $strlen = length($$ref); my $strlen = length($$ref);
# No point in doing extra work if we don't need to. # No point in doing extra work if we don't need to.
if($strlen < $block_size) { if($strlen < $block_size) {
@ -546,13 +546,13 @@ sub rindex64 {
# Do rindex on strings > 2GB. # Do rindex on strings > 2GB.
# rindex in Perl < v5.22 does not work for > 2GB # rindex in Perl < v5.22 does not work for > 2GB
# Input: # Input:
# as rindex # as rindex except STR which must be passed as a reference
# Output: # Output:
# as rindex # as rindex
my $ref = shift; my $ref = shift;
my $match = shift; my $match = shift;
my $pos = shift; my $pos = shift;
my $block_size = (1<<31)-1; my $block_size = 2**31-1;
my $strlen = length($$ref); my $strlen = length($$ref);
# Default: search from end # Default: search from end
$pos = defined $pos ? $pos : $strlen; $pos = defined $pos ? $pos : $strlen;
@ -895,9 +895,9 @@ sub parse_options {
not defined $opt::recend) { $opt::recend = "\n"; } not defined $opt::recend) { $opt::recend = "\n"; }
if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
$opt::blocksize = multiply_binary_prefix($opt::blocksize); $opt::blocksize = multiply_binary_prefix($opt::blocksize);
if($opt::blocksize > (1<<31)-1) { if($opt::blocksize > 2**31-1) {
warning("--blocksize >= 2G causes problems. Using 2G-1\n"); warning("--blocksize >= 2G causes problems. Using 2G-1\n");
$opt::blocksize = (1<<31)-1; $opt::blocksize = 2**31-1;
} }
$opt::memfree = multiply_binary_prefix($opt::memfree); $opt::memfree = multiply_binary_prefix($opt::memfree);
if(defined $opt::controlmaster) { $opt::noctrlc = 1; } if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
@ -5347,11 +5347,11 @@ sub slot {
*IN = *STDIN; *IN = *STDIN;
} }
my $first_round = 1;
my $flags; my $flags;
fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
$flags |= O_NONBLOCK; # Add non-blocking to the flags $flags |= O_NONBLOCK; # Add non-blocking to the flags
fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
while(1) { while(1) {
# clear EOF # clear EOF
@ -5361,32 +5361,29 @@ sub slot {
if($read) { if($read) {
# We can unlink the file now: The writer has written something # We can unlink the file now: The writer has written something
-e $unlink_file and unlink $unlink_file; -e $unlink_file and unlink $unlink_file;
if($first_round) {
# Do not start command if there is no input
$first_round = 0;
open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
}
# Blocking print # Blocking print
while($buf) { while($buf) {
my $bytes_written = syswrite(OUT,$buf); my $bytes_written = syswrite(OUT,$buf);
# syswrite may be interrupted by SIGHUP # syswrite may be interrupted by SIGHUP
substr($buf,0,$bytes_written) = ""; substr($buf,0,$bytes_written) = "";
} }
# Something printed: Wait less next time
$sleep /= 2;
} else { } else {
if(eof(IN) and not $writer_running) { if(eof(IN) and not $writer_running) {
# Writer dead: There will never be more to read => exit # Writer dead: There will never be more to read => exit
-e $unlink_file and unlink $unlink_file;
exit; exit;
} }
# TODO This could probably be done more efficiently using select(2) # Wait for something to happen on file handle IN
# Nothing read: Wait longer before next read select(fileno(IN), undef, undef, undef);
# Up to 30 milliseconds
$sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
usleep($sleep);
} }
} }
sub usleep {
# Sleep this many milliseconds.
my $secs = shift;
select(undef, undef, undef, $secs/1000);
}
}; };
$cattail =~ s/#.*//mg; $cattail =~ s/#.*//mg;
$cattail =~ s/\s+/ /g; $cattail =~ s/\s+/ /g;
@ -5487,14 +5484,38 @@ sub grouped {
} }
} }
sub empty_input_detector {
# If no input: exec true
# If some input: Pass input as input to pipe
# This avoids starting the $read command if there is no input.
# Returns:
# $cmd = script to prepend to '($real command)'
# The $tmpfile might exist if run on a remote system - we accept that risk
my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr");
# Unlink to avoid leaving files if --dry-run or --sshlogin
unlink $tmpfile;
my $cmd =
# Exit value:
# empty input = true
# some input = exit val from command
# sh -c needed as csh cannot hide stderr
qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }.
qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }.
qq{ (cat $tmpfile; rm $tmpfile; cat - ) | };
return $cmd;
}
sub filter_through_compress { sub filter_through_compress {
my $self = shift; my $self = shift;
# Send stdout to stdin for $opt::compress_program(1) # Send stdout to stdin for $opt::compress_program(1)
# Send stderr to stdin for $opt::compress_program(2) # Send stderr to stdin for $opt::compress_program(2)
# cattail get pid: $pid = $self->fh($fdno,'rpid'); # cattail get pid: $pid = $self->fh($fdno,'rpid');
my $cattail = cattail(); my $cattail = cattail();
for my $fdno (1,2) { for my $fdno (1,2) {
my $wpid = open(my $fdw,"|-","$opt::compress_program >>". my $wpid = open(my $fdw,"|-", empty_input_detector().
"($opt::compress_program) >>".
$self->fh($fdno,'name')) || die $?; $self->fh($fdno,'name')) || die $?;
$self->set_fh($fdno,'w',$fdw); $self->set_fh($fdno,'w',$fdw);
$self->set_fh($fdno,'wpid',$wpid); $self->set_fh($fdno,'wpid',$wpid);
@ -5931,22 +5952,10 @@ sub wrapped {
if(@Global::cat_partials) { if(@Global::cat_partials) {
# Prepend: # Prepend:
# < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 | # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 |
$command = (shift @Global::cat_partials). "|". "(". $command. ")"; $command = (shift @Global::cat_partials). " | ($command)";
} elsif($opt::pipe) { } elsif($opt::pipe) {
# Prepend EOF-detector to avoid starting $command if EOF. # Prepend EOF-detector to avoid starting $command if EOF.
# The $tmpfile might exist if run on a remote system - we accept that risk $command = empty_input_detector(). "($command);";
my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr");
# Unlink to avoid leaving files if --dry-run or --sshlogin
unlink $tmpfile;
$command =
# Exit value:
# empty input = true
# some input = exit val from command
# sh -c needed as csh cannot hide stderr
qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }.
qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }.
qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }.
"($command);";
} }
if($opt::tmux) { if($opt::tmux) {
# Wrap command with 'tmux' # Wrap command with 'tmux'

View file

@ -262,6 +262,14 @@ B<--pipepart>/B<--pipe> should be done on the local machine inside B<--tmux>
=back =back
=head2 --block-size adjustment
Every time GNU B<parallel> detects a record bigger than
B<--block-size> it increases the block size by 30%. A small
B<--block-size> gives very poor performance; by exponentially
increasing the block size performance will not suffer.
=head2 Shell shock =head2 Shell shock
The shell shock bug in B<bash> did not affect GNU B<parallel>, but the The shell shock bug in B<bash> did not affect GNU B<parallel>, but the

View file

@ -7,7 +7,14 @@ export XAP
NICEPAR="nice nice parallel" NICEPAR="nice nice parallel"
export NICEPAR export NICEPAR
cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | stdout parallel -vj0 -k --joblog /tmp/jl-`basename $0` -L1 cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | stdout parallel -vj6 -k --joblog /tmp/jl-`basename $0` -L1
echo 'bug #44250: pxz complains File format not recognized but decompresses anyway'
# The first line dumps core if run from make file. Why?!
stdout parallel --compress --compress-program pxz ls /{} ::: OK-if-missing-file
stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' ls /{} ::: OK-if-missing-file
stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' true ::: OK-if-no-output
stdout parallel --compress --compress-program pxz true ::: OK-if-no-output
echo 'bug #41613: --compress --line-buffer no newline'; echo 'bug #41613: --compress --line-buffer no newline';
perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo

View file

@ -1,3 +1,12 @@
echo 'bug #44250: pxz complains File format not recognized but decompresses anyway'
bug #44250: pxz complains File format not recognized but decompresses anyway
# The first line dumps core if run from make file. Why?!
stdout parallel --compress --compress-program pxz ls /{} ::: OK-if-missing-file
Segmentation fault (core dumped)
stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' ls /{} ::: OK-if-missing-file
ls: cannot access /OK-if-missing-file: No such file or directory
stdout parallel --compress --compress-program pixz --decompress-program 'pixz -d' true ::: OK-if-no-output
stdout parallel --compress --compress-program pxz true ::: OK-if-no-output
echo 'bug #41613: --compress --line-buffer no newline'; echo 'bug #41613: --compress --line-buffer no newline';
bug #41613: --compress --line-buffer no newline bug #41613: --compress --line-buffer no newline
perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo perl -e 'print "It worked"'| $NICEPAR --pipe --compress --line-buffer cat; echo