From a62b5283826cafba776d3cee3ae68b74e61757f5 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Mon, 4 Jul 2016 04:26:57 +0200 Subject: [PATCH] Fixed bug #48385: --pipepart does not work on block-devices. --- src/parallel | 35 +++++++++++++++++++-- src/parallel.pod | 82 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 88 insertions(+), 29 deletions(-) diff --git a/src/parallel b/src/parallel index 23ee4a9a..3ccc1f0e 100755 --- a/src/parallel +++ b/src/parallel @@ -126,7 +126,16 @@ if($opt::pipepart) { # --roundrobin => divide equally between jobslots my $size = 0; # Compute size of -a - $size += -s $_ for @opt::a; + for(@opt::a) { + if(-f $_) { + $size += -s $_; + } elsif(-b $_) { + $size += size_of_block_dev($_); + } else { + ::error("$_ is neither a file nor a block device"); + wait_and_exit(255); + } + } # Compute $Global::max_jobs_running $Global::dummy_jobs = 1; for my $sshlogin (values %Global::host) { @@ -222,7 +231,7 @@ sub pipe_part_files { # @commands that will cat_partial each part my ($file) = @_; my $buf = ""; - if(not -f $file) { + if(not -f $file and not -b $file) { ::error("$file is not a seekable file."); ::wait_and_exit(255); } @@ -274,6 +283,10 @@ sub find_split_positions { # @positions of block start/end my($file, $block, $headerlen) = @_; my $size = -s $file; + if(-b $file) { + # $file is a blockdevice + $size = size_of_block_dev($file); + } $block = int $block; # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 # The optimal dd blocksize for freebsd = 2^15..2^17 @@ -3797,6 +3810,24 @@ sub tmpfifo { return $tmpfifo; } +sub size_of_block_dev { + # Like -s but for block devices + # Input: + # $blockdev = file name of block device + # Returns: + # $size = in bytes, undef if error + my $blockdev = shift; + if(open(my $fh, "<", $blockdev)) { + seek($fh,0,2) || ::die_bug("cannot seek $blockdev"); + my $size = tell($fh); + close $fh; + return $size; + } else { + ::error("cannot open $blockdev"); + wait_and_exit(255); + } +} + sub qqx { # Like qx but with clean environment (except for @keep) # and STDERR ignored diff --git a/src/parallel.pod b/src/parallel.pod index 10357a88..ec3e51d3 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -1146,7 +1146,9 @@ Pipe parts of a physical file. B<--pipepart> works similar to B<--pipe>, but is much faster. If B<--block> is left out, B<--pipepart> will use a block size that -will result in 10 jobs per jobslot. +will result in 10 jobs per jobslot, except if run with +B<--round-robin> in which case it will result in 1 job per jobslot. + B<--pipepart> has a few limitations: @@ -1154,8 +1156,12 @@ B<--pipepart> has a few limitations: =item Z<>* -The file must be a physical (seekable) file (not a stream) and must be -given using B<-a> or B<::::>. +The file must be a normal file or a block device (technically it must +be seekable) and must be given using B<-a> or B<::::>. The file cannot +be a pipe or a fifo as they are not seekable. + +If using a block device with lot of NUL bytes, remember to set +B<--recend ''>. =item Z<>* @@ -1559,7 +1565,7 @@ time to initialize. B<--keep-order> will not work with B<--round-robin> as it is impossible to track which input block corresponds to which output. -B<--round-robin> implies B<--pipe>. +B<--round-robin> implies B<--pipe>, except if B<--pipepart> is given. =item B<--rpl> 'I I' @@ -3969,29 +3975,9 @@ E1 - - E4 - E6 R1 - - - - R6 - - - S1 - -xjobs: TODO - Please file a bug-report if you know what features xjobs -supports (See REPORTING BUGS). - -prll: TODO - Please file a bug-report if you know what features prll -supports (See REPORTING BUGS). - -dxargs: TODO - Please file a bug-report if you know what features dxargs -supports (See REPORTING BUGS). - -mdm/middelman: TODO - Please file a bug-report if you know what -features mdm/middelman supports (See REPORTING BUGS). - -xapply: TODO - Please file a bug-report if you know what features xapply -supports (See REPORTING BUGS). - -paexec: TODO - Please file a bug-report if you know what features paexec -supports (See REPORTING BUGS). - -ladon: TODO - Please file a bug-report if you know what features ladon -supports (See REPORTING BUGS). - -ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH -supports (See REPORTING BUGS). +xjobs, prll, dxargs, mdm/middelman, xapply, paexec, ladon, jobflow, +ClusterSSH: TODO - Please file a bug-report if you know what features +they support (See REPORTING BUGS). =head2 DIFFERENCES BETWEEN xargs AND GNU Parallel @@ -4574,6 +4560,48 @@ B<4> ladon "~/Music/*.wav" -- lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 B<4> parallel lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 ::: ~/Music/*.wav +=head2 DIFFERENCES BETWEEN jobflow AND GNU Parallel + +B can run multiple jobs in parallel. + +Just like B output from B jobs running in parallel mix +together by default. B can buffer into files (placed in +/run/shm), but these are not cleaned up - not even if B dies +unexpectently. If the total output is big (in the order of RAM+swap) +it can cause the system to run out of memory. + +B gives no error if the command is unknown, and like B +redirection requires wrapping with B. + +B makes it possible to set ressource limits on the running +jobs. This can be emulated by GNU B using B's B: + + + jobflow -limits=mem=100M,cpu=3,fsize=20M,nofiles=300 myjob + + parallel 'ulimit -v 102400 -t 3 -f 204800 -n 300 myjob' + + +=head3 EXAMPLES FROM jobflow README + +B<1> cat things.list | jobflow -threads=8 -exec ./mytask {} + +B<1> cat things.list | parallel -j8 ./mytask {} + +B<2> seq 100 | jobflow -threads=100 -exec echo {} + +B<2> seq 100 | parallel -j100 echo {} + +B<3> cat urls.txt | jobflow -threads=32 -exec wget {} + +B<3> cat urls.txt | parallel -j32 wget {} + +B<4> find . -name '*.bmp' | jobflow -threads=8 -exec bmp2jpeg {.}.bmp {.}.jpg + +B<4> find . -name '*.bmp' | parallel -j8 bmp2jpeg {.}.bmp {.}.jpg + + + =head2 DIFFERENCES BETWEEN ClusterSSH AND GNU Parallel ClusterSSH solves a different problem than GNU B.