Fixed bug #48385: --pipepart does not work on block-devices.

2024-11-22 22:17:54 +00:00 · 2016-07-04 04:26:57 +02:00 · 2016-07-04 04:26:57 +02:00 · a62b528382
parent dd5ade4fbe
commit a62b528382
2 changed files with 88 additions and 29 deletions
--- a/src/parallel
+++ b/src/parallel
@ -126,7 +126,16 @@ if($opt::pipepart) {
 	# --roundrobin => divide equally between jobslots
 	my $size = 0;
 	# Compute size of -a
-	$size += -s $_ for @opt::a;
+	for(@opt::a) {
 	    if(-f $_) {
 		$size += -s $_;
 	    } elsif(-b $_) {
 		$size += size_of_block_dev($_);
 	    } else {
 		::error("$_ is neither a file nor a block device");
 		wait_and_exit(255);
 	    }
 	}
 	# Compute $Global::max_jobs_running
 	$Global::dummy_jobs = 1;
 	for my $sshlogin (values %Global::host) {
@ -222,7 +231,7 @@ sub pipe_part_files {
    #   @commands that will cat_partial each part
    my ($file) = @_;
    my $buf = "";
-    if(not -f $file) {
+    if(not -f $file and not -b $file) {
 	::error("$file is not a seekable file.");
 	::wait_and_exit(255);
    }
@ -274,6 +283,10 @@ sub find_split_positions {
    #   @positions of block start/end
    my($file, $block, $headerlen) = @_;
    my $size = -s $file;
    if(-b $file) {
 	# $file is a blockdevice
 	$size = size_of_block_dev($file);
    }
    $block = int $block;
    # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
    # The optimal dd blocksize for freebsd = 2^15..2^17
@ -3797,6 +3810,24 @@ sub tmpfifo {
    return $tmpfifo;
 }
 sub size_of_block_dev {
    # Like -s but for block devices
    # Input:
    #   $blockdev = file name of block device
    # Returns:
    #   $size = in bytes, undef if error
    my $blockdev = shift;
    if(open(my $fh, "<", $blockdev)) {
 	seek($fh,0,2) || ::die_bug("cannot seek $blockdev");
 	my $size = tell($fh);
 	close $fh;
 	return $size;
    } else {
 	::error("cannot open $blockdev");
 	wait_and_exit(255);
    }
 }
 sub qqx {
    # Like qx but with clean environment (except for @keep)
    # and STDERR ignored
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -1146,7 +1146,9 @@ Pipe parts of a physical file. B<--pipepart> works similar to
 B<--pipe>, but is much faster.
 If B<--block> is left out, B<--pipepart> will use a block size that
-will result in 10 jobs per jobslot.
+will result in 10 jobs per jobslot, except if run with
 B<--round-robin> in which case it will result in 1 job per jobslot.
 B<--pipepart> has a few limitations:
@ -1154,8 +1156,12 @@ B<--pipepart> has a few limitations:
 =item Z<>*
-The file must be a physical (seekable) file (not a stream) and must be
+The file must be a normal file or a block device (technically it must
-given using B<-a> or B<::::>.
+be seekable) and must be given using B<-a> or B<::::>. The file cannot
 be a pipe or a fifo as they are not seekable.
 If using a block device with lot of NUL bytes, remember to set
 B<--recend ''>.
 =item Z<>*
@ -1559,7 +1565,7 @@ time to initialize.
 B<--keep-order> will not work with B<--round-robin> as it is
 impossible to track which input block corresponds to which output.
-B<--round-robin> implies B<--pipe>.
+B<--round-robin> implies B<--pipe>, except if B<--pipepart> is given.
 =item B<--rpl> 'I<tag> I<perl expression>'
@ -3969,29 +3975,9 @@ E1 -  -  E4 -  E6
 R1 -  -  -  -  R6 -  -  -
 S1 -
-xjobs: TODO - Please file a bug-report if you know what features xjobs
+xjobs, prll, dxargs, mdm/middelman, xapply, paexec, ladon, jobflow,
-supports (See REPORTING BUGS).
+ClusterSSH: TODO - Please file a bug-report if you know what features
-
+they support (See REPORTING BUGS).
 prll: TODO - Please file a bug-report if you know what features prll
 supports (See REPORTING BUGS).
 dxargs: TODO - Please file a bug-report if you know what features dxargs
 supports (See REPORTING BUGS).
 mdm/middelman: TODO - Please file a bug-report if you know what
 features mdm/middelman supports (See REPORTING BUGS).
 xapply: TODO - Please file a bug-report if you know what features xapply
 supports (See REPORTING BUGS).
 paexec: TODO - Please file a bug-report if you know what features paexec
 supports (See REPORTING BUGS).
 ladon: TODO - Please file a bug-report if you know what features ladon
 supports (See REPORTING BUGS).
 ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH
 supports (See REPORTING BUGS).
 =head2 DIFFERENCES BETWEEN xargs AND GNU Parallel
@ -4574,6 +4560,48 @@ B<4> ladon "~/Music/*.wav" -- lame -V 2 FULLPATH DIRNAME/BASENAME.mp3
 B<4> parallel lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 ::: ~/Music/*.wav 
 =head2 DIFFERENCES BETWEEN jobflow AND GNU Parallel
 B<jobflow> can run multiple jobs in parallel.
 Just like B<xargs> output from B<jobflow> jobs running in parallel mix
 together by default. B<jobflow> can buffer into files (placed in
 /run/shm), but these are not cleaned up - not even if B<jobflow> dies
 unexpectently. If the total output is big (in the order of RAM+swap)
 it can cause the system to run out of memory.
 B<jobflow> gives no error if the command is unknown, and like B<xargs>
 redirection requires wrapping with B<bash -c>.
 B<jobflow> makes it possible to set ressource limits on the running
 jobs. This can be emulated by GNU B<parallel> using B<bash>'s B<ulimit>:
  jobflow -limits=mem=100M,cpu=3,fsize=20M,nofiles=300 myjob
  parallel 'ulimit -v 102400 -t 3 -f 204800 -n 300 myjob'
 =head3 EXAMPLES FROM jobflow README
 B<1> cat things.list | jobflow -threads=8 -exec ./mytask {}
 B<1> cat things.list | parallel -j8 ./mytask {}
 B<2> seq 100 | jobflow -threads=100 -exec echo {}
 B<2> seq 100 | parallel -j100 echo {}
 B<3> cat urls.txt | jobflow -threads=32 -exec wget {}
 B<3> cat urls.txt | parallel -j32 wget {}
 B<4> find . -name '*.bmp' | jobflow -threads=8 -exec bmp2jpeg {.}.bmp {.}.jpg
 B<4> find . -name '*.bmp' | parallel -j8 bmp2jpeg {.}.bmp {.}.jpg
 =head2 DIFFERENCES BETWEEN ClusterSSH AND GNU Parallel
 ClusterSSH solves a different problem than GNU B<parallel>.