parallel: If --block is left out, --pipepart will use a block size that will result in 10 jobs per jobslot.

2024-11-22 14:07:55 +00:00 · 2016-05-26 00:13:48 +02:00 · 2016-05-26 00:13:48 +02:00 · 51f212e548
parent d570ec2d20
commit 51f212e548
7 changed files with 91 additions and 35 deletions
--- a/src/parallel
+++ b/src/parallel
@ -120,17 +120,30 @@ $Global::JobQueue = JobQueue->new(
    $number_of_args,\@Global::transfer_files,\@Global::ret_files);

 if($opt::pipepart) {
-    if($opt::roundrobin) {
-	# Compute size of -a
+    if(not $opt::blocksize or $opt::roundrobin) {
+	# --block not set =>
+	#   compute reasonable value giving 10 jobs per jobslot
+	# --roundrobin => divide equally between jobslots
 	my $size = 0;
+	# Compute size of -a
 	$size += -s $_ for @opt::a;
 	# Compute $Global::max_jobs_running
+	$Global::dummy_jobs = 1;
 	for my $sshlogin (values %Global::host) {
 	    $sshlogin->max_jobs_running();
 	}
-	$Global::max_jobs_running or ::die_bug("Global::max_jobs_running not set");
-	# Set --blocksize = size / no of proc
-	$opt::blocksize = 1 + $size / $Global::max_jobs_running;
+	$Global::max_jobs_running or
+	    ::die_bug("Global::max_jobs_running not set");
+	if($opt::roundrobin) {
+	    # Run in total $job_slots jobs
+	    # Set --blocksize = size / no of proc
+	    $Global::blocksize = 1 + int($size / $Global::max_jobs_running);
+	} else {
+	    # Run in total $job_slots*10 jobs
+	    # Set --blocksize = size / no of proc / 10
+	    $Global::blocksize = 1 +
+		int($size / $Global::max_jobs_running / 10);
+	}
    }
    @Global::cat_partials = map { pipe_part_files($_) } @opt::a;
    # Unget the empty arg as many times as there are parts
@ -215,7 +228,7 @@ sub pipe_part_files {
    }
    my $header = find_header(\$buf,open_or_exit($file));
    # find positions
-    my @pos = find_split_positions($file,$opt::blocksize,length $header);
+    my @pos = find_split_positions($file,$Global::blocksize,length $header);
    # Make @cat_partials
    my @cat_partials = ();
    for(my $i=0; $i<$#pos; $i++) {
@ -230,7 +243,7 @@ sub find_header {
    #   $fh = filehandle to read from
    # Uses:
    #   $opt::header
-    #   $opt::blocksize
+    #   $Global::blocksize
    # Returns:
    #   $header string
    my ($buf_ref, $fh) = @_;
@ -239,7 +252,7 @@ sub find_header {
 	if($opt::header eq ":") { $opt::header = "(.*\n)"; }
 	# Number = number of lines
 	$opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
-	while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) {
+	while(read($fh,substr($$buf_ref,length $$buf_ref,0),$Global::blocksize)) {
 	    if($$buf_ref=~s/^($opt::header)//) {
 		$header = $1;
 		last;
@ -318,7 +331,7 @@ sub spreadstdin {
    # read a record
    # Spawn a job and print the record to it.
    # Uses:
-    #   $opt::blocksize
+    #   $Global::blocksize
    #   STDIN
    #   $opt::r
    #   $Global::max_lines
@ -335,7 +348,7 @@ sub spreadstdin {
    my $chunk_number = 1;
    my $one_time_through;
    my $two_gb = 2**31-1;
-    my $blocksize = $opt::blocksize;
+    my $blocksize = $Global::blocksize;
    my $in = *STDIN;
    my $header = find_header(\$buf,$in);
    while(1) {
@ -998,11 +1011,10 @@ sub parse_options {
    if(@opt::transfer_files) { push @Global::transfer_files, @opt::transfer_files; }
    if(not defined $opt::recstart and
       not defined $opt::recend) { $opt::recend = "\n"; }
-    if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
-    $opt::blocksize = multiply_binary_prefix($opt::blocksize);
-    if($opt::blocksize > 2**31-1) {
+    $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M");
+    if($Global::blocksize > 2**31-1) {
 	warning("--blocksize >= 2G causes problems. Using 2G-1.");
-	$opt::blocksize = 2**31-1;
+	$Global::blocksize = 2**31-1;
    }
    $opt::memfree = multiply_binary_prefix($opt::memfree);
    check_invalid_option_combinations();
@ -5130,7 +5142,7 @@ sub compute_number_of_processes {
 	    $max_system_proc_reached and last;

 	    my $before_getting_arg = time;
-	    if(!$opt::roundrobin) {
+	    if(!$Global::dummy_jobs) {
 		get_args_or_jobs() or last;
 	    }
 	    $wait_time_for_getting_args += time - $before_getting_arg;
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -1133,13 +1133,19 @@ B<--files> is often used with B<--pipe>.
 B<--pipe> maxes out at around 1 GB/s input, and 100 MB/s output. If
 performance is important use B<--pipepart>.

-See also: B<--recstart>, B<--recend>, B<--fifo>, B<--cat>, B<--pipepart>.
+See also: B<--recstart>, B<--recend>, B<--fifo>, B<--cat>,
+B<--pipepart>, B<--files>.


 =item B<--pipepart>

 Pipe parts of a physical file. B<--pipepart> works similar to
-B<--pipe>, but is much faster. It has a few limitations:
+B<--pipe>, but is much faster.
+
+If B<--block> is left out, B<--pipepart> will use a block size that
+will result in 10 jobs per jobslot.
+
+B<--pipepart> has a few limitations:

 =over 3

@ -2866,7 +2872,7 @@ Or if the regexps are fixed strings:
  grep -F -f regexps.txt bigfile

 There are 2 limiting factors: CPU and disk I/O. CPU is easy to
-measure: If the grep takes >90% CPU (e.g. when running top), then the
+measure: If the B<grep> takes >90% CPU (e.g. when running top), then the
 CPU is a limiting factor, and parallelization will speed this up. If
 not, then disk I/O is the limiting factor, and depending on the disk
 system it may be faster or slower to parallelize. The only way to know
@ -2876,23 +2882,23 @@ If the CPU is the limiting factor parallelization should be done on the regexps:

  cat regexp.txt | parallel --pipe -L1000 --round-robin grep -f - bigfile

-If a line matches multiple regexps, the line may be duplicated. The command
-will start one grep per CPU and read bigfile one time per CPU,
-but as that is done in parallel, all reads except the first will be
-cached in RAM. Depending on the size of regexp.txt it may be faster to
-use --block 10m instead of -L1000. If regexp.txt is too big to fit in
-RAM, remove --round-robin and adjust -L1000. This will cause bigfile
-to be read more times.
+If a line matches multiple regexps, the line may be duplicated. The
+command will start one B<grep> per CPU and read I<bigfile> one time
+per CPU, but as that is done in parallel, all reads except the first
+will be cached in RAM. Depending on the size of I<regexp.txt> it may
+be faster to use B<--block 10m> instead of B<-L1000>. If I<regexp.txt>
+is too big to fit in RAM, remove B<--round-robin> and adjust
+B<-L1000>. This will cause I<bigfile> to be read more times.

 Some storage systems perform better when reading multiple chunks in
 parallel. This is true for some RAID systems and for some network file
-systems. To parallelize the reading of bigfile:
+systems. To parallelize the reading of I<bigfile>:

  parallel --pipepart --block 100M -a bigfile -k grep -f regexp.txt

-This will split bigfile into 100MB chunks and run grep on each of
-these chunks. To parallelize both reading of bigfile and regexp.txt
-combine the two using --fifo:
+This will split I<bigfile> into 100MB chunks and run B<grep> on each of
+these chunks. To parallelize both reading of I<bigfile> and I<regexp.txt>
+combine the two using B<--fifo>:

  parallel --pipepart --block 100M -a bigfile --fifo cat regexp.txt \
    \| parallel --pipe -L1000 --round-robin grep -f - {}
@ -4860,8 +4866,9 @@ it also uses rsync with ssh.

 =head1 SEE ALSO

-B<ssh>(1), B<rsync>(1), B<find>(1), B<xargs>(1), B<dirname>(1),
-B<make>(1), B<pexec>(1), B<ppss>(1), B<xjobs>(1), B<prll>(1),
-B<dxargs>(1), B<mdm>(1)
+B<ssh>(1), B<ssh-agent>(1), B<sshpass>(1), B<ssh-copy-id>(1),
+B<rsync>(1), B<find>(1), B<xargs>(1), B<dirname>(1), B<make>(1),
+B<pexec>(1), B<ppss>(1), B<xjobs>(1), B<prll>(1), B<dxargs>(1),
+B<mdm>(1)

 =cut
--- a/src/parallel_tutorial.html
+++ b/src/parallel_tutorial.html
@ -1819,7 +1819,7 @@

 <p>When using <b>--cat</b>, <b>--pipepart</b>, or when a job is run on a remote machine, the command is wrapped with helper scripts. <b>-vv</b> shows all of this.</p>

-<pre><code>  parallel -vv --pipepart wc :::: num30000</code></pre>
+<pre><code>  parallel -vv --pipepart --block 1M wc :::: num30000</code></pre>

 <p>Output:</p>

--- a/src/parallel_tutorial.pod
+++ b/src/parallel_tutorial.pod
@ -1816,7 +1816,7 @@ When using B<--cat>, B<--pipepart>, or when a job is run on a remote
 machine, the command is wrapped with helper scripts. B<-vv> shows all
 of this.

-  parallel -vv --pipepart wc :::: num30000
+  parallel -vv --pipepart --block 1M wc :::: num30000

 Output:

--- a/testsuite/tests-to-run/parallel-local-0.3s.sh
+++ b/testsuite/tests-to-run/parallel-local-0.3s.sh
@ -596,6 +596,16 @@ echo '### bug #34422: parallel -X --eta crashes with div by zero'
  # We do not care how long it took
  seq 2 | stdout parallel -X --eta echo | grep -E -v 'ETA:.*AVG'

+echo '**'
+
+echo '### --pipepart autoset --block => 10*joblots'
+
+  seq 1000 > /run/shm/parallel$$; 
+    parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$; 
+    rm /run/shm/parallel$$
+
+echo '**'
+

 EOF
 echo '### 1 .par file from --files expected'
--- a/testsuite/wanted-results/parallel-local-0.3s
+++ b/testsuite/wanted-results/parallel-local-0.3s
@ -1638,5 +1638,32 @@ Computers / CPU cores / Max jobs to run
 1:local / 8 / 2

 Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
+echo '**'
+**
+echo '### --pipepart autoset --block => 10*joblots'
+### --pipepart autoset --block => 10*joblots
+  seq 1000 > /run/shm/parallel$$;     parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$;     rm /run/shm/parallel$$
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+echo '**'
+**
 ### 1 .par file from --files expected
 0
--- a/testsuite/wanted-results/parallel-tutorial
+++ b/testsuite/wanted-results/parallel-tutorial
@ -846,7 +846,7 @@ _
  parallel --env _ -S $SERVER1 'echo $VAR; my_func2' ::: bar

 /bin/bash: my_func2: command not found
-  parallel -vv --pipepart wc :::: num30000
+  parallel -vv --pipepart --block 1M wc :::: num30000
 <num30000 perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }'  0 0 0 168894 | (wc)
  30000   30000  168894
  my_func3() {