Fixed bug #49800: --pipepart --block -1 should be --round-robin like.

This commit is contained in:
Ole Tange 2016-12-08 02:28:01 +01:00
parent b1d17c6dd2
commit 0388fb5b1e
5 changed files with 109 additions and 44 deletions

View file

@ -132,10 +132,15 @@ $Global::JobQueue = JobQueue->new(
$number_of_args,\@Global::transfer_files,\@Global::ret_files);
if($opt::pipepart) {
if(not $opt::blocksize or $opt::roundrobin) {
# --block not set =>
# compute reasonable value giving 10 jobs per jobslot
# --roundrobin => divide equally between jobslots
if(not $opt::blocksize) {
# --blocksize with 10 jobs per jobslot
$opt::blocksize = -10;
}
if($opt::roundrobin) {
# --blocksize with 1 job per jobslot
$opt::blocksize = -1;
}
if($opt::blocksize < 0) {
my $size = 0;
# Compute size of -a
for(@opt::a) {
@ -155,16 +160,10 @@ if($opt::pipepart) {
}
$Global::max_jobs_running or
::die_bug("Global::max_jobs_running not set");
if($opt::roundrobin) {
# Run in total $job_slots jobs
# Set --blocksize = size / no of proc
$Global::blocksize = 1 + int($size / $Global::max_jobs_running);
} else {
# Run in total $job_slots*10 jobs
# Set --blocksize = size / no of proc / 10
$Global::blocksize = 1 +
int($size / $Global::max_jobs_running / 10);
}
# Run in total $job_slots*(- $blocksize) jobs
# Set --blocksize = size / no of proc / 10
$Global::blocksize = 1 +
int($size / $Global::max_jobs_running / -$opt::blocksize);
}
@Global::cat_partials = map { pipe_part_files($_) } @opt::a;
# Unget the empty arg as many times as there are parts
@ -304,24 +303,28 @@ sub find_split_positions {
push(@pos,$headerlen);
for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
my $buf;
seek($fh, $pos, 0) || die;
while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
if($opt::regexp) {
# If match /$recend$recstart/ => Record position
if($buf =~ /^(.*$recend)$recstart/os) {
# Start looking for next record _after_ this match
$pos += length($1);
push(@pos,$pos);
last;
}
} else {
# If match $recend$recstart => Record position
my $i = index64(\$buf,$recendrecstart);
if($i != -1) {
# Start looking for next record _after_ this match
$pos += $i + length($recendrecstart);
push(@pos,$pos);
last;
if($recendrecstart eq "") {
push(@pos,$pos);
} else {
seek($fh, $pos, 0) || die;
while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
if($opt::regexp) {
# If match /$recend$recstart/ => Record position
if($buf =~ /^(.*$recend)$recstart/os) {
# Start looking for next record _after_ this match
$pos += length($1);
push(@pos,$pos);
last;
}
} else {
# If match $recend$recstart => Record position
my $i = index64(\$buf,$recendrecstart);
if($i != -1) {
# Start looking for next record _after_ this match
$pos += $i + length($recendrecstart);
push(@pos,$pos);
last;
}
}
}
}

View file

@ -499,6 +499,14 @@ If you use B<-N>, B<--block-size> should be bigger than N+1 records.
I<size> defaults to 1M.
When using B<--pipepart> negative a block size is not interpreted as a
blocksize but as the number of blocks each jobslot should have. So
this will run 10*5 = 50 jobs in total:
parallel --pipepart -a myfile --block -10 -j5 wc
This is an efficient alternative to B<--round-robin>.
See B<--pipe> and B<--pipepart> for use of this.
@ -1182,10 +1190,6 @@ B<--pipepart>, B<--files>.
Pipe parts of a physical file. B<--pipepart> works similar to
B<--pipe>, but is much faster.
If B<--block> is left out, B<--pipepart> will use a block size that
will result in 10 jobs per jobslot, except if run with
B<--round-robin> in which case it will result in 1 job per jobslot.
B<--pipepart> has a few limitations:
=over 3

View file

@ -605,14 +605,6 @@ echo '### bug #34422: parallel -X --eta crashes with div by zero'
echo '**'
echo '### --pipepart autoset --block => 10*joblots'
seq 1000 > /run/shm/parallel$$;
parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$;
rm /run/shm/parallel$$
echo '**'
echo '### bug #48295: --results should be dynamic like --wd'
rm -rf /tmp/parallel-48295;

View file

@ -59,4 +59,17 @@ echo '### bug #47644: Wrong slot number replacement when resuming'
seq 0 20 | parallel -kj 4 --delay 0.2 --joblog /tmp/parallel-bug-47558 'sleep 1; echo {%} {=$_==10 and exit =}';
seq 0 20 | parallel -kj 4 --resume --delay 0.2 --joblog /tmp/parallel-bug-47558 'sleep 1; echo {%} {=$_==110 and exit =}'
echo '**'
echo '### --pipepart --block -# (# < 0)'
seq 1000 > /run/shm/parallel$$;
parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$;
parallel -j2 -k --block -1 --pipepart echo {#}-2 :::: /run/shm/parallel$$;
parallel -j2 -k --block -2 --pipepart echo {#}-4 :::: /run/shm/parallel$$;
parallel -j2 -k --block -10 --pipepart echo {#}-20 :::: /run/shm/parallel$$;
rm /run/shm/parallel$$
echo '**'
EOF

View file

@ -65,3 +65,56 @@ echo '### bug #47644: Wrong slot number replacement when resuming'
3 18
4 19
1 20
echo '**'
**
echo '### --pipepart --block -# (# < 0)'
### --pipepart --block -# (# < 0)
seq 1000 > /run/shm/parallel$$; parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$; parallel -j2 -k --block -1 --pipepart echo {#}-1 :::: /run/shm/parallel$$; parallel -j2 -k --block -2 --pipepart echo {#}-2 :::: /run/shm/parallel$$; parallel -j2 -k --block -10 --pipepart echo {#}-10 :::: /run/shm/parallel$$; rm /run/shm/parallel$$
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
1-1
2-1
1-2
2-2
3-2
4-2
1-10
2-10
3-10
4-10
5-10
6-10
7-10
8-10
9-10
10-10
11-10
12-10
13-10
14-10
15-10
16-10
17-10
18-10
19-10
20-10
echo '**'
**