parallel: Code cleanup. Passes testsuite.

This commit is contained in:
Ole Tange 2011-05-05 20:50:53 +02:00
parent ba6af9c3d2
commit 594f630a7f
3 changed files with 93 additions and 34 deletions

View file

@ -1,3 +1,62 @@
Video
So far GNU Parallel has been focused on replacing a single
for-loop. The Pakistan release introduces a way to replace nested
loops.
As example I will use the image manipulation program 'convert'. This
will convert foo.png to jpg with a size of 800 and JPEG-quality of 95.
convert -size 800 -quality 95 foo.png foo_800_q95.jpg
With a for-loop it can be done on a list of files:
time \
for file in *.png ; do
convert -size 800 -quality 95 $file ${file##.JPG}_800_q95.jpg
done
Using GNU Parallel it looks like this:
time parallel convert -size 800 -quality 95 {} {.}_800_q95.jpg ::: *.png
To get the images in 3 different JPEG-qualities you can use a nested for-loop:
time \
for qual in 25 50 95 ; do
for file in *.png ; do
convert -size 800 -quality $qual $file ${file##.JPG}_800_q${qual}.jpg
done
done
With GNU Parallel 'Pakistan' you can do this:
time parallel convert -size 800 -quality 95 {1} {1.}_800_q{2}.jpg ::: *.png ::: 25 50 95
To get the 3 different JPEG-qualities in 2 different sizes you can use
a nest the for-loop even further:
time \
for size in 800 30 ; do
for qual in 25 50 95 ; do
for file in *.png ; do
convert -size $size -quality $qual $file ${file##.JPG}_${size}_q${qual}.jpg
done
done
done
With GNU Parallel 'Pakistan' you can do this:
time parallel convert -size {3} -quality {2} {1} {1.}_{3}_q{2}.jpg ::: *.png ::: 25 50 95 ::: 800 30
You can also provide the arguments in a file:
(echo 25; echo 50; echo 95) > qualities
ls *.png > png-files
time parallel convert -size {3} -quality {2} {1} {1.}_{3}_q{2}.jpg :::: png-files :::: qualities ::: 800 30
Test with first argument == END_OF_FILE string
unittest til xapply og uden.

View file

@ -3889,6 +3889,7 @@ sub new {
return bless {
'unget' => \@Global::unget_argv,
'fhs' => $fhs,
'arg_matrix' => undef,
}, ref($class) || $class;
}
@ -3934,8 +3935,8 @@ sub nest_get {
my $prepend = undef;
my $empty = 1;
my $no_of_inputs = $#{$self->{'fhs'}} + 1;
if(not @::arg_matrix) {
# Initialize @::arg_matrix with one arg from each file
if(not $self->{'arg_matrix'}) {
# Initialize @arg_matrix with one arg from each file
# read one line from each file
my @first_arg_set;
my $all_empty = 1;
@ -3944,8 +3945,8 @@ sub nest_get {
if(defined $arg) {
$all_empty = 0;
}
$::arg_matrix[$fhno][0] = $arg || Arg->new("");
push @first_arg_set, $::arg_matrix[$fhno][0];
$self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
}
if($all_empty) {
# All filehandles were at eof or eof-string
@ -3960,12 +3961,12 @@ sub nest_get {
# read one
my $arg = read_arg_from_fh($self->{'fhs'}[$fhno])
|| next; # If we just read an EOF string: Treat this as EOF
my $len = $#{$::arg_matrix[$fhno]} + 1;
$::arg_matrix[$fhno][$len] = $arg;
my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
$self->{'arg_matrix'}[$fhno][$len] = $arg;
# make all new combinations
my @combarg = ();
for (my $fhn = 0; $fhn < $no_of_inputs; $fhn++) {
push @combarg, [0, $#{$::arg_matrix[$fhn]}];
push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}];
}
$combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry
# map combinations
@ -3976,7 +3977,7 @@ sub nest_get {
for my $c (expand_combinations(@combarg)) {
my @a;
for my $n (0 .. $no_of_inputs - 1 ) {
push @a, $::arg_matrix[$n][$$c[$n]];
push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
}
push @mapped, [@a];
}

View file

@ -119,7 +119,7 @@ B<{/.}> can be used the same places as B<{}>. The replacement string
B<{/.}> can be changed with B<--basenameextensionreplace>.
=item B<{#}> (alpha testing)
=item B<{#}> (beta testing)
Sequence number of the job to run. The same as $PARALLEL_SEQ.
@ -159,8 +159,7 @@ B<{.}>. See B<-a> and B<-N>.
B<{>I<n>/.B<}> can be used the same places as B<{>I<n>B<}>.
=item B<:::> I<arguments>
=item B<:::> I<arguments> (alpha testing)
Use arguments from the command line as input instead of from stdin
(standard input). Unlike other options for GNU B<parallel> B<:::> is
@ -194,7 +193,7 @@ B<:::> and B<::::> can be mixed. So these are equivalent:
seq 6 7 | parallel -a - -a <(seq 4 5) echo {1} {2} {3} ::: 1 2 3
seq 4 5 | parallel echo {1} {2} {3} :::: <(seq 6 7) - ::: 1 2 3
=item B<::::> I<argfiles>
=item B<::::> I<argfiles> (alpha testing)
Another way to write B<-a> I<argfile1> B<-a> I<argfile2> ...
@ -274,7 +273,7 @@ basename of input line.
Use the replacement string I<replace-str> instead of B<{/.}> for basename of input line without extension.
=item B<--bg> (beta testing)
=item B<--bg>
Run command in background thus GNU B<parallel> will not wait for
completion of the command before exiting. This is the default if
@ -285,9 +284,9 @@ See also: B<--fg>
Implies B<--semaphore>.
=item B<--block> I<size> (beta testing)
=item B<--block> I<size>
=item B<--block-size> I<size> (beta testing)
=item B<--block-size> I<size>
Size of block in bytes. The size can be postfixed with K, M, G, or T
which would multiply the size with 1024, 1048576, 1073741824, or
@ -384,7 +383,7 @@ If I<eof-str> is omitted, there is no end of file string. If neither
B<-E> nor B<-e> is used, no end of file string is used.
=item B<--eta> (alpha testing)
=item B<--eta> (beta testing)
Show the estimated number of seconds before finishing. This forces GNU
B<parallel> to read all jobs before starting to find the number of
@ -392,7 +391,7 @@ jobs. GNU B<parallel> normally only reads the next job to run.
Implies B<--progress>.
=item B<--fg> (beta testing)
=item B<--fg>
Run command in foreground thus GNU B<parallel> will wait for
completion of the command before exiting.
@ -463,7 +462,7 @@ specified, and for B<-I>{} otherwise. This option is deprecated;
use B<-I> instead.
=item B<--joblog> I<logfile> (beta testing)
=item B<--joblog> I<logfile>
Logfile for executed jobs. Saved a list of the executed jobs to
I<logfile> in the following TAB separated format: sequence number,
@ -584,7 +583,7 @@ B<-l 0> is an alias for B<-l 1>.
Implies B<-X> unless B<-m> is set.
=item B<--load> I<max-load> (experimental)
=item B<--load> I<max-load> (alpha testing)
Do not start new jobs on a given computer unless the load is less than
I<max-load>. I<max-load> uses the same syntax as B<--jobs>, so I<100%>
@ -618,19 +617,19 @@ See also B<-X> for context replace. If in doubt use B<-X> as that will
most likely do what is needed.
=item B<--output-as-files> (beta testing)
=item B<--output-as-files>
=item B<--outputasfiles> (beta testing)
=item B<--outputasfiles>
=item B<--files> (beta testing)
=item B<--files>
Instead of printing the output to stdout (standard output) the output
of each job is saved in a file and the filename is then printed.
=item B<--pipe> (beta testing)
=item B<--pipe>
=item B<--spreadstdin> (beta testing)
=item B<--spreadstdin>
Spread input to jobs on stdin. Read a block of data from stdin
(standard input) and give one block of data as input to one job.
@ -773,9 +772,9 @@ default.
If the stdin (standard input) only contains whitespace, do not run the command.
=item B<--recstart> I<startstring> (beta testing)
=item B<--recstart> I<startstring>
=item B<--recend> I<endstring> (beta testing)
=item B<--recend> I<endstring>
If B<--recstart> is given I<startstring> will be used to split at record start.
@ -795,17 +794,17 @@ Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
expressions. This is slow, however.
=item B<--regexp> (beta test)
=item B<--regexp> (beta testing)
Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
expressions. This is slow, however.
=item B<--remove-rec-sep> (beta testing)
=item B<--remove-rec-sep>
=item B<--removerecsep> (beta testing)
=item B<--removerecsep>
=item B<--rrs> (beta testing)
=item B<--rrs>
Remove the text matched by B<--recstart> and B<--recend> before piping
it to the command.
@ -813,7 +812,7 @@ it to the command.
Only used with B<--pipe>.
=item B<--retries> I<n> (beta testing)
=item B<--retries> I<n>
If a job fails, retry it on another computer. Do this I<n> times. If
there are fewer than I<n> computers in B<--sshlogin> GNU parallel will
@ -991,9 +990,9 @@ Silent. The job to be run will not be printed. This is the default.
Can be reversed with B<-v>.
=item B<--tty> (beta testing)
=item B<--tty>
=item B<-T> (beta testing)
=item B<-T>
Open terminal tty. If GNU B<parallel> is used for starting an
interactive program then this option may be needed. It will start only
@ -1140,7 +1139,7 @@ B<~/.parallel/tmp/> on the remote computers and will be removed if
using B<--cleanup>.
=item B<--wait> (beta testing)
=item B<--wait>
Wait for all commands to complete.