parallel: Code cleanup. Passes testsuite.

This commit is contained in:
Ole Tange 2011-05-05 20:50:53 +02:00
parent ba6af9c3d2
commit 594f630a7f
3 changed files with 93 additions and 34 deletions

View file

@ -1,3 +1,62 @@
Video
So far GNU Parallel has been focused on replacing a single
for-loop. The Pakistan release introduces a way to replace nested
loops.
As example I will use the image manipulation program 'convert'. This
will convert foo.png to jpg with a size of 800 and JPEG-quality of 95.
convert -size 800 -quality 95 foo.png foo_800_q95.jpg
With a for-loop it can be done on a list of files:
time \
for file in *.png ; do
convert -size 800 -quality 95 $file ${file##.JPG}_800_q95.jpg
done
Using GNU Parallel it looks like this:
time parallel convert -size 800 -quality 95 {} {.}_800_q95.jpg ::: *.png
To get the images in 3 different JPEG-qualities you can use a nested for-loop:
time \
for qual in 25 50 95 ; do
for file in *.png ; do
convert -size 800 -quality $qual $file ${file##.JPG}_800_q${qual}.jpg
done
done
With GNU Parallel 'Pakistan' you can do this:
time parallel convert -size 800 -quality 95 {1} {1.}_800_q{2}.jpg ::: *.png ::: 25 50 95
To get the 3 different JPEG-qualities in 2 different sizes you can use
a nest the for-loop even further:
time \
for size in 800 30 ; do
for qual in 25 50 95 ; do
for file in *.png ; do
convert -size $size -quality $qual $file ${file##.JPG}_${size}_q${qual}.jpg
done
done
done
With GNU Parallel 'Pakistan' you can do this:
time parallel convert -size {3} -quality {2} {1} {1.}_{3}_q{2}.jpg ::: *.png ::: 25 50 95 ::: 800 30
You can also provide the arguments in a file:
(echo 25; echo 50; echo 95) > qualities
ls *.png > png-files
time parallel convert -size {3} -quality {2} {1} {1.}_{3}_q{2}.jpg :::: png-files :::: qualities ::: 800 30
Test with first argument == END_OF_FILE string Test with first argument == END_OF_FILE string
unittest til xapply og uden. unittest til xapply og uden.

View file

@ -3889,6 +3889,7 @@ sub new {
return bless { return bless {
'unget' => \@Global::unget_argv, 'unget' => \@Global::unget_argv,
'fhs' => $fhs, 'fhs' => $fhs,
'arg_matrix' => undef,
}, ref($class) || $class; }, ref($class) || $class;
} }
@ -3934,8 +3935,8 @@ sub nest_get {
my $prepend = undef; my $prepend = undef;
my $empty = 1; my $empty = 1;
my $no_of_inputs = $#{$self->{'fhs'}} + 1; my $no_of_inputs = $#{$self->{'fhs'}} + 1;
if(not @::arg_matrix) { if(not $self->{'arg_matrix'}) {
# Initialize @::arg_matrix with one arg from each file # Initialize @arg_matrix with one arg from each file
# read one line from each file # read one line from each file
my @first_arg_set; my @first_arg_set;
my $all_empty = 1; my $all_empty = 1;
@ -3944,8 +3945,8 @@ sub nest_get {
if(defined $arg) { if(defined $arg) {
$all_empty = 0; $all_empty = 0;
} }
$::arg_matrix[$fhno][0] = $arg || Arg->new(""); $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
push @first_arg_set, $::arg_matrix[$fhno][0]; push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
} }
if($all_empty) { if($all_empty) {
# All filehandles were at eof or eof-string # All filehandles were at eof or eof-string
@ -3960,12 +3961,12 @@ sub nest_get {
# read one # read one
my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]) my $arg = read_arg_from_fh($self->{'fhs'}[$fhno])
|| next; # If we just read an EOF string: Treat this as EOF || next; # If we just read an EOF string: Treat this as EOF
my $len = $#{$::arg_matrix[$fhno]} + 1; my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
$::arg_matrix[$fhno][$len] = $arg; $self->{'arg_matrix'}[$fhno][$len] = $arg;
# make all new combinations # make all new combinations
my @combarg = (); my @combarg = ();
for (my $fhn = 0; $fhn < $no_of_inputs; $fhn++) { for (my $fhn = 0; $fhn < $no_of_inputs; $fhn++) {
push @combarg, [0, $#{$::arg_matrix[$fhn]}]; push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}];
} }
$combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry
# map combinations # map combinations
@ -3976,7 +3977,7 @@ sub nest_get {
for my $c (expand_combinations(@combarg)) { for my $c (expand_combinations(@combarg)) {
my @a; my @a;
for my $n (0 .. $no_of_inputs - 1 ) { for my $n (0 .. $no_of_inputs - 1 ) {
push @a, $::arg_matrix[$n][$$c[$n]]; push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
} }
push @mapped, [@a]; push @mapped, [@a];
} }

View file

@ -119,7 +119,7 @@ B<{/.}> can be used the same places as B<{}>. The replacement string
B<{/.}> can be changed with B<--basenameextensionreplace>. B<{/.}> can be changed with B<--basenameextensionreplace>.
=item B<{#}> (alpha testing) =item B<{#}> (beta testing)
Sequence number of the job to run. The same as $PARALLEL_SEQ. Sequence number of the job to run. The same as $PARALLEL_SEQ.
@ -159,8 +159,7 @@ B<{.}>. See B<-a> and B<-N>.
B<{>I<n>/.B<}> can be used the same places as B<{>I<n>B<}>. B<{>I<n>/.B<}> can be used the same places as B<{>I<n>B<}>.
=item B<:::> I<arguments> (alpha testing)
=item B<:::> I<arguments>
Use arguments from the command line as input instead of from stdin Use arguments from the command line as input instead of from stdin
(standard input). Unlike other options for GNU B<parallel> B<:::> is (standard input). Unlike other options for GNU B<parallel> B<:::> is
@ -194,7 +193,7 @@ B<:::> and B<::::> can be mixed. So these are equivalent:
seq 6 7 | parallel -a - -a <(seq 4 5) echo {1} {2} {3} ::: 1 2 3 seq 6 7 | parallel -a - -a <(seq 4 5) echo {1} {2} {3} ::: 1 2 3
seq 4 5 | parallel echo {1} {2} {3} :::: <(seq 6 7) - ::: 1 2 3 seq 4 5 | parallel echo {1} {2} {3} :::: <(seq 6 7) - ::: 1 2 3
=item B<::::> I<argfiles> =item B<::::> I<argfiles> (alpha testing)
Another way to write B<-a> I<argfile1> B<-a> I<argfile2> ... Another way to write B<-a> I<argfile1> B<-a> I<argfile2> ...
@ -274,7 +273,7 @@ basename of input line.
Use the replacement string I<replace-str> instead of B<{/.}> for basename of input line without extension. Use the replacement string I<replace-str> instead of B<{/.}> for basename of input line without extension.
=item B<--bg> (beta testing) =item B<--bg>
Run command in background thus GNU B<parallel> will not wait for Run command in background thus GNU B<parallel> will not wait for
completion of the command before exiting. This is the default if completion of the command before exiting. This is the default if
@ -285,9 +284,9 @@ See also: B<--fg>
Implies B<--semaphore>. Implies B<--semaphore>.
=item B<--block> I<size> (beta testing) =item B<--block> I<size>
=item B<--block-size> I<size> (beta testing) =item B<--block-size> I<size>
Size of block in bytes. The size can be postfixed with K, M, G, or T Size of block in bytes. The size can be postfixed with K, M, G, or T
which would multiply the size with 1024, 1048576, 1073741824, or which would multiply the size with 1024, 1048576, 1073741824, or
@ -384,7 +383,7 @@ If I<eof-str> is omitted, there is no end of file string. If neither
B<-E> nor B<-e> is used, no end of file string is used. B<-E> nor B<-e> is used, no end of file string is used.
=item B<--eta> (alpha testing) =item B<--eta> (beta testing)
Show the estimated number of seconds before finishing. This forces GNU Show the estimated number of seconds before finishing. This forces GNU
B<parallel> to read all jobs before starting to find the number of B<parallel> to read all jobs before starting to find the number of
@ -392,7 +391,7 @@ jobs. GNU B<parallel> normally only reads the next job to run.
Implies B<--progress>. Implies B<--progress>.
=item B<--fg> (beta testing) =item B<--fg>
Run command in foreground thus GNU B<parallel> will wait for Run command in foreground thus GNU B<parallel> will wait for
completion of the command before exiting. completion of the command before exiting.
@ -463,7 +462,7 @@ specified, and for B<-I>{} otherwise. This option is deprecated;
use B<-I> instead. use B<-I> instead.
=item B<--joblog> I<logfile> (beta testing) =item B<--joblog> I<logfile>
Logfile for executed jobs. Saved a list of the executed jobs to Logfile for executed jobs. Saved a list of the executed jobs to
I<logfile> in the following TAB separated format: sequence number, I<logfile> in the following TAB separated format: sequence number,
@ -584,7 +583,7 @@ B<-l 0> is an alias for B<-l 1>.
Implies B<-X> unless B<-m> is set. Implies B<-X> unless B<-m> is set.
=item B<--load> I<max-load> (experimental) =item B<--load> I<max-load> (alpha testing)
Do not start new jobs on a given computer unless the load is less than Do not start new jobs on a given computer unless the load is less than
I<max-load>. I<max-load> uses the same syntax as B<--jobs>, so I<100%> I<max-load>. I<max-load> uses the same syntax as B<--jobs>, so I<100%>
@ -618,19 +617,19 @@ See also B<-X> for context replace. If in doubt use B<-X> as that will
most likely do what is needed. most likely do what is needed.
=item B<--output-as-files> (beta testing) =item B<--output-as-files>
=item B<--outputasfiles> (beta testing) =item B<--outputasfiles>
=item B<--files> (beta testing) =item B<--files>
Instead of printing the output to stdout (standard output) the output Instead of printing the output to stdout (standard output) the output
of each job is saved in a file and the filename is then printed. of each job is saved in a file and the filename is then printed.
=item B<--pipe> (beta testing) =item B<--pipe>
=item B<--spreadstdin> (beta testing) =item B<--spreadstdin>
Spread input to jobs on stdin. Read a block of data from stdin Spread input to jobs on stdin. Read a block of data from stdin
(standard input) and give one block of data as input to one job. (standard input) and give one block of data as input to one job.
@ -773,9 +772,9 @@ default.
If the stdin (standard input) only contains whitespace, do not run the command. If the stdin (standard input) only contains whitespace, do not run the command.
=item B<--recstart> I<startstring> (beta testing) =item B<--recstart> I<startstring>
=item B<--recend> I<endstring> (beta testing) =item B<--recend> I<endstring>
If B<--recstart> is given I<startstring> will be used to split at record start. If B<--recstart> is given I<startstring> will be used to split at record start.
@ -795,17 +794,17 @@ Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
expressions. This is slow, however. expressions. This is slow, however.
=item B<--regexp> (beta test) =item B<--regexp> (beta testing)
Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
expressions. This is slow, however. expressions. This is slow, however.
=item B<--remove-rec-sep> (beta testing) =item B<--remove-rec-sep>
=item B<--removerecsep> (beta testing) =item B<--removerecsep>
=item B<--rrs> (beta testing) =item B<--rrs>
Remove the text matched by B<--recstart> and B<--recend> before piping Remove the text matched by B<--recstart> and B<--recend> before piping
it to the command. it to the command.
@ -813,7 +812,7 @@ it to the command.
Only used with B<--pipe>. Only used with B<--pipe>.
=item B<--retries> I<n> (beta testing) =item B<--retries> I<n>
If a job fails, retry it on another computer. Do this I<n> times. If If a job fails, retry it on another computer. Do this I<n> times. If
there are fewer than I<n> computers in B<--sshlogin> GNU parallel will there are fewer than I<n> computers in B<--sshlogin> GNU parallel will
@ -991,9 +990,9 @@ Silent. The job to be run will not be printed. This is the default.
Can be reversed with B<-v>. Can be reversed with B<-v>.
=item B<--tty> (beta testing) =item B<--tty>
=item B<-T> (beta testing) =item B<-T>
Open terminal tty. If GNU B<parallel> is used for starting an Open terminal tty. If GNU B<parallel> is used for starting an
interactive program then this option may be needed. It will start only interactive program then this option may be needed. It will start only
@ -1140,7 +1139,7 @@ B<~/.parallel/tmp/> on the remote computers and will be removed if
using B<--cleanup>. using B<--cleanup>.
=item B<--wait> (beta testing) =item B<--wait>
Wait for all commands to complete. Wait for all commands to complete.