parallel: Prototype of --onall and --nonall

This commit is contained in:
Ole Tange 2011-05-26 23:19:58 +02:00
parent 2db11bf3f2
commit 78824429c8
3 changed files with 104 additions and 29 deletions

View file

@ -1,6 +1,29 @@
-S - should read --sshloginfile from stdin
--onall
One jobqueue per sshlogin.
parallel '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3
# Should work:
parallel --onall -S eos,iris '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3
parallel -S eos '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3
parallel -S iris '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3
parallel -a /tmp/abc -a /tmp/123 -S eos '(echo {3} {2}) | awk \{print\ \$2}'
parallel -a /tmp/abc -a /tmp/123 -S iris '(echo {3} {2}) | awk \{print\ \$2}'
cat | parallel --onall -S eos,iris '(echo {3} {2}) | awk \{print\ \$2}' :::: - ::: a b c ::: 1 2 3
vmstat\ 1\ 2\ \|\ tail\ -n1\ \|\ awk\ \{print\\\ \\\$7*\\\$8\}
Dont start new job if: Dont start new job if:
* load is too high
* memory free is too low * memory free is too low
Video 30. 36. 41. 48 Video 30. 36. 41. 48

View file

@ -50,6 +50,28 @@ if(@ARGV) {
} }
} }
if($::opt_nonall or $::opt_onall) {
# Copy all @fhlist into tempfiles
my @argfiles = ();
for my $fh (@fhlist) {
my ($outfh,$name) = ::tempfile(SUFFIX => ".all");
print $outfh (<$fh>);
close $outfh;
push @argfiles, $name;
}
# for each sshlogin do:
# parallel -S $sshlogin $command :::: @argfiles
open(PARALLEL,"| parallel -j $::opt_P") || die;
for my $sshlogin (values %Global::host) {
print PARALLEL "parallel -j1 -S ".
shell_quote_scalar($sshlogin->string())." ".
shell_quote_scalar($command)." :::: @argfiles\n";
}
close PARALLEL;
unlink(@argfiles);
exit;
}
$Global::JobQueue = JobQueue->new( $Global::JobQueue = JobQueue->new(
$command,\@fhlist,$Global::Xargs,$number_of_args,\@Global::ret_files); $command,\@fhlist,$Global::Xargs,$number_of_args,\@Global::ret_files);
if($::opt_eta) { if($::opt_eta) {
@ -335,6 +357,8 @@ sub get_options_from_array {
"number-of-cores" => \$::opt_number_of_cores, "number-of-cores" => \$::opt_number_of_cores,
"use-cpus-instead-of-cores" => \$::opt_use_cpus_instead_of_cores, "use-cpus-instead-of-cores" => \$::opt_use_cpus_instead_of_cores,
"nice=i" => \$::opt_nice, "nice=i" => \$::opt_nice,
"onall" => \$::opt_onall,
"nonall" => \$::opt_nonall,
"sshlogin|S=s" => \@::opt_sshlogin, "sshlogin|S=s" => \@::opt_sshlogin,
"sshloginfile=s" => \$::opt_sshloginfile, "sshloginfile=s" => \$::opt_sshloginfile,
"controlmaster|M" => \$::opt_controlmaster, "controlmaster|M" => \$::opt_controlmaster,
@ -493,6 +517,10 @@ sub parse_options {
if(defined $::opt_fg) { $Global::semaphore = 1; } if(defined $::opt_fg) { $Global::semaphore = 1; }
if(defined $::opt_bg) { $Global::semaphore = 1; } if(defined $::opt_bg) { $Global::semaphore = 1; }
if(defined $::opt_wait) { $Global::semaphore = 1; } if(defined $::opt_wait) { $Global::semaphore = 1; }
if(defined $::opt_nonall) {
# Append a dummy empty argument
push @ARGV, ":::", "";
}
if(defined $::opt_tty) { if(defined $::opt_tty) {
# Defaults for --tty: -j1 -u # Defaults for --tty: -j1 -u
# Can be overridden with -jXXX -g # Can be overridden with -jXXX -g

View file

@ -217,9 +217,9 @@ for processing arguments that may contain \n (newline).
=item B<-a> I<input-file> =item B<-a> I<input-file>
Use I<input-file> as input source. If you use this option, stdin is Use I<input-file> as input source. If you use this option, stdin
given to the first process run. Otherwise, stdin is redirected from (standard input) is given to the first process run. Otherwise, stdin
/dev/null. (standard input) is redirected from /dev/null.
If multiple B<-a> are given, each I<input-file> will be treated as an If multiple B<-a> are given, each I<input-file> will be treated as an
input source, and all combinations of input sources will be input source, and all combinations of input sources will be
@ -369,11 +369,11 @@ neither B<-E> nor B<-e> is used, no end of file string is used.
=item B<--dry-run> =item B<--dry-run>
Print the job to run on standard output, but do not run the job. Use Print the job to run on stdout (standard output), but do not run the
B<-v -v> to include the ssh/rsync wrapping if the job would be run on job. Use B<-v -v> to include the ssh/rsync wrapping if the job would
a remote computer. Do not count on this literaly, though, as the job be run on a remote computer. Do not count on this literaly, though, as
may be scheduled on another computer or the local computer if : is in the job may be scheduled on another computer or the local computer if
the list. : is in the list.
=item B<--eof>[=I<eof-str>] =item B<--eof>[=I<eof-str>]
@ -415,8 +415,9 @@ B<--gnu> takes precedence.
=item B<-g> =item B<-g>
Group output. Output from each jobs is grouped together and is only Group output. Output from each jobs is grouped together and is only
printed when the command is finished. STDERR first followed by STDOUT. printed when the command is finished. stderr (standard error) first
B<-g> is the default. Can be reversed with B<-u>. followed by stdout (standard output). B<-g> is the default. Can be
reversed with B<-u>.
=item B<--help> =item B<--help>
@ -620,6 +621,14 @@ See also B<-X> for context replace. If in doubt use B<-X> as that will
most likely do what is needed. most likely do what is needed.
=item B<--onall> (unimplemented)
Run all the jobs on all computers given with B<--sshlogin>. GNU
B<parallel> will log into B<--jobs> number of computers in parallel
and run one job at a time on the computer. The order of the jobs will
not be changed, but some computers may finish before others.
=item B<--output-as-files> =item B<--output-as-files>
=item B<--outputasfiles> =item B<--outputasfiles>
@ -634,8 +643,9 @@ of each job is saved in a file and the filename is then printed.
=item B<--spreadstdin> (alpha testing) =item B<--spreadstdin> (alpha testing)
Spread input to jobs on stdin. Read a block of data from stdin Spread input to jobs on stdin (standard input). Read a block of data
(standard input) and give one block of data as input to one job. from stdin (standard input) and give one block of data as input to one
job.
The block size is determined by B<--block>. The strings B<--recstart> The block size is determined by B<--block>. The strings B<--recstart>
and B<--recend> tell GNU B<parallel> how a record starts and/or and B<--recend> tell GNU B<parallel> how a record starts and/or
@ -947,6 +957,9 @@ on the local computer.
The sshlogin '..' is special, it read sshlogins from ~/.parallel/sshloginfile The sshlogin '..' is special, it read sshlogins from ~/.parallel/sshloginfile
The sshlogin '-' is special, too, it read sshlogins stdin (standard input).
To specify more sshlogins separate the sshlogins by comma or repeat To specify more sshlogins separate the sshlogins by comma or repeat
the options multiple times. the options multiple times.
@ -1035,7 +1048,7 @@ override use B<--gnu>.
=item B<-t> =item B<-t>
Print the job to be run on standard error. Print the job to be run on stderr (standard error).
See also B<-v> and B<-p>. See also B<-v> and B<-p>.
@ -1131,7 +1144,7 @@ not need this option.
=item B<-v> =item B<-v>
Verbose. Print the job to be run on standard output. Can be reversed Verbose. Print the job to be run on stdout (standard output). Can be reversed
with B<--silent>. See also B<-t>. with B<--silent>. See also B<-t>.
Use B<-v> B<-v> to print the wrapping ssh command when running remotely. Use B<-v> B<-v> to print the wrapping ssh command when running remotely.
@ -1403,6 +1416,9 @@ download images for the past 30 days:
B<parallel wget http://www.example.com/path/to/'$(date -d "today -{1} days" +%Y%m%d)_{2}.jpg' ::: $(seq 30) ::: $(seq -w 10)> B<parallel wget http://www.example.com/path/to/'$(date -d "today -{1} days" +%Y%m%d)_{2}.jpg' ::: $(seq 30) ::: $(seq -w 10)>
B<$(date -d "today -{1} days" +%Y%m%d)> with give the dates in
YYYYMMDD with {1} days subtracted.
=head1 EXAMPLE: Process files from a tar file while unpacking =head1 EXAMPLE: Process files from a tar file while unpacking
@ -1467,12 +1483,7 @@ Nested for-loops like this:
can be written like this: can be written like this:
B<cat xlist | parallel cat ylist \| parallel -I {o} do_something {} {o} | process_output> B<parallel do_something {1} {2} :::: xlist ylist | process_output
The above will run N*N jobs in parallel if parallel normally runs N jobs. To
ensure the output order is the same as the input and only run N jobs do:
B<cat xlist | parallel -k cat ylist \| parallel -j1 -kI {o} do_something {} {o} | process_output>
=head1 EXAMPLE: Group output lines =head1 EXAMPLE: Group output lines
@ -1690,6 +1701,18 @@ Convert *.mp3 to *.ogg running one process per CPU core on local computer and se
'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg' ::: *.mp3 'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg' ::: *.mp3
=head1 EXAMPLE: Running the same command on remote computers (unimplemented)
To run the command B<uptime> on remote computers you can do:
B<parallel --nonall -S server1,server2 uptime>
B<--nonall> reads no arguments. If you have a list of jobs you want
run on each computer you can do:
B<parallel --onall -S server1,server2 echo ::: 1 2 3>
=head1 EXAMPLE: Use multiple inputs in one command =head1 EXAMPLE: Use multiple inputs in one command
Copy files like foo.es.ext to foo.ext: Copy files like foo.es.ext to foo.ext:
@ -2062,7 +2085,8 @@ If you want a list of the jobs currently running you can run:
B<killall -USR1 parallel> B<killall -USR1 parallel>
GNU B<parallel> will then print the currently running jobs on STDERR. GNU B<parallel> will then print the currently running jobs on stderr
(standard error).
=head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS =head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS
@ -2229,11 +2253,11 @@ Manipulation of input
Outputs Outputs
O1. Grouping output so output from different jobs do not mix O1. Grouping output so output from different jobs do not mix
O2. Send stderr to stderr O2. Send stderr (standard error) to stderr (standard error)
O3. Send stdout to stdout O3. Send stdout (standard output) to stdout (standard output)
O4. Order of output can be same as order of input O4. Order of output can be same as order of input
O5. Stdout only contains stdout from the command O5. Stdout only contains stdout (standard output) from the command
O6. Stderr only contains stdout from the command O6. Stderr only contains stderr (standard error) from the command
Execution Execution
E1. Running jobs in parallel E1. Running jobs in parallel
@ -2618,9 +2642,9 @@ functions (see why
http://www.perlmonks.org/index.pl?node_id=484296). However, scripts or http://www.perlmonks.org/index.pl?node_id=484296). However, scripts or
composed commands work just fine. composed commands work just fine.
B<prll> generates a lot of status information on STDERR which makes it B<prll> generates a lot of status information on stderr (standard
harder to use the STDERR output of the job directly as input for error) which makes it harder to use the stderr (standard error) output
another program. of the job directly as input for another program.
Here is the example from B<prll>'s man page with the equivalent Here is the example from B<prll>'s man page with the equivalent
using GNU B<parallel>: using GNU B<parallel>: