From 78824429c88df5e2f28f4f14a34427ea4a9abbae Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Thu, 26 May 2011 23:19:58 +0200 Subject: [PATCH] parallel: Prototype of --onall and --nonall --- doc/FUTURE_IDEAS | 25 ++++++++++++++- src/parallel | 28 +++++++++++++++++ src/parallel.pod | 80 +++++++++++++++++++++++++++++++----------------- 3 files changed, 104 insertions(+), 29 deletions(-) diff --git a/doc/FUTURE_IDEAS b/doc/FUTURE_IDEAS index 2b5316d2..810b42d2 100644 --- a/doc/FUTURE_IDEAS +++ b/doc/FUTURE_IDEAS @@ -1,6 +1,29 @@ +-S - should read --sshloginfile from stdin + +--onall + +One jobqueue per sshlogin. + +parallel '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3 + +# Should work: +parallel --onall -S eos,iris '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3 + +parallel -S eos '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3 +parallel -S iris '(echo {3} {2}) | awk \{print\ \$2}' ::: a b c ::: 1 2 3 + +parallel -a /tmp/abc -a /tmp/123 -S eos '(echo {3} {2}) | awk \{print\ \$2}' +parallel -a /tmp/abc -a /tmp/123 -S iris '(echo {3} {2}) | awk \{print\ \$2}' + +cat | parallel --onall -S eos,iris '(echo {3} {2}) | awk \{print\ \$2}' :::: - ::: a b c ::: 1 2 3 + + + +vmstat\ 1\ 2\ \|\ tail\ -n1\ \|\ awk\ \{print\\\ \\\$7*\\\$8\} + + Dont start new job if: -* load is too high * memory free is too low Video 30. 36. 41. 48 diff --git a/src/parallel b/src/parallel index 90bb34e0..78810c2f 100755 --- a/src/parallel +++ b/src/parallel @@ -50,6 +50,28 @@ if(@ARGV) { } } +if($::opt_nonall or $::opt_onall) { + # Copy all @fhlist into tempfiles + my @argfiles = (); + for my $fh (@fhlist) { + my ($outfh,$name) = ::tempfile(SUFFIX => ".all"); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + open(PARALLEL,"| parallel -j $::opt_P") || die; + for my $sshlogin (values %Global::host) { + print PARALLEL "parallel -j1 -S ". + shell_quote_scalar($sshlogin->string())." ". + shell_quote_scalar($command)." :::: @argfiles\n"; + } + close PARALLEL; + unlink(@argfiles); + exit; +} + $Global::JobQueue = JobQueue->new( $command,\@fhlist,$Global::Xargs,$number_of_args,\@Global::ret_files); if($::opt_eta) { @@ -335,6 +357,8 @@ sub get_options_from_array { "number-of-cores" => \$::opt_number_of_cores, "use-cpus-instead-of-cores" => \$::opt_use_cpus_instead_of_cores, "nice=i" => \$::opt_nice, + "onall" => \$::opt_onall, + "nonall" => \$::opt_nonall, "sshlogin|S=s" => \@::opt_sshlogin, "sshloginfile=s" => \$::opt_sshloginfile, "controlmaster|M" => \$::opt_controlmaster, @@ -493,6 +517,10 @@ sub parse_options { if(defined $::opt_fg) { $Global::semaphore = 1; } if(defined $::opt_bg) { $Global::semaphore = 1; } if(defined $::opt_wait) { $Global::semaphore = 1; } + if(defined $::opt_nonall) { + # Append a dummy empty argument + push @ARGV, ":::", ""; + } if(defined $::opt_tty) { # Defaults for --tty: -j1 -u # Can be overridden with -jXXX -g diff --git a/src/parallel.pod b/src/parallel.pod index f5721ea9..c16fde1b 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -217,9 +217,9 @@ for processing arguments that may contain \n (newline). =item B<-a> I -Use I as input source. If you use this option, stdin is -given to the first process run. Otherwise, stdin is redirected from -/dev/null. +Use I as input source. If you use this option, stdin +(standard input) is given to the first process run. Otherwise, stdin +(standard input) is redirected from /dev/null. If multiple B<-a> are given, each I will be treated as an input source, and all combinations of input sources will be @@ -369,11 +369,11 @@ neither B<-E> nor B<-e> is used, no end of file string is used. =item B<--dry-run> -Print the job to run on standard output, but do not run the job. Use -B<-v -v> to include the ssh/rsync wrapping if the job would be run on -a remote computer. Do not count on this literaly, though, as the job -may be scheduled on another computer or the local computer if : is in -the list. +Print the job to run on stdout (standard output), but do not run the +job. Use B<-v -v> to include the ssh/rsync wrapping if the job would +be run on a remote computer. Do not count on this literaly, though, as +the job may be scheduled on another computer or the local computer if +: is in the list. =item B<--eof>[=I] @@ -415,8 +415,9 @@ B<--gnu> takes precedence. =item B<-g> Group output. Output from each jobs is grouped together and is only -printed when the command is finished. STDERR first followed by STDOUT. -B<-g> is the default. Can be reversed with B<-u>. +printed when the command is finished. stderr (standard error) first +followed by stdout (standard output). B<-g> is the default. Can be +reversed with B<-u>. =item B<--help> @@ -620,6 +621,14 @@ See also B<-X> for context replace. If in doubt use B<-X> as that will most likely do what is needed. +=item B<--onall> (unimplemented) + +Run all the jobs on all computers given with B<--sshlogin>. GNU +B will log into B<--jobs> number of computers in parallel +and run one job at a time on the computer. The order of the jobs will +not be changed, but some computers may finish before others. + + =item B<--output-as-files> =item B<--outputasfiles> @@ -634,8 +643,9 @@ of each job is saved in a file and the filename is then printed. =item B<--spreadstdin> (alpha testing) -Spread input to jobs on stdin. Read a block of data from stdin -(standard input) and give one block of data as input to one job. +Spread input to jobs on stdin (standard input). Read a block of data +from stdin (standard input) and give one block of data as input to one +job. The block size is determined by B<--block>. The strings B<--recstart> and B<--recend> tell GNU B how a record starts and/or @@ -947,6 +957,9 @@ on the local computer. The sshlogin '..' is special, it read sshlogins from ~/.parallel/sshloginfile +The sshlogin '-' is special, too, it read sshlogins stdin (standard input). + + To specify more sshlogins separate the sshlogins by comma or repeat the options multiple times. @@ -1035,7 +1048,7 @@ override use B<--gnu>. =item B<-t> -Print the job to be run on standard error. +Print the job to be run on stderr (standard error). See also B<-v> and B<-p>. @@ -1131,7 +1144,7 @@ not need this option. =item B<-v> -Verbose. Print the job to be run on standard output. Can be reversed +Verbose. Print the job to be run on stdout (standard output). Can be reversed with B<--silent>. See also B<-t>. Use B<-v> B<-v> to print the wrapping ssh command when running remotely. @@ -1403,6 +1416,9 @@ download images for the past 30 days: B +B<$(date -d "today -{1} days" +%Y%m%d)> with give the dates in +YYYYMMDD with {1} days subtracted. + =head1 EXAMPLE: Process files from a tar file while unpacking @@ -1467,12 +1483,7 @@ Nested for-loops like this: can be written like this: -B - -The above will run N*N jobs in parallel if parallel normally runs N jobs. To -ensure the output order is the same as the input and only run N jobs do: - -B +B on remote computers you can do: + +B + +B<--nonall> reads no arguments. If you have a list of jobs you want +run on each computer you can do: + +B + + =head1 EXAMPLE: Use multiple inputs in one command Copy files like foo.es.ext to foo.ext: @@ -2062,7 +2085,8 @@ If you want a list of the jobs currently running you can run: B -GNU B will then print the currently running jobs on STDERR. +GNU B will then print the currently running jobs on stderr +(standard error). =head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS @@ -2229,11 +2253,11 @@ Manipulation of input Outputs O1. Grouping output so output from different jobs do not mix - O2. Send stderr to stderr - O3. Send stdout to stdout + O2. Send stderr (standard error) to stderr (standard error) + O3. Send stdout (standard output) to stdout (standard output) O4. Order of output can be same as order of input - O5. Stdout only contains stdout from the command - O6. Stderr only contains stdout from the command + O5. Stdout only contains stdout (standard output) from the command + O6. Stderr only contains stderr (standard error) from the command Execution E1. Running jobs in parallel @@ -2618,9 +2642,9 @@ functions (see why http://www.perlmonks.org/index.pl?node_id=484296). However, scripts or composed commands work just fine. -B generates a lot of status information on STDERR which makes it -harder to use the STDERR output of the job directly as input for -another program. +B generates a lot of status information on stderr (standard +error) which makes it harder to use the stderr (standard error) output +of the job directly as input for another program. Here is the example from B's man page with the equivalent using GNU B: