mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-12-22 20:57:53 +00:00
Renamed back to parallel.
This commit is contained in:
parent
1a0103da04
commit
84cc81a5d8
169
src/parallel
169
src/parallel
|
@ -2,17 +2,17 @@
|
|||
|
||||
=head1 NAME
|
||||
|
||||
par$ - build and execute shell command lines from standard input in parallel
|
||||
parallel - build and execute shell command lines from standard input in parallel
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<par$> [-0cdEfghiIkmnpqrtuUvVX] [-I str] [-j num] [--silent] [command [arguments]] [< list_of_arguments]
|
||||
B<parallel> [-0cdEfghiIkmnpqrtuUvVX] [-I str] [-j num] [--silent] [command [arguments]] [< list_of_arguments]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
For each line of input B<par$> will execute B<command> with the
|
||||
For each line of input B<parallel> will execute B<command> with the
|
||||
line as arguments. If no B<command> is given the line of input is
|
||||
executed. B<par$> can often be used as a substitute for B<xargs>
|
||||
executed. B<parallel> can often be used as a substitute for B<xargs>
|
||||
or B<cat | sh>.
|
||||
|
||||
Several lines will be run in parallel.
|
||||
|
@ -25,8 +25,8 @@ Command to execute. If B<command> or the following arguments contain
|
|||
{} every instance will be substituted with the input line. Setting a
|
||||
command also invokes B<-f>.
|
||||
|
||||
If B<command> is given, B<par$> will behave similar to B<xargs>. If
|
||||
B<command> is not given B<par$> will behave similar to B<cat | sh>.
|
||||
If B<command> is given, B<parallel> will behave similar to B<xargs>. If
|
||||
B<command> is not given B<parallel> will behave similar to B<cat | sh>.
|
||||
|
||||
|
||||
=item B<{}>
|
||||
|
@ -172,13 +172,13 @@ end in the sequence 3 1 4 2 the output will still be 1 2 3 4.
|
|||
|
||||
=item B<--number-of-cpus>
|
||||
|
||||
Print the number of CPUs and exit (used by B<par$> itself to
|
||||
Print the number of CPUs and exit (used by B<parallel> itself to
|
||||
determine the number of CPUs on remote machines).
|
||||
|
||||
|
||||
=item B<--number-of-cores>
|
||||
|
||||
Print the number of cores and exit (used by B<par$> itself to determine the
|
||||
Print the number of cores and exit (used by B<parallel> itself to determine the
|
||||
number of cores on remote machines).
|
||||
|
||||
|
||||
|
@ -197,9 +197,9 @@ default.
|
|||
=item B<--sshlogin> I<[ncpu/]sshlogin[,[ncpu/]sshlogin]> (not implemented)
|
||||
|
||||
Distribute jobs to remote servers. The jobs will be run on a list of
|
||||
remote servers. B<par$> will determine the number of CPU cores on
|
||||
remote servers. B<parallel> will determine the number of CPU cores on
|
||||
the remote servers and run the number of jobs as specified by -j. If
|
||||
the number I<ncpu> is given B<par$> will use this number for
|
||||
the number I<ncpu> is given B<parallel> will use this number for
|
||||
number of CPUs on the host. Normally I<ncpu> will not be needed.
|
||||
|
||||
An I<sshlogin> is the string you would normally pass to SSH to login,
|
||||
|
@ -214,7 +214,7 @@ the options multiple times.
|
|||
|
||||
For examples: see B<--sshloginfile>.
|
||||
|
||||
The remote host must have B<par$> installed.
|
||||
The remote host must have B<parallel> installed.
|
||||
|
||||
|
||||
=item B<--sshloginfile> I<filename> (not implemented)
|
||||
|
@ -246,14 +246,14 @@ B<--sshlogin> when the arguments are files and should be transfered to
|
|||
the remote servers. The files will be transfered using B<rsync> and
|
||||
will be put relative to the default login dir. E.g.
|
||||
|
||||
echo foo/bar.txt | par$ \
|
||||
echo foo/bar.txt | parallel \
|
||||
--sshlogin server.example.com --transfer wc
|
||||
|
||||
This will transfer the file I<foo/bar.txt> to the server
|
||||
I<server.example.com> to the file $HOME/foo/bar.txt before running
|
||||
B<wc foo/bar.txt> on I<server.example.com>.
|
||||
|
||||
echo /tmp/foo/bar.txt | par$ \
|
||||
echo /tmp/foo/bar.txt | parallel \
|
||||
--sshlogin server.example.com --transfer wc
|
||||
|
||||
This will transfer the file I<foo/bar.txt> to the server
|
||||
|
@ -280,14 +280,14 @@ processing is done the file I<filename> will be transfered
|
|||
from the remote server using B<rsync> and will be put relative to
|
||||
the default login dir. E.g.
|
||||
|
||||
echo foo/bar.txt | par$ \
|
||||
echo foo/bar.txt | parallel \
|
||||
--sshlogin server.example.com --return {}.out touch {}.out
|
||||
|
||||
This will transfer the file I<$HOME/foo/bar.txt.out> from the server
|
||||
I<server.example.com> to the file I<foo/bar.txt.out> after running
|
||||
B<touch foo/bar.txt.out> on I<server.example.com>.
|
||||
|
||||
echo /tmp/foo/bar.txt | par$ \
|
||||
echo /tmp/foo/bar.txt | parallel \
|
||||
--sshlogin server.example.com --return {}.out touch {}.out
|
||||
|
||||
This will transfer the file I</tmp/foo/bar.txt.out> from the server
|
||||
|
@ -298,7 +298,7 @@ Multiple files can be transfered by repeating the options multiple
|
|||
times:
|
||||
|
||||
echo /tmp/foo/bar.txt | \
|
||||
par$ --sshlogin server.example.com \
|
||||
parallel --sshlogin server.example.com \
|
||||
--return {}.out --return {}.out2 touch {}.out {}.out2
|
||||
|
||||
B<--return> is often used with B<--transfer> and B<--cleanup>.
|
||||
|
@ -311,7 +311,7 @@ B<--return> is ignored when used with B<--sshlogin :> or when not used with B<--
|
|||
Remove transfered files. B<--cleanup> will remove the transfered files
|
||||
on the remote server after processing is done.
|
||||
|
||||
find log -name '*gz' | par$ \
|
||||
find log -name '*gz' | parallel \
|
||||
--sshlogin server.example.com --transfer --return {.}.bz2 \
|
||||
--cleanup "zcat {} | bzip -9 >{.}.bz2"
|
||||
|
||||
|
@ -338,7 +338,7 @@ output from different commands to be mixed. Can be reversed with B<-g>.
|
|||
|
||||
Count the number of CPUs instead of cores. When computing how many
|
||||
jobs to run in parallel relative to the number of cores you can ask
|
||||
B<par$> to instead look at the number of CPUs. This will make sense
|
||||
B<parallel> to instead look at the number of CPUs. This will make sense
|
||||
for computers that have hyperthreading as two jobs running on one CPU
|
||||
with hyperthreading will run slower than two jobs running on two CPUs.
|
||||
Normal users will not need this option.
|
||||
|
@ -368,7 +368,7 @@ of a word (like I<pic{}.jpg>) then the whole word will be repeated.
|
|||
|
||||
=head1 EXAMPLE 1: Working as cat | sh. Ressource inexpensive jobs and evaluation
|
||||
|
||||
B<par$> can work similar to B<cat | sh>.
|
||||
B<parallel> can work similar to B<cat | sh>.
|
||||
|
||||
A ressource inexpensive job is a job that takes very little CPU, disk
|
||||
I/O and network I/O. Ping is an example of a ressource inexpensive
|
||||
|
@ -386,18 +386,18 @@ The content of the file jobs_to_run:
|
|||
|
||||
To run 100 processes simultaneously do:
|
||||
|
||||
B<par$ -j 100 < jobs_to_run>
|
||||
B<parallel -j 100 < jobs_to_run>
|
||||
|
||||
As there is not a B<command> the option B<-c> is default because the
|
||||
jobs needs to be evaluated by the shell.
|
||||
|
||||
=head1 EXAMPLE 2: Working as xargs -n1. Argument appending
|
||||
|
||||
B<par$> can work similar to B<xargs -n1>.
|
||||
B<parallel> can work similar to B<xargs -n1>.
|
||||
|
||||
To output all html files run:
|
||||
|
||||
B<find . -name '*.html' | par$ cat>
|
||||
B<find . -name '*.html' | parallel cat>
|
||||
|
||||
As there is a B<command> the option B<-f> is default because the
|
||||
filenames needs to be protected from the shell in case a filename
|
||||
|
@ -414,11 +414,11 @@ If the system has more than 1 CPU core it can be run with
|
|||
number-of-cpu-cores jobs in parallel (-j +0). This will do that for
|
||||
all jpg files in a directory:
|
||||
|
||||
B<ls *.jpg | par$ -j +0 convert -geometry 120 {} thumb_{}>
|
||||
B<ls *.jpg | parallel -j +0 convert -geometry 120 {} thumb_{}>
|
||||
|
||||
To do it recursively use B<find>:
|
||||
|
||||
B<find . -name '*.jpg' | par$ -j +0 convert -geometry 120 {} {}_thumb.jpg>
|
||||
B<find . -name '*.jpg' | parallel -j +0 convert -geometry 120 {} {}_thumb.jpg>
|
||||
|
||||
Notice how the argument has to start with {} as {} will include path
|
||||
(e.g. running B<convert -geometry 120 ./foo/bar.jpg
|
||||
|
@ -427,30 +427,30 @@ like ./foo/bar.jpg_thumb.jpg. If that is not wanted this can fix it:
|
|||
|
||||
find . -name '*.jpg' | \
|
||||
perl -pe 'chomp; $a=$_; s:/([^/]+)$:/thumb_$1:; $_="convert -geometry 120 $a $_\n"' | \
|
||||
par$ -c -j +0
|
||||
parallel -c -j +0
|
||||
|
||||
Unfortunately this will not work if the filenames contain special
|
||||
characters (such as space or quotes). If you have B<ren> installed this
|
||||
is a better solution:
|
||||
|
||||
find . -name '*.jpg' | par$ -j +0 convert -geometry 120 {} {}_thumb.jpg
|
||||
find . -name '*.jpg' | parallel -j +0 convert -geometry 120 {} {}_thumb.jpg
|
||||
find . -name '*_thumb.jpg' | ren 's:/([^/]+)_thumb.jpg$:/thumb_$1:'
|
||||
|
||||
This will make files like ./foo/bar_thumb.jpg:
|
||||
|
||||
B<find . -name '*.jpg' | par$ -j +0 convert -geometry 120 {} {.}_thumb.jpg>
|
||||
B<find . -name '*.jpg' | parallel -j +0 convert -geometry 120 {} {.}_thumb.jpg>
|
||||
|
||||
=head1 EXAMPLE 4: Substitution and redirection
|
||||
|
||||
This will compare all files in the dir to the file foo and save the
|
||||
diffs in corresponding .diff files:
|
||||
|
||||
B<ls | par$ diff {} foo ">>B<"{}.diff>
|
||||
B<ls | parallel diff {} foo ">>B<"{}.diff>
|
||||
|
||||
Quoting of > is necessary to postpone the redirection. Another
|
||||
solution is to quote the whole command:
|
||||
|
||||
B<ls | par$ "diff {} foo >>B<{}.diff">
|
||||
B<ls | parallel "diff {} foo >>B<{}.diff">
|
||||
|
||||
|
||||
=head1 EXAMPLE 5: Composed commands
|
||||
|
@ -458,22 +458,22 @@ B<ls | par$ "diff {} foo >>B<{}.diff">
|
|||
A job can consist of several commands. This will print the number of
|
||||
files in each directory:
|
||||
|
||||
B<ls | par$ 'echo -n {}" "; ls {}|wc -l'>
|
||||
B<ls | parallel 'echo -n {}" "; ls {}|wc -l'>
|
||||
|
||||
To put the output in a file called <name>.dir:
|
||||
|
||||
B<ls | par$ '(echo -n {}" "; ls {}|wc -l) >> B<{}.dir'>
|
||||
B<ls | parallel '(echo -n {}" "; ls {}|wc -l) >> B<{}.dir'>
|
||||
|
||||
|
||||
=head1 EXAMPLE 6: Context replace
|
||||
|
||||
To remove the files I<pict0000.jpg> .. I<pict9999.jpg> you could do:
|
||||
|
||||
B<seq -f %04g 0 9999 | par$ rm pict{}.jpg>
|
||||
B<seq -f %04g 0 9999 | parallel rm pict{}.jpg>
|
||||
|
||||
You could also do:
|
||||
|
||||
B<seq -f %04g 0 9999 | perl -pe 's/(.*)/pict$1.jpg/' | par$ -m rm>
|
||||
B<seq -f %04g 0 9999 | perl -pe 's/(.*)/pict$1.jpg/' | parallel -m rm>
|
||||
|
||||
The first will run B<rm> 10000 times, while the last will only run
|
||||
B<rm> as many times needed to keep the command line length short
|
||||
|
@ -481,7 +481,7 @@ enough (typically 1-2 times).
|
|||
|
||||
You could also run:
|
||||
|
||||
B<seq -f %04g 0 9999 | par$ -X rm pict{}.jpg>
|
||||
B<seq -f %04g 0 9999 | parallel -X rm pict{}.jpg>
|
||||
|
||||
This will also only run B<rm> as many times needed to keep the command
|
||||
line length short enough.
|
||||
|
@ -489,18 +489,18 @@ line length short enough.
|
|||
=head1 EXAMPLE 7: Group output lines
|
||||
|
||||
When runnning jobs that output data, you often do not want the output
|
||||
of multiple jobs to run together. B<par$> defaults to grouping the
|
||||
of multiple jobs to run together. B<parallel> defaults to grouping the
|
||||
output of each job, so the output is printed when the job finishes. If
|
||||
you want the output to be printed while the job is running you can use
|
||||
B<-u>.
|
||||
|
||||
Compare the output of:
|
||||
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | par$ traceroute>
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute>
|
||||
|
||||
to the output of:
|
||||
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | par$ -u traceroute>
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel -u traceroute>
|
||||
|
||||
|
||||
=head1 EXAMPLE 8: Keep order of output same as order of input
|
||||
|
@ -511,7 +511,7 @@ same as the order of the input. B<-k> will make sure the order of
|
|||
output will be in the same order as input even if later jobs end
|
||||
before earlier jobs.
|
||||
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | par$ traceroute>
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute>
|
||||
|
||||
will give traceroute of foss.org.my, debian.org and
|
||||
freenetproject.org, but it will be sorted according to which job
|
||||
|
@ -519,7 +519,7 @@ completed first.
|
|||
|
||||
To keep the order the same as input run:
|
||||
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | par$ -k traceroute>
|
||||
B<(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel -k traceroute>
|
||||
|
||||
This will make sure the traceroute to foss.org.my will be printed
|
||||
first.
|
||||
|
@ -532,20 +532,20 @@ handy).
|
|||
|
||||
To run B<echo> on B<server.example.com>:
|
||||
|
||||
seq 1 10 | par$ --sshlogin server.example.com echo
|
||||
seq 1 10 | parallel --sshlogin server.example.com echo
|
||||
|
||||
To run commands on more than one remote computer run:
|
||||
|
||||
seq 1 10 | par$ --sshlogin server.example.com,server2.example.net echo
|
||||
seq 1 10 | parallel --sshlogin server.example.com,server2.example.net echo
|
||||
|
||||
Or:
|
||||
|
||||
seq 1 10 | par$ --sshlogin server.example.com \
|
||||
seq 1 10 | parallel --sshlogin server.example.com \
|
||||
--sshlogin server2.example.net echo
|
||||
|
||||
If the login username is I<foo> on I<server2.example.net> use:
|
||||
|
||||
seq 1 10 | par$ --sshlogin server.example.com \
|
||||
seq 1 10 | parallel --sshlogin server.example.com \
|
||||
--sshlogin foo@server2.example.net echo
|
||||
|
||||
To distribute the commands to a list of machines, make a file
|
||||
|
@ -557,7 +557,7 @@ I<mymachines> with all the machines:
|
|||
|
||||
Then run:
|
||||
|
||||
seq 1 10 | par$ --sshloginfile mymachines echo
|
||||
seq 1 10 | parallel --sshloginfile mymachines echo
|
||||
|
||||
To include the local machine add the special sshlogin ':' to the list:
|
||||
|
||||
|
@ -570,7 +570,7 @@ If the number of CPU cores on the remote servers is not identified
|
|||
correctly the number of CPU cores can be added in front. Here the
|
||||
server has 8 CPU cores.
|
||||
|
||||
seq 1 10 | par$ --sshlogin 8/server.example.com echo
|
||||
seq 1 10 | parallel --sshlogin 8/server.example.com echo
|
||||
|
||||
|
||||
=head1 EXAMPLE 10: Transferring of files (not implemented)
|
||||
|
@ -578,7 +578,7 @@ server has 8 CPU cores.
|
|||
To recompress gzipped files with bzip2 using a remote server run:
|
||||
|
||||
find logs/ -name '*.gz' | \
|
||||
par$ --sshlogin server.example.com \
|
||||
parallel --sshlogin server.example.com \
|
||||
--transfer "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
||||
This will list the .gz-files in the I<logs> directory and all
|
||||
|
@ -592,7 +592,7 @@ If you want the file to be transfered back to the local machine add
|
|||
I<--return {.}.bz2>:
|
||||
|
||||
find logs/ -name '*.gz' | \
|
||||
par$ --sshlogin server.example.com \
|
||||
parallel --sshlogin server.example.com \
|
||||
--transfer --return {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
||||
After the recompressing is done the I<.bz2>-file is transfered back to
|
||||
|
@ -603,14 +603,14 @@ I<--cleanup>. This will remove both the file transfered to the remote
|
|||
machine and the files transfered from the remote machine:
|
||||
|
||||
find logs/ -name '*.gz' | \
|
||||
par$ --sshlogin server.example.com \
|
||||
parallel --sshlogin server.example.com \
|
||||
--transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
||||
If you want run one several servers add the servers to I<--sshlogin>
|
||||
either using ',' or separate I<--sshlogin>:
|
||||
|
||||
find logs/ -name '*.gz' | \
|
||||
par$ --sshlogin server.example.com,server2.example.com \
|
||||
parallel --sshlogin server.example.com,server2.example.com \
|
||||
--sshlogin server3.example.com \
|
||||
--transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
||||
|
@ -618,7 +618,7 @@ You can add the local machine using I<--sshlogin :>. This will disable the
|
|||
removing and transferring for the local machine only:
|
||||
|
||||
find logs/ -name '*.gz' | \
|
||||
par$ --sshlogin server.example.com,server2.example.com \
|
||||
parallel --sshlogin server.example.com,server2.example.com \
|
||||
--sshlogin server3.example.com \
|
||||
--sshlogin : \
|
||||
--transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
@ -627,14 +627,14 @@ Often I<--transfer>, I<--return> and I<--cleanup> are used together. They can be
|
|||
shortened to I<--trc>:
|
||||
|
||||
find logs/ -name '*.gz' | \
|
||||
par$ --sshlogin server.example.com,server2.example.com \
|
||||
parallel --sshlogin server.example.com,server2.example.com \
|
||||
--sshlogin server3.example.com \
|
||||
--sshlogin : \
|
||||
--trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
||||
With the file I<mymachines> containing the compute machines it becomes:
|
||||
|
||||
find logs/ -name '*.gz' | par$ --sshloginfile mymachines \
|
||||
find logs/ -name '*.gz' | parallel --sshloginfile mymachines \
|
||||
--trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2"
|
||||
|
||||
=head1 QUOTING
|
||||
|
@ -644,21 +644,21 @@ print the filename for each line that has exactly 2 columns:
|
|||
|
||||
B<perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' file>
|
||||
|
||||
This can be done by B<par$> using:
|
||||
This can be done by B<parallel> using:
|
||||
|
||||
B<ls | par$ "perl -ne '/^\\S+\\s+\\S+$/ and print \$ARGV,\"\\n\"'">
|
||||
B<ls | parallel "perl -ne '/^\\S+\\s+\\S+$/ and print \$ARGV,\"\\n\"'">
|
||||
|
||||
Notice how \'s, "'s, and $'s needs to be quoted. B<par$> can do
|
||||
Notice how \'s, "'s, and $'s needs to be quoted. B<parallel> can do
|
||||
the quoting by using option B<-q>:
|
||||
|
||||
B<ls | par$ -q perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"'>
|
||||
B<ls | parallel -q perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"'>
|
||||
|
||||
However, this means you cannot make the shell interpret special
|
||||
characters. For example this B<will not work>:
|
||||
|
||||
B<ls | par$ -q "diff {} foo >>B<{}.diff">
|
||||
B<ls | parallel -q "diff {} foo >>B<{}.diff">
|
||||
|
||||
B<ls | par$ -q "ls {} | wc -l">
|
||||
B<ls | parallel -q "ls {} | wc -l">
|
||||
|
||||
because > and | need to be interpreted by the shell.
|
||||
|
||||
|
@ -671,14 +671,14 @@ then you might try using B<-q>.
|
|||
If you are using B<bash> process substitution like B<<(cat foo)> then
|
||||
you may try B<-q> and prepending B<command> with B<bash -c>:
|
||||
|
||||
B<ls | par$ -q bash -c 'wc -c <(echo {})'>
|
||||
B<ls | parallel -q bash -c 'wc -c <(echo {})'>
|
||||
|
||||
Or for substituting output:
|
||||
|
||||
B<ls | par$ -q bash -c 'tar c {} | tee >>B<(gzip >>B<{}.tar.gz) | bzip2 >>B<{}.tar.bz2'>
|
||||
B<ls | parallel -q bash -c 'tar c {} | tee >>B<(gzip >>B<{}.tar.gz) | bzip2 >>B<{}.tar.bz2'>
|
||||
|
||||
B<Conclusion>: To avoid dealing with the quoting problems it may be
|
||||
easier just to write a small script and have B<par$> call that
|
||||
easier just to write a small script and have B<parallel> call that
|
||||
script.
|
||||
|
||||
|
||||
|
@ -686,31 +686,31 @@ script.
|
|||
|
||||
If you want a list of the jobs currently running you can run:
|
||||
|
||||
B<killall -USR1 par$>
|
||||
B<killall -USR1 parallel>
|
||||
|
||||
B<par$> will then print the currently running jobs on STDERR.
|
||||
B<parallel> will then print the currently running jobs on STDERR.
|
||||
|
||||
|
||||
=head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS
|
||||
|
||||
If you regret starting a lot of jobs you can simply break B<par$>,
|
||||
If you regret starting a lot of jobs you can simply break B<parallel>,
|
||||
but if you want to make sure you do not have halfcompleted jobs you
|
||||
should send the signal B<SIGTERM> to B<par$>:
|
||||
should send the signal B<SIGTERM> to B<parallel>:
|
||||
|
||||
B<killall -TERM par$>
|
||||
B<killall -TERM parallel>
|
||||
|
||||
This will tell B<par$> to not start any new jobs, but wait until
|
||||
This will tell B<parallel> to not start any new jobs, but wait until
|
||||
the currently running jobs are finished.
|
||||
|
||||
|
||||
=head1 DIFFERENCES BETWEEN xargs/find -exec AND par$
|
||||
=head1 DIFFERENCES BETWEEN xargs/find -exec AND parallel
|
||||
|
||||
B<xargs> and B<find -exec> offer some of the same possibilites as
|
||||
B<par$>.
|
||||
B<parallel>.
|
||||
|
||||
B<find -exec> only works on files. So processing other input (such as
|
||||
hosts or URLs) will require creating these inputs as files. B<find
|
||||
-exec> has no support for running commands in par$.
|
||||
-exec> has no support for running commands in parallel.
|
||||
|
||||
B<xargs> deals badly with special characters (such as space, ' and
|
||||
"). To see the problem try this:
|
||||
|
@ -728,7 +728,7 @@ B<sed>, B<tar -v>, B<perl> (-0 and \0 instead of \n), B<locate>
|
|||
(requires using -0), B<find> (requires using -print0), B<grep>
|
||||
(requires user to use -z or -Z).
|
||||
|
||||
So B<par$>'s newline separation can be emulated with:
|
||||
So B<parallel>'s newline separation can be emulated with:
|
||||
|
||||
B<cat | xargs -d "\n" -n1 I<command>>
|
||||
|
||||
|
@ -749,10 +749,10 @@ arguments.
|
|||
If you use a replace string in B<xargs> (B<-I>) you can not force
|
||||
B<xargs> to use more than one argument.
|
||||
|
||||
Quoting in B<xargs> works like B<-q> in B<par$>. This means
|
||||
Quoting in B<xargs> works like B<-q> in B<parallel>. This means
|
||||
composed commands and redirection requires using B<bash -c>.
|
||||
|
||||
B<ls | par$ "wc {} >> B<{}.wc">
|
||||
B<ls | parallel "wc {} >> B<{}.wc">
|
||||
|
||||
becomes
|
||||
|
||||
|
@ -760,23 +760,23 @@ B<ls | xargs -d "\n" -P10 -I {} bash -c "wc {} >>B< {}.wc">
|
|||
|
||||
and
|
||||
|
||||
B<ls | par$ "echo {}; ls {}|wc">
|
||||
B<ls | parallel "echo {}; ls {}|wc">
|
||||
|
||||
becomes
|
||||
|
||||
B<ls | xargs -d "\n" -P10 -I {} bash -c "echo {}; ls {}|wc">
|
||||
|
||||
|
||||
=head1 DIFFERENCES BETWEEN mdm/middleman AND par$
|
||||
=head1 DIFFERENCES BETWEEN mdm/middleman AND parallel
|
||||
|
||||
middleman(mdm) is also a tool for running jobs in parallel.
|
||||
|
||||
Here are the shellscripts of http://mdm.berlios.de/usage.html ported
|
||||
to par$ use:
|
||||
to parallel use:
|
||||
|
||||
B<seq 1 19 | par$ -j+0 buffon -o - | sort -n >>B< result>
|
||||
B<seq 1 19 | parallel -j+0 buffon -o - | sort -n >>B< result>
|
||||
|
||||
B<cat files | par$ -j+0 cmd>
|
||||
B<cat files | parallel -j+0 cmd>
|
||||
|
||||
|
||||
=head1 BUGS
|
||||
|
@ -787,19 +787,22 @@ unexpected results, as it will often be interpreted as an option.
|
|||
|
||||
=head1 REPORTING BUGS
|
||||
|
||||
Report bugs to <bug-par$@tange.dk>.
|
||||
Report bugs to <bug-parallel@tange.dk>.
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk
|
||||
|
||||
Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk
|
||||
Copyright (C) 2008,2009,2010 Ole Tange, http://ole.tange.dk
|
||||
|
||||
Copyright (C) 2010 Ole Tange, http://ole.tange.dk and Free Software
|
||||
Foundation, Inc.
|
||||
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright (C) 2007-2010 Free Software Foundation, Inc.
|
||||
Copyright (C) 2007,2008,2009,2010 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -817,7 +820,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
B<par$> uses Perl, and the Perl modules Getopt::Long, IPC::Open3,
|
||||
B<parallel> uses Perl, and the Perl modules Getopt::Long, IPC::Open3,
|
||||
Symbol, IO::File, POSIX, and File::Temp.
|
||||
|
||||
|
||||
|
@ -885,7 +888,7 @@ GetOptions("debug|D" => \$::opt_D,
|
|||
|
||||
# Defaults:
|
||||
$Global::version = 20100419;
|
||||
$Global::progname = 'par$';
|
||||
$Global::progname = 'parallel';
|
||||
$Global::debug = 0;
|
||||
$Global::processes_to_run = 10;
|
||||
$command = undef;
|
||||
|
|
992
src/parallel.1
992
src/parallel.1
|
@ -1,992 +0,0 @@
|
|||
.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07)
|
||||
.\"
|
||||
.\" Standard preamble:
|
||||
.\" ========================================================================
|
||||
.de Sp \" Vertical space (when we can't use .PP)
|
||||
.if t .sp .5v
|
||||
.if n .sp
|
||||
..
|
||||
.de Vb \" Begin verbatim text
|
||||
.ft CW
|
||||
.nf
|
||||
.ne \\$1
|
||||
..
|
||||
.de Ve \" End verbatim text
|
||||
.ft R
|
||||
.fi
|
||||
..
|
||||
.\" Set up some character translations and predefined strings. \*(-- will
|
||||
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
|
||||
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
|
||||
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
|
||||
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
|
||||
.\" nothing in troff, for use with C<>.
|
||||
.tr \(*W-
|
||||
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
|
||||
.ie n \{\
|
||||
. ds -- \(*W-
|
||||
. ds PI pi
|
||||
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
|
||||
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
|
||||
. ds L" ""
|
||||
. ds R" ""
|
||||
. ds C` ""
|
||||
. ds C' ""
|
||||
'br\}
|
||||
.el\{\
|
||||
. ds -- \|\(em\|
|
||||
. ds PI \(*p
|
||||
. ds L" ``
|
||||
. ds R" ''
|
||||
'br\}
|
||||
.\"
|
||||
.\" Escape single quotes in literal strings from groff's Unicode transform.
|
||||
.ie \n(.g .ds Aq \(aq
|
||||
.el .ds Aq '
|
||||
.\"
|
||||
.\" If the F register is turned on, we'll generate index entries on stderr for
|
||||
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
|
||||
.\" entries marked with X<> in POD. Of course, you'll have to process the
|
||||
.\" output yourself in some meaningful fashion.
|
||||
.ie \nF \{\
|
||||
. de IX
|
||||
. tm Index:\\$1\t\\n%\t"\\$2"
|
||||
..
|
||||
. nr % 0
|
||||
. rr F
|
||||
.\}
|
||||
.el \{\
|
||||
. de IX
|
||||
..
|
||||
.\}
|
||||
.\"
|
||||
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
|
||||
.\" Fear. Run. Save yourself. No user-serviceable parts.
|
||||
. \" fudge factors for nroff and troff
|
||||
.if n \{\
|
||||
. ds #H 0
|
||||
. ds #V .8m
|
||||
. ds #F .3m
|
||||
. ds #[ \f1
|
||||
. ds #] \fP
|
||||
.\}
|
||||
.if t \{\
|
||||
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
|
||||
. ds #V .6m
|
||||
. ds #F 0
|
||||
. ds #[ \&
|
||||
. ds #] \&
|
||||
.\}
|
||||
. \" simple accents for nroff and troff
|
||||
.if n \{\
|
||||
. ds ' \&
|
||||
. ds ` \&
|
||||
. ds ^ \&
|
||||
. ds , \&
|
||||
. ds ~ ~
|
||||
. ds /
|
||||
.\}
|
||||
.if t \{\
|
||||
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
|
||||
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
|
||||
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
|
||||
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
|
||||
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
|
||||
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
|
||||
.\}
|
||||
. \" troff and (daisy-wheel) nroff accents
|
||||
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
|
||||
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
|
||||
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
|
||||
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
|
||||
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
|
||||
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
|
||||
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
|
||||
.ds ae a\h'-(\w'a'u*4/10)'e
|
||||
.ds Ae A\h'-(\w'A'u*4/10)'E
|
||||
. \" corrections for vroff
|
||||
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
|
||||
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
|
||||
. \" for low resolution devices (crt and lpr)
|
||||
.if \n(.H>23 .if \n(.V>19 \
|
||||
\{\
|
||||
. ds : e
|
||||
. ds 8 ss
|
||||
. ds o a
|
||||
. ds d- d\h'-1'\(ga
|
||||
. ds D- D\h'-1'\(hy
|
||||
. ds th \o'bp'
|
||||
. ds Th \o'LP'
|
||||
. ds ae ae
|
||||
. ds Ae AE
|
||||
.\}
|
||||
.rm #[ #] #H #V #F C
|
||||
.\" ========================================================================
|
||||
.\"
|
||||
.IX Title "PARALLEL 1"
|
||||
.TH PARALLEL 1 "2010-04-16" "20100419" "parallel"
|
||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||
.\" way too many mistakes in technical documents.
|
||||
.if n .ad l
|
||||
.nh
|
||||
.SH "NAME"
|
||||
parallel \- build and execute shell command lines from standard input in parallel
|
||||
.SH "SYNOPSIS"
|
||||
.IX Header "SYNOPSIS"
|
||||
\&\fBparallel\fR [\-0cdEfghiIkmnpqrtuUvX] [\-I str] [\-j num] [\-\-silent] [command [arguments]] [< list_of_arguments]
|
||||
.SH "DESCRIPTION"
|
||||
.IX Header "DESCRIPTION"
|
||||
For each line of input \fBparallel\fR will execute \fBcommand\fR with the
|
||||
line as arguments. If no \fBcommand\fR is given the line of input is
|
||||
executed. \fBparallel\fR can often be used as a substitute for \fBxargs\fR
|
||||
or \fBcat | sh\fR.
|
||||
.PP
|
||||
Several lines will be run in parallel.
|
||||
.IP "\fIcommand\fR" 9
|
||||
.IX Item "command"
|
||||
Command to execute. If \fBcommand\fR or the following arguments contain
|
||||
{} every instance will be substituted with the input line. Setting a
|
||||
command also invokes \fB\-f\fR.
|
||||
.Sp
|
||||
If \fBcommand\fR is given, \fBparallel\fR will behave similar to \fBxargs\fR. If
|
||||
\&\fBcommand\fR is not given \fBparallel\fR will behave similar to \fBcat | sh\fR.
|
||||
.IP "\fB{}\fR" 9
|
||||
.IX Item "{}"
|
||||
Input line. This is the default replacement string and will normally
|
||||
be used for putting the argument in the command line. It can be
|
||||
changed with \fB\-I\fR.
|
||||
.IP "\fB{.}\fR" 9
|
||||
.IX Item "{.}"
|
||||
Input line without extension. This is a specialized replacement string
|
||||
with the extension removed. It will remove from the last \fB.\fR till the
|
||||
end of line of each input line and replace {.} with the
|
||||
remaining. E.g. \fIfoo.jpg\fR becomes \fIfoo\fR. If the input line does
|
||||
not contain \fB.\fR it will remain unchanged.
|
||||
.Sp
|
||||
{.} can be used the same places as {}.
|
||||
.IP "\fB\-\-null\fR" 9
|
||||
.IX Item "--null"
|
||||
.PD 0
|
||||
.IP "\fB\-0\fR" 9
|
||||
.IX Item "-0"
|
||||
.PD
|
||||
Use \s-1NUL\s0 as delimiter. Normally input lines will end in \en
|
||||
(newline). If they end in \e0 (\s-1NUL\s0), then use this option. It is useful
|
||||
for processing filenames that may contain \en (newline).
|
||||
.IP "\fB\-\-command\fR" 9
|
||||
.IX Item "--command"
|
||||
.PD 0
|
||||
.IP "\fB\-c\fR" 9
|
||||
.IX Item "-c"
|
||||
.PD
|
||||
Line is a command. The input line contains more than one argument or
|
||||
the input line needs to be evaluated by the shell. This is the default
|
||||
if \fBcommand\fR is not set. Can be reversed with \fB\-f\fR.
|
||||
.IP "\fB\-\-delimiter\fR \fIdelim\fR" 9
|
||||
.IX Item "--delimiter delim"
|
||||
.PD 0
|
||||
.IP "\fB\-d\fR \fIdelim\fR" 9
|
||||
.IX Item "-d delim"
|
||||
.PD
|
||||
Input items are terminated by the specified character. Quotes and
|
||||
backslash are not special; every character in the input is taken
|
||||
literally. Disables the end-of-file string, which is treated like any
|
||||
other argument. This can be used when the input consists of simply
|
||||
newline-separated items, although it is almost always better to design
|
||||
your program to use \-\-null where this is possible. The specified
|
||||
delimiter may be a single character, a C\-style character escape such
|
||||
as \en, or an octal or hexadecimal escape code. Octal and
|
||||
hexadecimal escape codes are understood as for the printf command.
|
||||
Multibyte characters are not supported.
|
||||
.IP "\fB\-\-file\fR" 9
|
||||
.IX Item "--file"
|
||||
.PD 0
|
||||
.IP "\fB\-f\fR" 9
|
||||
.IX Item "-f"
|
||||
.PD
|
||||
Line is a filename. The input line contains a filename that will be
|
||||
quoted so it is not evaluated by the shell. This is the default if
|
||||
\&\fBcommand\fR is set. Can be reversed with \fB\-c\fR.
|
||||
.IP "\fB\-\-group\fR" 9
|
||||
.IX Item "--group"
|
||||
.PD 0
|
||||
.IP "\fB\-g\fR" 9
|
||||
.IX Item "-g"
|
||||
.PD
|
||||
Group output. Output from each jobs is grouped together and is only
|
||||
printed when the command is finished. \s-1STDERR\s0 first followed by \s-1STDOUT\s0.
|
||||
\&\fB\-g\fR is the default. Can be reversed with \fB\-u\fR.
|
||||
.IP "\fB\-I\fR \fIstring\fR" 9
|
||||
.IX Item "-I string"
|
||||
Use the replacement string \fIstring\fR instead of {}.
|
||||
.IP "\fB\-U\fR \fIstring\fR" 9
|
||||
.IX Item "-U string"
|
||||
.PD 0
|
||||
.IP "\fB\-\-extensionreplace\fR \fIstring\fR" 9
|
||||
.IX Item "--extensionreplace string"
|
||||
.PD
|
||||
Use the replacement string \fIstring\fR instead of {.} for input line without extension.
|
||||
.IP "\fB\-\-jobs\fR \fIN\fR" 9
|
||||
.IX Item "--jobs N"
|
||||
.PD 0
|
||||
.IP "\fB\-j\fR \fIN\fR" 9
|
||||
.IX Item "-j N"
|
||||
.IP "\fB\-\-max\-procs\fR \fIN\fR" 9
|
||||
.IX Item "--max-procs N"
|
||||
.IP "\fB\-P\fR \fIN\fR" 9
|
||||
.IX Item "-P N"
|
||||
.PD
|
||||
Run up to N jobs in parallel. 0 means as many as possible. Default is 10.
|
||||
.IP "\fB\-\-jobs\fR \fI+N\fR" 9
|
||||
.IX Item "--jobs +N"
|
||||
.PD 0
|
||||
.IP "\fB\-j\fR \fI+N\fR" 9
|
||||
.IX Item "-j +N"
|
||||
.IP "\fB\-\-max\-procs\fR \fI+N\fR" 9
|
||||
.IX Item "--max-procs +N"
|
||||
.IP "\fB\-P\fR \fI+N\fR" 9
|
||||
.IX Item "-P +N"
|
||||
.PD
|
||||
Add N to the number of \s-1CPU\s0 cores. Run this many jobs in parallel. For
|
||||
compute intensive jobs \fI\-j +0\fR is useful as it will run
|
||||
number-of-cpu-cores jobs in parallel. See also
|
||||
\&\-\-use\-cpus\-instead\-of\-cores.
|
||||
.IP "\fB\-\-jobs\fR \fI\-N\fR" 9
|
||||
.IX Item "--jobs -N"
|
||||
.PD 0
|
||||
.IP "\fB\-j\fR \fI\-N\fR" 9
|
||||
.IX Item "-j -N"
|
||||
.IP "\fB\-\-max\-procs\fR \fI\-N\fR" 9
|
||||
.IX Item "--max-procs -N"
|
||||
.IP "\fB\-P\fR \fI\-N\fR" 9
|
||||
.IX Item "-P -N"
|
||||
.PD
|
||||
Subtract N from the number of \s-1CPU\s0 cores. Run this many jobs in parallel.
|
||||
If the evaluated number is less than 1 then 1 will be used. See also
|
||||
\&\-\-use\-cpus\-instead\-of\-cores.
|
||||
.IP "\fB\-\-jobs\fR \fIN\fR%" 9
|
||||
.IX Item "--jobs N%"
|
||||
.PD 0
|
||||
.IP "\fB\-j\fR \fIN\fR%" 9
|
||||
.IX Item "-j N%"
|
||||
.IP "\fB\-\-max\-procs\fR \fIN\fR%" 9
|
||||
.IX Item "--max-procs N%"
|
||||
.IP "\fB\-P\fR \fIN\fR%" 9
|
||||
.IX Item "-P N%"
|
||||
.PD
|
||||
Multiply N% with the number of \s-1CPU\s0 cores. Run this many jobs in parallel.
|
||||
If the evaluated number is less than 1 then 1 will be used. See also
|
||||
\&\-\-use\-cpus\-instead\-of\-cores.
|
||||
.IP "\fB\-\-keeporder\fR" 9
|
||||
.IX Item "--keeporder"
|
||||
.PD 0
|
||||
.IP "\fB\-k\fR" 9
|
||||
.IX Item "-k"
|
||||
.PD
|
||||
Keep sequence of output same as the order of input. If jobs 1 2 3 4
|
||||
end in the sequence 3 1 4 2 the output will still be 1 2 3 4.
|
||||
.IP "\fB\-\-number\-of\-cpus\fR" 9
|
||||
.IX Item "--number-of-cpus"
|
||||
Print the number of CPUs and exit (used by \fBparallel\fR itself to
|
||||
determine the number of CPUs on remote machines).
|
||||
.IP "\fB\-\-number\-of\-cores\fR" 9
|
||||
.IX Item "--number-of-cores"
|
||||
Print the number of cores and exit (used by \fBparallel\fR itself to determine the
|
||||
number of cores on remote machines).
|
||||
.IP "\fB\-\-quote\fR" 9
|
||||
.IX Item "--quote"
|
||||
.PD 0
|
||||
.IP "\fB\-q\fR" 9
|
||||
.IX Item "-q"
|
||||
.PD
|
||||
Quote \fBcommand\fR. This will quote the command line so special
|
||||
characters are not interpreted by the shell. See the section
|
||||
\&\s-1QUOTING\s0. Most people will never need this. Quoting is disabled by
|
||||
default.
|
||||
.IP "\fB\-S\fR \fI[ncpu/]sshlogin[,[ncpu/]sshlogin]\fR (not implemented)" 9
|
||||
.IX Item "-S [ncpu/]sshlogin[,[ncpu/]sshlogin] (not implemented)"
|
||||
.PD 0
|
||||
.IP "\fB\-\-sshlogin\fR \fI[ncpu/]sshlogin[,[ncpu/]sshlogin]\fR (not implemented)" 9
|
||||
.IX Item "--sshlogin [ncpu/]sshlogin[,[ncpu/]sshlogin] (not implemented)"
|
||||
.PD
|
||||
Distribute jobs to remote servers. The jobs will be run on a list of
|
||||
remote servers. \fBparallel\fR will determine the number of \s-1CPU\s0 cores on
|
||||
the remote servers and run the number of jobs as specified by \-j. If
|
||||
the number \fIncpu\fR is given \fBparallel\fR will use this number for
|
||||
number of CPUs on the host. Normally \fIncpu\fR will not be needed.
|
||||
.Sp
|
||||
An \fIsshlogin\fR is the string you would normally pass to \s-1SSH\s0 to login,
|
||||
e.g. \fIserver.example.com\fR, \fIfoo@server.example.com\fR, or \fI\*(L"\-l foo \-p
|
||||
2222 server.example.com\*(R"\fR. The sshlogin must not require a password.
|
||||
.Sp
|
||||
The sshlogin ':' is special, it means 'no ssh' and will therefore run
|
||||
on the local machine.
|
||||
.Sp
|
||||
To specify more sshlogins separate the sshlogins by comma or repeat
|
||||
the options multiple times.
|
||||
.Sp
|
||||
For examples: see \fB\-\-sshloginfile\fR.
|
||||
.Sp
|
||||
The remote host must have \fBparallel\fR installed.
|
||||
.IP "\fB\-\-sshloginfile\fR \fIfilename\fR (not implemented)" 9
|
||||
.IX Item "--sshloginfile filename (not implemented)"
|
||||
File with sshlogins. The file consists of sshlogins on separate
|
||||
lines. Empty lines and lines starting with '#' are ignored. Example:
|
||||
.Sp
|
||||
.Vb 9
|
||||
\& server.example.com
|
||||
\& username@server2.example.com
|
||||
\& 8/my\-8\-core\-server.example.com
|
||||
\& 2/myusername@my\-dualcore.example.net
|
||||
\& # This server has SSH running on port 2222
|
||||
\& \-p 2222 server.example.net
|
||||
\& 4/\-p 2222 quadserver.example.net
|
||||
\& # Assume 16 cores on the local machine
|
||||
\& 16/:
|
||||
.Ve
|
||||
.IP "\fB\-\-silent\fR" 9
|
||||
.IX Item "--silent"
|
||||
Silent. The job to be run will not be printed. This is the default.
|
||||
Can be reversed with \fB\-v\fR.
|
||||
.IP "\fB\-\-transfer\fR (not implemented)" 9
|
||||
.IX Item "--transfer (not implemented)"
|
||||
Transfer files to remote servers. \fB\-\-transfer\fR is used with
|
||||
\&\fB\-\-sshlogin\fR when the arguments are files and should be transfered to
|
||||
the remote servers. The files will be transfered using \fBrsync\fR and
|
||||
will be put relative to the default login dir. E.g.
|
||||
.Sp
|
||||
.Vb 2
|
||||
\& echo foo/bar.txt | parallel \e
|
||||
\& \-\-sshlogin server.example.com \-\-transfer wc
|
||||
.Ve
|
||||
.Sp
|
||||
This will transfer the file \fIfoo/bar.txt\fR to the server
|
||||
\&\fIserver.example.com\fR to the file \f(CW$HOME\fR/foo/bar.txt before running
|
||||
\&\fBwc foo/bar.txt\fR on \fIserver.example.com\fR.
|
||||
.Sp
|
||||
.Vb 2
|
||||
\& echo /tmp/foo/bar.txt | parallel \e
|
||||
\& \-\-sshlogin server.example.com \-\-transfer wc
|
||||
.Ve
|
||||
.Sp
|
||||
This will transfer the file \fIfoo/bar.txt\fR to the server
|
||||
\&\fIserver.example.com\fR to the file /tmp/foo/bar.txt before running
|
||||
\&\fBwc /tmp/foo/bar.txt\fR on \fIserver.example.com\fR.
|
||||
.Sp
|
||||
\&\fB\-\-transfer\fR is often used with \fB\-\-return\fR and \fB\-\-cleanup\fR.
|
||||
.Sp
|
||||
\&\fB\-\-transfer\fR is ignored when used with \fB\-\-sshlogin :\fR or when not used with \fB\-\-sshlogin\fR.
|
||||
.IP "\fB\-\-trc\fR \fIfilename\fR (not implemented)" 9
|
||||
.IX Item "--trc filename (not implemented)"
|
||||
Transfer, Return, Cleanup. Short hand for:
|
||||
.Sp
|
||||
.Vb 1
|
||||
\& \-\-transfer \-\-return I<filename> \-\-cleanup
|
||||
.Ve
|
||||
.IP "\fB\-\-return\fR \fIfilename\fR (not implemented)" 9
|
||||
.IX Item "--return filename (not implemented)"
|
||||
Transfer files from remote servers. \fB\-\-return\fR is used with
|
||||
\&\fB\-\-sshlogin\fR when the arguments are files on the remote servers. When
|
||||
processing is done the file \fIfilename\fR will be transfered
|
||||
from the remote server using \fBrsync\fR and will be put relative to
|
||||
the default login dir. E.g.
|
||||
.Sp
|
||||
.Vb 2
|
||||
\& echo foo/bar.txt | parallel \e
|
||||
\& \-\-sshlogin server.example.com \-\-return {}.out touch {}.out
|
||||
.Ve
|
||||
.Sp
|
||||
This will transfer the file \fI\f(CI$HOME\fI/foo/bar.txt.out\fR from the server
|
||||
\&\fIserver.example.com\fR to the file \fIfoo/bar.txt.out\fR after running
|
||||
\&\fBtouch foo/bar.txt.out\fR on \fIserver.example.com\fR.
|
||||
.Sp
|
||||
.Vb 2
|
||||
\& echo /tmp/foo/bar.txt | parallel \e
|
||||
\& \-\-sshlogin server.example.com \-\-return {}.out touch {}.out
|
||||
.Ve
|
||||
.Sp
|
||||
This will transfer the file \fI/tmp/foo/bar.txt.out\fR from the server
|
||||
\&\fIserver.example.com\fR to the file \fI/tmp/foo/bar.txt.out\fR after running
|
||||
\&\fBtouch /tmp/foo/bar.txt.out\fR on \fIserver.example.com\fR.
|
||||
.Sp
|
||||
Multiple files can be transfered by repeating the options multiple
|
||||
times:
|
||||
.Sp
|
||||
.Vb 3
|
||||
\& echo /tmp/foo/bar.txt | \e
|
||||
\& parallel \-\-sshlogin server.example.com \e
|
||||
\& \-\-return {}.out \-\-return {}.out2 touch {}.out {}.out2
|
||||
.Ve
|
||||
.Sp
|
||||
\&\fB\-\-return\fR is often used with \fB\-\-transfer\fR and \fB\-\-cleanup\fR.
|
||||
.Sp
|
||||
\&\fB\-\-return\fR is ignored when used with \fB\-\-sshlogin :\fR or when not used with \fB\-\-sshlogin\fR.
|
||||
.IP "\fB\-\-cleanup\fR (not implemented)" 9
|
||||
.IX Item "--cleanup (not implemented)"
|
||||
Remove transfered files. \fB\-\-cleanup\fR will remove the transfered files
|
||||
on the remote server after processing is done.
|
||||
.Sp
|
||||
.Vb 3
|
||||
\& find log \-name \*(Aq*gz\*(Aq | parallel \e
|
||||
\& \-\-sshlogin server.example.com \-\-transfer \-\-return {.}.bz2 \e
|
||||
\& \-\-cleanup "zcat {} | bzip \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.Sp
|
||||
With \fB\-\-transfer\fR the file transfered to the remote server will be
|
||||
removed on the remote server. Directories created will not be removed
|
||||
\&\- even if they are empty.
|
||||
.Sp
|
||||
With \fB\-\-return\fR the file transfered from the remote server will be
|
||||
removed on the remote server. Directories created will not be removed
|
||||
\&\- even if they are empty.
|
||||
.Sp
|
||||
\&\fB\-\-cleanup\fR is ignored when not used with \fB\-\-transfer\fR or \fB\-\-return\fR.
|
||||
.IP "\fB\-\-ungroup\fR" 9
|
||||
.IX Item "--ungroup"
|
||||
.PD 0
|
||||
.IP "\fB\-u\fR" 9
|
||||
.IX Item "-u"
|
||||
.PD
|
||||
Ungroup output. Output is printed as soon as possible. This may cause
|
||||
output from different commands to be mixed. Can be reversed with \fB\-g\fR.
|
||||
.IP "\fB\-\-use\-cpus\-instead\-of\-cores\fR (not implemented)" 9
|
||||
.IX Item "--use-cpus-instead-of-cores (not implemented)"
|
||||
Count the number of CPUs instead of cores. When computing how many
|
||||
jobs to run in parallel relative to the number of cores you can ask
|
||||
parallel to instead look at the number of CPUs. This will make sense
|
||||
for computers that have hyperthreading as two jobs running on one \s-1CPU\s0
|
||||
with hyperthreading will run slower than two jobs running on two CPUs.
|
||||
Normal users will not need this option.
|
||||
.IP "\fB\-v\fR" 9
|
||||
.IX Item "-v"
|
||||
Verbose. Print the job to be run on \s-1STDOUT\s0. Can be reversed with
|
||||
\&\fB\-\-silent\fR.
|
||||
.IP "\fB\-\-xargs\fR" 9
|
||||
.IX Item "--xargs"
|
||||
.PD 0
|
||||
.IP "\fB\-m\fR" 9
|
||||
.IX Item "-m"
|
||||
.PD
|
||||
Multiple. Insert as many arguments as the command line length permits. If
|
||||
{} is not used the arguments will be appended to the line. If {} is
|
||||
used multiple times each {} will be replaced with all the arguments.
|
||||
.IP "\fB\-X\fR" 9
|
||||
.IX Item "-X"
|
||||
xargs with context replace. This works like \fB\-m\fR except if {} is part
|
||||
of a word (like \fIpic{}.jpg\fR) then the whole word will be repeated.
|
||||
.SH "EXAMPLE 1: Working as cat | sh. Ressource inexpensive jobs and evaluation"
|
||||
.IX Header "EXAMPLE 1: Working as cat | sh. Ressource inexpensive jobs and evaluation"
|
||||
\&\fBparallel\fR can work similar to \fBcat | sh\fR.
|
||||
.PP
|
||||
A ressource inexpensive job is a job that takes very little \s-1CPU\s0, disk
|
||||
I/O and network I/O. Ping is an example of a ressource inexpensive
|
||||
job. wget is too \- if the webpages are small.
|
||||
.PP
|
||||
The content of the file jobs_to_run:
|
||||
.PP
|
||||
.Vb 7
|
||||
\& ping \-c 1 10.0.0.1
|
||||
\& wget http://status\-server/status.cgi?ip=10.0.0.1
|
||||
\& ping \-c 1 10.0.0.2
|
||||
\& wget http://status\-server/status.cgi?ip=10.0.0.2
|
||||
\& ...
|
||||
\& ping \-c 1 10.0.0.255
|
||||
\& wget http://status\-server/status.cgi?ip=10.0.0.255
|
||||
.Ve
|
||||
.PP
|
||||
To run 100 processes simultaneously do:
|
||||
.PP
|
||||
\&\fBparallel \-j 100 < jobs_to_run\fR
|
||||
.PP
|
||||
As there is not a \fBcommand\fR the option \fB\-c\fR is default because the
|
||||
jobs needs to be evaluated by the shell.
|
||||
.SH "EXAMPLE 2: Working as xargs \-n1. Argument appending"
|
||||
.IX Header "EXAMPLE 2: Working as xargs -n1. Argument appending"
|
||||
\&\fBparallel\fR can work similar to \fBxargs \-n1\fR.
|
||||
.PP
|
||||
To output all html files run:
|
||||
.PP
|
||||
\&\fBfind . \-name '*.html' | parallel cat\fR
|
||||
.PP
|
||||
As there is a \fBcommand\fR the option \fB\-f\fR is default because the
|
||||
filenames needs to be protected from the shell in case a filename
|
||||
contains special characters.
|
||||
.SH "EXAMPLE 3: Compute intensive jobs and substitution"
|
||||
.IX Header "EXAMPLE 3: Compute intensive jobs and substitution"
|
||||
If ImageMagick is installed this will generate a thumbnail of a jpg
|
||||
file:
|
||||
.PP
|
||||
\&\fBconvert \-geometry 120 foo.jpg thumb_foo.jpg\fR
|
||||
.PP
|
||||
If the system has more than 1 \s-1CPU\s0 core it can be run with
|
||||
number-of-cpu-cores jobs in parallel (\-j +0). This will do that for
|
||||
all jpg files in a directory:
|
||||
.PP
|
||||
\&\fBls *.jpg | parallel \-j +0 convert \-geometry 120 {} thumb_{}\fR
|
||||
.PP
|
||||
To do it recursively use \fBfind\fR:
|
||||
.PP
|
||||
\&\fBfind . \-name '*.jpg' | parallel \-j +0 convert \-geometry 120 {} {}_thumb.jpg\fR
|
||||
.PP
|
||||
Notice how the argument has to start with {} as {} will include path
|
||||
(e.g. running \fBconvert \-geometry 120 ./foo/bar.jpg
|
||||
thumb_./foo/bar.jpg\fR would clearly be wrong). It will result in files
|
||||
like ./foo/bar.jpg_thumb.jpg. If that is not wanted this can fix it:
|
||||
.PP
|
||||
.Vb 3
|
||||
\& find . \-name \*(Aq*.jpg\*(Aq | \e
|
||||
\& perl \-pe \*(Aqchomp; $a=$_; s:/([^/]+)$:/thumb_$1:; $_="convert \-geometry 120 $a $_\en"\*(Aq | \e
|
||||
\& parallel \-c \-j +0
|
||||
.Ve
|
||||
.PP
|
||||
Unfortunately this will not work if the filenames contain special
|
||||
characters (such as space or quotes). If you have \fBren\fR installed this
|
||||
is a better solution:
|
||||
.PP
|
||||
.Vb 2
|
||||
\& find . \-name \*(Aq*.jpg\*(Aq | parallel \-j +0 convert \-geometry 120 {} {}_thumb.jpg
|
||||
\& find . \-name \*(Aq*_thumb.jpg\*(Aq | ren \*(Aqs:/([^/]+)_thumb.jpg$:/thumb_$1:\*(Aq
|
||||
.Ve
|
||||
.PP
|
||||
This will make files like ./foo/bar_thumb.jpg:
|
||||
.PP
|
||||
\&\fBfind . \-name '*.jpg' | parallel \-j +0 convert \-geometry 120 {} {.}_thumb.jpg\fR
|
||||
.SH "EXAMPLE 4: Substitution and redirection"
|
||||
.IX Header "EXAMPLE 4: Substitution and redirection"
|
||||
This will compare all files in the dir to the file foo and save the
|
||||
diffs in corresponding .diff files:
|
||||
.PP
|
||||
\&\fBls | parallel diff {} foo "\fR>\fB"{}.diff\fR
|
||||
.PP
|
||||
Quoting of > is necessary to postpone the redirection. Another
|
||||
solution is to quote the whole command:
|
||||
.PP
|
||||
\&\fBls | parallel "diff {} foo \fR>\fB{}.diff"\fR
|
||||
.SH "EXAMPLE 5: Composed commands"
|
||||
.IX Header "EXAMPLE 5: Composed commands"
|
||||
A job can consist of several commands. This will print the number of
|
||||
files in each directory:
|
||||
.PP
|
||||
\&\fBls | parallel 'echo \-n {}\*(L" \*(R"; ls {}|wc \-l'\fR
|
||||
.PP
|
||||
To put the output in a file called <name>.dir:
|
||||
.PP
|
||||
\&\fBls | parallel '(echo \-n {}\*(L" \*(R"; ls {}|wc \-l) \fR> \fB{}.dir'\fR
|
||||
.SH "EXAMPLE 6: Context replace"
|
||||
.IX Header "EXAMPLE 6: Context replace"
|
||||
To remove the files \fIpict0000.jpg\fR .. \fIpict9999.jpg\fR you could do:
|
||||
.PP
|
||||
\&\fBseq \-f \f(CB%04g\fB 0 9999 | parallel rm pict{}.jpg\fR
|
||||
.PP
|
||||
You could also do:
|
||||
.PP
|
||||
\&\fBseq \-f \f(CB%04g\fB 0 9999 | perl \-pe 's/(.*)/pict$1.jpg/' | parallel \-m rm\fR
|
||||
.PP
|
||||
The first will run \fBrm\fR 10000 times, while the last will only run
|
||||
\&\fBrm\fR as many times needed to keep the command line length short
|
||||
enough (typically 1\-2 times).
|
||||
.PP
|
||||
You could also run:
|
||||
.PP
|
||||
\&\fBseq \-f \f(CB%04g\fB 0 9999 | parallel \-X rm pict{}.jpg\fR
|
||||
.PP
|
||||
This will also only run \fBrm\fR as many times needed to keep the command
|
||||
line length short enough.
|
||||
.SH "EXAMPLE 7: Group output lines"
|
||||
.IX Header "EXAMPLE 7: Group output lines"
|
||||
When runnning jobs that output data, you often do not want the output
|
||||
of multiple jobs to run together. \fBparallel\fR defaults to grouping the
|
||||
output of each job, so the output is printed when the job finishes. If
|
||||
you want the output to be printed while the job is running you can use
|
||||
\&\fB\-u\fR.
|
||||
.PP
|
||||
Compare the output of:
|
||||
.PP
|
||||
\&\fB(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute\fR
|
||||
.PP
|
||||
to the output of:
|
||||
.PP
|
||||
\&\fB(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel \-u traceroute\fR
|
||||
.SH "EXAMPLE 8: Keep order of output same as order of input"
|
||||
.IX Header "EXAMPLE 8: Keep order of output same as order of input"
|
||||
Normally the output of a job will be printed as soon as it
|
||||
completes. Sometimes you want the order of the output to remain the
|
||||
same as the order of the input. \fB\-k\fR will make sure the order of
|
||||
output will be in the same order as input even if later jobs end
|
||||
before earlier jobs.
|
||||
.PP
|
||||
\&\fB(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute\fR
|
||||
.PP
|
||||
will give traceroute of foss.org.my, debian.org and
|
||||
freenetproject.org, but it will be sorted according to which job
|
||||
completed first.
|
||||
.PP
|
||||
To keep the order the same as input run:
|
||||
.PP
|
||||
\&\fB(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel \-k traceroute\fR
|
||||
.PP
|
||||
This will make sure the traceroute to foss.org.my will be printed
|
||||
first.
|
||||
.SH "EXAMPLE 9: Using remote computers (not implemented)"
|
||||
.IX Header "EXAMPLE 9: Using remote computers (not implemented)"
|
||||
To run commands on a remote computer \s-1SSH\s0 needs to be set up and you
|
||||
must be able to login without entering a password (\fBssh-agent\fR may be
|
||||
handy).
|
||||
.PP
|
||||
To run \fBecho\fR on \fBserver.example.com\fR:
|
||||
.PP
|
||||
.Vb 1
|
||||
\& seq 1 10 | parallel \-\-sshlogin server.example.com echo
|
||||
.Ve
|
||||
.PP
|
||||
To run commands on more than one remote computer run:
|
||||
.PP
|
||||
.Vb 1
|
||||
\& seq 1 10 | parallel \-\-sshlogin server.example.com,server2.example.net echo
|
||||
.Ve
|
||||
.PP
|
||||
Or:
|
||||
.PP
|
||||
.Vb 2
|
||||
\& seq 1 10 | parallel \-\-sshlogin server.example.com \e
|
||||
\& \-\-sshlogin server2.example.net echo
|
||||
.Ve
|
||||
.PP
|
||||
If the login username is \fIfoo\fR on \fIserver2.example.net\fR use:
|
||||
.PP
|
||||
.Vb 2
|
||||
\& seq 1 10 | parallel \-\-sshlogin server.example.com \e
|
||||
\& \-\-sshlogin foo@server2.example.net echo
|
||||
.Ve
|
||||
.PP
|
||||
To distribute the commands to a list of machines, make a file
|
||||
\&\fImymachines\fR with all the machines:
|
||||
.PP
|
||||
.Vb 3
|
||||
\& server.example.com
|
||||
\& foo@server2.example.com
|
||||
\& server3.example.com
|
||||
.Ve
|
||||
.PP
|
||||
Then run:
|
||||
.PP
|
||||
.Vb 1
|
||||
\& seq 1 10 | parallel \-\-sshloginfile mymachines echo
|
||||
.Ve
|
||||
.PP
|
||||
To include the local machine add the special sshlogin ':' to the list:
|
||||
.PP
|
||||
.Vb 4
|
||||
\& server.example.com
|
||||
\& foo@server2.example.com
|
||||
\& server3.example.com
|
||||
\& :
|
||||
.Ve
|
||||
.PP
|
||||
If the number of \s-1CPU\s0 cores on the remote servers is not identified
|
||||
correctly the number of \s-1CPU\s0 cores can be added in front. Here the
|
||||
server has 8 \s-1CPU\s0 cores.
|
||||
.PP
|
||||
.Vb 1
|
||||
\& seq 1 10 | parallel \-\-sshlogin 8/server.example.com echo
|
||||
.Ve
|
||||
.SH "EXAMPLE 10: Transferring of files (not implemented)"
|
||||
.IX Header "EXAMPLE 10: Transferring of files (not implemented)"
|
||||
To recompress gzipped files with bzip2 using a remote server run:
|
||||
.PP
|
||||
.Vb 3
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | \e
|
||||
\& parallel \-\-sshlogin server.example.com \e
|
||||
\& \-\-transfer "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.PP
|
||||
This will list the .gz\-files in the \fIlogs\fR directory and all
|
||||
directories below. Then it will transfer the files to
|
||||
\&\fIserver.example.com\fR to the corresponding directory in
|
||||
\&\fI\f(CI$HOME\fI/logs\fR. On \fIserver.example.com\fR the file will be recompressed
|
||||
using \fBzcat\fR and \fBbzip2\fR resulting in the corresponding file with
|
||||
\&\fI.gz\fR replaced with \fI.bz2\fR.
|
||||
.PP
|
||||
If you want the file to be transfered back to the local machine add
|
||||
\&\fI\-\-return {.}.bz2\fR:
|
||||
.PP
|
||||
.Vb 3
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | \e
|
||||
\& parallel \-\-sshlogin server.example.com \e
|
||||
\& \-\-transfer \-\-return {.}.bz2 "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.PP
|
||||
After the recompressing is done the \fI.bz2\fR\-file is transfered back to
|
||||
the local machine and put next to the original \fI.gz\fR\-file.
|
||||
.PP
|
||||
If you want to delete the transfered files on the remote machine add
|
||||
\&\fI\-\-cleanup\fR. This will remove both the file transfered to the remote
|
||||
machine and the files transfered from the remote machine:
|
||||
.PP
|
||||
.Vb 3
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | \e
|
||||
\& parallel \-\-sshlogin server.example.com \e
|
||||
\& \-\-transfer \-\-return {.}.bz2 \-\-cleanup "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.PP
|
||||
If you want run one several servers add the servers to \fI\-\-sshlogin\fR
|
||||
either using ',' or separate \fI\-\-sshlogin\fR:
|
||||
.PP
|
||||
.Vb 4
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | \e
|
||||
\& parallel \-\-sshlogin server.example.com,server2.example.com \e
|
||||
\& \-\-sshlogin server3.example.com \e
|
||||
\& \-\-transfer \-\-return {.}.bz2 \-\-cleanup "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.PP
|
||||
You can add the local machine using \fI\-\-sshlogin :\fR. This will disable the
|
||||
removing and transferring for the local machine only:
|
||||
.PP
|
||||
.Vb 5
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | \e
|
||||
\& parallel \-\-sshlogin server.example.com,server2.example.com \e
|
||||
\& \-\-sshlogin server3.example.com \e
|
||||
\& \-\-sshlogin : \e
|
||||
\& \-\-transfer \-\-return {.}.bz2 \-\-cleanup "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.PP
|
||||
Often \fI\-\-transfer\fR, \fI\-\-return\fR and \fI\-\-cleanup\fR are used together. They can be
|
||||
shortened to \fI\-\-trc\fR:
|
||||
.PP
|
||||
.Vb 5
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | \e
|
||||
\& parallel \-\-sshlogin server.example.com,server2.example.com \e
|
||||
\& \-\-sshlogin server3.example.com \e
|
||||
\& \-\-sshlogin : \e
|
||||
\& \-\-trc {.}.bz2 "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.PP
|
||||
With the file \fImymachines\fR containing the compute machines it becomes:
|
||||
.PP
|
||||
.Vb 2
|
||||
\& find logs/ \-name \*(Aq*.gz\*(Aq | parallel \-\-sshloginfile mymachines \e
|
||||
\& \-\-trc {.}.bz2 "zcat {} | bzip2 \-9 >{.}.bz2"
|
||||
.Ve
|
||||
.SH "QUOTING"
|
||||
.IX Header "QUOTING"
|
||||
For more advanced use quoting may be an issue. The following will
|
||||
print the filename for each line that has exactly 2 columns:
|
||||
.PP
|
||||
\&\fBperl \-ne '/^\eS+\es+\eS+$/ and print \f(CB$ARGV\fB,\*(L"\en\*(R"' file\fR
|
||||
.PP
|
||||
This can be done by \fBparallel\fR using:
|
||||
.PP
|
||||
\&\fBls | parallel \*(L"perl \-ne '/^\e\eS+\e\es+\e\eS+$/ and print \e$ARGV,\e\*(R"\e\en\e\*(L"'\*(R"\fR
|
||||
.PP
|
||||
Notice how \e's, "'s, and $'s needs to be quoted. \fBparallel\fR can do
|
||||
the quoting by using option \fB\-q\fR:
|
||||
.PP
|
||||
\&\fBls | parallel \-q perl \-ne '/^\eS+\es+\eS+$/ and print \f(CB$ARGV\fB,\*(L"\en\*(R"'\fR
|
||||
.PP
|
||||
However, this means you cannot make the shell interpret special
|
||||
characters. For example this \fBwill not work\fR:
|
||||
.PP
|
||||
\&\fBls | parallel \-q "diff {} foo \fR>\fB{}.diff"\fR
|
||||
.PP
|
||||
\&\fBls | parallel \-q \*(L"ls {} | wc \-l\*(R"\fR
|
||||
.PP
|
||||
because > and | need to be interpreted by the shell.
|
||||
.PP
|
||||
If you get errors like:
|
||||
.PP
|
||||
\&\fBsh: \-c: line 0: syntax error near unexpected token\fR
|
||||
.PP
|
||||
then you might try using \fB\-q\fR.
|
||||
.PP
|
||||
If you are using \fBbash\fR process substitution like \fB<(cat foo)\fR then
|
||||
you may try \fB\-q\fR and prepending \fBcommand\fR with \fBbash \-c\fR:
|
||||
.PP
|
||||
\&\fBls | parallel \-q bash \-c 'wc \-c <(echo {})'\fR
|
||||
.PP
|
||||
Or for substituting output:
|
||||
.PP
|
||||
\&\fBls | parallel \-q bash \-c 'tar c {} | tee \fR>\fB(gzip \fR>\fB{}.tar.gz) | bzip2 \fR>\fB{}.tar.bz2'\fR
|
||||
.PP
|
||||
\&\fBConclusion\fR: To avoid dealing with the quoting problems it may be
|
||||
easier just to write a small script and have \fBparallel\fR call that
|
||||
script.
|
||||
.SH "LIST RUNNING JOBS"
|
||||
.IX Header "LIST RUNNING JOBS"
|
||||
If you want a list of the jobs currently running you can run:
|
||||
.PP
|
||||
\&\fBkillall \-USR1 parallel\fR
|
||||
.PP
|
||||
\&\fBparallel\fR will then print the currently running jobs on \s-1STDERR\s0.
|
||||
.SH "COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS"
|
||||
.IX Header "COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS"
|
||||
If you regret starting a lot of jobs you can simply break \fBparallel\fR,
|
||||
but if you want to make sure you do not have halfcompleted jobs you
|
||||
should send the signal \fB\s-1SIGTERM\s0\fR to \fBparallel\fR:
|
||||
.PP
|
||||
\&\fBkillall \-TERM parallel\fR
|
||||
.PP
|
||||
This will tell \fBparallel\fR to not start any new jobs, but wait until
|
||||
the currently running jobs are finished.
|
||||
.SH "DIFFERENCES BETWEEN xargs/find \-exec AND parallel"
|
||||
.IX Header "DIFFERENCES BETWEEN xargs/find -exec AND parallel"
|
||||
\&\fBxargs\fR and \fBfind \-exec\fR offer some of the same possibilites as
|
||||
\&\fBparallel\fR.
|
||||
.PP
|
||||
\&\fBfind \-exec\fR only works on files. So processing other input (such as
|
||||
hosts or URLs) will require creating these inputs as files. \fBfind
|
||||
\&\-exec\fR has no support for running commands in parallel.
|
||||
.PP
|
||||
\&\fBxargs\fR deals badly with special characters (such as space, ' and
|
||||
"). To see the problem try this:
|
||||
.PP
|
||||
.Vb 5
|
||||
\& touch important_file
|
||||
\& touch \*(Aqnot important_file\*(Aq
|
||||
\& ls not* | xargs rm
|
||||
\& mkdir \-p \*(Aq12" records\*(Aq
|
||||
\& ls | xargs rmdir
|
||||
.Ve
|
||||
.PP
|
||||
You can specify \fB\-0\fR or \fB\-d \*(L"\en\*(R"\fR, but many input generators are not
|
||||
optimized for using \fB\s-1NUL\s0\fR as separator but are optimized for
|
||||
\&\fBnewline\fR as separator. E.g \fBhead\fR, \fBtail\fR, \fBawk\fR, \fBls\fR, \fBecho\fR,
|
||||
\&\fBsed\fR, \fBtar \-v\fR, \fBperl\fR (\-0 and \e0 instead of \en), \fBlocate\fR
|
||||
(requires using \-0), \fBfind\fR (requires using \-print0), \fBgrep\fR
|
||||
(requires user to use \-z or \-Z).
|
||||
.PP
|
||||
So \fBparallel\fR's newline separation can be emulated with:
|
||||
.PP
|
||||
\&\fBcat | xargs \-d \*(L"\en\*(R" \-n1 \f(BIcommand\fB\fR
|
||||
.PP
|
||||
\&\fBxargs\fR can run a given number of jobs in parallel, but has no
|
||||
support for running number-of-cpu-cores jobs in parallel.
|
||||
.PP
|
||||
\&\fBxargs\fR has no support for grouping the output, therefore output may
|
||||
run together, e.g. the first half of a line is from one process and
|
||||
the last half of the line is from another process.
|
||||
.PP
|
||||
\&\fBxargs\fR has no support for keeping the order of the output, therefore
|
||||
if running jobs in parallel using \fBxargs\fR the output of the second
|
||||
job cannot be postponed till the first job is done.
|
||||
.PP
|
||||
\&\fBxargs\fR has no support for context replace, so you will have to create the
|
||||
arguments.
|
||||
.PP
|
||||
If you use a replace string in \fBxargs\fR (\fB\-I\fR) you can not force
|
||||
\&\fBxargs\fR to use more than one argument.
|
||||
.PP
|
||||
Quoting in \fBxargs\fR works like \fB\-q\fR in \fBparallel\fR. This means
|
||||
composed commands and redirection requires using \fBbash \-c\fR.
|
||||
.PP
|
||||
\&\fBls | parallel "wc {} \fR> \fB{}.wc"\fR
|
||||
.PP
|
||||
becomes
|
||||
.PP
|
||||
\&\fBls | xargs \-d \*(L"\en\*(R" \-P10 \-I {} bash \-c "wc {} \fR>\fB {}.wc"\fR
|
||||
.PP
|
||||
and
|
||||
.PP
|
||||
\&\fBls | parallel \*(L"echo {}; ls {}|wc\*(R"\fR
|
||||
.PP
|
||||
becomes
|
||||
.PP
|
||||
\&\fBls | xargs \-d \*(L"\en\*(R" \-P10 \-I {} bash \-c \*(L"echo {}; ls {}|wc\*(R"\fR
|
||||
.SH "DIFFERENCES BETWEEN mdm/middleman AND parallel"
|
||||
.IX Header "DIFFERENCES BETWEEN mdm/middleman AND parallel"
|
||||
middleman(mdm) is also a tool for running jobs in parallel.
|
||||
.PP
|
||||
Here are the shellscripts of http://mdm.berlios.de/usage.html ported
|
||||
to parallel use:
|
||||
.PP
|
||||
\&\fBseq 1 19 | parallel \-j+0 buffon \-o \- | sort \-n \fR>\fB result\fR
|
||||
.PP
|
||||
\&\fBcat files | parallel \-j+0 cmd\fR
|
||||
.SH "BUGS"
|
||||
.IX Header "BUGS"
|
||||
Filenames beginning with '\-' can cause some commands to give
|
||||
unexpected results, as it will often be interpreted as an option.
|
||||
.SH "REPORTING BUGS"
|
||||
.IX Header "REPORTING BUGS"
|
||||
Report bugs to <bug\-parallel@tange.dk>.
|
||||
.SH "IDEAS"
|
||||
.IX Header "IDEAS"
|
||||
One char options not used: F G J K M P Q Y
|
||||
.PP
|
||||
Test if \-0 works on filenames ending in '\en'
|
||||
.PP
|
||||
xargs dropin-replacement.
|
||||
Implement the missing \-\-features
|
||||
.PP
|
||||
monitor to see which jobs are currently running
|
||||
http://code.google.com/p/ppss/
|
||||
.PP
|
||||
Accept signal \s-1INT\s0 instead of \s-1TERM\s0 to complete current running jobs but
|
||||
do not start new jobs. Print out the number of jobs waiting to
|
||||
complete on \s-1STDERR\s0. Accept sig \s-1INT\s0 again to kill now. This seems to be
|
||||
hard, as all foreground processes get the \s-1INT\s0 from the shell.
|
||||
.PP
|
||||
If there are nomore jobs (\s-1STDIN\s0 is closed) then make sure to
|
||||
distribute the arguments evenly if running \-X.
|
||||
.PP
|
||||
Distribute jobs to computers with different speeds/number\-of\-cpu\-cores using ssh
|
||||
ask the computers how many cpus they have and spawn appropriately
|
||||
according to \-j setting. Reuse ssh connection (\-M and \-S)
|
||||
.PP
|
||||
SEED=$RANDOM
|
||||
ssh \-MS /tmp/ssh\-%r@%h:%p\-$SEED elvis
|
||||
rsync \-\-rsh=\*(L"ssh \-S /tmp/ssh\-%r@%h:%p\-$SEED\*(R" gitup elvis:/tmp/
|
||||
ssh \-S /tmp/ssh\-%r@%h:%p\-$SEED elvis hostname
|
||||
.PP
|
||||
FILE=gpl\-3.0.txt
|
||||
BASE=gpl\-3.0
|
||||
$ rsync \-z \f(CW$FILE\fR e:$FILE
|
||||
$ ssh e \*(L"cat \f(CW$FILE\fR | bzip2 > \f(CW$BASE\fR.bz2\*(R"
|
||||
$ rsync \-z e:$BASE.bz2 \f(CW$BASE\fR.bz2
|
||||
$ ssh e \*(L"rm \f(CW$FILE\fR \f(CW$BASE\fR\*(R"
|
||||
.PP
|
||||
http://www.semicomplete.com/blog/geekery/distributed\-xargs.html?source=rss20
|
||||
http://code.google.com/p/ppss/wiki/Manual2
|
||||
.PP
|
||||
http://www.gnu.org/software/pexec/
|
||||
.PP
|
||||
Where will '>' be run? Local or remote? Remote.
|
||||
.PP
|
||||
Parallelize so this can be done:
|
||||
mdm.screen find dir \-execdir mdm-run cmd {} \e;
|
||||
Maybe:
|
||||
find dir \-execdir parallel \-\-communication\-file /tmp/comfile cmd {} \e;
|
||||
.SS "Comfile"
|
||||
.IX Subsection "Comfile"
|
||||
This will put a lock on /tmp/comfile. The number of locks is the number of running commands.
|
||||
If the number is smaller than \-j then it will start a process in the background ( cmd & ),
|
||||
otherwise wait.
|
||||
.PP
|
||||
parallel \-\-wait /tmp/comfile will wait until no more locks on the file
|
||||
.SH "AUTHOR"
|
||||
.IX Header "AUTHOR"
|
||||
Copyright (C) 2007\-10\-18 Ole Tange, http://ole.tange.dk
|
||||
.PP
|
||||
Copyright (C) 2008\-2010 Ole Tange, http://ole.tange.dk
|
||||
.SH "LICENSE"
|
||||
.IX Header "LICENSE"
|
||||
Copyright (C) 2007\-2010 Free Software Foundation, Inc.
|
||||
.PP
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the \s-1GNU\s0 General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
at your option any later version.
|
||||
.PP
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but \s-1WITHOUT\s0 \s-1ANY\s0 \s-1WARRANTY\s0; without even the implied warranty of
|
||||
\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS\s0 \s-1FOR\s0 A \s-1PARTICULAR\s0 \s-1PURPOSE\s0. See the
|
||||
\&\s-1GNU\s0 General Public License for more details.
|
||||
.PP
|
||||
You should have received a copy of the \s-1GNU\s0 General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
.SH "DEPENDENCIES"
|
||||
.IX Header "DEPENDENCIES"
|
||||
\&\fBparallel\fR uses Perl, and the Perl modules Getopt::Long, IPC::Open3,
|
||||
Symbol, IO::File, \s-1POSIX\s0, and File::Temp.
|
||||
.SH "SEE ALSO"
|
||||
.IX Header "SEE ALSO"
|
||||
\&\fBfind\fR(1), \fBxargs\fR(1)
|
Loading…
Reference in a new issue