parallel/src/parallel_tutorial.1
2014-08-25 07:06:13 +02:00

2802 lines
68 KiB
Groff

.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
. ds -- \(*W-
. ds PI pi
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
. ds L" ""
. ds R" ""
. ds C` ""
. ds C' ""
'br\}
.el\{\
. ds -- \|\(em\|
. ds PI \(*p
. ds L" ``
. ds R" ''
. ds C`
. ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{
. if \nF \{
. de IX
. tm Index:\\$1\t\\n%\t"\\$2"
..
. if !\nF==2 \{
. nr % 0
. nr F 2
. \}
. \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
. \" fudge factors for nroff and troff
.if n \{\
. ds #H 0
. ds #V .8m
. ds #F .3m
. ds #[ \f1
. ds #] \fP
.\}
.if t \{\
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
. ds #V .6m
. ds #F 0
. ds #[ \&
. ds #] \&
.\}
. \" simple accents for nroff and troff
.if n \{\
. ds ' \&
. ds ` \&
. ds ^ \&
. ds , \&
. ds ~ ~
. ds /
.\}
.if t \{\
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
. \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
. \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
. \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
. ds : e
. ds 8 ss
. ds o a
. ds d- d\h'-1'\(ga
. ds D- D\h'-1'\(hy
. ds th \o'bp'
. ds Th \o'LP'
. ds ae ae
. ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "PARALLEL_TUTORIAL 1"
.TH PARALLEL_TUTORIAL 1 "2014-08-23" "20140822" "parallel"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "GNU Parallel Tutorial"
.IX Header "GNU Parallel Tutorial"
This tutorial shows off much of \s-1GNU\s0 Parallel's functionality. The
tutorial is meant to learn the options in \s-1GNU\s0 Parallel. The tutorial
is not to show realistic examples from the real world.
.PP
Spend an hour walking through the tutorial. Your command line will
love you for it.
.SH "Prerequisites"
.IX Header "Prerequisites"
To run this tutorial you must have the following:
.IP "parallel >= version 20140622" 9
.IX Item "parallel >= version 20140622"
Install the newest version with:
.Sp
.Vb 1
\& (wget \-O \- pi.dk/3 || curl pi.dk/3/) | bash
.Ve
.Sp
This will also install the newest version of the tutorial:
.Sp
.Vb 1
\& man parallel_tutorial
.Ve
.Sp
Most of the tutorial will work on older versions, too.
.IP "abc-file:" 9
.IX Item "abc-file:"
The file can be generated by:
.Sp
.Vb 1
\& parallel \-k echo ::: A B C > abc\-file
.Ve
.IP "def-file:" 9
.IX Item "def-file:"
The file can be generated by:
.Sp
.Vb 1
\& parallel \-k echo ::: D E F > def\-file
.Ve
.IP "abc0\-file:" 9
.IX Item "abc0-file:"
The file can be generated by:
.Sp
.Vb 1
\& perl \-e \*(Aqprintf "A\e0B\e0C\e0"\*(Aq > abc0\-file
.Ve
.IP "abc_\-file:" 9
.IX Item "abc_-file:"
The file can be generated by:
.Sp
.Vb 1
\& perl \-e \*(Aqprintf "A_B_C_"\*(Aq > abc_\-file
.Ve
.IP "tsv\-file.tsv" 9
.IX Item "tsv-file.tsv"
The file can be generated by:
.Sp
.Vb 1
\& perl \-e \*(Aqprintf "f1\etf2\enA\etB\enC\etD\en"\*(Aq > tsv\-file.tsv
.Ve
.IP "num30000" 9
.IX Item "num30000"
The file can be generated by:
.Sp
.Vb 1
\& perl \-e \*(Aqfor(1..30000){print "$_\en"}\*(Aq > num30000
.Ve
.IP "num1000000" 9
.IX Item "num1000000"
The file can be generated by:
.Sp
.Vb 1
\& perl \-e \*(Aqfor(1..1000000){print "$_\en"}\*(Aq > num1000000
.Ve
.IP "num_%header" 9
.IX Item "num_%header"
The file can be generated by:
.Sp
.Vb 1
\& (echo %head1; echo %head2; perl \-e \*(Aqfor(1..10){print "$_\en"}\*(Aq) > num_%header
.Ve
.ie n .IP "For remote running: ssh login on 2 servers with no password in $SERVER1 and $SERVER2" 9
.el .IP "For remote running: ssh login on 2 servers with no password in \f(CW$SERVER1\fR and \f(CW$SERVER2\fR" 9
.IX Item "For remote running: ssh login on 2 servers with no password in $SERVER1 and $SERVER2"
.Vb 2
\& SERVER1=server.example.com
\& SERVER2=server2.example.net
.Ve
.Sp
You must be able to:
.Sp
.Vb 2
\& ssh $SERVER1 echo works
\& ssh $SERVER2 echo works
.Ve
.Sp
It can be setup by running 'ssh\-keygen \-t dsa; ssh-copy-id \f(CW$SERVER1\fR'
and using an empty pass phrase.
.SH "Input sources"
.IX Header "Input sources"
\&\s-1GNU\s0 Parallel reads input from input sources. These can be files, the
command line, and stdin (standard input or a pipe).
.SS "A single input source"
.IX Subsection "A single input source"
Input can be read from the command line:
.PP
.Vb 1
\& parallel echo ::: A B C
.Ve
.PP
Output (the order may be different because the jobs are run in
parallel):
.PP
.Vb 3
\& A
\& B
\& C
.Ve
.PP
The input source can be a file:
.PP
.Vb 1
\& parallel \-a abc\-file echo
.Ve
.PP
Output: Same as above.
.PP
\&\s-1STDIN \s0(standard input) can be the input source:
.PP
.Vb 1
\& cat abc\-file | parallel echo
.Ve
.PP
Output: Same as above.
.SS "Multiple input sources"
.IX Subsection "Multiple input sources"
\&\s-1GNU\s0 Parallel can take multiple input sources given on the command
line. \s-1GNU\s0 Parallel then generates all combinations of the input
sources:
.PP
.Vb 1
\& parallel echo ::: A B C ::: D E F
.Ve
.PP
Output (the order may be different):
.PP
.Vb 9
\& A D
\& A E
\& A F
\& B D
\& B E
\& B F
\& C D
\& C E
\& C F
.Ve
.PP
The input sources can be files:
.PP
.Vb 1
\& parallel \-a abc\-file \-a def\-file echo
.Ve
.PP
Output: Same as above.
.PP
\&\s-1STDIN \s0(standard input) can be one of the input sources using '\-':
.PP
.Vb 1
\& cat abc\-file | parallel \-a \- \-a def\-file echo
.Ve
.PP
Output: Same as above.
.PP
Instead of \-a files can be given after '::::':
.PP
.Vb 1
\& cat abc\-file | parallel echo :::: \- def\-file
.Ve
.PP
Output: Same as above.
.PP
::: and :::: can be mixed:
.PP
.Vb 1
\& parallel echo ::: A B C :::: def\-file
.Ve
.PP
Output: Same as above.
.PP
\fIMatching arguments from all input sources\fR
.IX Subsection "Matching arguments from all input sources"
.PP
With \-\-xapply you can get one argument from each input source:
.PP
.Vb 1
\& parallel \-\-xapply echo ::: A B C ::: D E F
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& A D
\& B E
\& C F
.Ve
.PP
If one of the input sources is too short, its values will wrap:
.PP
.Vb 1
\& parallel \-\-xapply echo ::: A B C D E ::: F G
.Ve
.PP
Output (the order may be different):
.PP
.Vb 5
\& A F
\& B G
\& C F
\& D G
\& E F
.Ve
.SS "Changing the argument separator."
.IX Subsection "Changing the argument separator."
\&\s-1GNU\s0 Parallel can use other separators than ::: or ::::. This is
typically useful if ::: or :::: is used in the command to run:
.PP
.Vb 1
\& parallel \-\-arg\-sep ,, echo ,, A B C :::: def\-file
.Ve
.PP
Output (the order may be different):
.PP
.Vb 9
\& A D
\& A E
\& A F
\& B D
\& B E
\& B F
\& C D
\& C E
\& C F
.Ve
.PP
Changing the argument file separator:
.PP
.Vb 1
\& parallel \-\-arg\-file\-sep // echo ::: A B C // def\-file
.Ve
.PP
Output: Same as above.
.SS "Changing the argument delimiter"
.IX Subsection "Changing the argument delimiter"
\&\s-1GNU\s0 Parallel will normally treat a full line as a single argument: It
uses \en as argument delimiter. This can be changed with \-d:
.PP
.Vb 1
\& parallel \-d _ echo :::: abc_\-file
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& A
\& B
\& C
.Ve
.PP
\&\s-1NULL\s0 can be given as \e0:
.PP
.Vb 1
\& parallel \-d \*(Aq\e0\*(Aq echo :::: abc0\-file
.Ve
.PP
Output: Same as above.
.PP
A shorthand for \-d '\e0' is \-0 (this will often be used to read files
from find ... \-print0):
.PP
.Vb 1
\& parallel \-0 echo :::: abc0\-file
.Ve
.PP
Output: Same as above.
.SS "End-of-file value for input source"
.IX Subsection "End-of-file value for input source"
\&\s-1GNU\s0 Parallel can stop reading when it encounters a certain value:
.PP
.Vb 1
\& parallel \-E stop echo ::: A B stop C D
.Ve
.PP
Output:
.PP
.Vb 2
\& A
\& B
.Ve
.SS "Skipping empty lines"
.IX Subsection "Skipping empty lines"
Using \-\-no\-run\-if\-empty \s-1GNU\s0 Parallel will skip empty lines.
.PP
.Vb 1
\& (echo 1; echo; echo 2) | parallel \-\-no\-run\-if\-empty echo
.Ve
.PP
Output:
.PP
.Vb 2
\& 1
\& 2
.Ve
.SH "Building the command line"
.IX Header "Building the command line"
.SS "No command means arguments are commands"
.IX Subsection "No command means arguments are commands"
If no command is given after parallel the arguments themselves are
treated as commands:
.PP
.Vb 1
\& parallel ::: ls \*(Aqecho foo\*(Aq pwd
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& [list of files in current dir]
\& foo
\& [/path/to/current/working/dir]
.Ve
.PP
The command can be a script, a binary or a Bash function if the function is
exported using 'export \-f':
.PP
.Vb 6
\& # Only works in Bash and only if $SHELL=.../bash
\& my_func() {
\& echo in my_func $1
\& }
\& export \-f my_func
\& parallel my_func ::: 1 2 3
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& in my_func 1
\& in my_func 2
\& in my_func 3
.Ve
.SS "Replacement strings"
.IX Subsection "Replacement strings"
\fIThe 7 predefined replacement strings\fR
.IX Subsection "The 7 predefined replacement strings"
.PP
\&\s-1GNU\s0 Parallel has several replacement strings. If no replacement
strings are used the default is to append {}:
.PP
.Vb 1
\& parallel echo ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A/B.C
.Ve
.PP
The default replacement string is {}:
.PP
.Vb 1
\& parallel echo {} ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A/B.C
.Ve
.PP
The replacement string {.} removes the extension:
.PP
.Vb 1
\& parallel echo {.} ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A/B
.Ve
.PP
The replacement string {/} removes the path:
.PP
.Vb 1
\& parallel echo {/} ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& B.C
.Ve
.PP
The replacement string {//} keeps only the path:
.PP
.Vb 1
\& parallel echo {//} ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A
.Ve
.PP
The replacement string {/.} removes the path and the extension:
.PP
.Vb 1
\& parallel echo {/.} ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& B
.Ve
.PP
The replacement string {#} gives the job number:
.PP
.Vb 1
\& parallel echo {#} ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1
\& 2
\& 3
.Ve
.PP
The replacement string {%} gives the job slot number (between 1 and
number of jobs to run in parallel):
.PP
.Vb 1
\& parallel \-j 2 echo {%} ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1
\& 2
\& 1
.Ve
.PP
\fIChanging the replacement strings\fR
.IX Subsection "Changing the replacement strings"
.PP
The replacement string {} can be changed with \-I:
.PP
.Vb 1
\& parallel \-I ,, echo ,, ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A/B.C
.Ve
.PP
The replacement string {.} can be changed with \-\-extensionreplace:
.PP
.Vb 1
\& parallel \-\-extensionreplace ,, echo ,, ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A/B
.Ve
.PP
The replacement string {/} can be replaced with \-\-basenamereplace:
.PP
.Vb 1
\& parallel \-\-basenamereplace ,, echo ,, ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& B.C
.Ve
.PP
The replacement string {//} can be changed with \-\-dirnamereplace:
.PP
.Vb 1
\& parallel \-\-dirnamereplace ,, echo ,, ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& A
.Ve
.PP
The replacement string {/.} can be changed with \-\-basenameextensionreplace:
.PP
.Vb 1
\& parallel \-\-basenameextensionreplace ,, echo ,, ::: A/B.C
.Ve
.PP
Output:
.PP
.Vb 1
\& B
.Ve
.PP
The replacement string {#} can be changed with \-\-seqreplace:
.PP
.Vb 1
\& parallel \-\-seqreplace ,, echo ,, ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1
\& 2
\& 3
.Ve
.PP
The replacement string {%} can be changed with \-\-slotreplace:
.PP
.Vb 1
\& parallel \-j2 \-\-slotreplace ,, echo ,, ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1
\& 2
\& 1
.Ve
.PP
\fIPerl expression replacement string\fR
.IX Subsection "Perl expression replacement string"
.PP
When predefined replacement strings are not flexible enough a perl
expression can be used instead. One example is to remove two
extensions: foo.tar.gz \-> foo
.PP
.Vb 1
\& parallel echo \*(Aq{= s:\e.[^.]+$::;s:\e.[^.]+$::; =}\*(Aq ::: foo.tar.gz
.Ve
.PP
Output:
.PP
.Vb 1
\& foo
.Ve
.PP
If the strings \fB{=\fR and \fB=}\fR cause problems they can be replaced with \-\-parens:
.PP
.Vb 1
\& parallel \-\-parens ,,,, echo \*(Aq,, s:\e.[^.]+$::;s:\e.[^.]+$::; ,,\*(Aq ::: foo.tar.gz
.Ve
.PP
Output: Same as above.
.PP
To define a short hand replacement string use \fB\-\-rpl\fR:
.PP
.Vb 1
\& parallel \-\-rpl \*(Aq.. s:\e.[^.]+$::;s:\e.[^.]+$::;\*(Aq echo \*(Aq..\*(Aq ::: foo.tar.gz
.Ve
.PP
Output: Same as above.
.PP
If the short hand starts with '{' it can be used as a positional
replacement string, too:
.PP
.Vb 1
\& parallel \-\-rpl \*(Aq{..} s:\e.[^.]+$::;s:\e.[^.]+$::;\*(Aq echo \*(Aq{..}\*(Aq ::: foo.tar.gz
.Ve
.PP
Output: Same as above.
.PP
\&\s-1GNU \s0\fBparallel\fR's 7 replacement strings are implemented as:
.PP
.Vb 7
\& \-\-rpl \*(Aq{} \*(Aq
\& \-\-rpl \*(Aq{#} $_=$job\->seq()\*(Aq
\& \-\-rpl \*(Aq{%} $_=$job\->slot()\*(Aq
\& \-\-rpl \*(Aq{/} s:.*/::\*(Aq
\& \-\-rpl \*(Aq{//} $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);\*(Aq
\& \-\-rpl \*(Aq{/.} s:.*/::; s:\e.[^/.]+$::;\*(Aq
\& \-\-rpl \*(Aq{.} s:\e.[^/.]+$::\*(Aq
.Ve
.PP
\fIPositional replacement strings\fR
.IX Subsection "Positional replacement strings"
.PP
With multiple input sources the argument from the individual input
sources can be access with {number}:
.PP
.Vb 1
\& parallel echo {1} and {2} ::: A B ::: C D
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& A and C
\& A and D
\& B and C
\& B and D
.Ve
.PP
The positional replacement strings can also be modified using / // /. and .:
.PP
.Vb 1
\& parallel echo /={1/} //={1//} /.={1/.} .={1.} ::: A/B.C D/E.F
.Ve
.PP
Output (the order may be different):
.PP
.Vb 2
\& /=B.C //=A /.=B .=A/B
\& /=E.F //=D /.=E .=D/E
.Ve
.PP
If a position is negative, it will refer to the input source counted
from behind:
.PP
.Vb 1
\& parallel echo 1={1} 2={2} 3={3} \-1={\-1} \-2={\-2} \-3={\-3} ::: A B ::: C D ::: E F
.Ve
.PP
Output (the order may be different):
.PP
.Vb 8
\& 1=A 2=C 3=E \-1=E \-2=C \-3=A
\& 1=A 2=C 3=F \-1=F \-2=C \-3=A
\& 1=A 2=D 3=E \-1=E \-2=D \-3=A
\& 1=A 2=D 3=F \-1=F \-2=D \-3=A
\& 1=B 2=C 3=E \-1=E \-2=C \-3=B
\& 1=B 2=C 3=F \-1=F \-2=C \-3=B
\& 1=B 2=D 3=E \-1=E \-2=D \-3=B
\& 1=B 2=D 3=F \-1=F \-2=D \-3=B
.Ve
.PP
\fIPositional perl expression replacement string\fR
.IX Subsection "Positional perl expression replacement string"
.PP
To use a perl expression as a positional replacement string simply
prepend the perl expression with number and space:
.PP
.Vb 1
\& parallel echo \*(Aq{=2 s:\e.[^.]+$::;s:\e.[^.]+$::; =} {1}\*(Aq ::: bar ::: foo.tar.gz
.Ve
.PP
Output:
.PP
.Vb 1
\& foo bar
.Ve
.PP
If a defined short hand starts with '{' it can be used as a positional
replacement string, too:
.PP
.Vb 1
\& parallel \-\-rpl \*(Aq{..} s:\e.[^.]+$::;s:\e.[^.]+$::;\*(Aq echo \*(Aq{2..} {1}\*(Aq ::: bar ::: foo.tar.gz
.Ve
.PP
Output: Same as above.
.PP
\fIInput from columns\fR
.IX Subsection "Input from columns"
.PP
The columns in a file can be bound to positional replacement strings
using \-\-colsep. Here the columns are separated with \s-1TAB \s0(\et):
.PP
.Vb 1
\& parallel \-\-colsep \*(Aq\et\*(Aq echo 1={1} 2={2} :::: tsv\-file.tsv
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1=f1 2=f2
\& 1=A 2=B
\& 1=C 2=D
.Ve
.PP
\fIHeader defined replacement strings\fR
.IX Subsection "Header defined replacement strings"
.PP
With \-\-header \s-1GNU\s0 Parallel will use the first value of the input
source as the name of the replacement string. Only the non-modified
version {} is supported:
.PP
.Vb 1
\& parallel \-\-header : echo f1={f1} f2={f2} ::: f1 A B ::: f2 C D
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& f1=A f2=C
\& f1=A f2=D
\& f1=B f2=C
\& f1=B f2=D
.Ve
.PP
It is useful with \-\-colsep for processing files with \s-1TAB\s0 separated values:
.PP
.Vb 1
\& parallel \-\-header : \-\-colsep \*(Aq\et\*(Aq echo f1={f1} f2={f2} :::: tsv\-file.tsv
.Ve
.PP
Output (the order may be different):
.PP
.Vb 2
\& f1=A f2=B
\& f1=C f2=D
.Ve
.SS "More than one argument"
.IX Subsection "More than one argument"
With \-\-xargs will \s-1GNU\s0 Parallel fit as many arguments as possible on a
single line:
.PP
.Vb 1
\& cat num30000 | parallel \-\-xargs echo | wc \-l
.Ve
.PP
Output:
.PP
.Vb 1
\& 2
.Ve
.PP
The 30000 arguments fitted on 2 lines.
.PP
The maximal length of a single line can be set with \-s. With a maximal
line length of 10000 chars 17 commands will be run:
.PP
.Vb 1
\& cat num30000 | parallel \-\-xargs \-s 10000 echo | wc \-l
.Ve
.PP
Output:
.PP
.Vb 1
\& 17
.Ve
.PP
For better parallelism \s-1GNU\s0 Parallel can distribute the arguments
between all the parallel jobs when end of file is met.
.PP
Below \s-1GNU\s0 Parallel reads the last argument when generating the second
job. When \s-1GNU\s0 Parallel reads the last argument, it spreads all the
arguments for the second job over 4 jobs instead, as 4 parallel jobs
are requested.
.PP
The first job will be the same as the \-\-xargs example above, but the
second job will be split into 4 evenly sized jobs, resulting in a
total of 5 jobs:
.PP
.Vb 1
\& cat num30000 | parallel \-\-jobs 4 \-m echo | wc \-l
.Ve
.PP
Output:
.PP
.Vb 1
\& 5
.Ve
.PP
This is even more visible when running 4 jobs with 10 arguments. The
10 arguments are being spread over 4 jobs:
.PP
.Vb 1
\& parallel \-\-jobs 4 \-m echo ::: {1..10}
.Ve
.PP
Output:
.PP
.Vb 4
\& 1 2 3
\& 4 5 6
\& 7 8 9
\& 10
.Ve
.PP
A replacement string can be part of a word. \-m will not repeat the context:
.PP
.Vb 1
\& parallel \-\-jobs 4 \-m echo pre\-{}\-post ::: A B C D E F G
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& pre\-A B\-post
\& pre\-C D\-post
\& pre\-E F\-post
\& pre\-G\-post
.Ve
.PP
To repeat the context use \-X which otherwise works like \-m:
.PP
.Vb 1
\& parallel \-\-jobs 4 \-X echo pre\-{}\-post ::: A B C D E F G
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& pre\-A\-post pre\-B\-post
\& pre\-C\-post pre\-D\-post
\& pre\-E\-post pre\-F\-post
\& pre\-G\-post
.Ve
.PP
To limit the number of arguments use \-N:
.PP
.Vb 1
\& parallel \-N3 echo ::: A B C D E F G H
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& A B C
\& D E F
\& G H
.Ve
.PP
\&\-N also sets the positional replacement strings:
.PP
.Vb 1
\& parallel \-N3 echo 1={1} 2={2} 3={3} ::: A B C D E F G H
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1=A 2=B 3=C
\& 1=D 2=E 3=F
\& 1=G 2=H 3=
.Ve
.PP
\&\-N0 reads 1 argument but inserts none:
.PP
.Vb 1
\& parallel \-N0 echo foo ::: 1 2 3
.Ve
.PP
Output:
.PP
.Vb 3
\& foo
\& foo
\& foo
.Ve
.SS "Quoting"
.IX Subsection "Quoting"
Command lines that contain special characters may need to be protected from the shell.
.PP
The perl program 'print \*(L"@ARGV\en\*(R"' basically works like echo.
.PP
.Vb 1
\& perl \-e \*(Aqprint "@ARGV\en"\*(Aq A
.Ve
.PP
Output:
.PP
.Vb 1
\& A
.Ve
.PP
To run that in parallel the command needs to be quoted:
.PP
.Vb 1
\& parallel perl \-e \*(Aqprint "@ARGV\en"\*(Aq ::: This wont work
.Ve
.PP
Output:
.PP
.Vb 1
\& [Nothing]
.Ve
.PP
To quote the command use \-q:
.PP
.Vb 1
\& parallel \-q perl \-e \*(Aqprint "@ARGV\en"\*(Aq ::: This works
.Ve
.PP
Output (the order may be different):
.PP
.Vb 2
\& This
\& works
.Ve
.PP
Or you can quote the critical part using \e':
.PP
.Vb 1
\& parallel perl \-e \e\*(Aq\*(Aqprint "@ARGV\en"\*(Aq\e\*(Aq ::: This works, too
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& This
\& works,
\& too
.Ve
.PP
\&\s-1GNU\s0 Parallel can also \e\-quote full lines. Simply run:
.PP
.Vb 4
\& parallel \-\-shellquote
\& parallel: Warning: Input is read from the terminal. Only experts do this on purpose. Press CTRL\-D to exit.
\& perl \-e \*(Aqprint "@ARGV\en"\*(Aq
\& [CTRL\-D]
.Ve
.PP
Output:
.PP
.Vb 1
\& perl\e \-e\e \e\*(Aqprint\e \e"@ARGV\e\en\e"\e\*(Aq
.Ve
.PP
This can then be used as the command:
.PP
.Vb 1
\& parallel perl\e \-e\e \e\*(Aqprint\e \e"@ARGV\e\en\e"\e\*(Aq ::: This also works
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& This
\& also
\& works
.Ve
.SS "Trimming space"
.IX Subsection "Trimming space"
Space can be trimmed on the arguments using \-\-trim:
.PP
.Vb 1
\& parallel \-\-trim r echo pre\-{}\-post ::: \*(Aq A \*(Aq
.Ve
.PP
Output:
.PP
.Vb 1
\& pre\- A\-post
.Ve
.PP
To trim on the left side:
.PP
.Vb 1
\& parallel \-\-trim l echo pre\-{}\-post ::: \*(Aq A \*(Aq
.Ve
.PP
Output:
.PP
.Vb 1
\& pre\-A \-post
.Ve
.PP
To trim on the both sides:
.PP
.Vb 1
\& parallel \-\-trim lr echo pre\-{}\-post ::: \*(Aq A \*(Aq
.Ve
.PP
Output:
.PP
.Vb 1
\& pre\-A\-post
.Ve
.SH "Controling the output"
.IX Header "Controling the output"
The output can prefixed with the argument:
.PP
.Vb 1
\& parallel \-\-tag echo foo\-{} ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& A foo\-A
\& B foo\-B
\& C foo\-C
.Ve
.PP
To prefix it with another string use \-\-tagstring:
.PP
.Vb 1
\& parallel \-\-tagstring {}\-bar echo foo\-{} ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& A\-bar foo\-A
\& B\-bar foo\-B
\& C\-bar foo\-C
.Ve
.PP
To see what commands will be run without running them:
.PP
.Vb 1
\& parallel \-\-dryrun echo {} ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& echo A
\& echo B
\& echo C
.Ve
.PP
To print the command before running them use \-\-verbose:
.PP
.Vb 1
\& parallel \-\-verbose echo {} ::: A B C
.Ve
.PP
Output (the order may be different):
.PP
.Vb 6
\& echo A
\& echo B
\& A
\& echo C
\& B
\& C
.Ve
.PP
\&\s-1GNU\s0 Parallel will postpone the output until the command completes:
.PP
.Vb 1
\& parallel \-j2 \*(Aqprintf "%s\-start\en%s" {} {};sleep {};printf "%s\en" \-middle;echo {}\-end\*(Aq ::: 4 2 1
.Ve
.PP
Output:
.PP
.Vb 9
\& 2\-start
\& 2\-middle
\& 2\-end
\& 1\-start
\& 1\-middle
\& 1\-end
\& 4\-start
\& 4\-middle
\& 4\-end
.Ve
.PP
To get the output immediately use \-\-ungroup:
.PP
.Vb 1
\& parallel \-j2 \-\-ungroup \*(Aqprintf "%s\-start\en%s" {} {};sleep {};printf "%s\en" \-middle;echo {}\-end\*(Aq ::: 4 2 1
.Ve
.PP
Output:
.PP
.Vb 9
\& 4\-start
\& 42\-start
\& 2\-middle
\& 2\-end
\& 1\-start
\& 1\-middle
\& 1\-end
\& \-middle
\& 4\-end
.Ve
.PP
\&\-\-ungroup is fast, but can cause half a line from one job to be mixed
with half a line of another job. That has happend in the second line,
where the line '4\-middle' is mixed with '2\-start'.
.PP
To avoid this use \-\-linebuffer:
.PP
.Vb 1
\& parallel \-j2 \-\-linebuffer \*(Aqprintf "%s\-start\en%s" {} {};sleep {};printf "%s\en" \-middle;echo {}\-end\*(Aq ::: 4 2 1
.Ve
.PP
Output:
.PP
.Vb 9
\& 4\-start
\& 2\-start
\& 2\-middle
\& 2\-end
\& 1\-start
\& 1\-middle
\& 1\-end
\& 4\-middle
\& 4\-end
.Ve
.PP
To force the output in the same order as the arguments use \-\-keep\-order/\-k:
.PP
.Vb 1
\& parallel \-j2 \-k \*(Aqprintf "%s\-start\en%s" {} {};sleep {};printf "%s\en" \-middle;echo {}\-end\*(Aq ::: 4 2 1
.Ve
.PP
Output:
.PP
.Vb 9
\& 4\-start
\& 4\-middle
\& 4\-end
\& 2\-start
\& 2\-middle
\& 2\-end
\& 1\-start
\& 1\-middle
\& 1\-end
.Ve
.SS "Saving output into files"
.IX Subsection "Saving output into files"
\&\s-1GNU\s0 Parallel can save the output of each job into files:
.PP
.Vb 1
\& parallel \-\-files ::: A B C
.Ve
.PP
Output will be similar to:
.PP
.Vb 3
\& /tmp/pAh6uWuQCg.par
\& /tmp/opjhZCzAX4.par
\& /tmp/W0AT_Rph2o.par
.Ve
.PP
By default \s-1GNU\s0 Parallel will cache the output in files in /tmp. This
can be changed by setting \f(CW$TMPDIR\fR or \-\-tmpdir:
.PP
.Vb 1
\& parallel \-\-tmpdir /var/tmp \-\-files ::: A B C
.Ve
.PP
Output will be similar to:
.PP
.Vb 3
\& /var/tmp/N_vk7phQRc.par
\& /var/tmp/7zA4Ccf3wZ.par
\& /var/tmp/LIuKgF_2LP.par
.Ve
.PP
Or:
.PP
.Vb 1
\& TMPDIR=/var/tmp parallel \-\-files ::: A B C
.Ve
.PP
Output: Same as above.
.PP
The output files can be saved in a structured way using \-\-results:
.PP
.Vb 1
\& parallel \-\-results outdir echo ::: A B C
.Ve
.PP
Output:
.PP
.Vb 3
\& A
\& B
\& C
.Ve
.PP
but also these files were generated containing the standard output
(stdout) and standard error (stderr):
.PP
.Vb 6
\& outdir/1/A/stderr
\& outdir/1/A/stdout
\& outdir/1/B/stderr
\& outdir/1/B/stdout
\& outdir/1/C/stderr
\& outdir/1/C/stdout
.Ve
.PP
This is useful if you are running multiple variables:
.PP
.Vb 1
\& parallel \-\-header : \-\-results outdir echo ::: f1 A B ::: f2 C D
.Ve
.PP
Generated files:
.PP
.Vb 8
\& outdir/f1/A/f2/C/stderr
\& outdir/f1/A/f2/C/stdout
\& outdir/f1/A/f2/D/stderr
\& outdir/f1/A/f2/D/stdout
\& outdir/f1/B/f2/C/stderr
\& outdir/f1/B/f2/C/stdout
\& outdir/f1/B/f2/D/stderr
\& outdir/f1/B/f2/D/stdout
.Ve
.PP
The directories are named after the variables and their values.
.SH "Control the execution"
.IX Header "Control the execution"
.SS "Number of simultaneous jobs"
.IX Subsection "Number of simultaneous jobs"
The number of concurrent jobs is given with \-\-jobs/\-j:
.PP
.Vb 1
\& /usr/bin/time parallel \-N0 \-j64 sleep 1 ::: {1..128}
.Ve
.PP
With 64 jobs in parallel the 128 sleeps will take 2\-8 seconds to run \-
depending on how fast your machine is.
.PP
By default \-\-jobs is the same as the number of \s-1CPU\s0 cores. So this:
.PP
.Vb 1
\& /usr/bin/time parallel \-N0 sleep 1 ::: {1..128}
.Ve
.PP
should take twice the time of running 2 jobs per \s-1CPU\s0 core:
.PP
.Vb 1
\& /usr/bin/time parallel \-N0 \-\-jobs 200% sleep 1 ::: {1..128}
.Ve
.PP
\&\-\-jobs 0 will run as many jobs in parallel as possible:
.PP
.Vb 1
\& /usr/bin/time parallel \-N0 \-\-jobs 0 sleep 1 ::: {1..128}
.Ve
.PP
which should take 1\-7 seconds depending on how fast your machine is.
.PP
\&\-\-jobs can read from a file which is re-read when a job finishes:
.PP
.Vb 5
\& echo 50% > my_jobs
\& /usr/bin/time parallel \-N0 \-\-jobs my_jobs sleep 1 ::: {1..128} &
\& sleep 1
\& echo 0 > my_jobs
\& wait
.Ve
.PP
The first second only 50% of the \s-1CPU\s0 cores will run a job. The '0' is
put into my_jobs and then the rest of the jobs will be started in
parallel.
.PP
Instead of basing the percentage on the number of \s-1CPU\s0 cores
\&\s-1GNU\s0 Parallel can base it on the number of CPUs:
.PP
.Vb 1
\& parallel \-\-use\-cpus\-instead\-of\-cores \-N0 sleep 1 ::: {1..128}
.Ve
.SS "Interactivity"
.IX Subsection "Interactivity"
\&\s-1GNU\s0 Parallel can ask the user if a command should be run using \-\-interactive:
.PP
.Vb 1
\& parallel \-\-interactive echo ::: 1 2 3
.Ve
.PP
Output:
.PP
.Vb 5
\& echo 1 ?...y
\& echo 2 ?...n
\& 1
\& echo 3 ?...y
\& 3
.Ve
.PP
\&\s-1GNU\s0 Parallel can be used to put arguments on the command line for an
interactive command such as emacs to edit one file at a time:
.PP
.Vb 1
\& parallel \-\-tty emacs ::: 1 2 3
.Ve
.PP
Or give multiple argument in one go to open multiple files:
.PP
.Vb 1
\& parallel \-X \-\-tty vi ::: 1 2 3
.Ve
.SS "Timing"
.IX Subsection "Timing"
Some jobs do heavy I/O when they start. To avoid a thundering herd \s-1GNU\s0
Parallel can delay starting new jobs. \-\-delay X will make sure there is
at least X seconds between each start:
.PP
.Vb 1
\& parallel \-\-delay 2.5 echo Starting {}\e;date ::: 1 2 3
.Ve
.PP
Output:
.PP
.Vb 6
\& Starting 1
\& Thu Aug 15 16:24:33 CEST 2013
\& Starting 2
\& Thu Aug 15 16:24:35 CEST 2013
\& Starting 3
\& Thu Aug 15 16:24:38 CEST 2013
.Ve
.PP
If jobs taking more than a certain amount of time are known to fail,
they can be stopped with \-\-timeout:
.PP
.Vb 1
\& parallel \-\-timeout 2.1 sleep {}\e; echo {} ::: 1 2 3 4
.Ve
.PP
Output:
.PP
.Vb 2
\& 1
\& 2
.Ve
.PP
\&\s-1GNU\s0 Parallel can compute the median runtime for jobs and kill those
that take more than 200% of the median runtime:
.PP
.Vb 1
\& parallel \-\-timeout 200% sleep {}\e; echo {} ::: 2.1 2.2 3 7 2.3
.Ve
.PP
Output:
.PP
.Vb 4
\& 2.1
\& 2.2
\& 3
\& 2.3
.Ve
.PP
Based on the runtime of completed jobs \s-1GNU\s0 Parallel can estimate the
total runtime:
.PP
.Vb 1
\& parallel \-\-eta sleep ::: 1 3 2 2 1 3 3 2 1
.Ve
.PP
Output:
.PP
.Vb 2
\& Computers / CPU cores / Max jobs to run
\& 1:local / 2 / 2
\&
\& Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
\& ETA: 2s 0left 1.11avg local:0/9/100%/1.1s
.Ve
.SS "Progress"
.IX Subsection "Progress"
\&\s-1GNU\s0 Parallel can give progress information with \-\-progress:
.PP
.Vb 1
\& parallel \-\-progress sleep ::: 1 3 2 2 1 3 3 2 1
.Ve
.PP
Output:
.PP
.Vb 2
\& Computers / CPU cores / Max jobs to run
\& 1:local / 2 / 2
\&
\& Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
\& local:0/9/100%/1.1s
.Ve
.PP
A logfile of the jobs completed so far can be generated with \-\-joblog:
.PP
.Vb 2
\& parallel \-\-joblog /tmp/log exit ::: 1 2 3 0
\& cat /tmp/log
.Ve
.PP
Output:
.PP
.Vb 5
\& Seq Host Starttime Runtime Send Receive Exitval Signal Command
\& 1 : 1376577364.974 0.008 0 0 1 0 exit 1
\& 2 : 1376577364.982 0.013 0 0 2 0 exit 2
\& 3 : 1376577364.990 0.013 0 0 3 0 exit 3
\& 4 : 1376577365.003 0.003 0 0 0 0 exit 0
.Ve
.PP
The log contains the job sequence, which host the job was run on, the
start time and run time, how much data was transferred if the job was
run on a remote host, the exit value, the signal that killed the job,
and finally the command being run.
.PP
With a joblog \s-1GNU\s0 Parallel can be stopped and later pickup where it
left off. It it important that the input of the completed jobs is
unchanged.
.PP
.Vb 4
\& parallel \-\-joblog /tmp/log exit ::: 1 2 3 0
\& cat /tmp/log
\& parallel \-\-resume \-\-joblog /tmp/log exit ::: 1 2 3 0 0 0
\& cat /tmp/log
.Ve
.PP
Output:
.PP
.Vb 5
\& Seq Host Starttime Runtime Send Receive Exitval Signal Command
\& 1 : 1376580069.544 0.008 0 0 1 0 exit 1
\& 2 : 1376580069.552 0.009 0 0 2 0 exit 2
\& 3 : 1376580069.560 0.012 0 0 3 0 exit 3
\& 4 : 1376580069.571 0.005 0 0 0 0 exit 0
\&
\& Seq Host Starttime Runtime Send Receive Exitval Signal Command
\& 1 : 1376580069.544 0.008 0 0 1 0 exit 1
\& 2 : 1376580069.552 0.009 0 0 2 0 exit 2
\& 3 : 1376580069.560 0.012 0 0 3 0 exit 3
\& 4 : 1376580069.571 0.005 0 0 0 0 exit 0
\& 5 : 1376580070.028 0.009 0 0 0 0 exit 0
\& 6 : 1376580070.038 0.007 0 0 0 0 exit 0
.Ve
.PP
Note how the start time of the last 2 jobs is clearly from the second run.
.PP
With \-\-resume\-failed \s-1GNU\s0 Parallel will re-run the jobs that failed:
.PP
.Vb 2
\& parallel \-\-resume\-failed \-\-joblog /tmp/log exit ::: 1 2 3 0 0 0
\& cat /tmp/log
.Ve
.PP
Output:
.PP
.Vb 10
\& Seq Host Starttime Runtime Send Receive Exitval Signal Command
\& 1 : 1376580069.544 0.008 0 0 1 0 exit 1
\& 2 : 1376580069.552 0.009 0 0 2 0 exit 2
\& 3 : 1376580069.560 0.012 0 0 3 0 exit 3
\& 4 : 1376580069.571 0.005 0 0 0 0 exit 0
\& 5 : 1376580070.028 0.009 0 0 0 0 exit 0
\& 6 : 1376580070.038 0.007 0 0 0 0 exit 0
\& 1 : 1376580154.433 0.010 0 0 1 0 exit 1
\& 2 : 1376580154.444 0.022 0 0 2 0 exit 2
\& 3 : 1376580154.466 0.005 0 0 3 0 exit 3
.Ve
.PP
Note how seq 1 2 3 have been repeated because they had exit value != 0.
.SS "Termination"
.IX Subsection "Termination"
For certain jobs there is no need to continue if one of the jobs fails
and has an exit code != 0. \s-1GNU\s0 Parallel will stop spawning new jobs
with \-\-halt 1:
.PP
.Vb 1
\& parallel \-j2 \-\-halt 1 echo {}\e; exit {} ::: 0 0 1 2 3
.Ve
.PP
Output:
.PP
.Vb 8
\& 0
\& 0
\& 1
\& parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job failed:
\& echo 1; exit 1
\& 2
\& parallel: Starting no more jobs. Waiting for 1 jobs to finish. This job failed:
\& echo 2; exit 2
.Ve
.PP
With \-\-halt 2 the running jobs will be killed immediately:
.PP
.Vb 1
\& parallel \-j2 \-\-halt 2 echo {}\e; exit {} ::: 0 0 1 2 3
.Ve
.PP
Output:
.PP
.Vb 5
\& 0
\& 0
\& 1
\& parallel: This job failed:
\& echo 1; exit 1
.Ve
.PP
If \-\-halt is given a percentage this percentage of the jobs must fail
(though minimum 3) before \s-1GNU\s0 Parallel stops spawning more jobs:
.PP
.Vb 1
\& parallel \-j2 \-\-halt 20% echo {}\e; exit {} ::: 0 0 1 2 3 4 5 6 7
.Ve
.PP
Output:
.PP
.Vb 11
\& 0
\& 0
\& 1
\& 2
\& 3
\& 4
\& parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job failed:
\& echo 4; exit 4
\& 5
\& parallel: Starting no more jobs. Waiting for 1 jobs to finish. This job failed:
\& echo 5; exit 5
.Ve
.PP
\&\s-1GNU\s0 Parallel can retry the command with \-\-retries. This is useful if a
command fails for unkown reasons now and then.
.PP
.Vb 2
\& parallel \-k \-\-retries 3 \*(Aqecho tried {} >>/tmp/runs; echo completed {}; exit {}\*(Aq ::: 1 2 0
\& cat /tmp/runs
.Ve
.PP
Output:
.PP
.Vb 3
\& completed 1
\& completed 2
\& completed 0
\&
\& tried 1
\& tried 2
\& tried 1
\& tried 2
\& tried 1
\& tried 2
\& tried 0
.Ve
.PP
Note how job 1 and 2 were tried 3 times, but 0 was not retried because it had exit code 0.
.SS "Limiting the ressources"
.IX Subsection "Limiting the ressources"
To avoid overloading systems \s-1GNU\s0 Parallel can look at the system load
before starting another job:
.PP
.Vb 1
\& parallel \-\-load 100% echo load is less than {} job per cpu ::: 1
.Ve
.PP
Output:
.PP
.Vb 2
\& [when then load is less than the number of cpu cores]
\& load is less than 1 job per cpu
.Ve
.PP
\&\s-1GNU\s0 Parallel can also check if the system is swapping.
.PP
.Vb 1
\& parallel \-\-noswap echo the system is not swapping ::: now
.Ve
.PP
Output:
.PP
.Vb 2
\& [when then system is not swapping]
\& the system is not swapping now
.Ve
.PP
\&\s-1GNU\s0 Parallel can run the jobs with a nice value. This will work both
locally and remotely.
.PP
.Vb 1
\& parallel \-\-nice 17 echo this is being run with nice \-n ::: 17
.Ve
.PP
Output:
.PP
.Vb 1
\& this is being run with nice \-n 17
.Ve
.SH "Remote execution"
.IX Header "Remote execution"
\&\s-1GNU\s0 Parallel can run jobs on remote servers. It uses ssh to
communicate with the remote machines.
.SS "Sshlogin"
.IX Subsection "Sshlogin"
The most basic sshlogin is \-S host:
.PP
.Vb 1
\& parallel \-S $SERVER1 echo running on ::: $SERVER1
.Ve
.PP
Output:
.PP
.Vb 1
\& running on [$SERVER1]
.Ve
.PP
To use a different username prepend the server with username@
.PP
.Vb 1
\& parallel \-S username@$SERVER1 echo running on ::: username@$SERVER1
.Ve
.PP
Output:
.PP
.Vb 1
\& running on [username@$SERVER1]
.Ve
.PP
The special sshlogin ':' is the local machine:
.PP
.Vb 1
\& parallel \-S : echo running on ::: the_local_machine
.Ve
.PP
Output:
.PP
.Vb 1
\& running on the_local_machine
.Ve
.PP
If ssh is not in \f(CW$PATH\fR it can be prepended to \f(CW$SERVER1:\fR
.PP
.Vb 1
\& parallel \-S \*(Aq/usr/bin/ssh \*(Aq$SERVER1 echo custom ::: ssh
.Ve
.PP
Output:
.PP
.Vb 1
\& custom ssh
.Ve
.PP
Several servers can be given using multiple \-S:
.PP
.Vb 1
\& parallel \-S $SERVER1 \-S $SERVER2 echo ::: running on more hosts
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& running
\& on
\& more
\& hosts
.Ve
.PP
Or they can be separated by ,:
.PP
.Vb 1
\& parallel \-S $SERVER1,$SERVER2 echo ::: running on more hosts
.Ve
.PP
Output: Same as above.
.PP
The can also be read from a file (replace user@ with the user on \f(CW$SERVER2\fR):
.PP
.Vb 4
\& echo $SERVER1 > nodefile
\& # Force 4 cores, special ssh\-command, username
\& echo 4//usr/bin/ssh user@$SERVER2 >> nodefile
\& parallel \-\-sshloginfile nodefile echo ::: running on more hosts
.Ve
.PP
Output: Same as above.
.PP
The special \-\-sshloginfile '..' reads from ~/.parallel/sshloginfile.
.PP
To force \s-1GNU\s0 Parallel to treat a server having a given number of \s-1CPU\s0
cores prepend #/ to the sshlogin:
.PP
.Vb 1
\& parallel \-S 4/$SERVER1 echo force {} cpus on server ::: 4
.Ve
.PP
Output:
.PP
.Vb 1
\& force 4 cpus on server
.Ve
.SS "Transferring files"
.IX Subsection "Transferring files"
\&\s-1GNU\s0 Parallel can transfer the files to be processed to the remote
host. It does that using rsync.
.PP
.Vb 2
\& echo This is input_file > input_file
\& parallel \-S $SERVER1 \-\-transfer cat ::: input_file
.Ve
.PP
Output:
.PP
.Vb 1
\& This is input_file
.Ve
.PP
If the files is processed into another file, the resulting file can be
transferred back:
.PP
.Vb 3
\& echo This is input_file > input_file
\& parallel \-S $SERVER1 \-\-transfer \-\-return {}.out cat {} ">"{}.out ::: input_file
\& cat input_file.out
.Ve
.PP
Output: Same as above.
.PP
To remove the input and output file on the remote server use \-\-cleanup:
.PP
.Vb 3
\& echo This is input_file > input_file
\& parallel \-S $SERVER1 \-\-transfer \-\-return {}.out \-\-cleanup cat {} ">"{}.out ::: input_file
\& cat input_file.out
.Ve
.PP
Output: Same as above.
.PP
There is a short hand for \-\-transfer \-\-return \-\-cleanup called \-\-trc:
.PP
.Vb 3
\& echo This is input_file > input_file
\& parallel \-S $SERVER1 \-\-trc {}.out cat {} ">"{}.out ::: input_file
\& cat input_file.out
.Ve
.PP
Output: Same as above.
.PP
Some jobs need a common database for all jobs. \s-1GNU\s0 Parallel can
transfer that using \-\-basefile which will transfer the file before the
first job:
.PP
.Vb 2
\& echo common data > common_file
\& parallel \-\-basefile common_file \-S $SERVER1 cat common_file\e; echo {} ::: foo
.Ve
.PP
Output:
.PP
.Vb 2
\& common data
\& foo
.Ve
.PP
To remove it from the remote host after the last job use \-\-cleanup.
.SS "Working dir"
.IX Subsection "Working dir"
The default working dir on the remote machines is the login dir. This
can be changed with \-\-workdir \fImydir\fR.
.PP
Files transferred using \-\-transfer and \-\-return will be relative
to \fImydir\fR on remote computers, and the command will be executed in
the dir \fImydir\fR.
.PP
The special \fImydir\fR value ... will create working dirs under
~/.parallel/tmp/ on the remote computers. If \-\-cleanup is given
these dirs will be removed.
.PP
The special \fImydir\fR value . uses the current working dir. If the
current working dir is beneath your home dir, the value . is
treated as the relative path to your home dir. This means that if your
home dir is different on remote computers (e.g. if your login is
different) the relative path will still be relative to your home dir.
.PP
.Vb 3
\& parallel \-S $SERVER1 pwd ::: ""
\& parallel \-\-workdir . \-S $SERVER1 pwd ::: ""
\& parallel \-\-workdir ... \-S $SERVER1 pwd ::: ""
.Ve
.PP
Output:
.PP
.Vb 3
\& [the login dir on $SERVER1]
\& [current dir relative on $SERVER1]
\& [a dir in ~/.parallel/tmp/...]
.Ve
.SS "Avoid overloading sshd"
.IX Subsection "Avoid overloading sshd"
If many jobs are started on the same server, sshd can be
overloaded. \s-1GNU\s0 Parallel can insert a delay between each job run on
the same server:
.PP
.Vb 1
\& parallel \-S $SERVER1 \-\-sshdelay 0.2 echo ::: 1 2 3
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& 1
\& 2
\& 3
.Ve
.PP
Sshd will be less overloaded if using \-\-controlmaster, which will
multiplex ssh connections:
.PP
.Vb 1
\& parallel \-\-controlmaster \-S $SERVER1 echo ::: 1 2 3
.Ve
.PP
Output: Same as above.
.SS "Ignore hosts that are down"
.IX Subsection "Ignore hosts that are down"
In clusters with many hosts a few of the are often down. \s-1GNU\s0 Parallel
can ignore those hosts. In this case the host 173.194.32.46 is down:
.PP
.Vb 1
\& parallel \-\-filter\-hosts \-S 173.194.32.46,$SERVER1 echo ::: bar
.Ve
.PP
Output:
.PP
.Vb 1
\& bar
.Ve
.SS "Running the same commands on all hosts"
.IX Subsection "Running the same commands on all hosts"
\&\s-1GNU\s0 Parallel can run the same command on all the hosts:
.PP
.Vb 1
\& parallel \-\-onall \-S $SERVER1,$SERVER2 echo ::: foo bar
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& foo
\& bar
\& foo
\& bar
.Ve
.PP
Often you will just want to run a single command on all hosts with out
arguments. \-\-nonall is a no argument \-\-onall:
.PP
.Vb 1
\& parallel \-\-nonall \-S $SERVER1,$SERVER2 echo foo bar
.Ve
.PP
Output:
.PP
.Vb 2
\& foo bar
\& foo bar
.Ve
.PP
When \-\-tag is used with \-\-nonall and \-\-onall the \-\-tagstring is the host:
.PP
.Vb 1
\& parallel \-\-nonall \-\-tag \-S $SERVER1,$SERVER2 echo foo bar
.Ve
.PP
Output (the order may be different):
.PP
.Vb 2
\& $SERVER1 foo bar
\& $SERVER2 foo bar
.Ve
.PP
\&\-\-jobs sets the number of servers to log in to in parallel.
.SS "Transfer environment variables and functions"
.IX Subsection "Transfer environment variables and functions"
Using \-\-env \s-1GNU\s0 Parallel can transfer an environment variable to the
remote system.
.PP
.Vb 3
\& MYVAR=\*(Aqfoo bar\*(Aq
\& export MYVAR
\& parallel \-\-env MYVAR \-S $SERVER1 echo \*(Aq$MYVAR\*(Aq ::: baz
.Ve
.PP
Output:
.PP
.Vb 1
\& foo bar baz
.Ve
.PP
This works for functions too if your shell is Bash:
.PP
.Vb 6
\& # This only works in Bash
\& my_func() {
\& echo in my_func $1
\& }
\& export \-f my_func
\& parallel \-\-env my_func \-S $SERVER1 my_func ::: baz
.Ve
.PP
Output:
.PP
.Vb 1
\& in my_func baz
.Ve
.PP
\&\s-1GNU\s0 Parallel can copy all defined variables and functions to the
remote system. It just needs to record which ones to ignore in
~/.parallel/ignored_vars. Do that by running this once:
.PP
.Vb 2
\& parallel \-\-record\-env
\& cat ~/.parallel/ignored_vars
.Ve
.PP
Output:
.PP
.Vb 1
\& [list of variables to ignore \- including $PATH and $HOME]
.Ve
.PP
Now all new variables and functions defined will be copied when using
\&\-\-env _:
.PP
.Vb 7
\& # The function is only copied if using Bash
\& my_func2() {
\& echo in my_func2 $VAR $1
\& }
\& export \-f my_func2
\& VAR=foo
\& export VAR
\&
\& parallel \-\-env _ \-S $SERVER1 \*(Aqecho $VAR; my_func2\*(Aq ::: bar
.Ve
.PP
Output:
.PP
.Vb 2
\& foo
\& in my_func2 foo bar
.Ve
.SS "Showing what is actually run"
.IX Subsection "Showing what is actually run"
\&\-\-verbose will show the command that would be run on the local
machine. When a job is run on a remote machine this is wrapped with
ssh and possibly transferring files and environment variables, setting
the workdir, and setting \-\-nice value. \-vv shows all of this.
.PP
.Vb 1
\& parallel \-vv \-S $SERVER1 echo ::: bar
.Ve
.PP
Output:
.PP
.Vb 2
\& ssh \-tt \-oLogLevel=quiet lo \*(Aqeval \`echo $SHELL | grep "/t\e{0,1\e}csh" > /dev/null && echo setenv PARALLEL_SEQ \*(Aq$PARALLEL_SEQ\*(Aq\e; setenv PARALLEL_PID \*(Aq$PARALLEL_PID\*(Aq || echo PARALLEL_SEQ=\*(Aq$PARALLEL_SEQ\*(Aq\e;export PARALLEL_SEQ\e; PARALLEL_PID=\*(Aq$PARALLEL_PID\*(Aq\e;export PARALLEL_PID\` ;\*(Aq tty\e \e>/dev/null\e \e&\e&\e stty\e isig\e \-onlcr\e \-echo\e;echo\e bar;
\& bar
.Ve
.PP
When the command gets more complex, the output is so hard to read, that it is only useful for debugging:
.PP
.Vb 5
\& my_func3() {
\& echo in my_func $1 > $1.out
\& }
\& export \-f my_func3
\& parallel \-vv \-\-workdir ... \-\-nice 17 \-\-env _ \-\-trc {}.out \-S $SERVER1 my_func3 {} ::: abc\-file
.Ve
.PP
Output will be similar to:
.PP
.Vb 5
\& ssh server mkdir \-p .parallel/tmp/hk\-31483\-1; rsync \-rlDzR \-essh ./abc\-file server:.parallel/tmp/hk\-31483\-1;ssh \-tt \-oLogLevel=quiet server \*(Aqeval \`echo $SHELL | grep "/t\e{0,1\e}csh" > /dev/null && echo setenv PARALLEL_SEQ \*(Aq$PARALLEL_SEQ\*(Aq\e; setenv PARALLEL_PID \*(Aq$PARALLEL_PID\*(Aq || echo PARALLEL_SEQ=\*(Aq$PARALLEL_SEQ\*(Aq\e;export PARALLEL_SEQ\e; PARALLEL_PID=\*(Aq$PARALLEL_PID\*(Aq\e;export PARALLEL_PID\` ;\*(Aq tty\e \e>/dev/null\e \e&\e&\e stty\e isig\e \-onlcr\e \-echo\e;mkdir\e \-p\e .parallel/tmp/hk\-31483\-1\e;\e cd\e .parallel/tmp/hk\-31483\-1\e \e&\e&\e echo\e \e$SHELL\e \e|\e grep\e \e"/t\e\e\e{0,1\e\e\e}csh\e"\e \e>\e /dev/null\e \e&\e&\e setenv\e my_func3\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e\e\e \e\e\e>\e\e\e \e\e\e$1.out\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e setenv\e VAR\e foo\e \e&\e&\e setenv\e my_func2\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e|\e|\e export\e my_func3=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e\e\e \e\e\e>\e\e\e \e\e\e$1.out\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e export\e VAR=foo\e \e&\e&\e export\e my_func2=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e eval\e my_func3\e"\e$my_func3\e"\e \e&\e&\e eval\e my_func2\e"\e$my_func2\e"\e;\e\enice\e \-n17\e /bin/bash\e \-c\e my_func3\e\e\e abc\-file;_EXIT_status=$?; mkdir \-p .; rsync \-\-rsync\-path=cd\e .parallel/tmp/hk\-31483\-1/.\e;\e rsync \-rlDzR \-essh server:abc\-file.out .;ssh server rm\e \-f\e .parallel/tmp/hk\-31483\-1/abc\-file\e;rm\e \-f\e .parallel/tmp/hk\-31483\-1/abc\-file.out\e;rm \-rf .parallel/tmp/hk\-31483\-1\e;; exit $_EXIT_status;
.Ve
.SH "\-\-pipe"
.IX Header "--pipe"
The \-\-pipe functionality puts \s-1GNU\s0 Parallel in a different mode:
Instead of treating the data on stdin (standard input) as arguments
for a command to run, the data will be sent to stdin (standard input)
of the command.
.PP
The typical situation is:
.PP
.Vb 1
\& command_A | command_B | command_C
.Ve
.PP
where command_B is slow, and you want to speed up command_B.
.SS "Chunk size"
.IX Subsection "Chunk size"
By default \s-1GNU\s0 Parallel will start an instance of command_B, read a
chunk of 1 \s-1MB,\s0 and pass that to the instance. Then start another
instance, read another chunk, and pass that to the second instance.
.PP
.Vb 1
\& cat num1000000 | parallel \-\-pipe wc
.Ve
.PP
Output (the order may be different):
.PP
.Vb 7
\& 165668 165668 1048571
\& 149797 149797 1048579
\& 149796 149796 1048572
\& 149797 149797 1048579
\& 149797 149797 1048579
\& 149796 149796 1048572
\& 85349 85349 597444
.Ve
.PP
The size of the chunk is not exactly 1 \s-1MB\s0 because \s-1GNU\s0 Parallel only
passes full lines \- never half a line, thus the blocksize is only
average 1 \s-1MB.\s0 You can change the block size to 2 \s-1MB\s0 with \-\-block:
.PP
.Vb 1
\& cat num1000000 | parallel \-\-pipe \-\-block 2M wc
.Ve
.PP
Output (the order may be different):
.PP
.Vb 4
\& 315465 315465 2097150
\& 299593 299593 2097151
\& 299593 299593 2097151
\& 85349 85349 597444
.Ve
.PP
\&\s-1GNU\s0 Parallel treats each line as a record. If the order of record is
unimportant (e.g. you need all lines processed, but you do not care
which is processed first), then you can use \-\-round\-robin. Without
\&\-\-round\-robin \s-1GNU\s0 Parallel will start a command per block; with
\&\-\-round\-robin only the requested number of jobs will be started
(\-\-jobs). The records will then be distributed between the running
jobs:
.PP
.Vb 1
\& cat num1000000 | parallel \-\-pipe \-j4 \-\-round\-robin wc
.Ve
.PP
Output will be similar to:
.PP
.Vb 4
\& 149797 149797 1048579
\& 299593 299593 2097151
\& 315465 315465 2097150
\& 235145 235145 1646016
.Ve
.PP
One of the 4 instances got a single record, 2 instances got 2 full
records each, and one instance got 1 full and 1 partial record.
.SS "Records"
.IX Subsection "Records"
\&\s-1GNU\s0 Parallel sees the input as records. The default record is a single
line.
.PP
Using \-N140000 \s-1GNU\s0 Parallel will read 140000 records at a time:
.PP
.Vb 1
\& cat num1000000 | parallel \-\-pipe \-N140000 wc
.Ve
.PP
Output (the order may be different):
.PP
.Vb 8
\& 140000 140000 868895
\& 140000 140000 980000
\& 140000 140000 980000
\& 140000 140000 980000
\& 140000 140000 980000
\& 140000 140000 980000
\& 140000 140000 980000
\& 20000 20000 140001
.Ve
.PP
Notice that the last job could not get the full 140000 lines, but only
20000 lines.
.PP
If a record is 75 lines \-L can be used:
.PP
.Vb 1
\& cat num1000000 | parallel \-\-pipe \-L75 wc
.Ve
.PP
Output (the order may be different):
.PP
.Vb 8
\& 165600 165600 1048095
\& 149850 149850 1048950
\& 149775 149775 1048425
\& 149775 149775 1048425
\& 149850 149850 1048950
\& 149775 149775 1048425
\& 85350 85350 597450
\& 25 25 176
.Ve
.PP
Notice \s-1GNU\s0 Parallel still reads a block of around 1 \s-1MB\s0; but instead of
passing full lines to 'wc' it passes full 75 lines at a time. This
of course does not hold for the last job (which in this case got 25
lines).
.SS "Record separators"
.IX Subsection "Record separators"
\&\s-1GNU\s0 Parallel uses separators to determine where two record split.
.PP
\&\-\-recstart gives the string that starts a record; \-\-recend gives the
string that ends a record. The default is \-\-recend '\en' (newline).
.PP
If both \-\-recend and \-\-recstart are given, then the record will only
split if the recend string is immediately followed by the recstart
string.
.PP
Here the \-\-recend is set to ', ':
.PP
.Vb 1
\& echo /foo, bar/, /baz, qux/, | parallel \-kN1 \-\-recend \*(Aq, \*(Aq \-\-pipe echo JOB{#}\e;cat\e;echo END
.Ve
.PP
Output:
.PP
.Vb 9
\& JOB1
\& /foo, END
\& JOB2
\& bar/, END
\& JOB3
\& /baz, END
\& JOB4
\& qux/,
\& END
.Ve
.PP
Here the \-\-recstart is set to '/':
.PP
.Vb 1
\& echo /foo, bar/, /baz, qux/, | parallel \-kN1 \-\-recstart \*(Aq/\*(Aq \-\-pipe echo JOB{#}\e;cat\e;echo END
.Ve
.PP
Output:
.PP
.Vb 9
\& JOB1
\& /foo, barEND
\& JOB2
\& /, END
\& JOB3
\& /baz, quxEND
\& JOB4
\& /,
\& END
.Ve
.PP
Here both \-\-recend and \-\-recstart are set:
.PP
.Vb 1
\& echo /foo, bar/, /baz, qux/, | parallel \-kN1 \-\-recend \*(Aq, \*(Aq \-\-recstart \*(Aq/\*(Aq \-\-pipe echo JOB{#}\e;cat\e;echo END
.Ve
.PP
Output:
.PP
.Vb 5
\& JOB1
\& /foo, bar/, END
\& JOB2
\& /baz, qux/,
\& END
.Ve
.PP
Note the difference between setting one string and setting both strings.
.PP
With \-\-regexp the \-\-recend and \-\-recstart will be treated as a regular expression:
.PP
.Vb 1
\& echo foo,bar,_baz,_\|_qux, | parallel \-kN1 \-\-regexp \-\-recend \*(Aq,_+\*(Aq \-\-pipe echo JOB{#}\e;cat\e;echo END
.Ve
.PP
Output:
.PP
.Vb 7
\& JOB1
\& foo,bar,_END
\& JOB2
\& baz,_\|_END
\& JOB3
\& qux,
\& END
.Ve
.PP
\&\s-1GNU\s0 Parallel can remove the record separators with \-\-remove\-rec\-sep/\-\-rrs:
.PP
.Vb 1
\& echo foo,bar,_baz,_\|_qux, | parallel \-kN1 \-\-rrs \-\-regexp \-\-recend \*(Aq,_+\*(Aq \-\-pipe echo JOB{#}\e;cat\e;echo END
.Ve
.PP
Output:
.PP
.Vb 7
\& JOB1
\& foo,barEND
\& JOB2
\& bazEND
\& JOB3
\& qux,
\& END
.Ve
.SS "Header"
.IX Subsection "Header"
If the input data has a header, the header can be repeated for each
job by matching the header with \-\-header. If headers start with \f(CW%:\fR
.PP
.Vb 1
\& cat num_%header | parallel \-\-header \*(Aq(%.*\en)*\*(Aq \-\-pipe \-N3 echo JOB{#}\e;cat
.Ve
.PP
Output (the order may be different):
.PP
.Vb 10
\& JOB1
\& %head1
\& %head2
\& 1
\& 2
\& 3
\& JOB2
\& %head1
\& %head2
\& 4
\& 5
\& 6
\& JOB3
\& %head1
\& %head2
\& 7
\& 8
\& 9
\& JOB4
\& %head1
\& %head2
\& 10
.Ve
.PP
If the header is 2 lines, \-\-header 2 will work:
.PP
.Vb 1
\& cat num_%header | parallel \-\-header 2 \-\-pipe \-N3 echo JOB{#}\e;cat
.Ve
.PP
Output: Same as above.
.SH "Shebang"
.IX Header "Shebang"
.SS "Input data and parallel command in the same file"
.IX Subsection "Input data and parallel command in the same file"
\&\s-1GNU\s0 Parallel is often called as:
.PP
.Vb 1
\& cat input_file | parallel command
.Ve
.PP
With \-\-shebang the input_file and parallel can be combined into the same script.
.PP
UNIX-scripts start with a shebang line like:
.PP
.Vb 1
\& #!/bin/bash
.Ve
.PP
\&\s-1GNU\s0 Parallel can do that, too. With \-\-shebang the arguments can be
listed in the file. The parallel command is the first line of the
script:
.PP
.Vb 1
\& #!/usr/bin/parallel \-\-shebang \-r echo
\&
\& foo
\& bar
\& baz
.Ve
.PP
Output (the order may be different):
.PP
.Vb 3
\& foo
\& bar
\& baz
.Ve
.SS "Parallelizing existing scripts"
.IX Subsection "Parallelizing existing scripts"
\&\s-1GNU\s0 Parallel is often called as:
.PP
.Vb 2
\& cat input_file | parallel command
\& parallel command ::: foo bar
.Ve
.PP
If command is a script parallel can be combined into a single file so:
.PP
.Vb 2
\& cat input_file | command
\& command foo bar
.Ve
.PP
will run the script in parallel.
.PP
This perl script perl_echo works like echo:
.PP
.Vb 1
\& #!/usr/bin/perl
\&
\& print "@ARGV\en"
.Ve
.PP
It can be called as:
.PP
.Vb 1
\& parallel perl_echo ::: foo bar
.Ve
.PP
By changing the #!\-line it can be run in parallel
.PP
.Vb 1
\& #!/usr/bin/parallel \-\-shebang\-wrap /usr/bin/perl
\&
\& print "@ARGV\en"
.Ve
.PP
Thus this will work:
.PP
.Vb 1
\& perl_echo foo bar
.Ve
.PP
Output (the order may be different):
.PP
.Vb 2
\& foo
\& bar
.Ve
.PP
This technique can be used for:
.IP "Perl:" 9
.IX Item "Perl:"
#!/usr/bin/parallel \-\-shebang\-wrap /usr/bin/perl
.IP "Python:" 9
.IX Item "Python:"
#!/usr/bin/parallel \-\-shebang\-wrap /usr/bin/python
.IP "Bash:" 9
.IX Item "Bash:"
#!/usr/bin/parallel \-\-shebang\-wrap /bin/bash
.IP "R:" 9
.IX Item "R:"
#!/usr/bin/parallel \-\-shebang\-wrap /usr/bin/Rscript \-\-vanilla \-\-slave
.IP "GNUplot:" 9
.IX Item "GNUplot:"
#!/usr/bin/parallel \-\-shebang\-wrap ARG={} /usr/bin/gnuplot
.IP "Ruby:" 9
.IX Item "Ruby:"
#!/usr/bin/parallel \-\-shebang\-wrap /usr/bin/ruby
.SH "Semaphore"
.IX Header "Semaphore"
\&\s-1GNU\s0 Parallel can work as a counting semaphore. This is slower and less
efficient than its normal mode.
.PP
An alias for 'parallel \-\-semaphore' is 'sem'. The default is to allow
only one program to run at a time (technically called a mutex). The
program is started in the background. Use \-\-wait for all 'sem's to
finish:
.PP
.Vb 5
\& sem \*(Aqsleep 1; echo The first finished\*(Aq &&
\& echo The first is now running in the background &&
\& sem \*(Aqsleep 1; echo The second finished\*(Aq &&
\& echo The second is now running in the background
\& sem \-\-wait
.Ve
.PP
Output:
.PP
.Vb 4
\& The first is now running in the background
\& The first finished
\& The second is now running in the background
\& The second finished
.Ve
.PP
The command can be run in the foreground with \-\-fg:
.PP
.Vb 5
\& sem \-\-fg \*(Aqsleep 1; echo The first finished\*(Aq &&
\& echo The first finished running in the foreground &&
\& sem \-\-fg \*(Aqsleep 1; echo The second finished\*(Aq &&
\& echo The second finished running in the foreground
\& sem \-\-wait
.Ve
.PP
The difference between this and just running the command, is that a
mutex is set, so if other sems were running in the background only one
would run at the same time.
.PP
To tell the difference between which semaphore is used, use
\&\-\-semaphorename/\-\-id. Run this in one terminal:
.PP
.Vb 1
\& sem \-\-id my_id \-u \*(Aqecho First started; sleep 10; echo The first finished\*(Aq
.Ve
.PP
and simultaneously this in another terminal:
.PP
.Vb 1
\& sem \-\-id my_id \-u \*(Aqecho Second started; sleep 10; echo The second finished\*(Aq
.Ve
.PP
Note how the second will only be started when the first has finished.
.SS "Counting semaphore"
.IX Subsection "Counting semaphore"
A mutex is like having a single toilet: When it is in use everyone
else will have to wait. A counting semaphore is like having multiple
toilets: Several people can use the toilets, but when they all are in
use, everyone else will have to wait.
.PP
sem can emulate a counting semaphore. Use \-\-jobs to set the number of
toilets:
.PP
.Vb 5
\& sem \-\-jobs 3 \-\-id my_id \-u \*(Aqecho First started; sleep 5; echo The first finished\*(Aq &&
\& sem \-\-jobs 3 \-\-id my_id \-u \*(Aqecho Second started; sleep 6; echo The second finished\*(Aq &&
\& sem \-\-jobs 3 \-\-id my_id \-u \*(Aqecho Third started; sleep 7; echo The third finished\*(Aq &&
\& sem \-\-jobs 3 \-\-id my_id \-u \*(Aqecho Fourth started; sleep 8; echo The fourth finished\*(Aq &&
\& sem \-\-wait \-\-id my_id
.Ve
.PP
Output:
.PP
.Vb 8
\& First started
\& Second started
\& Third started
\& The first finished
\& Fourth started
\& The second finished
\& The third finished
\& The fourth finished
.Ve
.SH "Informational"
.IX Header "Informational"
\&\s-1GNU\s0 Parallel has some options to give short information about the
configuration.
.PP
\&\-\-help will print a summary of the most important options:
.PP
.Vb 1
\& parallel \-\-help
.Ve
.PP
Output:
.PP
.Vb 4
\& Usage:
\& parallel [options] [command [arguments]] < list_of_arguments
\& parallel [options] [command [arguments]] (::: arguments|:::: argfile(s))...
\& cat ... | parallel \-\-pipe [options] [command [arguments]]
\&
\& \-j n Run n jobs in parallel
\& \-k Keep same order
\& \-X Multiple arguments with context replace
\& \-\-colsep regexp Split input on regexp for positional replacements
\& {} {.} {/} {/.} {#} Replacement strings
\& {3} {3.} {3/} {3/.} Positional replacement strings
\&
\& \-S sshlogin Example: foo@server.example.com
\& \-\-slf .. Use ~/.parallel/sshloginfile as the list of sshlogins
\& \-\-trc {}.bar Shorthand for \-\-transfer \-\-return {}.bar \-\-cleanup
\& \-\-onall Run the given command with argument on all sshlogins
\& \-\-nonall Run the given command with no arguments on all sshlogins
\&
\& \-\-pipe Split stdin (standard input) to multiple jobs.
\& \-\-recend str Record end separator for \-\-pipe.
\& \-\-recstart str Record start separator for \-\-pipe.
\&
\& See \*(Aqman parallel\*(Aq for details
\&
\& When using GNU Parallel for a publication please cite:
\&
\& O. Tange (2011): GNU Parallel \- The Command\-Line Power Tool,
\& ;login: The USENIX Magazine, February 2011:42\-47.
.Ve
.PP
When asking for help, always report the full output of:
.PP
.Vb 1
\& parallel \-\-version
.Ve
.PP
Output:
.PP
.Vb 5
\& GNU parallel 20130822
\& Copyright (C) 2007,2008,2009,2010,2011,2012,2013 Ole Tange and Free Software Foundation, Inc.
\& License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
\& This is free software: you are free to change and redistribute it.
\& GNU parallel comes with no warranty.
\&
\& Web site: http://www.gnu.org/software/parallel
\&
\& When using GNU Parallel for a publication please cite:
\&
\& O. Tange (2011): GNU Parallel \- The Command\-Line Power Tool,
\& ;login: The USENIX Magazine, February 2011:42\-47.
.Ve
.PP
In scripts \-\-minversion can be used to ensure the user has at least
this version:
.PP
.Vb 1
\& parallel \-\-minversion 20130722 && echo Your version is at least 20130722.
.Ve
.PP
Output:
.PP
.Vb 2
\& 20130722
\& Your version is at least 20130722.
.Ve
.PP
If using \s-1GNU\s0 Parallel for research the BibTeX citation can be
generated using \-\-bibtex.
.PP
.Vb 1
\& parallel \-\-bibtex
.Ve
.PP
Output:
.PP
.Vb 12
\& @article{Tange2011a,
\& title = {GNU Parallel \- The Command\-Line Power Tool},
\& author = {O. Tange},
\& address = {Frederiksberg, Denmark},
\& journal = {;login: The USENIX Magazine},
\& month = {Feb},
\& number = {1},
\& volume = {36},
\& url = {http://www.gnu.org/s/parallel},
\& year = {2011},
\& pages = {42\-47}
\& }
.Ve
.PP
With \-\-max\-line\-length\-allowed \s-1GNU\s0 Parallel will report the maximal
size of the command line:
.PP
.Vb 1
\& parallel \-\-max\-line\-length\-allowed
.Ve
.PP
Output (may vary on different systems):
.PP
.Vb 1
\& 131071
.Ve
.PP
\&\-\-number\-of\-cpus and \-\-number\-of\-cores run system specific code to
determine the number of CPUs and \s-1CPU\s0 cores on the system. On
unsupported platforms they will return 1:
.PP
.Vb 2
\& parallel \-\-number\-of\-cpus
\& parallel \-\-number\-of\-cores
.Ve
.PP
Output (may vary on different systems):
.PP
.Vb 2
\& 4
\& 64
.Ve
.SH "Profiles"
.IX Header "Profiles"
The defaults for \s-1GNU\s0 Parallel can be changed systemwise by putting the
command line options in /etc/parallel/config. They can be changed for
a user by putting them in ~/.parallel/config.
.PP
Profiles work the same way, but have to be referred to with \-\-profile:
.PP
.Vb 6
\& echo \*(Aq\-S :,\*(Aq$SERVER1 > ~/.parallel/cluster
\& echo \*(Aq\-\-nice 17\*(Aq >> ~/.parallel/cluster
\& echo \*(Aq\-\-filter\-hosts\*(Aq >> ~/.parallel/cluster
\& echo \*(Aq\-\-timeout 300%\*(Aq >> ~/.parallel/cluster
\& echo \*(Aq\-\-env _\*(Aq >> ~/.parallel/cluster
\& parallel \-\-profile cluster echo ::: A B C
.Ve
.PP
Output:
.PP
.Vb 3
\& A
\& B
\& C
.Ve
.PP
Profiles can be combined:
.PP
.Vb 2
\& echo \*(Aq\-vv \-\-dry\-run\*(Aq > ~/.parallel/dryverbose
\& parallel \-\-profile dryverbose \-\-profile cluster echo ::: A B C
.Ve
.PP
Output:
.PP
.Vb 10
\& ssh \-tt \-oLogLevel=quiet lo \*(Aqeval \`echo $SHELL | grep "/t\e{0,1\e}csh" > /dev/null && echo setenv PARALLEL_SEQ \*(Aq$PARALLEL_SEQ\*(Aq\e; setenv PARALLEL_PID \*(Aq$PARALLEL_PID\*(Aq || echo PARALLEL_SEQ=\*(Aq$PARALLEL_SEQ\*(Aq\e;export PARALLEL_SEQ\e; PARALLEL_PID=\*(Aq$PARALLEL_PID\*(Aq\e;export PARALLEL_PID\` ;\*(Aq tty\e \e>/dev/null\e \e&\e&\e stty\e isig\e \-onlcr\e \-echo\e;echo\e \e$SHELL\e \e|\e grep\e \e"/t\e\e\e{0,1\e\e\e}csh\e"\e \e>\e /dev/null\e \e&\e&\e setenv\e SERVER1\e lo\e \e&\e&\e setenv\e MYVAR\e foo\e\e\e bar\e \e&\e&\e setenv\e VAR\e foo\e \e&\e&\e setenv\e my_func\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e setenv\e my_func2\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e|\e|\e export\e SERVER1=lo\e \e&\e&\e export\e MYVAR=foo\e\e\e bar\e \e&\e&\e export\e VAR=foo\e \e&\e&\e export\e my_func=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e export\e my_func2=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e eval\e my_func\e"\e$my_func\e"\e \e&\e&\e eval\e my_func2\e"\e$my_func2\e"\e;\e\enice\e \-n17\e /bin/bash\e \-c\e echo\e\e\e A;
\& ssh \-tt \-oLogLevel=quiet lo \*(Aqeval \`echo $SHELL | grep "/t\e{0,1\e}csh" > /dev/null && echo setenv PARALLEL_SEQ \*(Aq$PARALLEL_SEQ\*(Aq\e; setenv PARALLEL_PID \*(Aq$PARALLEL_PID\*(Aq || echo PARALLEL_SEQ=\*(Aq$PARALLEL_SEQ\*(Aq\e;export PARALLEL_SEQ\e; PARALLEL_PID=\*(Aq$PARALLEL_PID\*(Aq\e;export PARALLEL_PID\` ;\*(Aq tty\e \e>/dev/null\e \e&\e&\e stty\e isig\e \-onlcr\e \-echo\e;echo\e \e$SHELL\e \e|\e grep\e \e"/t\e\e\e{0,1\e\e\e}csh\e"\e \e>\e /dev/null\e \e&\e&\e setenv\e SERVER1\e lo\e \e&\e&\e setenv\e MYVAR\e foo\e\e\e bar\e \e&\e&\e setenv\e VAR\e foo\e \e&\e&\e setenv\e my_func\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e setenv\e my_func2\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e|\e|\e export\e SERVER1=lo\e \e&\e&\e export\e MYVAR=foo\e\e\e bar\e \e&\e&\e export\e VAR=foo\e \e&\e&\e export\e my_func=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e export\e my_func2=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e eval\e my_func\e"\e$my_func\e"\e \e&\e&\e eval\e my_func2\e"\e$my_func2\e"\e;\e\enice\e \-n17\e /bin/bash\e \-c\e echo\e\e\e B;
\& ssh \-tt \-oLogLevel=quiet lo \*(Aqeval \`echo $SHELL | grep "/t\e{0,1\e}csh" > /dev/null && echo setenv PARALLEL_SEQ \*(Aq$PARALLEL_SEQ\*(Aq\e; setenv PARALLEL_PID \*(Aq$PARALLEL_PID\*(Aq || echo PARALLEL_SEQ=\*(Aq$PARALLEL_SEQ\*(Aq\e;export PARALLEL_SEQ\e; PARALLEL_PID=\*(Aq$PARALLEL_PID\*(Aq\e;export PARALLEL_PID\` ;\*(Aq tty\e \e>/dev/null\e \e&\e&\e stty\e isig\e \-onlcr\e \-echo\e;echo\e \e$SHELL\e \e|\e grep\e \e"/t\e\e\e{0,1\e\e\e}csh\e"\e \e>\e /dev/null\e \e&\e&\e setenv\e SERVER1\e lo\e \e&\e&\e setenv\e MYVAR\e foo\e\e\e bar\e \e&\e&\e setenv\e VAR\e foo\e \e&\e&\e setenv\e my_func\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e setenv\e my_func2\e \e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e|\e|\e export\e SERVER1=lo\e \e&\e&\e export\e MYVAR=foo\e\e\e bar\e \e&\e&\e export\e VAR=foo\e \e&\e&\e export\e my_func=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e export\e my_func2=\e\e\e(\e\e\e)\e\e\e \e\e\e{\e\e\e \e\e\e echo\e\e\e in\e\e\e my_func2\e\e\e \e\e\e$VAR\e\e\e \e\e\e$1\e"\*(Aq
\& \*(Aq\e"\e\e\e}\e \e&\e&\e eval\e my_func\e"\e$my_func\e"\e \e&\e&\e eval\e my_func2\e"\e$my_func2\e"\e;\e\enice\e \-n17\e /bin/bash\e \-c\e echo\e\e\e C;
.Ve
.SH "Spread the word"
.IX Header "Spread the word"
I hope you have learned something from this tutorial.
.PP
If you like \s-1GNU\s0 Parallel:
.IP "\(bu" 2
(Re\-)walk through the tutorial if you have not done so in the past year
(http://www.gnu.org/software/parallel/parallel_tutorial.html)
.IP "\(bu" 2
Give a demo at your local user group/team/colleagues
.IP "\(bu" 2
Post the intro videos and the tutorial on Reddit, Diaspora*,
forums, blogs, Identi.ca, Google+, Twitter, Facebook, Linkedin,
mailing lists
.IP "\(bu" 2
Request or write a review for your favourite blog or magazine
.IP "\(bu" 2
Invite me for your next conference
.PP
If you use \s-1GNU\s0 Parallel for research:
.IP "\(bu" 2
Please cite \s-1GNU\s0 Parallel in you publications (use \-\-bibtex)
.PP
If \s-1GNU\s0 Parallel saves you money:
.IP "\(bu" 2
(Have your company) donate to \s-1FSF\s0 or become a member
https://my.fsf.org/donate/
.PP
(C) 2013,2014 Ole Tange, GPLv3