diff --git a/src/Makefile.am b/src/Makefile.am index 95766761..56953c71 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -31,7 +31,7 @@ doc_DATA = parallel.html env_parallel.html sem.html sql.html \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_book.pdf parallel_design.pdf \ parallel_alternatives.pdf parcat.pdf parset.pdf parsort.pdf \ - parallel_cheat_bw.pdf + parallel_cheat_bw.pdf parallel_options_map.pdf endif web: sphinx @@ -345,6 +345,10 @@ parallel_cheat_bw.pdf: parallel_cheat_bw.fodt libreoffice --headless --convert-to pdf parallel_cheat_bw.fodt \ || echo "Warning: libreoffice failed. Using old parallel_cheat_bw.pdf" +parallel_options_map.pdf: parallel.pod + ./pod2graph parallel.pod > parallel_options_map.pdf \ + || echo "Warning: pod2graph failed. Using old parallel_options_map.pdf" + sem: parallel ln -fs parallel sem @@ -365,7 +369,7 @@ DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_book.pdf parallel_design.pdf \ parallel_alternatives.pdf parcat.pdf parset.pdf parsort.pdf \ - parallel_cheat_bw.pdf + parallel_cheat_bw.pdf parallel_options_map.pdf EXTRA_DIST = parallel sem sql niceload parcat parset parsort \ env_parallel env_parallel.ash env_parallel.bash \ diff --git a/src/parallel.pod b/src/parallel.pod index 7ab850c0..e6ab23e2 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -134,7 +134,7 @@ Input line. This replacement string will be replaced by a full line read from the input source. The input source is normally stdin (standard input), but -can also be given with B<-a>, B<:::>, or B<::::>. +can also be given with B<--arg-file>, B<:::>, or B<::::>. The replacement string B<{}> can be changed with B<-I>. @@ -157,13 +157,14 @@ This replacement string will be replaced by the input with the extension removed. If the input line contains B<.> after the last B, the last B<.> until the end of the string will be removed and B<{.}> will be replaced with the remaining. E.g. I becomes -I, I becomes I, -I becomes I, I remains I. If the -input line does not contain B<.> it will remain unchanged. +I, I becomes I, +I becomes I, I remains +I. If the input line does not contain B<.> it will remain +unchanged. -The replacement string B<{.}> can be changed with B<--er>. +The replacement string B<{.}> can be changed with B<--extensionreplace> -To understand replacement strings see B<{}>. +See also: B<{}> B<--extensionreplace> =item B<{/}> @@ -173,10 +174,7 @@ Basename of input line. This replacement string will be replaced by the input with the directory part removed. -The replacement string B<{/}> can be changed with -B<--basenamereplace>. - -To understand replacement strings see B<{}>. +See also: B<{}> B<--basenamereplace> =item B<{//}> @@ -186,10 +184,7 @@ Dirname of input line. This replacement string will be replaced by the dir of the input line. See B(1). -The replacement string B<{//}> can be changed with -B<--dirnamereplace>. - -To understand replacement strings see B<{}>. +See also: B<{}> B<--dirnamereplace> =item B<{/.}> @@ -200,10 +195,7 @@ This replacement string will be replaced by the input with the directory and extension part removed. B<{/.}> is a combination of B<{/}> and B<{.}>. -The replacement string B<{/.}> can be changed with -B<--basenameextensionreplace>. - -To understand replacement strings see B<{}>. +See also: B<{}> B<--basenameextensionreplace> =item B<{#}> @@ -213,9 +205,7 @@ Sequence number of the job to run. This replacement string will be replaced by the sequence number of the job being run. It contains the same number as $PARALLEL_SEQ. -The replacement string B<{#}> can be changed with B<--seqreplace>. - -To understand replacement strings see B<{}>. +See also: B<{#}> B<--seqreplace> =item B<{%}> @@ -226,8 +216,6 @@ This replacement string will be replaced by the job's slot number between 1 and number of jobs to run in parallel. There will never be 2 jobs running at the same time with the same job slot number. -The replacement string B<{%}> can be changed with B<--slotreplace>. - If the job needs to be retried (e.g using B<--retries> or B<--retry-failed>) the job slot is not automatically updated. You should then instead use B<$PARALLEL_JOBSLOT>: @@ -252,7 +240,7 @@ should then instead use B<$PARALLEL_JOBSLOT>: Notice how {%} and $PARALLEL_JOBSLOT differ in the retry run of C and D. -To understand replacement strings see B<{}>. +See also: B<{}> B<--jobs> B<--slotreplace> =item B<{>IB<}> @@ -260,12 +248,10 @@ To understand replacement strings see B<{}>. Argument from input source I or the I'th argument. This positional replacement string will be replaced by the input from -input source I (when used with B<-a> or B<::::>) or with the +input source I (when used with B<--arg-file> or B<::::>) or with the I'th argument (when used with B<-N>). If I is negative it refers to the I'th last argument. -To understand replacement strings see B<{}>. - See also: B<{}> B<{>I.B<}> B<{>I/B<}> B<{>I//B<}> B<{>I/.B<}> @@ -278,11 +264,11 @@ extension. B<{>I.B<}> is a combination of B<{>IB<}> and B<{.}>. This positional replacement string will be replaced by the input from -input source I (when used with B<-a> or B<::::>) or with the +input source I (when used with B<--arg-file> or B<::::>) or with the I'th argument (when used with B<-N>). The input will have the extension removed. -To understand positional replacement strings see B<{>IB<}>. +See also: B<{>IB<}> B<{.}> =item B<{>I/B<}> @@ -292,11 +278,11 @@ Basename of argument from input source I or the I'th argument. B<{>I/B<}> is a combination of B<{>IB<}> and B<{/}>. This positional replacement string will be replaced by the input from -input source I (when used with B<-a> or B<::::>) or with the +input source I (when used with B<--arg-file> or B<::::>) or with the I'th argument (when used with B<-N>). The input will have the directory (if any) removed. -To understand positional replacement strings see B<{>IB<}>. +See also: B<{>IB<}> B<{/}> =item B<{>I//B<}> @@ -306,10 +292,10 @@ Dirname of argument from input source I or the I'th argument. B<{>I//B<}> is a combination of B<{>IB<}> and B<{//}>. This positional replacement string will be replaced by the dir of the -input from input source I (when used with B<-a> or B<::::>) or with +input from input source I (when used with B<--arg-file> or B<::::>) or with the I'th argument (when used with B<-N>). See B(1). -To understand positional replacement strings see B<{>IB<}>. +See also: B<{>IB<}> B<{//}> =item B<{>I/.B<}> @@ -321,11 +307,11 @@ B<{>I/.B<}> is a combination of B<{>IB<}>, B<{/}>, and B<{.}>. This positional replacement string will be replaced by the input from -input source I (when used with B<-a> or B<::::>) or with the +input source I (when used with B<--arg-file> or B<::::>) or with the I'th argument (when used with B<-N>). The input will have the directory (if any) and extension removed. -To understand positional replacement strings see B<{>IB<}>. +See also: B<{>IB<}> B<{/.}> =item B<{=>IB<=}> @@ -412,7 +398,7 @@ Example: parallel csh -c {= '$_="mkdir ".Q($_)' =} ::: '12" dir' seq 50 | parallel echo job {#} of {= '$_=total_jobs()' =} -See also: B<--rpl> B<--parens> +See also: B<--rpl> B<--parens> B<{}> B<{=>I IB<=}> =item B<{=>I IB<=}> @@ -421,7 +407,7 @@ Positional equivalent to B<{=perl expression=}>. To understand positional replacement strings see B<{>IB<}>. -See also: B<{=perl expression=}> B<{>IB<}> +See also: B<{=>IB<=}> B<{>IB<}> =item B<:::> I @@ -449,7 +435,7 @@ generated. E.g. ::: 1 2 ::: a b c will result in the combinations (1,a) (1,b) (1,c) (2,a) (2,b) (2,c). This is useful for replacing nested for-loops. -B<:::>, B<::::>, and B<-a> can be mixed. So these are equivalent: +B<:::>, B<::::>, and B<--arg-file> can be mixed. So these are equivalent: parallel echo {1} {2} {3} ::: 6 7 ::: 4 5 ::: 1 2 3 parallel echo {1} {2} {3} :::: <(seq 6 7) <(seq 4 5) \ @@ -463,7 +449,7 @@ B<:::>, B<::::>, and B<-a> can be mixed. So these are equivalent: seq 4 5 | parallel echo {1} {2} {3} :::: <(seq 6 7) - \ ::: 1 2 3 -See also: B<--arg-sep> B<-a> B<::::> B<:::+> B<::::+> B<--link> +See also: B<--arg-sep> B<--arg-file> B<::::> B<:::+> B<::::+> B<--link> =item B<:::+> I @@ -482,11 +468,11 @@ See also: B<::::+> B<--link> =item B<::::> I -Another way to write B<-a> I B<-a> I ... +Another way to write B<--arg-file> I B<--arg-file> I ... B<:::> and B<::::> can be mixed. -See also: B<-a> B<:::> B<--link> +See also: B<--arg-file> B<:::> B<::::+> B<--link> =item B<::::+> I @@ -496,7 +482,7 @@ Like B<::::> but linked like B<--link> to the previous input source. Contrary to B<--link>, values do not wrap: The shortest input source determines the length. -See also: B<-a> B<:::+> B<--link> +See also: B<--arg-file> B<:::+> B<--link> =item B<--null> @@ -509,9 +495,9 @@ Normally input lines will end in \n (newline). If they end in \0 (NUL), then use this option. It is useful for processing arguments that may contain \n (newline). -Shorthand for B<-d '\0'>. +Shorthand for B<--delimiter '\0'>. -See also: B<-d> +See also: B<--delimiter> =item B<--arg-file> I @@ -524,9 +510,9 @@ If you use this option, stdin (standard input) is given to the first process run. Otherwise, stdin (standard input) is redirected from /dev/null. -If multiple B<-a> are given, each I will be treated as an +If multiple B<--arg-file> are given, each I will be treated as an input source, and all combinations of input sources will be -generated. E.g. The file B contains B<1 2>, the file +generated. E.g. The file B contains B<1 2>, the file B contains B. B<-a foo> B<-a bar> will result in the combinations (1,a) (1,b) (1,c) (2,a) (2,b) (2,c). This is useful for replacing nested for-loops. @@ -584,7 +570,7 @@ B<--bf> can be specified to transfer more basefiles. The I will be transferred the same way as B<--transferfile>. See also: B<--sshlogin> B<--transfer> B<--return> B<--cleanup> - +B<--workdir> =item B<--basenamereplace> I @@ -630,7 +616,7 @@ I is small (<10), slower if it is big (>100). B<--bin> requires B<--pipe> and a fixed numeric value for B<--jobs>. -See also: SPREADING BLOCKS OF DATA B<--group-by> B<--roundrobin> +See also: SPREADING BLOCKS OF DATA B<--group-by> B<--round-robin> B<--shard> @@ -692,24 +678,24 @@ length of one record. For performance reasons I should be bigger than a two records. GNU B will warn you and automatically increase the size if you choose a I that is too small. -If you use B<-N>, B<--block-size> should be bigger than N+1 records. +If you use B<-N>, B<--block> should be bigger than N+1 records. I defaults to 1M. -When using B<--pipepart> a negative block size is not interpreted as a +When using B<--pipe-part> a negative block size is not interpreted as a blocksize but as the number of blocks each jobslot should have. So this will run 10*5 = 50 jobs in total: - parallel --pipepart -a myfile --block -10 -j5 wc + parallel --pipe-part -a myfile --block -10 -j5 wc -This is an efficient alternative to B<--roundrobin> because data is +This is an efficient alternative to B<--round-robin> because data is never read by GNU B, but you can still have very few jobslots process large amounts of data. -See also: UNIT PREFIX B<-N> B<--pipe> B<--pipepart> B<--roundrobin> +See also: UNIT PREFIX B<-N> B<--pipe> B<--pipe-part> B<--round-robin> +B<--block-timeout> - -=item B<--blocktimeout> I +=item B<--block-timeout> I =item B<--bt> I @@ -720,21 +706,21 @@ partial block read so far. I is in seconds, but can be postfixed with s, m, h, or d. -See also: TIME POSTFIXES B<--pipe> +See also: TIME POSTFIXES B<--pipe> B<--block> =item B<--cat> Create a temporary file with content. -Normally B<--pipe>/B<--pipepart> will give data to the program on +Normally B<--pipe>/B<--pipe-part> will give data to the program on stdin (standard input). With B<--cat> GNU B will create a temporary file with the name in B<{}>, so you can do: B. -Implies B<--pipe> unless B<--pipepart> is used. +Implies B<--pipe> unless B<--pipe-part> is used. -See also: B<--pipe> B<--pipepart> B<--fifo> +See also: B<--pipe> B<--pipe-part> B<--fifo> =item B<--cleanup> @@ -756,11 +742,11 @@ With B<--return> the file transferred from the remote computer will be removed on the remote computer. Directories on the remote computer containing the file will be removed if they are empty. -B<--cleanup> is ignored when not used with B<--transferfile> or -B<--return>. +B<--cleanup> is ignored when not used with B<--basefile>, +B<--transfer>, B<--transferfile> or B<--return>. -See also: B<--basefile> B<--transfer> B<--sshlogin> B<--return> -B<--cleanup> +See also: B<--basefile> B<--transfer> B<--transferfile> B<--sshlogin> +B<--return> =item B<--colsep> I @@ -836,7 +822,7 @@ Even quoted newlines are parsed correctly: When used with B<--pipe> only pass full CSV-records. -See also: B<--csv> B<--pipe> +See also: B<--pipe> B<--link> B<{>IB<}> B<--colsep> B<--header> =item B<--ctag> I @@ -857,19 +843,20 @@ Color tagstring. See also: B<--ctag> B<--tagstring> -=item B<--delay> I +=item B<--delay> I -Delay starting next job by I. +Delay starting next job by I. -GNU B will pause I after starting each -job. I is in seconds, but can be postfixed with s, m, h, or d. +GNU B will not start another job for the next I. -If you append 'auto' to I (e.g. 13m3sauto) GNU -B will automatically try to find the optimal value: If a job fails, -I is increased by 30%. If a job succeeds, I is +I is in seconds, but can be postfixed with s, m, h, or d. + +If you append 'auto' to I (e.g. 13m3sauto) GNU B +will automatically try to find the optimal value: If a job fails, +I is increased by 30%. If a job succeeds, I is decreased by 10%. -See also: TIME POSTFIXES B<--retries> +See also: TIME POSTFIXES B<--retries> B<--ssh-delay> =item B<--delimiter> I @@ -901,7 +888,7 @@ Print the job to run on stdout (standard output), but do not run the job. Use B<-v -v> to include the wrapping that GNU B generates -(for remote jobs, B<--tmux>, B<--nice>, B<--pipe>, B<--pipepart>, +(for remote jobs, B<--tmux>, B<--nice>, B<--pipe>, B<--pipe-part>, B<--fifo> and B<--cat>). Do not count on this literally, though, as the job may be scheduled on another computer or the local computer if : is in the list. @@ -960,7 +947,7 @@ variables except for the ones mentioned in ~/.parallel/ignored_vars. To copy the full environment (both exported and not exported variables, arrays, and functions) use B. -See also: B<--record-env> B<--session> B<--sshlogin> B +See also: B<--record-env> B<--session> B<--sshlogin> I B @@ -998,7 +985,7 @@ See also: B<--bg> B Create a temporary fifo with content. -Normally B<--pipe> and B<--pipepart> will give data to the program on +Normally B<--pipe> and B<--pipe-part> will give data to the program on stdin (standard input). With B<--fifo> GNU B will create a temporary fifo with the name in B<{}>, so you can do: @@ -1009,9 +996,9 @@ Beware: If the fifo is never opened for reading, the job will block forever: seq 1000000 | parallel --fifo echo This will block seq 1000000 | parallel --fifo 'echo This will not block < {}' -Implies B<--pipe> unless B<--pipepart> is used. +Implies B<--pipe> unless B<--pipe-part> is used. -See also: B<--cat> B<--pipe> B<--pipepart> +See also: B<--cat> B<--pipe> B<--pipe-part> =item B<--filter> I @@ -1024,6 +1011,8 @@ I can contain replacement strings and Perl code. Example: Outputs: 1,1 1,2 1,3 2,2 2,3 3,3 +See also: B B<--no-run-if-empty> + =item B<--filter-hosts> @@ -1075,7 +1064,7 @@ See also: B<--line-buffer> B<--ungroup> Group input by value. -Combined with B<--pipe>/B<--pipepart> B<--group-by> groups lines with +Combined with B<--pipe>/B<--pipe-part> B<--group-by> groups lines with the same value into a record. The value can be computed from the full line or from a single column. @@ -1093,7 +1082,7 @@ Use the value in the column numbered. Treat the first line as a header and use the value in the column named. -(Not supported with B<--pipepart>). +(Not supported with B<--pipe-part>). =item Z<> perl expression @@ -1107,7 +1096,7 @@ Put the value of the column put in $_, run the perl expression, and use $_ as th Put the value of the column put in $_, run the perl expression, and use $_ as the value. -(Not supported with B<--pipepart>). +(Not supported with B<--pipe-part>). =back @@ -1143,8 +1132,8 @@ UserID when grouping: cat table.csv | parallel --pipe --colsep , --header : \ --group-by 'UserID s/\D//g' -kN1 wc -See also: SPREADING BLOCKS OF DATA B<--pipe> B<--pipepart> B<--bin> -B<--shard> B<--roundrobin> +See also: SPREADING BLOCKS OF DATA B<--pipe> B<--pipe-part> B<--bin> +B<--shard> B<--round-robin> =item B<--help> @@ -1266,7 +1255,7 @@ B<--header :> is an alias for B<--header '.*\n'>. If I is a number, it is a fixed number of lines. -See also: B<--colsep> B<--pipe> B<--pipepart> +See also: B<--colsep> B<--pipe> B<--pipe-part> =item B<--hostgroups> @@ -1298,16 +1287,20 @@ See also: B<--sshlogin> B<$PARALLEL_HOSTGROUPS> B<$PARALLEL_ARGHOSTGROUPS> Use the replacement string I instead of B<{}>. +See also: B<{}> -=item B<--replace>[=I] -=item B<-i>[I] +=item B<--replace> [I] + +=item B<-i> [I] This option is deprecated; use B<-I> instead. This option is a synonym for B<-I>I if I is specified, and for B<-I {}> otherwise. +See also: B<{}> + =item B<--joblog> I @@ -1373,8 +1366,8 @@ Add N to the number of CPU threads. Run this many jobs in parallel. -See also: B<--use-cores-instead-of-threads> -B<--use-sockets-instead-of-threads>. +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> =item B<--jobs> I<-N> @@ -1390,8 +1383,8 @@ Subtract N from the number of CPU threads. Run this many jobs in parallel. If the evaluated number is less than 1 then 1 will be used. -See also: B<--use-cores-instead-of-threads> -B<--use-sockets-instead-of-threads> +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> =item B<--jobs> I% @@ -1406,8 +1399,8 @@ Multiply N% with the number of CPU threads. Run this many jobs in parallel. -See also: B<--use-cores-instead-of-threads> -B<--use-sockets-instead-of-threads> +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> =item B<--jobs> I @@ -1446,12 +1439,12 @@ If used with B<--onall> or B<--nonall> the output will grouped by sshlogin in sorted order. B<--keep-order> cannot keep the output order when used with B<--pipe ---roundrobin>. Here it instead means, that the jobslots will get the +--round-robin>. Here it instead means, that the jobslots will get the same blocks as input in the same order in every run if the input is kept the same. Run each of these twice and compare: - seq 10000000 | parallel --pipe --roundrobin 'sleep 0.$RANDOM; wc' - seq 10000000 | parallel --pipe -k --roundrobin 'sleep 0.$RANDOM; wc' + seq 10000000 | parallel --pipe --round-robin 'sleep 0.$RANDOM; wc' + seq 10000000 | parallel --pipe -k --round-robin 'sleep 0.$RANDOM; wc' B<-k> only affects the order in which the output is printed - not the order in which jobs are run. @@ -1474,11 +1467,11 @@ I can be postfixed with K, M, G, T, P, k, m, g, t, or p. Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set. -See also: UNIT PREFIX B<-N> B<-l> B<--block-size> B<-X> B<-m> +See also: UNIT PREFIX B<-N> B<--max-lines> B<--block> B<-X> B<-m> B<--xargs> B<--pipe> -=item B<--max-lines>[=I] +=item B<--max-lines> [I] =item B<-l>[I] @@ -1493,7 +1486,7 @@ B<-l 0> is an alias for B<-l 1>. Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set. -See also: UNIT PREFIX B<-N> B<-l> B<--block-size> B<-X> B<-m> +See also: UNIT PREFIX B<-N> B<--block> B<-X> B<-m> B<--xargs> B<--pipe> @@ -1577,10 +1570,10 @@ mix. Compare: See also: B<--group> B<--ungroup> B<--keep-order> -=item B<--xapply> - =item B<--link> +=item B<--xapply> + Link input sources. Read multiple input sources like the command B. If multiple @@ -1655,13 +1648,13 @@ The I can be postfixed with K, M, G, T, P, k, m, g, t, or p. If the jobs take up very different amount of RAM, GNU B will only start as many as there is memory for. If less than I bytes are free, no more jobs will be started. If less than 50% I bytes -are free, the youngest job will be killed (as per B<--termseq>), and +are free, the youngest job will be killed (as per B<--term-seq>), and put back on the queue to be run later. B<--retries> must be set to determine how many times GNU B should retry a given job. -See also: UNIT PREFIX B<--termseq> B<--retries> B<--memsuspend> +See also: UNIT PREFIX B<--term-seq> B<--retries> B<--memsuspend> =item B<--memsuspend> I @@ -1702,8 +1695,10 @@ a certain version of GNU B: echo halt done=50% supported from version 20170422 && parallel --halt now,done=50% echo ::: {1..100} +See also: B<--version> -=item B<--max-args>=I + +=item B<--max-args> I =item B<-n> I @@ -1721,8 +1716,10 @@ UNIT PREFIX). Implies B<-X> unless B<-m> is set. +See also: B<-X> B<-m> B<--xargs> B<--max-replace-args> -=item B<--max-replace-args>=I + +=item B<--max-replace-args> I =item B<-N> I @@ -1746,7 +1743,7 @@ I can be postfixed with K, M, G, T, P, k, m, g, t, or p. When used with B<--pipe> B<-N> is the number of records to read. This is somewhat slower than B<--block>. -See also: UNIT PREFIX B<--pipe> B<--block> B<-m> B<-X> +See also: UNIT PREFIX B<--pipe> B<--block> B<-m> B<-X> B<--max-args> =item B<--nonall> @@ -1761,7 +1758,7 @@ computers to log into in parallel. This is useful for running the same command (e.g. uptime) on a list of servers. -See also: B<--onall> +See also: B<--onall> B<--sshlogin> =item B<--onall> @@ -1778,7 +1775,7 @@ all the output from one server will be grouped together. B<--joblog> will contain an entry for each job on each server, so there will be several job sequence 1. -See also: B<--nonall> +See also: B<--nonall> B<--sshlogin> =item B<--output-as-files> @@ -1787,6 +1784,8 @@ See also: B<--nonall> =item B<--files> +Save output to files. + Instead of printing the output to stdout (standard output) the output of each job is saved in a file and the filename is then printed. @@ -1812,36 +1811,36 @@ You can limit the number of records to be passed with B<-N>, and set the record size with B<-L>. B<--pipe> maxes out at around 1 GB/s input, and 100 MB/s output. If -performance is important use B<--pipepart>. +performance is important use B<--pipe-part>. B<--fifo> and B<--cat> will give stdin (standard input) on a fifo or a temporary file. -If data is arriving slowly, you can use B<--blocktimeout> to finish +If data is arriving slowly, you can use B<--block-timeout> to finish reading a block early. The data can be spread between the jobs in specific ways using B<--round-robin>, B<--bin>, B<--shard>, B<--group-by>. See the section: SPREADING BLOCKS OF DATA -See also: B<--block> B<--blocktimeout> B<--recstart> B<--recend> -B<--fifo> B<--cat> B<--pipepart> B<-N> B<-L> +See also: B<--block> B<--block-timeout> B<--recstart> B<--recend> +B<--fifo> B<--cat> B<--pipe-part> B<-N> B<-L> B<--round-robin> -=item B<--pipepart> +=item B<--pipe-part> Pipe parts of a physical file. -B<--pipepart> works similar to B<--pipe>, but is much faster. +B<--pipe-part> works similar to B<--pipe>, but is much faster. -B<--pipepart> has a few limitations: +B<--pipe-part> has a few limitations: =over 3 =item * The file must be a normal file or a block device (technically it must -be seekable) and must be given using B<-a> or B<::::>. The file cannot +be seekable) and must be given using B<--arg-file> or B<::::>. The file cannot be a pipe, a fifo, or a stream as they are not seekable. If using a block device with lot of NUL bytes, remember to set @@ -1855,7 +1854,7 @@ where records end. =back -See also: <--pipe> +See also: <--pipe> B<--recstart> B<--recend> B<--arg-file> B<::::> =item B<--plain> @@ -1866,6 +1865,8 @@ Ignore any B<--profile>, $PARALLEL, and ~/.parallel/config to get full control on the command line (used by GNU B internally when called with B<--sshlogin>). +See also: B<--profile> + =item B<--plus> @@ -1912,7 +1913,7 @@ inspired by bash's parameter expansion: {,str} lowercase str if found at the start {,,str} lowercase str -See also: B<--rpl> +See also: B<--rpl> B<{}> =item B<--progress> @@ -1935,10 +1936,14 @@ See also: B<--eta> B<--bar> =item B<--max-line-length-allowed> +Print maximal command line length. + Print the maximal number of characters allowed on the command line and exit (used by GNU B itself to determine the line length on remote computers). +See also: B<--show-limits> + =item B<--number-of-cpus> (obsolete) @@ -1950,6 +1955,9 @@ Print the number of physical CPU cores and exit. Print the number of physical CPU cores and exit (used by GNU B itself to determine the number of physical CPU cores on remote computers). +See also: B<--number-of-sockets> B<--number-of-threads> +B<--use-cores-instead-of-threads> B<--jobs> + =item B<--number-of-sockets> @@ -1957,6 +1965,9 @@ Print the number of filled CPU sockets and exit (used by GNU B itself to determine the number of filled CPU sockets on remote computers). +See also: B<--number-of-cores> B<--number-of-threads> +B<--use-sockets-instead-of-threads> B<--jobs> + =item B<--number-of-threads> @@ -1964,6 +1975,8 @@ Print the number of hyperthreaded CPU cores and exit (used by GNU B itself to determine the number of hyperthreaded CPU cores on remote computers). +See also: B<--number-of-cores> B<--number-of-sockets> B<--jobs> + =item B<--no-keep-order> @@ -1989,6 +2002,8 @@ be run at level 10, but remote jobs run at nice level 5). =item B<-p> +Ask user before running a job. + Prompt the user about whether to run each command line and read a line from the terminal. Only run the command line if the response starts with 'y' or 'Y'. Implies B<-t>. @@ -1996,7 +2011,9 @@ with 'y' or 'Y'. Implies B<-t>. =item B<--parens> I -Define start and end parenthesis for B<{= perl expression =}>. The +Use I instead of B<{==}>. + +Define start and end parenthesis for B<{=perl expression=}>. The left and the right parenthesis can be multiple characters and are assumed to be the same length. The default is B<{==}> giving B<{=> as the start parenthesis and B<=}> as the end parenthesis. @@ -2006,17 +2023,18 @@ B<,,>: parallel --parens ,,,, echo foo is ,,s/I/O/g,, ::: FII -See also: B<--rpl> B<{= perl expression =}> +See also: B<--rpl> B<{=>IB<=}> =item B<--profile> I =item B<-J> I -Use profile I for options. This is useful if you want to -have multiple profiles. You could have one profile for running jobs in -parallel on the local computer and a different profile for running jobs -on remote computers. See the section PROFILE FILES for examples. +Use profile I for options. + +This is useful if you want to have multiple profiles. You could have +one profile for running jobs in parallel on the local computer and a +different profile for running jobs on remote computers. I corresponds to the file ~/.parallel/I. @@ -2025,6 +2043,8 @@ the profiles conflict, the later ones will be used. Default: ~/.parallel/config +See also: PROFILE FILES + =item B<--quote> @@ -2037,22 +2057,29 @@ interpreted by the shell (e.g. ; \ | *), use B<--quote> to escape these. The command must be a simple command (see B) without redirections and without variable assignments. -See the section QUOTING. Most people will not need this. Quoting is -disabled by default. +Most people will not need this. Quoting is disabled by default. + +See also: QUOTING I B<--shell-quote> B =item B<--no-run-if-empty> =item B<-r> +Do not run empty input. + If the stdin (standard input) only contains whitespace, do not run the command. If used with B<--pipe> this is slow. +See also: I B<--pipe> B<--interactive> + =item B<--noswap> +Do not start job is computer is swapping. + Do not start new jobs on a given computer if there is both swap-in and swap-out activity. @@ -2071,16 +2098,20 @@ See also: B<--memfree> B<--memsuspend> =item B<--record-env> +Record environment. + Record current environment variables in ~/.parallel/ignored_vars. This is useful before using B<--env _>. -See also: B<--env> B<--session> +See also: B<--env> B<--session> B =item B<--recstart> I =item B<--recend> I +Split record between I and I. + If B<--recstart> is given I will be used to split at record start. If B<--recend> is given I will be used to split at record end. @@ -2110,6 +2141,9 @@ See also: B<--pipe> B<--regexp> B<--remove-rec-sep> Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular expressions. This is slow, however. +See also: B<--pipe> B<--regexp> B<--remove-rec-sep> B<--recstart> +B<--recend> + =item B<--remove-rec-sep> @@ -2117,10 +2151,15 @@ expressions. This is slow, however. =item B<--rrs> +Remove record separator. + Remove the text matched by B<--recstart> and B<--recend> before piping it to the command. -Only used with B<--pipe>/B<--pipepart>. +Only used with B<--pipe>/B<--pipe-part>. + +See also: B<--pipe> B<--regexp> B<--pipe-part> B<--recstart> +B<--recend> =item B<--results> I @@ -2360,7 +2399,7 @@ B<--sshlogin> GNU B will re-use all the computers. This is useful if some jobs fail for no apparent reason (such as network failure). -See also: B<--termseq> +See also: B<--term-seq> B<--sshlogin> =item B<--return> I @@ -2400,13 +2439,14 @@ times: --sshlogin server.example.com \ --return {.}.out --return {.}.out2 touch {.}.out {.}.out2 -B<--return> is often used with B<--transferfile> and B<--cleanup>. - B<--return> is ignored when used with B<--sshlogin :> or when not used with B<--sshlogin>. For details on transferring see B<--transferfile>. +See also: B<--transfer> B<--transferfile> B<--sshlogin> B<--cleanup> +B<--workdir> + =item B<--round-robin> @@ -2415,14 +2455,14 @@ For details on transferring see B<--transferfile>. Distribute chunks of standard input in a round robin fashion. Normally B<--pipe> will give a single block to each instance of the -command. With B<--roundrobin> all blocks will at random be written to +command. With B<--round-robin> all blocks will at random be written to commands already running. This is useful if the command takes a long time to initialize. -B<--keep-order> will not work with B<--roundrobin> as it is +B<--keep-order> will not work with B<--round-robin> as it is impossible to track which input block corresponds to which output. -B<--roundrobin> implies B<--pipe>, except if B<--pipepart> is given. +B<--round-robin> implies B<--pipe>, except if B<--pipe-part> is given. See the section: SPREADING BLOCKS OF DATA. @@ -2431,6 +2471,8 @@ See also: B<--bin> B<--group-by> B<--shard> =item B<--rpl> 'I I' +Define replacement string. + Use I as a replacement string for I. This makes it possible to define your own replacement strings. GNU B's 7 replacement strings are implemented as: @@ -2521,19 +2563,23 @@ You can even use multiple matches: parallel --rpl '{(.*?)/(.*?)} $_="$$2$_$$1"' \ echo {swap/these} ::: -middle- -See also: B<{= perl expression =}> B<--parens> +See also: B<{=>IB<=}> B<--parens> =item B<--rsync-opts> I -Options to pass on to B. Setting B<--rsync-opts> takes -precedence over setting the environment variable $PARALLEL_RSYNC_OPTS. +Options to pass on to B. + +Setting B<--rsync-opts> takes precedence over setting the environment +variable $PARALLEL_RSYNC_OPTS. -=item B<--max-chars>=I +=item B<--max-chars> I =item B<-s> I +Limit length of command. + Use at most I characters per command line, including the command and initial-arguments and the terminating nulls at the ends of the argument strings. The largest allowed value is system-dependent, @@ -2543,33 +2589,42 @@ of your environment. The default value is the maximum. I can be postfixed with K, M, G, T, P, k, m, g, t, or p (see UNIT PREFIX). -Implies B<-X> unless B<-m> is set. +Implies B<-X> unless B<-m> or B<--xargs> is set. + +See also: B<-X> B<-m> B<--xargs> B<--max-line-length-allowed> +B<--show-limits> =item B<--show-limits> +Display limits given by the operating system. + Display the limits on the command-line length which are imposed by the operating system and the B<-s> option. Pipe the input from /dev/null (and perhaps specify --no-run-if-empty) if you don't want GNU B to do anything. +See also: B<--max-chars> B<--max-line-length-allowed> B<--version> + =item B<--semaphore> -Work as a counting semaphore. B<--semaphore> will cause GNU -B to start I in the background. When the number of -jobs given by B<--jobs> is reached, GNU B will wait for one of -these to complete before starting another command. +Work as a counting semaphore. + +B<--semaphore> will cause GNU B to start I in the +background. When the number of jobs given by B<--jobs> is reached, GNU +B will wait for one of these to complete before starting +another command. B<--semaphore> implies B<--bg> unless B<--fg> is specified. The command B is an alias for B. -See also: B B<--bg> B<--fg> B<--semaphorename> -B<--semaphoretimeout> B<--wait> +See also: B B<--bg> B<--fg> B<--semaphore-name> +B<--semaphore-timeout> B<--wait> -=item B<--semaphorename> I +=item B<--semaphore-name> I =item B<--id> I @@ -2588,7 +2643,7 @@ Implies B<--semaphore>. See also: B B<--semaphore> -=item B<--semaphoretimeout> I +=item B<--semaphore-timeout> I =item B<--st> I @@ -2611,6 +2666,8 @@ See also: B Use the replacement string I instead of B<{#}> for job sequence number. +See also: B<{#}> + =item B<--session> @@ -2620,7 +2677,7 @@ variables with names in B<$PARALLEL_IGNORED_NAMES> will not be copied. Only supported in B. -See also: B<--env> B<--record-env> +See also: B<--env> B<--record-env> B =item B<--shard> I @@ -2648,7 +2705,7 @@ B<--shard> requires B<--pipe> and a fixed numeric value for B<--jobs>. See the section: SPREADING BLOCKS OF DATA. -See also: B<--bin> B<--group-by> B<--roundrobin> +See also: B<--bin> B<--group-by> B<--round-robin> =item B<--shebang> @@ -2680,6 +2737,8 @@ On FreeBSD B is needed: There are many limitations of shebang (#!) depending on your operating system. See details on https://www.in-ulm.de/~mascheck/various/shebang/ +See also: B<--shebang-wrap> + =item B<--shebang-wrap> @@ -2710,24 +2769,37 @@ E.g. B<--shebang-wrap> must be set as the first option. +See also: B<--shebang> -=item B<--shellquote> + +=item B<--shell-quote> Does not run the command but quotes it. Useful for making quoted composed commands for GNU B. -Multiple B<--shellquote> with quote the string multiple times, so -B can be written as -B. +Multiple B<--shell-quote> with quote the string multiple times, so +B can be written as +B. + +See also: B<--quote> =item B<--shuf> -Shuffle jobs. When having multiple input sources it is hard to -randomize jobs. --shuf will generate all jobs, and shuffle them before +Shuffle jobs. + +When having multiple input sources it is hard to randomize +jobs. B<--shuf> will generate all jobs, and shuffle them before running them. This is useful to get a quick preview of the results before running the full batch. +Combined with B<--halt soon,done=1%> you can run a random 1% sample of +all jobs: + + seq 1000 | parallel --shuf --halt soon,done=1% echo + +See also: B<--halt> + =item B<--skip-first-line> @@ -2737,10 +2809,10 @@ when called with B<--shebang>). =item B<--sql> I (obsolete) -Use B<--sqlmaster> instead. +Use B<--sql-master> instead. -=item B<--sqlmaster> I +=item B<--sql-master> I Submit jobs via SQL server. I must point to a table, which will contain the same information as B<--joblog>, the values from the input @@ -2781,13 +2853,17 @@ It can also be an alias from ~/.sql/aliases: :myalias mysql:///mydb/paralleljobs - -=item B<--sqlandworker> I - -Shorthand for: B<--sqlmaster> I B<--sqlworker> I. +See also: B<--sql-and-worker> B<--sql-worker> B<--joblog> -=item B<--sqlworker> I +=item B<--sql-and-worker> I + +Shorthand for: B<--sql-master> I B<--sql-worker> I. + +See also: B<--sql-master> B<--sql-worker> + + +=item B<--sql-worker> I Execute jobs via SQL server. Read the input sources variables from the table pointed to by I. The I on the command line @@ -2799,30 +2875,37 @@ once. If B<--sqlworker> runs on the local machine, the hostname in the SQL table will not be ':' but instead the hostname of the machine. +See also: B<--sql-master> B<--sql-and-worker> + =item B<--ssh> I GNU B defaults to using B for remote access. This can be overridden with B<--ssh>. It can also be set on a per server -basis (see B<--sshlogin>). +basis with B<--sshlogin>. + +See also: B<--sshlogin> -=item B<--sshdelay> I +=item B<--ssh-delay> I -Delay starting next ssh by I. GNU B will not start -another ssh for the next I. +Delay starting next ssh by I. -For details on I see B<--delay>. +GNU B will not start another ssh for the next I. +I is in seconds, but can be postfixed with s, m, h, or d. -=item B<-S> I<[@hostgroups/][ncpus/]sshlogin[,[@hostgroups/][ncpus/]sshlogin[,...]]> +See also: TIME POSTFIXES B<--sshlogin> B<--delay> -=item B<-S> I<@hostgroup> =item B<--sshlogin> I<[@hostgroups/][ncpus/]sshlogin[,[@hostgroups/][ncpus/]sshlogin[,...]]> =item B<--sshlogin> I<@hostgroup> +=item B<-S> I<[@hostgroups/][ncpus/]sshlogin[,[@hostgroups/][ncpus/]sshlogin[,...]]> + +=item B<-S> I<@hostgroup> + Distribute jobs to remote computers. The jobs will be run on a list of remote computers. @@ -2867,6 +2950,7 @@ B<--sshlogin> is known to cause problems with B<-m> and B<-X>. See also: B<--basefile> B<--transferfile> B<--return> B<--cleanup> B<--trc> B<--sshloginfile> B<--workdir> B<--filter-hosts> +B<--ssh> =item B<--sshloginfile> I @@ -2940,20 +3024,28 @@ See also: B<--filter-hosts> Use the replacement string I instead of B<{%}> for job slot number. +See also: B<{%}> + =item B<--silent> -Silent. The job to be run will not be printed. This is the default. -Can be reversed with B<-v>. +Silent. + +The job to be run will not be printed. This is the default. Can be +reversed with B<-v>. + +See also: B<-v> =item B<--template> I=I =item B<--tmpl> I=I -Copy I to I. All replacement strings in the contents of -I will be replaced. All replacement strings in the name I -will be replaced. +Replace replacement strings in I and save it in I. + +All replacement strings in the contents of I will be +replaced. All replacement strings in the name I will be +replaced. With B<--cleanup> the new file will be removed when the job is done. @@ -2974,14 +3066,18 @@ it can be used like this: parallel --cleanup --header : --tmpl my.tmpl={#}.t myprog {#}.t \ ::: x 1.234 2.345 3.45678 ::: y 1 2 3 +See also: B<{}> B<--cleanup> + =item B<--tty> -Open terminal tty. If GNU B is used for starting a program -that accesses the tty (such as an interactive program) then this -option may be needed. It will default to starting only one job at a -time (i.e. B<-j1>), not buffer the output (i.e. B<-u>), and it will -open a tty for the job. +Open terminal tty. + +If GNU B is used for starting a program that accesses the +tty (such as an interactive program) then this option may be +needed. It will default to starting only one job at a time +(i.e. B<-j1>), not buffer the output (i.e. B<-u>), and it will open a +tty for the job. You can of course override B<-j1> and B<-u>. @@ -2990,6 +3086,8 @@ the jobs (with B<--timeout>, B<--memfree>, or B<--halt>). This is due to GNU B giving each child its own process group, which is then killed. Process groups are dependant on the tty. +See also: B<--ungroup> + =item B<--tag> @@ -3001,24 +3099,26 @@ prepended with the sshlogin instead. B<--tag> is ignored when using B<-u>. -B<--ctag> gives the tag a color. +See also: B<--tagstring> B<--ctag> =item B<--tagstring> I -Tag lines with a string. Each output line will be prepended with -I and TAB (\t). I can contain replacement strings such as -B<{}>. +Tag lines with a string. + +Each output line will be prepended with I and TAB (\t). I +can contain replacement strings such as B<{}>. B<--tagstring> is ignored when using B<-u>, B<--onall>, and B<--nonall>. -B<--ctagstring> gives the tag a color. +See also: B<--tag> B<--ctagstring> =item B<--tee> -Pipe all data to all jobs. Used with B<--pipe>/B<--pipepart> and -B<:::>. +Pipe all data to all jobs. + +Used with B<--pipe>/B<--pipe-part> and B<:::>. seq 1000 | parallel --pipe --tee -v wc {} ::: -w -l -c @@ -3030,11 +3130,13 @@ fill: How many words contain a..z and how many bytes do they fill? - parallel -a /usr/share/dict/words --pipepart --tee --tag \ + parallel -a /usr/share/dict/words --pipe-part --tee --tag \ 'grep {1} | wc {2}' ::: {a..z} ::: -l -c +See also: B<:::> B<--pipe> B<--pipe-part> -=item B<--termseq> I + +=item B<--term-seq> I Termination sequence. @@ -3049,6 +3151,8 @@ waits 100 ms, sends another TERM signal, waits 50 ms, sends a KILL signal, waits 25 ms, and exits. GNU B detects if a process dies before the waiting time is up. +See also: B<--halt> B<--timeout> B<--memfree> + =item B<--tmpdir> I @@ -3058,7 +3162,7 @@ GNU B normally buffers output into temporary files in /tmp. By setting B<--tmpdir> you can use a different dir for the files. Setting B<--tmpdir> is equivalent to setting $TMPDIR. -See also: B<--compress> +See also: B<--compress> B<$TMPDIR> B<$PARALLEL_REMOTE_TMPDIR> =item B<--tmux> (Long beta testing) @@ -3066,6 +3170,8 @@ See also: B<--compress> Use B for output. Start a B session and run each job in a window in that session. No other output will be produced. +See also: B<--tmuxpane> + =item B<--tmuxpane> (Long beta testing) @@ -3073,20 +3179,21 @@ Use B for output but put output into panes in the first window. Useful if you want to monitor the progress of less than 100 concurrent jobs. +See also: B<--tmux> + =item B<--timeout> I Time out for command. If the command runs for longer than I -seconds it will get killed as per B<--termseq>. +seconds it will get killed as per B<--term-seq>. If I is followed by a % then the timeout will dynamically be computed as a percentage of the median average runtime of successful jobs. Only values > 100% will make sense. -I is in seconds, but can be postfixed with s, m, h, or d -(see the section TIME POSTFIXES). +I is in seconds, but can be postfixed with s, m, h, or d. -See also: B<--termseq> +See also: TIME POSTFIXES B<--term-seq> B<--retries> =item B<--verbose> @@ -3095,7 +3202,7 @@ See also: B<--termseq> Print the job to be run on stderr (standard error). -See also: B<-v> B<-p> +See also: B<-v> B<--interactive> =item B<--transfer> @@ -3104,6 +3211,8 @@ Transfer files to remote computers. Shorthand for: B<--transferfile {}>. +See also: B<--transferfile>. + =item B<--transferfile> I @@ -3161,9 +3270,10 @@ B<--cleanup> =item B<--trc> I -Transfer, Return, Cleanup. Shorthand for: +Transfer, Return, Cleanup. Shorthand for: B<--transfer> B<--return> +I B<--cleanup> -B<--transferfile {}> B<--return> I B<--cleanup> +See also: B<--transfer> B<--return> B<--cleanup> =item B<--trim> @@ -3193,6 +3303,8 @@ a bc " -> "a bc". This is the default if B<--colsep> is used. =back +See also: B<--no-run-if-empty> B<{}> B<--colsep> + =item B<--ungroup> @@ -3227,16 +3339,22 @@ See also: B<--line-buffer> B<--group> Use the replacement string I instead of B<{.}> for input line without extension. +See also: B<{.}> + =item B<--use-sockets-instead-of-threads> +See also: B<--use-cores-instead-of-threads> + + =item B<--use-cores-instead-of-threads> =item B<--use-cpus-instead-of-cores> (obsolete) -Determine how GNU B counts the number of CPUs. GNU -B uses this number when the number of jobslots is computed -relative to the number of CPUs (e.g. 100% or +1). +Determine how GNU B counts the number of CPUs. + +GNU B uses this number when the number of jobslots is +computed relative to the number of CPUs (e.g. 100% or +1). CPUs can be counted in three different ways: @@ -3269,10 +3387,15 @@ B<--use-cpus-instead-of-cores> is a (misleading) alias for B<--use-sockets-instead-of-threads> and is kept for backwards compatibility. +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> + =item B<-v> -Verbose. Print the job to be run on stdout (standard output). Can be reversed +Verbose. + +Print the job to be run on stdout (standard output). Can be reversed with B<--silent>. Use B<-v> B<-v> to print the wrapping ssh command when running remotely. @@ -4286,7 +4409,7 @@ is much faster. If it still does not fit in memory you can do this: - parallel --pipepart -a regexps.txt --block 1M grep -F -f - -n bigfile | \ + parallel --pipe-part -a regexps.txt --block 1M grep -F -f - -n bigfile | \ sort -un | perl -pe 's/^\d+://' The 1M should be your free memory divided by the number of CPU threads and @@ -4297,13 +4420,13 @@ GNU/Linux you can do: END { print sum }' /proc/meminfo) percpu=$((free / 200 / $(parallel --number-of-threads)))k - parallel --pipepart -a regexps.txt --block $percpu --compress \ + parallel --pipe-part -a regexps.txt --block $percpu --compress \ grep -F -f - -n bigfile | \ sort -un | perl -pe 's/^\d+://' If you can live with duplicated lines and wrong order, it is faster to do: - parallel --pipepart -a regexps.txt --block $percpu --compress \ + parallel --pipe-part -a regexps.txt --block $percpu --compress \ grep -F -f - bigfile =head3 Limiting factor: CPU @@ -4311,7 +4434,7 @@ If you can live with duplicated lines and wrong order, it is faster to do: If the CPU is the limiting factor parallelization should be done on the regexps: - cat regexps.txt | parallel --pipe -L1000 --roundrobin --compress \ + cat regexps.txt | parallel --pipe -L1000 --round-robin --compress \ grep -f - -n bigfile | \ sort -un | perl -pe 's/^\d+://' @@ -4324,15 +4447,15 @@ Some storage systems perform better when reading multiple chunks in parallel. This is true for some RAID systems and for some network file systems. To parallelize the reading of I: - parallel --pipepart --block 100M -a bigfile -k --compress \ + parallel --pipe-part --block 100M -a bigfile -k --compress \ grep -f regexps.txt This will split I into 100MB chunks and run B on each of these chunks. To parallelize both reading of I and I combine the two using B<--cat>: - parallel --pipepart --block 100M -a bigfile --cat cat regexps.txt \ - \| parallel --pipe -L1000 --roundrobin grep -f - {} + parallel --pipe-part --block 100M -a bigfile --cat cat regexps.txt \ + \| parallel --pipe -L1000 --round-robin grep -f - {} If a line matches multiple regexps, the line may be duplicated. @@ -4867,7 +4990,7 @@ where '/1' and ' 1:' determines this is read 1. This will cut big.fq into one chunk per CPU core and pass it on stdin (standard input) to the program fastq-reader: - parallel --pipepart -a big.fq --block -1 --regexp \ + parallel --pipe-part -a big.fq --block -1 --regexp \ --recend '\n' --recstart '@.*(/1| 1:.*)\n[A-Za-z\n\.~]' \ fastq-reader @@ -4905,7 +5028,7 @@ byte has to be copied through GNU B. But if B is a real (seekable) file GNU B can by-pass the copying and send the parts directly to the program: - parallel --pipepart --block 100m -a bigfile --files sort |\ + parallel --pipe-part --block 100m -a bigfile --files sort |\ parallel -Xj1 sort -m {} ';' rm {} >bigfile.sort @@ -4958,13 +5081,13 @@ process. That means that if you just spawn more GNU Bs then each of them can run 250 jobs. This will spawn up to 2500 jobs: cat myinput |\ - parallel --pipe -N 50 --roundrobin -j50 parallel -j50 your_prg + parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg This will spawn up to 62500 jobs (use with caution - you need 64 GB RAM to do this, and you may need to increase /proc/sys/kernel/pid_max): cat myinput |\ - parallel --pipe -N 250 --roundrobin -j250 parallel -j250 your_prg + parallel --pipe -N 250 --round-robin -j250 parallel -j250 your_prg =head2 EXAMPLE: Working as mutex and counting semaphore @@ -5523,12 +5646,16 @@ Path to B. If unset the B in $PATH is used. =item $TMPDIR -Directory for temporary files. See: B<--tmpdir>. +Directory for temporary files. + +See also: B<--tmpdir> =item $PARALLEL_REMOTE_TMPDIR -Directory for temporary files on remote servers See: B<--tmpdir>. +Directory for temporary files on remote servers. + +See also: B<--tmpdir> =item $PARALLEL @@ -5551,6 +5678,8 @@ can be written as: Notice the \ after 'myssh' is needed because 'myssh' and 'user@server' must be one argument. +See also: B<--profile> + =back diff --git a/src/pod2graph b/src/pod2graph new file mode 100755 index 00000000..65ad8361 --- /dev/null +++ b/src/pod2graph @@ -0,0 +1,101 @@ +#!/usr/bin/perl + +# Convert .pod file containing: +# +# =item --option +# +# See also: --other-option +# +# to a graph.pdf with link between --option and --other-option + +$pod=join("",<>); +$pod=~s/^.*=head1 OPTIONS//s; +$pod=~s/=head1 EXAMPLES.*//s; +$pod=~s/^.*?=over//s; +$pod=~s/=back\s*$//s; +$pod=~s/=over.*?=back//sg; + +$in_text = 0; +$in_item = 0; +$in_see_also = 0; + + +for(split(/\n\n+/,$pod)) { + if(/^See also:\s+(\S.*)/s) { + $lex = "seealso"; + $in_text = 0; + $in_item = 0; + $in_see_only = 1; + } elsif(/^=item\s+(B<[{]=.*?perl expression.*?=[}]>|[IB]<.*?>)(\s|$)/s) { + $lex = "item"; + $in_text = 0; + $in_item = 1; + $in_see_only = 0; + } elsif(/\S/) { + $lex = "text"; + $in_text = 1; + $in_item = 0; + $in_see_only = 0; + } + + if($lex eq "seealso") { + if($lastlex eq "item") { + @saveditems = @items; + @items = (); + } + my $to = $1; + my $from = (join "/", + map { + s/I<(.*?)>/$1/g; + s/B<(.*?)>/$1/g; + $_ } + @saveditems[0]); + my @to; + while($to =~ s/(B<[{]=.*?perl expression.*?=[}]>|[BI]<.*?>)(\s|$)//) { + my $v = $1; + push @to, map { + s/I<(.*?)>/$1/g; + s/B<(.*?)>/$1/g; + $_; + } $v; + } + map { + if(not $seen{$from,$_}++ + and + not $seen{$_,$from}++) { + push @nodelines, "\"$from\" -- \"$_\"\n" + } + } @to; + + } elsif($lex eq "text") { + if($lastlex eq "item") { + @saveditems = @items; + @items = (); + } + } elsif($lex eq "item") { + push(@items,$1); + } + $lastlex=$lex; +} + + +sub header() { + return q[ + graph test123 { + graph [splines=true overlap=false;nodesep=2; + ]; + labelloc="t"; + label="Related map for options for GNU Parallel\nFind which options relate to which";fontsize=33; + + "{}"[margin=0.3;] + "--sshlogin"[margin=0.3] + "--pipe"[margin=0.3;] + ":::"[margin=0.3;] + "-N"[margin=0.3] + ]; +} + +open(GRAPHVIZ,"|-","sfdp -Tpdf") || die; +print GRAPHVIZ header(), (sort { rand()*3 -1 } @nodelines), "}"; +close GRAPHVIZ; + diff --git a/testsuite/tests-to-run/parallel-local-10s.sh b/testsuite/tests-to-run/parallel-local-10s.sh index 615dff15..c1539f6d 100644 --- a/testsuite/tests-to-run/parallel-local-10s.sh +++ b/testsuite/tests-to-run/parallel-local-10s.sh @@ -8,21 +8,6 @@ # Each should be taking 10-30s and be possible to run in parallel # I.e.: No race conditions, no logins -par_bin() { - echo '### Test --bin' - seq 10 | parallel --pipe --bin 1 -j4 wc | sort - paste <(seq 10) <(seq 10 -1 1) | - parallel --pipe --colsep '\t' --bin 2 -j4 wc | sort - echo '### Test --bin with expression that gives 1..n' - paste <(seq 10) <(seq 10 -1 1) | - parallel --pipe --colsep '\t' --bin '2 $_=$_%2+1' -j4 wc | sort - echo '### Test --bin with expression that gives 0..n-1' - paste <(seq 10) <(seq 10 -1 1) | - parallel --pipe --colsep '\t' --bin '2 $_%=2' -j4 wc | sort - # Fails - blocks! - # paste <(seq 10) <(seq 10 -1 1) | parallel --pipe --colsep '\t' --bin 2 wc -} - par_load_blocks() { echo "### Test if --load blocks. Bug."; export PARALLEL="--load 300%" diff --git a/testsuite/tests-to-run/parallel-local-30s.sh b/testsuite/tests-to-run/parallel-local-30s.sh index b5d2b29e..262b6098 100755 --- a/testsuite/tests-to-run/parallel-local-30s.sh +++ b/testsuite/tests-to-run/parallel-local-30s.sh @@ -8,6 +8,22 @@ # Each should be taking 30-100s and be possible to run in parallel # I.e.: No race conditions, no logins +par_bin() { + echo '### Test --bin' + seq 10 | parallel --pipe --bin 1 -j4 wc | sort + paste <(seq 10) <(seq 10 -1 1) | + parallel --pipe --colsep '\t' --bin 2 -j4 wc | sort + echo '### Test --bin with expression that gives 1..n' + paste <(seq 10) <(seq 10 -1 1) | + parallel --pipe --colsep '\t' --bin '2 $_=$_%2+1' -j4 wc | sort + echo '### Test --bin with expression that gives 0..n-1' + paste <(seq 10) <(seq 10 -1 1) | + parallel --pipe --colsep '\t' --bin '2 $_%=2' -j4 wc | sort + echo '### Blocks in version 20220122' + echo 10 | ppar --pipe --bin 1 -j100% wc + paste <(seq 10) <(seq 10 -1 1) | parallel --pipe --colsep '\t' --bin 2 wc +} + par_shard_a() { echo '### --shard' # Each of the 5 lines should match: diff --git a/testsuite/wanted-results/parallel-local-10s b/testsuite/wanted-results/parallel-local-10s index 8e398f33..05338615 100644 --- a/testsuite/wanted-results/parallel-local-10s +++ b/testsuite/wanted-results/parallel-local-10s @@ -24,25 +24,6 @@ par__pipepart_spawn 1:local / 2+ / 2+ par__pipepart_tee bug #45479: --pipe/--pipepart --tee par__pipepart_tee --pipepart --tee par__pipepart_tee 314572800 -par_bin ### Test --bin -par_bin 2 2 4 -par_bin 2 2 4 -par_bin 3 3 6 -par_bin 3 3 7 -par_bin 2 4 8 -par_bin 2 4 8 -par_bin 3 6 13 -par_bin 3 6 13 -par_bin ### Test --bin with expression that gives 1..n -par_bin 0 0 0 -par_bin 0 0 0 -par_bin 5 10 21 -par_bin 5 10 21 -par_bin ### Test --bin with expression that gives 0..n-1 -par_bin 0 0 0 -par_bin 0 0 0 -par_bin 5 10 21 -par_bin 5 10 21 par_colsep ### Test of --colsep par_colsep a b c par_colsep a b c diff --git a/testsuite/wanted-results/parallel-local-30s b/testsuite/wanted-results/parallel-local-30s index 93394036..2adaf324 100644 --- a/testsuite/wanted-results/parallel-local-30s +++ b/testsuite/wanted-results/parallel-local-30s @@ -1,3 +1,39 @@ +par_bin ### Test --bin +par_bin 2 2 4 +par_bin 2 2 4 +par_bin 3 3 6 +par_bin 3 3 7 +par_bin 2 4 8 +par_bin 2 4 8 +par_bin 3 6 13 +par_bin 3 6 13 +par_bin ### Test --bin with expression that gives 1..n +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 5 10 21 +par_bin 5 10 21 +par_bin ### Test --bin with expression that gives 0..n-1 +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 5 10 21 +par_bin 5 10 21 +par_bin ### Blocks in version 20220122 +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 1 1 3 +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 0 0 0 +par_bin 1 2 4 +par_bin 1 2 4 +par_bin 1 2 4 +par_bin 2 4 9 +par_bin 1 2 4 +par_bin 2 4 9 +par_bin 1 2 4 +par_bin 1 2 4 par_exit_code bug #52207: Exit status 0 when child job is killed, even with "now,fail=1" par_exit_code # Ideally the command should return the same par_exit_code # with or without parallel @@ -1640,95 +1676,97 @@ par_race_condition1 9 par_race_condition1 10 par_round_robin_blocks bug #49664: --round-robin does not complete par_round_robin_blocks 8 -par_shard ### --shard -par_shard OK -par_shard OK -par_shard OK -par_shard OK -par_shard OK -par_shard 10 1 -par_shard 10 2 -par_shard 10 3 -par_shard 10 4 -par_shard 10 5 -par_shard 10 6 -par_shard 10 7 -par_shard 10 8 -par_shard 10 9 -par_shard 9 0 -par_shard 9 1 -par_shard 9 2 -par_shard 9 3 -par_shard 9 4 -par_shard 9 5 -par_shard 9 6 -par_shard 9 7 -par_shard 9 8 -par_shard 9 9 -par_shard 10 1 -par_shard 10 2 -par_shard 10 3 -par_shard 10 4 -par_shard 10 5 -par_shard 10 6 -par_shard 10 7 -par_shard 10 8 -par_shard 10 9 -par_shard 9 0 -par_shard 9 1 -par_shard 9 2 -par_shard 9 3 -par_shard 9 4 -par_shard 9 5 -par_shard 9 6 -par_shard 9 7 -par_shard 9 8 -par_shard 9 9 -par_shard 10 1 -par_shard 10 2 -par_shard 10 3 -par_shard 10 4 -par_shard 10 5 -par_shard 10 6 -par_shard 10 7 -par_shard 10 8 -par_shard 10 9 -par_shard 2 c1 -par_shard 9 0 -par_shard 9 1 -par_shard 9 2 -par_shard 9 3 -par_shard 9 4 -par_shard 9 5 -par_shard 9 6 -par_shard 9 7 -par_shard 9 8 -par_shard 9 9 -par_shard 2 c2 -par_shard 10 1 -par_shard 10 2 -par_shard 10 3 -par_shard 10 4 -par_shard 10 5 -par_shard 10 6 -par_shard 10 7 -par_shard 10 8 -par_shard 10 9 -par_shard 2 c1 -par_shard 9 0 -par_shard 9 1 -par_shard 9 2 -par_shard 9 3 -par_shard 9 4 -par_shard 9 5 -par_shard 9 6 -par_shard 9 7 -par_shard 9 8 -par_shard 9 9 -par_shard 2 c2 -par_shard *** broken -par_shard parallel: Error: --shard requires --jobs to be higher than the number of -par_shard parallel: Error: arguments. Increase --jobs. +par_shard_a ### --shard +par_shard_a OK +par_shard_a OK +par_shard_a OK +par_shard_a OK +par_shard_a OK +par_shard_a 10 1 +par_shard_a 10 2 +par_shard_a 10 3 +par_shard_a 10 4 +par_shard_a 10 5 +par_shard_a 10 6 +par_shard_a 10 7 +par_shard_a 10 8 +par_shard_a 10 9 +par_shard_a 9 0 +par_shard_a 9 1 +par_shard_a 9 2 +par_shard_a 9 3 +par_shard_a 9 4 +par_shard_a 9 5 +par_shard_a 9 6 +par_shard_a 9 7 +par_shard_a 9 8 +par_shard_a 9 9 +par_shard_b ### --shard +par_shard_b 10 1 +par_shard_b 10 2 +par_shard_b 10 3 +par_shard_b 10 4 +par_shard_b 10 5 +par_shard_b 10 6 +par_shard_b 10 7 +par_shard_b 10 8 +par_shard_b 10 9 +par_shard_b 9 0 +par_shard_b 9 1 +par_shard_b 9 2 +par_shard_b 9 3 +par_shard_b 9 4 +par_shard_b 9 5 +par_shard_b 9 6 +par_shard_b 9 7 +par_shard_b 9 8 +par_shard_b 9 9 +par_shard_c ### --shard +par_shard_c 10 1 +par_shard_c 10 2 +par_shard_c 10 3 +par_shard_c 10 4 +par_shard_c 10 5 +par_shard_c 10 6 +par_shard_c 10 7 +par_shard_c 10 8 +par_shard_c 10 9 +par_shard_c 2 c1 +par_shard_c 9 0 +par_shard_c 9 1 +par_shard_c 9 2 +par_shard_c 9 3 +par_shard_c 9 4 +par_shard_c 9 5 +par_shard_c 9 6 +par_shard_c 9 7 +par_shard_c 9 8 +par_shard_c 9 9 +par_shard_c 2 c2 +par_shard_d 10 1 +par_shard_d 10 2 +par_shard_d 10 3 +par_shard_d 10 4 +par_shard_d 10 5 +par_shard_d 10 6 +par_shard_d 10 7 +par_shard_d 10 8 +par_shard_d 10 9 +par_shard_d 2 c1 +par_shard_d 9 0 +par_shard_d 9 1 +par_shard_d 9 2 +par_shard_d 9 3 +par_shard_d 9 4 +par_shard_d 9 5 +par_shard_d 9 6 +par_shard_d 9 7 +par_shard_d 9 8 +par_shard_d 9 9 +par_shard_d 2 c2 +par_shard_d *** broken +par_shard_d parallel: Error: --shard requires --jobs to be higher than the number of +par_shard_d parallel: Error: arguments. Increase --jobs. par_sighup ### Test SIGHUP par_sighup 1 par_sighup 10