From 05e66ecaa5f386af4c71dc1dbc5bed2d1743111c Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Thu, 14 Jun 2012 23:13:11 +0200 Subject: [PATCH] Fixed bug #34958: --pipe with --record size measured in lines. --- src/parallel | 80 +- src/parallel.pod | 20 +- src/parallel.texi | 2130 +------------------ testsuite/tests-to-run/parallel-local114.sh | 6 + testsuite/wanted-results/parallel-local114 | 15 + 5 files changed, 92 insertions(+), 2159 deletions(-) diff --git a/src/parallel b/src/parallel index f8afb8bc..e173e1db 100755 --- a/src/parallel +++ b/src/parallel @@ -295,49 +295,61 @@ sub spreadstdin { # Force the while-loop once if everything was read by header reading my $force_one_time_through = 0; for my $in (@fhlist) { - while(!$force_one_time_through++ or read($in,substr($buf,length $buf,0),$::opt_blocksize)) { - # substr above = append to $buf - if($::opt_r) { - # Remove empty lines - $buf=~s/^\s*\n//gm; - if(length $buf == 0) { - next; - } - } - if($::opt_regexp) { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) { - write_record_to_pipe(\$header,\$1,$recstart,$recend,length $1); - } + piperead: while(1) { + if(!$force_one_time_through) { + $force_one_time_through++; + } elsif($Global::max_lines) { + # Read $Global::max_lines lines + eof($in) and last piperead; + for(my $t = 0; !eof($in) and + substr($buf,length $buf,0) = <$in> and $t < $Global::max_lines; + $t++) {} } else { - # Find the last recend-recstart in $buf - if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { - write_record_to_pipe(\$header,\$1,$recstart,$recend,length $1); + # Read a block + read($in,substr($buf,length $buf,0),$::opt_blocksize) or last; + # substr above = append to $buf + } + if($::opt_r) { + # Remove empty lines + $buf=~s/^\s*\n//gm; + if(length $buf == 0) { + next; } } - } else { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - my $i = 0; - while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) { - $i += length $recend; # find the actual splitting location - write_record_to_pipe(\$header,\$buf,$recstart,$recend,$i); - substr($buf,0,$i) = ""; + if($::opt_regexp) { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) { + write_record_to_pipe(\$header,\$1,$recstart,$recend,length $1); + } + } else { + # Find the last recend-recstart in $buf + if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { + write_record_to_pipe(\$header,\$1,$recstart,$recend,length $1); + } } } else { - # Find the last recend-recstart in $buf - my $i = rindex($buf,$recendrecstart); - if($i != -1) { - $i += length $recend; # find the actual splitting location - write_record_to_pipe(\$header,\$buf,$recstart,$recend,$i); - substr($buf,0,$i) = ""; + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + my $i = 0; + while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) { + $i += length $recend; # find the actual splitting location + write_record_to_pipe(\$header,\$buf,$recstart,$recend,$i); + substr($buf,0,$i) = ""; + } + } else { + # Find the last recend-recstart in $buf + my $i = rindex($buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + write_record_to_pipe(\$header,\$buf,$recstart,$recend,$i); + substr($buf,0,$i) = ""; + } } } } } -} - + # If there is anything left in the buffer write it substr($buf,0,0) = ""; write_record_to_pipe(\$header,\$buf,$recstart,$recend,length $buf); diff --git a/src/parallel.pod b/src/parallel.pod index 40b9cf02..452a9a5d 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -636,24 +636,28 @@ to see the difference: =item B<-L> I -Use at most I nonblank input lines per command line. -Trailing blanks cause an input line to be logically continued on the -next input line. +When used with B<--pipe>: Read records of I. + +When used otherwise: Use at most I nonblank input lines per +command line. Trailing blanks cause an input line to be logically +continued on the next input line. B<-L 0> means read one line, but insert 0 arguments on the command line. -Implies B<-X> unless B<-m> or B<--xargs> is set. +Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set. =item B<--max-lines>[=I] =item B<-l>[I] -Synonym for the B<-L> option. Unlike B<-L>, the I argument -is optional. If I is not specified, it defaults to one. -The B<-l> option is deprecated since the POSIX standard specifies -B<-L> instead. +When used with B<--pipe>: Read records of I. + +When used otherwise: Synonym for the B<-L> option. Unlike B<-L>, the +I argument is optional. If I is not specified, +it defaults to one. The B<-l> option is deprecated since the POSIX +standard specifies B<-L> instead. B<-l 0> is an alias for B<-l 1>. diff --git a/src/parallel.texi b/src/parallel.texi index c614bcb5..863a3742 100644 --- a/src/parallel.texi +++ b/src/parallel.texi @@ -688,14 +688,16 @@ to see the difference: @item @strong{-L} @emph{max-lines} @anchor{@strong{-L} @emph{max-lines}} -Use at most @emph{max-lines} nonblank input lines per command line. -Trailing blanks cause an input line to be logically continued on the -next input line. +When used with @strong{--pipe}: Read records of @emph{max-lines}. + +When used otherwise: Use at most @emph{max-lines} nonblank input lines per +command line. Trailing blanks cause an input line to be logically +continued on the next input line. @strong{-L 0} means read one line, but insert 0 arguments on the command line. -Implies @strong{-X} unless @strong{-m} or @strong{--xargs} is set. +Implies @strong{-X} unless @strong{-m}, @strong{--xargs}, or @strong{--pipe} is set. @item @strong{--max-lines}[=@emph{max-lines}] @anchor{@strong{--max-lines}[=@emph{max-lines}]} @@ -703,10 +705,12 @@ Implies @strong{-X} unless @strong{-m} or @strong{--xargs} is set. @item @strong{-l}[@emph{max-lines}] @anchor{@strong{-l}[@emph{max-lines}]} -Synonym for the @strong{-L} option. Unlike @strong{-L}, the @emph{max-lines} argument -is optional. If @emph{max-lines} is not specified, it defaults to one. -The @strong{-l} option is deprecated since the POSIX standard specifies -@strong{-L} instead. +When used with @strong{--pipe}: Read records of @emph{max-lines}. + +When used otherwise: Synonym for the @strong{-L} option. Unlike @strong{-L}, the +@emph{max-lines} argument is optional. If @emph{max-lines} is not specified, +it defaults to one. The @strong{-l} option is deprecated since the POSIX +standard specifies @strong{-L} instead. @strong{-l 0} is an alias for @strong{-l 1}. @@ -1520,2112 +1524,4 @@ Compare these two: @verbatim parallel echo {1} {2} ::: 1 2 3 ::: a b c - parallel --xapply echo {1} {2} ::: 1 2 3 ::: a b c -@end verbatim - -See also @strong{--header}. - -@item @strong{--shebang} -@anchor{@strong{--shebang}} - -@item @strong{--hashbang} -@anchor{@strong{--hashbang}} - -GNU @strong{Parallel} can be called as a shebang (#!) command as the first line of a script. Like this: - -@verbatim - #!/usr/bin/parallel --shebang -r traceroute - - foss.org.my - debian.org - freenetproject.org -@end verbatim - -For this to work @strong{--shebang} must be set as the first option. - -@end table - -@chapter EXAMPLE: Working as xargs -n1. Argument appending -@anchor{EXAMPLE: Working as xargs -n1. Argument appending} - -GNU @strong{parallel} can work similar to @strong{xargs -n1}. - -To compress all html files using @strong{gzip} run: - -@strong{find . -name '*.html' | parallel gzip} - -If the file names may contain a newline use @strong{-0}. Substitute FOO BAR with -FUBAR in all files in this dir and subdirs: - -@strong{find . -type f -print0 | parallel -q0 perl -i -pe 's/FOO BAR/FUBAR/g'} - -Note @strong{-q} is needed because of the space in 'FOO BAR'. - -@chapter EXAMPLE: Reading arguments from command line -@anchor{EXAMPLE: Reading arguments from command line} - -GNU @strong{parallel} can take the arguments from command line instead of -stdin (standard input). To compress all html files in the current dir -using @strong{gzip} run: - -@strong{parallel gzip ::: *.html} - -To convert *.wav to *.mp3 using LAME running one process per CPU core -run: - -@strong{parallel lame @{@} -o @{.@}.mp3 ::: *.wav} - -@chapter EXAMPLE: Inserting multiple arguments -@anchor{EXAMPLE: Inserting multiple arguments} - -When moving a lot of files like this: @strong{mv *.log destdir} you will -sometimes get the error: - -@strong{bash: /bin/mv: Argument list too long} - -because there are too many files. You can instead do: - -@strong{ls | grep -E '\.log$' | parallel mv @{@} destdir} - -This will run @strong{mv} for each file. It can be done faster if @strong{mv} gets -as many arguments that will fit on the line: - -@strong{ls | grep -E '\.log$' | parallel -m mv @{@} destdir} - -@chapter EXAMPLE: Context replace -@anchor{EXAMPLE: Context replace} - -To remove the files @emph{pict0000.jpg} .. @emph{pict9999.jpg} you could do: - -@strong{seq -w 0 9999 | parallel rm pict@{@}.jpg} - -You could also do: - -@strong{seq -w 0 9999 | perl -pe 's/(.*)/pict$1.jpg/' | parallel -m rm} - -The first will run @strong{rm} 10000 times, while the last will only run -@strong{rm} as many times needed to keep the command line length short -enough to avoid @strong{Argument list too long} (it typically runs 1-2 times). - -You could also run: - -@strong{seq -w 0 9999 | parallel -X rm pict@{@}.jpg} - -This will also only run @strong{rm} as many times needed to keep the command -line length short enough. - -@chapter EXAMPLE: Compute intensive jobs and substitution -@anchor{EXAMPLE: Compute intensive jobs and substitution} - -If ImageMagick is installed this will generate a thumbnail of a jpg -file: - -@strong{convert -geometry 120 foo.jpg thumb_foo.jpg} - -This will run with number-of-cpu-cores jobs in parallel for all jpg -files in a directory: - -@strong{ls *.jpg | parallel convert -geometry 120 @{@} thumb_@{@}} - -To do it recursively use @strong{find}: - -@strong{find . -name '*.jpg' | parallel convert -geometry 120 @{@} @{@}_thumb.jpg} - -Notice how the argument has to start with @strong{@{@}} as @strong{@{@}} will include path -(e.g. running @strong{convert -geometry 120 ./foo/bar.jpg -thumb_./foo/bar.jpg} would clearly be wrong). The command will -generate files like ./foo/bar.jpg_thumb.jpg. - -Use @strong{@{.@}} to avoid the extra .jpg in the file name. This command will -make files like ./foo/bar_thumb.jpg: - -@strong{find . -name '*.jpg' | parallel convert -geometry 120 @{@} @{.@}_thumb.jpg} - -@chapter EXAMPLE: Substitution and redirection -@anchor{EXAMPLE: Substitution and redirection} - -This will generate an uncompressed version of .gz-files next to the .gz-file: - -@strong{parallel zcat @{@} "}>@strong{"@{.@} ::: *.gz} - -Quoting of > is necessary to postpone the redirection. Another -solution is to quote the whole command: - -@strong{parallel "zcat @{@} }>@strong{@{.@}" ::: *.gz} - -Other special shell characters (such as * ; $ > < | >> <<) also need -to be put in quotes, as they may otherwise be interpreted by the shell -and not given to GNU @strong{parallel}. - -@chapter EXAMPLE: Composed commands -@anchor{EXAMPLE: Composed commands} - -A job can consist of several commands. This will print the number of -files in each directory: - -@strong{ls | parallel 'echo -n @{@}" "; ls @{@}|wc -l'} - -To put the output in a file called .dir: - -@strong{ls | parallel '(echo -n @{@}" "; ls @{@}|wc -l) }> @strong{@{@}.dir'} - -Even small shell scripts can be run by GNU @strong{parallel}: - -@strong{find . | parallel 'a=@{@}; name=$@{a##*/@}; upper=$(echo "$name" | tr "[:lower:]" "[:upper:]"); echo "$name - $upper"'} - -@strong{ls | parallel 'mv @{@} "$(echo @{@} | tr "[:upper:]" "[:lower:]")"'} - -Given a list of URLs, list all URLs that fail to download. Print the -line number and the URL. - -@strong{cat urlfile | parallel "wget @{@} 2}>@strong{/dev/null || grep -n @{@} urlfile"} - -Create a mirror directory with the same filenames except all files and -symlinks are empty files. - -@strong{cp -rs /the/source/dir mirror_dir; find mirror_dir -type l | parallel -m rm @{@} '&&' touch @{@}} - -Find the files in a list that do not exist - -@strong{cat file_list | parallel 'if [ ! -e @{@} ] ; then echo @{@}; fi'} - -@chapter EXAMPLE: Removing file extension when processing files -@anchor{EXAMPLE: Removing file extension when processing files} - -When processing files removing the file extension using @strong{@{.@}} is -often useful. - -Create a directory for each zip-file and unzip it in that dir: - -@strong{parallel 'mkdir @{.@}; cd @{.@}; unzip ../@{@}' ::: *.zip} - -Recompress all .gz files in current directory using @strong{bzip2} running 1 -job per CPU core in parallel: - -@strong{parallel "zcat @{@} | bzip2 }>@strong{@{.@}.bz2 && rm @{@}" ::: *.gz} - -Convert all WAV files to MP3 using LAME: - -@strong{find sounddir -type f -name '*.wav' | parallel lame @{@} -o @{.@}.mp3} - -Put all converted in the same directory: - -@strong{find sounddir -type f -name '*.wav' | parallel lame @{@} -o mydir/@{/.@}.mp3} - -@chapter EXAMPLE: Removing two file extensions when processing files and calling GNU Parallel from itself -@anchor{EXAMPLE: Removing two file extensions when processing files and calling GNU Parallel from itself} - -If you have directory with tar.gz files and want these extracted in -the corresponding dir (e.g foo.tar.gz will be extracted in the dir -foo) you can do: - -@strong{ls *.tar.gz| parallel --er @{tar@} 'echo @{tar@}|parallel "mkdir -p @{.@} ; tar -C @{.@} -xf @{.@}.tar.gz"'} - -@chapter EXAMPLE: Download 10 images for each of the past 30 days -@anchor{EXAMPLE: Download 10 images for each of the past 30 days} - -Let us assume a website stores images like: - -@verbatim - http://www.example.com/path/to/YYYYMMDD_##.jpg -@end verbatim - -where YYYYMMDD is the date and ## is the number 01-10. This will -download images for the past 30 days: - -@strong{parallel wget http://www.example.com/path/to/'$(date -d "today -@{1@} days" +%Y%m%d)_@{2@}.jpg' ::: $(seq 30) ::: $(seq -w 10)} - -@strong{$(date -d "today -@{1@} days" +%Y%m%d)} will give the dates in -YYYYMMDD with @{1@} days subtracted. - -@chapter EXAMPLE: Breadth first parallel web crawler/mirrorer -@anchor{EXAMPLE: Breadth first parallel web crawler/mirrorer} - -This script below will crawl and mirror a URL in parallel. It -downloads first pages that are 1 click down, then 2 clicks down, then -3; instead of the normal depth first, where the first link link on -each page is fetched first. - -Run like this: - -@strong{PARALLEL=-j100 ./parallel-crawl http://gatt.org.yeslab.org/} - -Remove the @strong{wget} part if you only want a web crawler. - -It works by fetching a page from a list of URLs and looking for links -in that page that are within the same starting URL and that have not -already been seen. These links are added to a new queue. When all the -pages from the list is done, the new queue is moved to the list of -URLs and the process is started over until no unseen links are found. - -@verbatim - #!/bin/bash - - # E.g. http://gatt.org.yeslab.org/ - URL=$1 - # Stay inside the start dir - BASEURL=$(echo $URL | perl -pe 's:#.*::; s:(//.*/)[^/]*:$1:') - URLLIST=$(mktemp urllist.XXXX) - URLLIST2=$(mktemp urllist.XXXX) - SEEN=$(mktemp seen.XXXX) - - # Spider to get the URLs - echo $URL >$URLLIST - cp $URLLIST $SEEN - - while [ -s $URLLIST ] ; do - cat $URLLIST | - parallel lynx -listonly -image_links -dump {} \; wget -qm -l1 -Q1 {} \; echo Spidered: {} \>\&2 | - perl -ne 's/#.*//; s/\s+\d+.\s(\S+)$/$1/ and do { $seen{$1}++ or print }' | - grep -F $BASEURL | - grep -v -x -F -f $SEEN | tee -a $SEEN > $URLLIST2 - mv $URLLIST2 $URLLIST - done - - rm -f $URLLIST $URLLIST2 $SEEN -@end verbatim - -@chapter EXAMPLE: Process files from a tar file while unpacking -@anchor{EXAMPLE: Process files from a tar file while unpacking} - -If the files to be processed are in a tar file then unpacking one file -and processing it immediately may be faster than first unpacking all -files. - -@strong{tar xvf foo.tgz | perl -ne 'print $l;$l=$_;END@{print $l@}' | -parallel echo} - -The Perl one-liner is needed to avoid race condition. - -@chapter EXAMPLE: Rewriting a for-loop and a while-read-loop -@anchor{EXAMPLE: Rewriting a for-loop and a while-read-loop} - -for-loops like this: - -@verbatim - (for x in `cat list` ; do - do_something $x - done) | process_output -@end verbatim - -and while-read-loops like this: - -@verbatim - cat list | (while read x ; do - do_something $x - done) | process_output -@end verbatim - -can be written like this: - -@strong{cat list | parallel do_something | process_output} - -If the processing requires more steps the for-loop like this: - -@verbatim - (for x in `cat list` ; do - no_extension=${x%.*}; - do_something $x scale $no_extension.jpg - do_step2 <$x $no_extension - done) | process_output -@end verbatim - -and while-loops like this: - -@verbatim - cat list | (while read x ; do - no_extension=${x%.*}; - do_something $x scale $no_extension.jpg - do_step2 <$x $no_extension - done) | process_output -@end verbatim - -can be written like this: - -@strong{cat list | parallel "do_something @{@} scale @{.@}.jpg ; do_step2 <@{@} @{.@}" | process_output} - -@chapter EXAMPLE: Rewriting nested for-loops -@anchor{EXAMPLE: Rewriting nested for-loops} - -Nested for-loops like this: - -@verbatim - (for x in `cat xlist` ; do - for y in `cat ylist` ; do - do_something $x $y - done - done) | process_output -@end verbatim - -can be written like this: - -@strong{parallel do_something @{1@} @{2@} :::: xlist ylist | process_output} - -Nested for-loops like this: - -@verbatim - (for gender in M F ; do - for size in S M L XL XXL ; do - echo $gender $size - done - done) | sort -@end verbatim - -can be written like this: - -@strong{parallel echo @{1@} @{2@} ::: M F ::: S M L XL XXL | sort} - -@chapter EXAMPLE: for-loops with column names -@anchor{EXAMPLE: for-loops with column names} - -When doing multiple nested for-loops it can be easier to keep track of -the loop variable if is is named instead of just having a number. Use -@strong{--header :} to let the first argument be an named alias for the -positional replacement string: - -@verbatim - parallel --header : echo {gender} {size} ::: gender M F ::: size S M L XL XXL -@end verbatim - -This also works if the input file is a file with columns: - -@verbatim - cat addressbook.tsv | parallel --colsep '\t' --header : echo {Name} {E-mail address} -@end verbatim - -@chapter EXAMPLE: Using shell variables -@anchor{EXAMPLE: Using shell variables} - -When using shell variables you need to quote them correctly as they -may otherwise be split on spaces. - -Notice the difference between: - -@verbatim - V=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar) - parallel echo ::: ${V[@]} # This is probably not what you want -@end verbatim - -and: - -@verbatim - V=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar) - parallel echo ::: "${V[@]}" -@end verbatim - -When using variables in the actual command that contains special -characters (e.g. space) you can quote them using @strong{'"$VAR"'} or using -"'s and @strong{-q}: - -@verbatim - V="Here are two " - parallel echo "'$V'" ::: spaces - parallel -q echo "$V" ::: spaces -@end verbatim - -@chapter EXAMPLE: Group output lines -@anchor{EXAMPLE: Group output lines} - -When running jobs that output data, you often do not want the output -of multiple jobs to run together. GNU @strong{parallel} defaults to grouping the -output of each job, so the output is printed when the job finishes. If -you want the output to be printed while the job is running you can use -@strong{-u}. - -Compare the output of: - -@strong{parallel traceroute ::: foss.org.my debian.org freenetproject.org} - -to the output of: - -@strong{parallel -u traceroute ::: foss.org.my debian.org freenetproject.org} - -@chapter EXAMPLE: Tag output lines -@anchor{EXAMPLE: Tag output lines} - -GNU @strong{parallel} groups the output lines, but it can be hard to see -where the different jobs begin. @strong{--tag} prepends the argument to make -that more visible: - -@strong{parallel --tag traceroute ::: foss.org.my debian.org freenetproject.org} - -Check the uptime of the servers in @emph{~/.parallel/sshloginfile}: - -@strong{parallel --tag -S .. --nonall uptime} - -@chapter EXAMPLE: Keep order of output same as order of input -@anchor{EXAMPLE: Keep order of output same as order of input} - -Normally the output of a job will be printed as soon as it -completes. Sometimes you want the order of the output to remain the -same as the order of the input. This is often important, if the output -is used as input for another system. @strong{-k} will make sure the order of -output will be in the same order as input even if later jobs end -before earlier jobs. - -Append a string to every line in a text file: - -@strong{cat textfile | parallel -k echo @{@} append_string} - -If you remove @strong{-k} some of the lines may come out in the wrong order. - -Another example is @strong{traceroute}: - -@strong{parallel traceroute ::: foss.org.my debian.org freenetproject.org} - -will give traceroute of foss.org.my, debian.org and -freenetproject.org, but it will be sorted according to which job -completed first. - -To keep the order the same as input run: - -@strong{parallel -k traceroute ::: foss.org.my debian.org freenetproject.org} - -This will make sure the traceroute to foss.org.my will be printed -first. - -A bit more complex example is downloading a huge file in chunks in -parallel: Some internet connections will deliver more data if you -download files in parallel. For downloading files in parallel see: -"EXAMPLE: Download 10 images for each of the past 30 days". But if you -are downloading a big file you can download the file in chunks in -parallel. - -To download byte 10000000-19999999 you can use @strong{curl}: - -@strong{curl -r 10000000-19999999 http://example.com/the/big/file} > @strong{file.part} - -To download a 1 GB file we need 100 10MB chunks downloaded and -combined in the correct order. - -@strong{seq 0 99 | parallel -k curl -r \ - @{@}0000000-@{@}9999999 http://example.com/the/big/file} > @strong{file} - -@chapter EXAMPLE: Parallel grep -@anchor{EXAMPLE: Parallel grep} - -@strong{grep -r} greps recursively through directories. On multicore CPUs -GNU @strong{parallel} can often speed this up. - -@strong{find . -type f | parallel -k -j150% -n 1000 -m grep -H -n STRING @{@}} - -This will run 1.5 job per core, and give 1000 arguments to @strong{grep}. - -To grep a big file in parallel use @strong{--pipe}: - -@strong{cat bigfile | parallel --pipe grep foo} - -Depending on your disks and CPUs it may be faster to read larger blocks: - -@strong{cat bigfile | parallel --pipe --block 10M grep foo} - -@chapter EXAMPLE: Using remote computers -@anchor{EXAMPLE: Using remote computers} - -To run commands on a remote computer SSH needs to be set up and you -must be able to login without entering a password (The commands -@strong{ssh-copy-id} and @strong{ssh-agent} may help you do that). - -To run @strong{echo} on @strong{server.example.com}: - -@verbatim - seq 10 | parallel --sshlogin server.example.com echo -@end verbatim - -To run commands on more than one remote computer run: - -@verbatim - seq 10 | parallel --sshlogin server.example.com,server2.example.net echo -@end verbatim - -Or: - -@verbatim - seq 10 | parallel --sshlogin server.example.com \ - --sshlogin server2.example.net echo -@end verbatim - -If the login username is @emph{foo} on @emph{server2.example.net} use: - -@verbatim - seq 10 | parallel --sshlogin server.example.com \ - --sshlogin foo@server2.example.net echo -@end verbatim - -To distribute the commands to a list of computers, make a file -@emph{mycomputers} with all the computers: - -@verbatim - server.example.com - foo@server2.example.com - server3.example.com -@end verbatim - -Then run: - -@verbatim - seq 10 | parallel --sshloginfile mycomputers echo -@end verbatim - -To include the local computer add the special sshlogin ':' to the list: - -@verbatim - server.example.com - foo@server2.example.com - server3.example.com - : -@end verbatim - -GNU @strong{parallel} will try to determine the number of CPU cores on each -of the remote computers, and run one job per CPU core - even if the -remote computers do not have the same number of CPU cores. - -If the number of CPU cores on the remote computers is not identified -correctly the number of CPU cores can be added in front. Here the -computer has 8 CPU cores. - -@verbatim - seq 10 | parallel --sshlogin 8/server.example.com echo -@end verbatim - -@chapter EXAMPLE: Transferring of files -@anchor{EXAMPLE: Transferring of files} - -To recompress gzipped files with @strong{bzip2} using a remote computer run: - -@verbatim - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com \ - --transfer "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -This will list the .gz-files in the @emph{logs} directory and all -directories below. Then it will transfer the files to -@emph{server.example.com} to the corresponding directory in -@emph{$HOME/logs}. On @emph{server.example.com} the file will be recompressed -using @strong{zcat} and @strong{bzip2} resulting in the corresponding file with -@emph{.gz} replaced with @emph{.bz2}. - -If you want the resulting bz2-file to be transferred back to the local -computer add @emph{--return @{.@}.bz2}: - -@verbatim - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com \ - --transfer --return {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -After the recompressing is done the @emph{.bz2}-file is transferred back to -the local computer and put next to the original @emph{.gz}-file. - -If you want to delete the transferred files on the remote computer add -@emph{--cleanup}. This will remove both the file transferred to the remote -computer and the files transferred from the remote computer: - -@verbatim - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com \ - --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -If you want run on several computers add the computers to @emph{--sshlogin} -either using ',' or multiple @emph{--sshlogin}: - -@verbatim - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com,server2.example.com \ - --sshlogin server3.example.com \ - --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -You can add the local computer using @emph{--sshlogin :}. This will disable the -removing and transferring for the local computer only: - -@verbatim - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com,server2.example.com \ - --sshlogin server3.example.com \ - --sshlogin : \ - --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -Often @emph{--transfer}, @emph{--return} and @emph{--cleanup} are used together. They can be -shortened to @emph{--trc}: - -@verbatim - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com,server2.example.com \ - --sshlogin server3.example.com \ - --sshlogin : \ - --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -With the file @emph{mycomputers} containing the list of computers it becomes: - -@verbatim - find logs/ -name '*.gz' | parallel --sshloginfile mycomputers \ - --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -If the file @emph{~/.parallel/sshloginfile} contains the list of computers -the special short hand @emph{-S ..} can be used: - -@verbatim - find logs/ -name '*.gz' | parallel -S .. \ - --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" -@end verbatim - -@chapter EXAMPLE: Distributing work to local and remote computers -@anchor{EXAMPLE: Distributing work to local and remote computers} - -Convert *.mp3 to *.ogg running one process per CPU core on local computer and server2: - -@verbatim - parallel --trc {.}.ogg -S server2,: \ - 'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg' ::: *.mp3 -@end verbatim - -@chapter EXAMPLE: Running the same command on remote computers -@anchor{EXAMPLE: Running the same command on remote computers} - -To run the command @strong{uptime} on remote computers you can do: - -@strong{parallel --tag --nonall -S server1,server2 uptime} - -@strong{--nonall} reads no arguments. If you have a list of jobs you want -run on each computer you can do: - -@strong{parallel --tag --onall -S server1,server2 echo ::: 1 2 3} - -Remove @strong{--tag} if you do not want the sshlogin added before the -output. - -If you have a lot of hosts use '-j0' to access more hosts in parallel. - -@chapter EXAMPLE: Parallelizing rsync -@anchor{EXAMPLE: Parallelizing rsync} - -@strong{rsync} is a great tool, but sometimes it will not fill up the -available bandwidth. This is often a problem when copying several big -files over high speed connections. - -The following will start one @strong{rsync} per big file in @emph{src-dir} to -@emph{dest-dir} on the server @emph{fooserver}: - -@strong{find src-dir -type f -size +100000 | parallel -v ssh fooserver -mkdir -p /dest-dir/@{//@}\;rsync -Havessh @{@} fooserver:/dest-dir/@{@}} - -The dirs created may end up with wrong permissions and smaller files -are not being transferred. To fix those run @strong{rsync} a final time: - -@strong{rsync -Havessh src-dir/ fooserver:/dest-dir/} - -@chapter EXAMPLE: Use multiple inputs in one command -@anchor{EXAMPLE: Use multiple inputs in one command} - -Copy files like foo.es.ext to foo.ext: - -@strong{ls *.es.* | perl -pe 'print; s/\.es//' | parallel -N2 cp @{1@} @{2@}} - -The perl command spits out 2 lines for each input. GNU @strong{parallel} -takes 2 inputs (using @strong{-N2}) and replaces @{1@} and @{2@} with the inputs. - -Count in binary: - -@strong{parallel -k echo ::: 0 1 ::: 0 1 ::: 0 1 ::: 0 1 ::: 0 1 ::: 0 1} - -Print the number on the opposing sides of a six sided die: - -@strong{parallel --xapply -a <(seq 6) -a <(seq 6 -1 1) echo} - -@strong{parallel --xapply echo :::: <(seq 6) <(seq 6 -1 1)} - -Convert files from all subdirs to PNG-files with consecutive numbers -(useful for making input PNG's for @strong{ffmpeg}): - -@strong{parallel --xapply -a <(find . -type f | sort) -a <(seq $(find . -type f|wc -l)) convert @{1@} @{2@}.png} - -Alternative version: - -@strong{find . -type f | sort | parallel convert @{@} @{#@}.png} - -@chapter EXAMPLE: Use a table as input -@anchor{EXAMPLE: Use a table as input} - -Content of table_file.tsv: - -@verbatim - foobar - baz quux -@end verbatim - -To run: - -@verbatim - cmd -o bar -i foo - cmd -o quux -i baz -@end verbatim - -you can run: - -@strong{parallel -a table_file.tsv --colsep '\t' cmd -o @{2@} -i @{1@}} - -Note: The default for GNU @strong{parallel} is to remove the spaces around the columns. To keep the spaces: - -@strong{parallel -a table_file.tsv --trim n --colsep '\t' cmd -o @{2@} -i @{1@}} - -@chapter EXAMPLE: Run the same command 10 times -@anchor{EXAMPLE: Run the same command 10 times} - -If you want to run the same command with the same arguments 10 times -in parallel you can do: - -@strong{seq 10 | parallel -n0 my_command my_args} - -@chapter EXAMPLE: Working as cat | sh. Resource inexpensive jobs and evaluation -@anchor{EXAMPLE: Working as cat | sh. Resource inexpensive jobs and evaluation} - -GNU @strong{parallel} can work similar to @strong{cat | sh}. - -A resource inexpensive job is a job that takes very little CPU, disk -I/O and network I/O. Ping is an example of a resource inexpensive -job. wget is too - if the webpages are small. - -The content of the file jobs_to_run: - -@verbatim - ping -c 1 10.0.0.1 - wget http://example.com/status.cgi?ip=10.0.0.1 - ping -c 1 10.0.0.2 - wget http://example.com/status.cgi?ip=10.0.0.2 - ... - ping -c 1 10.0.0.255 - wget http://example.com/status.cgi?ip=10.0.0.255 -@end verbatim - -To run 100 processes simultaneously do: - -@strong{parallel -j 100 < jobs_to_run} - -As there is not a @emph{command} the jobs will be evaluated by the shell. - -@chapter EXAMPLE: Processing a big file using more cores -@anchor{EXAMPLE: Processing a big file using more cores} - -To process a big file or some output you can use @strong{--pipe} to split up -the data into blocks and pipe the blocks into the processing program. - -If the program is @strong{gzip -9} you can do: - -@strong{cat bigfile | parallel --pipe --recend '' -k gzip -9 }>@strong{bigfile.gz} - -This will split @strong{bigfile} into blocks of 1 MB and pass that to @strong{gzip --9} in parallel. One @strong{gzip} will be run per CPU core. The output of -@strong{gzip -9} will be kept in order and saved to @strong{bigfile.gz} - -@strong{gzip} works fine if the output is appended, but some processing does -not work like that - for example sorting. For this GNU @strong{parallel} can -put the output of each command into a file. This will sort a big file -in parallel: - -@strong{cat bigfile | parallel --pipe --files sort | parallel -Xj1 sort -m @{@} ';' rm @{@} }>@strong{bigfile.sort} - -Here @strong{bigfile} is split into blocks of around 1MB, each block ending -in '\n' (which is the default for @strong{--recend}). Each block is passed -to @strong{sort} and the output from @strong{sort} is saved into files. These -files are passed to the second @strong{parallel} that runs @strong{sort -m} on the -files before it removes the files. The output is saved to -@strong{bigfile.sort}. - -@chapter EXAMPLE: Working as mutex and counting semaphore -@anchor{EXAMPLE: Working as mutex and counting semaphore} - -The command @strong{sem} is an alias for @strong{parallel --semaphore}. - -A counting semaphore will allow a given number of jobs to be started -in the background. When the number of jobs are running in the -background, GNU @strong{sem} will wait for one of these to complete before -starting another command. @strong{sem --wait} will wait for all jobs to -complete. - -Run 10 jobs concurrently in the background: - -@verbatim - for i in *.log ; do - echo $i - sem -j10 gzip $i ";" echo done - done - sem --wait -@end verbatim - -A mutex is a counting semaphore allowing only one job to run. This -will edit the file @emph{myfile} and prepends the file with lines with the -numbers 1 to 3. - -@verbatim - seq 3 | parallel sem sed -i -e 'i{}' myfile -@end verbatim - -As @emph{myfile} can be very big it is important only one process edits -the file at the same time. - -Name the semaphore to have multiple different semaphores active at the -same time: - -@verbatim - seq 3 | parallel sem --id mymutex sed -i -e 'i{}' myfile -@end verbatim - -@chapter EXAMPLE: Start editor with filenames from stdin (standard input) -@anchor{EXAMPLE: Start editor with filenames from stdin (standard input)} - -You can use GNU @strong{parallel} to start interactive programs like emacs or vi: - -@strong{cat filelist | parallel --tty -X emacs} - -@strong{cat filelist | parallel --tty -X vi} - -If there are more files than will fit on a single command line, the -editor will be started again with the remaining files. - -@chapter EXAMPLE: Running sudo -@anchor{EXAMPLE: Running sudo} - -@strong{sudo} requires a password to run a command as root. It caches the -access, so you only need to enter the password again if you have not -used @strong{sudo} for a while. - -The command: - -@verbatim - parallel sudo echo ::: This is a bad idea -@end verbatim - -is no good, as you would be prompted for the sudo password for each of -the jobs. You can either do: - -@verbatim - sudo echo This - parallel sudo echo ::: is a good idea -@end verbatim - -or: - -@verbatim - sudo parallel echo ::: This is a good idea -@end verbatim - -This way you only have to enter the sudo password once. - -@chapter EXAMPLE: GNU Parallel as queue system/batch manager -@anchor{EXAMPLE: GNU Parallel as queue system/batch manager} - -GNU @strong{parallel} can work as a simple job queue system or batch manager. -The idea is to put the jobs into a file and have GNU @strong{parallel} read -from that continuously. As GNU @strong{parallel} will stop at end of file we -use @strong{tail} to continue reading: - -@strong{echo }>@strong{jobqueue}; @strong{tail -f jobqueue | parallel} - -To submit your jobs to the queue: - -@strong{echo my_command my_arg }>>@strong{ jobqueue} - -You can of course use @strong{-S} to distribute the jobs to remote -computers: - -@strong{echo }>@strong{jobqueue}; @strong{tail -f jobqueue | parallel -S ..} - -There are a two small issues when using GNU @strong{parallel} as queue -system/batch manager: - -@itemize -@item You will get a warning if you do not submit JobSlots jobs within the -first second. E.g. if you have 8 cores and use @strong{-j+2} you have to submit -10 jobs. These can be dummy jobs (e.g. @strong{echo foo}). You can also simply -ignore the warning. - -@item Jobs will be run immediately, but output from jobs will only be -printed when JobSlots more jobs has been started. E.g. if you have 10 -jobslots then the output from the first completed job will only be -printed when job 11 is started. - -@end itemize - -@chapter EXAMPLE: GNU Parallel as dir processor -@anchor{EXAMPLE: GNU Parallel as dir processor} - -If you have a dir in which users drop files that needs to be processed -you can do this on GNU/Linux (If you know what @strong{inotifywait} is -called on other platforms file a bug report): - -@strong{inotifywait -q -m -r -e MOVED_TO -e CLOSE_WRITE --format %w%f my_dir | parallel --u echo} - -This will run the command @strong{echo} on each file put into @strong{my_dir} or -subdirs of @strong{my_dir}. - -The @strong{-u} is needed because of a small bug in GNU @strong{parallel}. If that -proves to be a problem, file a bug report. - -You can of course use @strong{-S} to distribute the jobs to remote -computers: - -@strong{inotifywait -q -m -r -e MOVED_TO -e CLOSE_WRITE --format %w%f my_dir -| parallel -S .. -u echo} - -If the files to be processed are in a tar file then unpacking one file -and processing it immediately may be faster than first unpacking all -files. Set up the dir processor as above and unpack into the dir. - -@chapter QUOTING -@anchor{QUOTING} - -GNU @strong{parallel} is very liberal in quoting. You only need to quote -characters that have special meaning in shell: - -( ) $ ` ' " < > ; | \ - -and depending on context these needs to be quoted, too: - -* ~ & # ! ? space * @{ - -Therefore most people will never need more quoting than putting '\' -in front of the special characters. - -However, when you want to use a shell variable you need to quote the -$-sign. Here is an example using $PARALLEL_SEQ. This variable is set -by GNU @strong{parallel} itself, so the evaluation of the $ must be done by -the sub shell started by GNU @strong{parallel}: - -@strong{seq 10 | parallel -N2 echo seq:\$PARALLEL_SEQ arg1:@{1@} arg2:@{2@}} - -If the variable is set before GNU @strong{parallel} starts you can do this: - -@strong{VAR=this_is_set_before_starting} - -@strong{echo test | parallel echo @{@} $VAR} - -Prints: @strong{test this_is_set_before_starting} - -It is a little more tricky if the variable contains more than one space in a row: - -@strong{VAR="two spaces between each word"} - -@strong{echo test | parallel echo @{@} \'"$VAR"\'} - -Prints: @strong{test two spaces between each word} - -If the variable should not be evaluated by the shell starting GNU -@strong{parallel} but be evaluated by the sub shell started by GNU -@strong{parallel}, then you need to quote it: - -@strong{echo test | parallel VAR=this_is_set_after_starting \; echo @{@} \$VAR} - -Prints: @strong{test this_is_set_after_starting} - -It is a little more tricky if the variable contains space: - -@strong{echo test | parallel VAR='"two spaces between each word"' echo @{@} \'"$VAR"\'} - -Prints: @strong{test two spaces between each word} - -$$ is the shell variable containing the process id of the shell. This -will print the process id of the shell running GNU @strong{parallel}: - -@strong{seq 10 | parallel echo $$} - -And this will print the process ids of the sub shells started by GNU -@strong{parallel}. - -@strong{seq 10 | parallel echo \$\$} - -If the special characters should not be evaluated by the sub shell -then you need to protect it against evaluation from both the shell -starting GNU @strong{parallel} and the sub shell: - -@strong{echo test | parallel echo @{@} \\\$VAR} - -Prints: @strong{test $VAR} - -GNU @strong{parallel} can protect against evaluation by the sub shell by -using -q: - -@strong{echo test | parallel -q echo @{@} \$VAR} - -Prints: @strong{test $VAR} - -This is particularly useful if you have lots of quoting. If you want to run a perl script like this: - -@strong{perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' file} - -It needs to be quoted like this: - -@strong{ls | parallel perl -ne '/^\\S+\\s+\\S+\$/\ and\ print\ \$ARGV,\"\\n\"'} - -Notice how spaces, \'s, "'s, and $'s need to be quoted. GNU @strong{parallel} -can do the quoting by using option -q: - -@strong{ls | parallel -q perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"'} - -However, this means you cannot make the sub shell interpret special -characters. For example because of @strong{-q} this WILL NOT WORK: - -@strong{ls *.gz | parallel -q "zcat @{@} }>@strong{@{.@}"} - -@strong{ls *.gz | parallel -q "zcat @{@} | bzip2 }>@strong{@{.@}.bz2"} - -because > and | need to be interpreted by the sub shell. - -If you get errors like: - -@verbatim - sh: -c: line 0: syntax error near unexpected token - sh: Syntax error: Unterminated quoted string - sh: -c: line 0: unexpected EOF while looking for matching `'' - sh: -c: line 1: syntax error: unexpected end of file -@end verbatim - -then you might try using @strong{-q}. - -If you are using @strong{bash} process substitution like @strong{<(cat foo)} then -you may try @strong{-q} and prepending @emph{command} with @strong{bash -c}: - -@strong{ls | parallel -q bash -c 'wc -c <(echo @{@})'} - -Or for substituting output: - -@strong{ls | parallel -q bash -c 'tar c @{@} | tee }>@strong{(gzip }>@strong{@{@}.tar.gz) | bzip2 }>@strong{@{@}.tar.bz2'} - -@strong{Conclusion}: To avoid dealing with the quoting problems it may be -easier just to write a small script and have GNU @strong{parallel} call that -script. - -@chapter LIST RUNNING JOBS -@anchor{LIST RUNNING JOBS} - -If you want a list of the jobs currently running you can run: - -@strong{killall -USR1 parallel} - -GNU @strong{parallel} will then print the currently running jobs on stderr -(standard error). - -@chapter COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS -@anchor{COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS} - -If you regret starting a lot of jobs you can simply break GNU @strong{parallel}, -but if you want to make sure you do not have half-completed jobs you -should send the signal @strong{SIGTERM} to GNU @strong{parallel}: - -@strong{killall -TERM parallel} - -This will tell GNU @strong{parallel} to not start any new jobs, but wait until -the currently running jobs are finished before exiting. - -@chapter ENVIRONMENT VARIABLES -@anchor{ENVIRONMENT VARIABLES} - -@table @asis -@item $PARALLEL_PID -@anchor{$PARALLEL_PID} - -The environment variable $PARALLEL_PID is set by GNU @strong{parallel} and -is visible to the jobs started from GNU @strong{parallel}. This makes it -possible for the jobs to communicate directly to GNU @strong{parallel}. -Remember to quote the $, so it gets evaluated by the correct -shell. - -@strong{Example:} If each of the jobs tests a solution and one of jobs finds -the solution the job can tell GNU @strong{parallel} not to start more jobs -by: @strong{kill -TERM $PARALLEL_PID}. This only works on the local -computer. - -@item $PARALLEL_SEQ -@anchor{$PARALLEL_SEQ} - -$PARALLEL_SEQ will be set to the sequence number of the job -running. Remember to quote the $, so it gets evaluated by the correct -shell. - -@strong{Example:} - -@strong{seq 10 | parallel -N2 echo seq:'$'PARALLEL_SEQ arg1:@{1@} arg2:@{2@}} - -@item $TMPDIR -@anchor{$TMPDIR} - -Directory for temporary files. See: @strong{--tmpdir}. - -@item $PARALLEL -@anchor{$PARALLEL} - -The environment variable $PARALLEL will be used as default options for -GNU @strong{parallel}. If the variable contains special shell characters -(e.g. $, *, or space) then these need to be to be escaped with \. - -@strong{Example:} - -@strong{cat list | parallel -j1 -k -v ls} - -can be written as: - -@strong{cat list | PARALLEL="-kvj1" parallel ls} - -@strong{cat list | parallel -j1 -k -v -S"myssh user@@server" ls} - -can be written as: - -@strong{cat list | PARALLEL='-kvj1 -S myssh\ user@@server' parallel echo} - -Notice the \ in the middle is needed because 'myssh' and 'user@@server' -must be one argument. - -@end table - -@chapter DEFAULT PROFILE (CONFIG FILE) -@anchor{DEFAULT PROFILE (CONFIG FILE)} - -The file ~/.parallel/config (formerly known as .parallelrc) will be -read if it exists. Lines starting with '#' will be ignored. It can be -formatted like the environment variable $PARALLEL, but it is often -easier to simply put each option on its own line. - -Options on the command line takes precedence over the environment -variable $PARALLEL which takes precedence over the file -~/.parallel/config. - -@chapter PROFILE FILES -@anchor{PROFILE FILES} - -If @strong{--profile} set, GNU @strong{parallel} will read the profile from that file instead of -~/.parallel/config. You can have multiple @strong{--profiles}. - -Example: Profile for running a command on every sshlogin in -~/.ssh/sshlogins and prepend the output with the sshlogin: - -@verbatim - echo --tag -S .. --nonall > ~/.parallel/n - parallel -Jn uptime -@end verbatim - -Example: Profile for running every command with @strong{-j-1} and @strong{nice} - -@verbatim - echo -j-1 nice > ~/.parallel/nice_profile - parallel -J nice_profile bzip2 -9 ::: * -@end verbatim - -Example: Profile for running a perl script before every command: - -@verbatim - echo "perl -e '\$a=\$\$; print \$a,\" \",'\$PARALLEL_SEQ',\" \";';" > ~/.parallel/pre_perl - parallel -J pre_perl echo ::: * -@end verbatim - -Note how the $ and " need to be quoted using \. - -Example: Profile for running distributed jobs with @strong{nice} on the -remote computers: - -@verbatim - echo -S .. nice > ~/.parallel/dist - parallel -J dist --trc {.}.bz2 bzip2 -9 ::: * -@end verbatim - -@chapter EXIT STATUS -@anchor{EXIT STATUS} - -If @strong{--halt-on-error} 0 or not specified: - -@table @asis -@item 0 -@anchor{0 1} - -All jobs ran without error. - -@item 1-253 -@anchor{1-253} - -Some of the jobs failed. The exit status gives the number of failed jobs - -@item 254 -@anchor{254} - -More than 253 jobs failed. - -@item 255 -@anchor{255} - -Other error. - -@end table - -If @strong{--halt-on-error} 1 or 2: Exit status of the failing job. - -@chapter DIFFERENCES BETWEEN GNU Parallel AND ALTERNATIVES -@anchor{DIFFERENCES BETWEEN GNU Parallel AND ALTERNATIVES} - -There are a lot programs with some of the functionality of GNU -@strong{parallel}. GNU @strong{parallel} strives to include the best of the -functionality without sacrificing ease of use. - -@section SUMMARY TABLE -@anchor{SUMMARY TABLE} - -The following features are in some of the comparable tools: - -Inputs - I1. Arguments can be read from stdin - I2. Arguments can be read from a file - I3. Arguments can be read from multiple files - I4. Arguments can be read from command line - I5. Arguments can be read from a table - I6. Arguments can be read from the same file using #! (shebang) - I7. Line oriented input as default (Quoting of special chars not needed) - -Manipulation of input - M1. Composed command - M2. Multiple arguments can fill up an execution line - M3. Arguments can be put anywhere in the execution line - M4. Multiple arguments can be put anywhere in the execution line - M5. Arguments can be replaced with context - M6. Input can be treated as complete execution line - -Outputs - O1. Grouping output so output from different jobs do not mix - O2. Send stderr (standard error) to stderr (standard error) - O3. Send stdout (standard output) to stdout (standard output) - O4. Order of output can be same as order of input - O5. Stdout only contains stdout (standard output) from the command - O6. Stderr only contains stderr (standard error) from the command - -Execution - E1. Running jobs in parallel - E2. List running jobs - E3. Finish running jobs, but do not start new jobs - E4. Number of running jobs can depend on number of cpus - E5. Finish running jobs, but do not start new jobs after first failure - E6. Number of running jobs can be adjusted while running - -Remote execution - R1. Jobs can be run on remote computers - R2. Basefiles can be transferred - R3. Argument files can be transferred - R4. Result files can be transferred - R5. Cleanup of transferred files - R6. No config files needed - R7. Do not run more than SSHD's MaxStartup can handle - R8. Configurable SSH command - R9. Retry if connection breaks occasionally - -Semaphore - S1. Possibility to work as a mutex - S2. Possibility to work as a counting semaphore - -Legend - - = no - x = not applicable - ID = yes - -As every new version of the programs are not tested the table may be -outdated. Please file a bug-report if you find errors (See REPORTING -BUGS). - -parallel: -I1 I2 I3 I4 I5 I6 I7 -M1 M2 M3 M4 M5 M6 -O1 O2 O3 O4 O5 O6 -E1 E2 E3 E4 E5 E6 -R1 R2 R3 R4 R5 R6 R7 R8 R9 -S1 S2 - -xargs: -I1 I2 - - - - - -- M2 M3 - - - -- O2 O3 - O5 O6 -E1 - - - - - -- - - - - x - - - -- - - -find -exec: -- - - x - x - -- M2 M3 - - - - -- O2 O3 O4 O5 O6 -- - - - - - - -- - - - - - - - - -x x - -make -j: -- - - - - - - -- - - - - - -O1 O2 O3 - x O6 -E1 - - - E5 - -- - - - - - - - - -- - - -ppss: -I1 I2 - - - - I7 -M1 - M3 - - M6 -O1 - - x - - -E1 E2 ?E3 E4 - - -R1 R2 R3 R4 - - ?R7 ? ? -- - - -pexec: -I1 I2 - I4 I5 - - -M1 - M3 - - M6 -O1 O2 O3 - O5 O6 -E1 - - E4 - E6 -R1 - - - - R6 - - - -S1 - - -xjobs: TODO - Please file a bug-report if you know what features xjobs -supports (See REPORTING BUGS). - -prll: TODO - Please file a bug-report if you know what features prll -supports (See REPORTING BUGS). - -dxargs: TODO - Please file a bug-report if you know what features dxargs -supports (See REPORTING BUGS). - -mdm/middelman: TODO - Please file a bug-report if you know what -features mdm/middelman supports (See REPORTING BUGS). - -xapply: TODO - Please file a bug-report if you know what features xapply -supports (See REPORTING BUGS). - -paexec: TODO - Please file a bug-report if you know what features paexec -supports (See REPORTING BUGS). - -ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH -supports (See REPORTING BUGS). - -@section DIFFERENCES BETWEEN xargs AND GNU Parallel -@anchor{DIFFERENCES BETWEEN xargs AND GNU Parallel} - -@strong{xargs} offer some of the same possibilities as GNU @strong{parallel}. - -@strong{xargs} deals badly with special characters (such as space, ' and -"). To see the problem try this: - -@verbatim - touch important_file - touch 'not important_file' - ls not* | xargs rm - mkdir -p "My brother's 12\" records" - ls | xargs rmdir -@end verbatim - -You can specify @strong{-0} or @strong{-d "\n"}, but many input generators are not -optimized for using @strong{NUL} as separator but are optimized for -@strong{newline} as separator. E.g @strong{head}, @strong{tail}, @strong{awk}, @strong{ls}, @strong{echo}, -@strong{sed}, @strong{tar -v}, @strong{perl} (@strong{-0} and \0 instead of \n), @strong{locate} -(requires using @strong{-0}), @strong{find} (requires using @strong{-print0}), @strong{grep} -(requires user to use @strong{-z} or @strong{-Z}), @strong{sort} (requires using @strong{-z}). - -So GNU @strong{parallel}'s newline separation can be emulated with: - -@strong{cat | xargs -d "\n" -n1 @emph{command}} - -@strong{xargs} can run a given number of jobs in parallel, but has no -support for running number-of-cpu-cores jobs in parallel. - -@strong{xargs} has no support for grouping the output, therefore output may -run together, e.g. the first half of a line is from one process and -the last half of the line is from another process. The example -@strong{Parallel grep} cannot be done reliably with @strong{xargs} because of -this. To see this in action try: - -@verbatim - parallel perl -e '\$a=\"1{}\"x10000000\;print\ \$a,\"\\n\"' '>' {} ::: a b c d e f - ls -l a b c d e f - parallel -kP4 -n1 grep 1 > out.par ::: a b c d e f - echo a b c d e f | xargs -P4 -n1 grep 1 > out.xargs-unbuf - echo a b c d e f | xargs -P4 -n1 grep --line-buffered 1 > out.xargs-linebuf - echo a b c d e f | xargs -n1 grep --line-buffered 1 > out.xargs-serial - ls -l out* - md5sum out* -@end verbatim - -@strong{xargs} has no support for keeping the order of the output, therefore -if running jobs in parallel using @strong{xargs} the output of the second -job cannot be postponed till the first job is done. - -@strong{xargs} has no support for running jobs on remote computers. - -@strong{xargs} has no support for context replace, so you will have to create the -arguments. - -If you use a replace string in @strong{xargs} (@strong{-I}) you can not force -@strong{xargs} to use more than one argument. - -Quoting in @strong{xargs} works like @strong{-q} in GNU @strong{parallel}. This means -composed commands and redirection require using @strong{bash -c}. - -@strong{ls | parallel "wc @{@} }> @strong{@{@}.wc"} - -becomes (assuming you have 8 cores) - -@strong{ls | xargs -d "\n" -P8 -I @{@} bash -c "wc @{@} }>@strong{ @{@}.wc"} - -and - -@strong{ls | parallel "echo @{@}; ls @{@}|wc"} - -becomes (assuming you have 8 cores) - -@strong{ls | xargs -d "\n" -P8 -I @{@} bash -c "echo @{@}; ls @{@}|wc"} - -@section DIFFERENCES BETWEEN find -exec AND GNU Parallel -@anchor{DIFFERENCES BETWEEN find -exec AND GNU Parallel} - -@strong{find -exec} offer some of the same possibilities as GNU @strong{parallel}. - -@strong{find -exec} only works on files. So processing other input (such as -hosts or URLs) will require creating these inputs as files. @strong{find --exec} has no support for running commands in parallel. - -@section DIFFERENCES BETWEEN make -j AND GNU Parallel -@anchor{DIFFERENCES BETWEEN make -j AND GNU Parallel} - -@strong{make -j} can run jobs in parallel, but requires a crafted Makefile -to do this. That results in extra quoting to get filename containing -newline to work correctly. - -@strong{make -j} has no support for grouping the output, therefore output -may run together, e.g. the first half of a line is from one process -and the last half of the line is from another process. The example -@strong{Parallel grep} cannot be done reliably with @strong{make -j} because of -this. - -(Very early versions of GNU @strong{parallel} were coincidently implemented -using @strong{make -j}). - -@section DIFFERENCES BETWEEN ppss AND GNU Parallel -@anchor{DIFFERENCES BETWEEN ppss AND GNU Parallel} - -@strong{ppss} is also a tool for running jobs in parallel. - -The output of @strong{ppss} is status information and thus not useful for -using as input for another command. The output from the jobs are put -into files. - -The argument replace string ($ITEM) cannot be changed. Arguments must -be quoted - thus arguments containing special characters (space '"&!*) -may cause problems. More than one argument is not supported. File -names containing newlines are not processed correctly. When reading -input from a file null cannot be used as a terminator. @strong{ppss} needs -to read the whole input file before starting any jobs. - -Output and status information is stored in ppss_dir and thus requires -cleanup when completed. If the dir is not removed before running -@strong{ppss} again it may cause nothing to happen as @strong{ppss} thinks the -task is already done. GNU @strong{parallel} will normally not need cleaning -up if running locally and will only need cleaning up if stopped -abnormally and running remote (@strong{--cleanup} may not complete if -stopped abnormally). The example @strong{Parallel grep} would require extra -postprocessing if written using @strong{ppss}. - -For remote systems PPSS requires 3 steps: config, deploy, and -start. GNU @strong{parallel} only requires one step. - -@subsection EXAMPLES FROM ppss MANUAL -@anchor{EXAMPLES FROM ppss MANUAL} - -Here are the examples from @strong{ppss}'s manual page with the equivalent -using GNU @strong{parallel}: - -@strong{1} ./ppss.sh standalone -d /path/to/files -c 'gzip ' - -@strong{1} find /path/to/files -type f | parallel gzip - -@strong{2} ./ppss.sh standalone -d /path/to/files -c 'cp "$ITEM" /destination/dir ' - -@strong{2} find /path/to/files -type f | parallel cp @{@} /destination/dir - -@strong{3} ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q ' - -@strong{3} parallel -a list-of-urls.txt wget -q - -@strong{4} ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q "$ITEM"' - -@strong{4} parallel -a list-of-urls.txt wget -q @{@} - -@strong{5} ./ppss config -C config.cfg -c 'encode.sh ' -d /source/dir -m -192.168.1.100 -u ppss -k ppss-key.key -S ./encode.sh -n nodes.txt -o -/some/output/dir --upload --download ; ./ppss deploy -C config.cfg ; -./ppss start -C config - -@strong{5} # parallel does not use configs. If you want a different username put it in nodes.txt: user@@hostname - -@strong{5} find source/dir -type f | parallel --sshloginfile nodes.txt --trc @{.@}.mp3 lame -a @{@} -o @{.@}.mp3 --preset standard --quiet - -@strong{6} ./ppss stop -C config.cfg - -@strong{6} killall -TERM parallel - -@strong{7} ./ppss pause -C config.cfg - -@strong{7} Press: CTRL-Z or killall -SIGTSTP parallel - -@strong{8} ./ppss continue -C config.cfg - -@strong{8} Enter: fg or killall -SIGCONT parallel - -@strong{9} ./ppss.sh status -C config.cfg - -@strong{9} killall -SIGUSR2 parallel - -@section DIFFERENCES BETWEEN pexec AND GNU Parallel -@anchor{DIFFERENCES BETWEEN pexec AND GNU Parallel} - -@strong{pexec} is also a tool for running jobs in parallel. - -Here are the examples from @strong{pexec}'s info page with the equivalent -using GNU @strong{parallel}: - -@strong{1} pexec -o sqrt-%s.dat -p "$(seq 10)" -e NUM -n 4 -c -- \ - 'echo "scale=10000;sqrt($NUM)" | bc' - -@strong{1} seq 10 | parallel -j4 'echo "scale=10000;sqrt(@{@})" | bc > sqrt-@{@}.dat' - -@strong{2} pexec -p "$(ls myfiles*.ext)" -i %s -o %s.sort -- sort - -@strong{2} ls myfiles*.ext | parallel sort @{@} ">@{@}.sort" - -@strong{3} pexec -f image.list -n auto -e B -u star.log -c -- \ - 'fistar $B.fits -f 100 -F id,x,y,flux -o $B.star' - -@strong{3} parallel -a image.list \ - 'fistar @{@}.fits -f 100 -F id,x,y,flux -o @{@}.star' 2>star.log - -@strong{4} pexec -r *.png -e IMG -c -o - -- \ - 'convert $IMG $@{IMG%.png@}.jpeg ; "echo $IMG: done"' - -@strong{4} ls *.png | parallel 'convert @{@} @{.@}.jpeg; echo @{@}: done' - -@strong{5} pexec -r *.png -i %s -o %s.jpg -c 'pngtopnm | pnmtojpeg' - -@strong{5} ls *.png | parallel 'pngtopnm < @{@} | pnmtojpeg > @{@}.jpg' - -@strong{6} for p in *.png ; do echo $@{p%.png@} ; done | \ - pexec -f - -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' - -@strong{6} ls *.png | parallel 'pngtopnm < @{@} | pnmtojpeg > @{.@}.jpg' - -@strong{7} LIST=$(for p in *.png ; do echo $@{p%.png@} ; done) - pexec -r $LIST -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' - -@strong{7} ls *.png | parallel 'pngtopnm < @{@} | pnmtojpeg > @{.@}.jpg' - -@strong{8} pexec -n 8 -r *.jpg -y unix -e IMG -c \ - 'pexec -j -m blockread -d $IMG | \ - jpegtopnm | pnmscale 0.5 | pnmtojpeg | \ - pexec -j -m blockwrite -s th_$IMG' - -@strong{8} Combining GNU @strong{parallel} and GNU @strong{sem}. - -@strong{8} ls *jpg | parallel -j8 'sem --id blockread cat @{@} | jpegtopnm |' \ - 'pnmscale 0.5 | pnmtojpeg | sem --id blockwrite cat > th_@{@}' - -@strong{8} If reading and writing is done to the same disk, this may be -faster as only one process will be either reading or writing: - -@strong{8} ls *jpg | parallel -j8 'sem --id diskio cat @{@} | jpegtopnm |' \ - 'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_@{@}' - -@section DIFFERENCES BETWEEN xjobs AND GNU Parallel -@anchor{DIFFERENCES BETWEEN xjobs AND GNU Parallel} - -@strong{xjobs} is also a tool for running jobs in parallel. It only supports -running jobs on your local computer. - -@strong{xjobs} deals badly with special characters just like @strong{xargs}. See -the section @strong{DIFFERENCES BETWEEN xargs AND GNU Parallel}. - -Here are the examples from @strong{xjobs}'s man page with the equivalent -using GNU @strong{parallel}: - -@strong{1} ls -1 *.zip | xjobs unzip - -@strong{1} ls *.zip | parallel unzip - -@strong{2} ls -1 *.zip | xjobs -n unzip - -@strong{2} ls *.zip | parallel unzip >/dev/null - -@strong{3} find . -name '*.bak' | xjobs gzip - -@strong{3} find . -name '*.bak' | parallel gzip - -@strong{4} ls -1 *.jar | sed 's/\(.*\)/\1 > \1.idx/' | xjobs jar tf - -@strong{4} ls *.jar | parallel jar tf @{@} '>' @{@}.idx - -@strong{5} xjobs -s script - -@strong{5} cat script | parallel - -@strong{6} mkfifo /var/run/my_named_pipe; -xjobs -s /var/run/my_named_pipe & -echo unzip 1.zip >> /var/run/my_named_pipe; -echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe - -@strong{6} mkfifo /var/run/my_named_pipe; -cat /var/run/my_named_pipe | parallel & -echo unzip 1.zip >> /var/run/my_named_pipe; -echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe - -@section DIFFERENCES BETWEEN prll AND GNU Parallel -@anchor{DIFFERENCES BETWEEN prll AND GNU Parallel} - -@strong{prll} is also a tool for running jobs in parallel. It does not -support running jobs on remote computers. - -@strong{prll} encourages using BASH aliases and BASH functions instead of -scripts. GNU @strong{parallel} will never support running aliases (see why -http://www.perlmonks.org/index.pl?node_id=484296). However, scripts, -composed commands, or functions exported with @strong{export -f} work just -fine. - -@strong{prll} generates a lot of status information on stderr (standard -error) which makes it harder to use the stderr (standard error) output -of the job directly as input for another program. - -Here is the example from @strong{prll}'s man page with the equivalent -using GNU @strong{parallel}: - -prll -s 'mogrify -flip $1' *.jpg - -parallel mogrify -flip ::: *.jpg - -@section DIFFERENCES BETWEEN dxargs AND GNU Parallel -@anchor{DIFFERENCES BETWEEN dxargs AND GNU Parallel} - -@strong{dxargs} is also a tool for running jobs in parallel. - -@strong{dxargs} does not deal well with more simultaneous jobs than SSHD's -MaxStartup. @strong{dxargs} is only built for remote run jobs, but does not -support transferring of files. - -@section DIFFERENCES BETWEEN mdm/middleman AND GNU Parallel -@anchor{DIFFERENCES BETWEEN mdm/middleman AND GNU Parallel} - -middleman(mdm) is also a tool for running jobs in parallel. - -Here are the shellscripts of http://mdm.berlios.de/usage.html ported -to GNU @strong{parallel}: - -@strong{seq 19 | parallel buffon -o - | sort -n }>@strong{ result} - -@strong{cat files | parallel cmd} - -@strong{find dir -execdir sem cmd @{@} \;} - -@section DIFFERENCES BETWEEN xapply AND GNU Parallel -@anchor{DIFFERENCES BETWEEN xapply AND GNU Parallel} - -@strong{xapply} can run jobs in parallel on the local computer. - -Here are the examples from @strong{xapply}'s man page with the equivalent -using GNU @strong{parallel}: - -@strong{1} xapply '(cd %1 && make all)' */ - -@strong{1} parallel 'cd @{@} && make all' ::: */ - -@strong{2} xapply -f 'diff %1 ../version5/%1' manifest | more - -@strong{2} parallel diff @{@} ../version5/@{@} < manifest | more - -@strong{3} xapply -p/dev/null -f 'diff %1 %2' manifest1 checklist1 - -@strong{3} parallel --xapply diff @{1@} @{2@} :::: manifest1 checklist1 - -@strong{4} xapply 'indent' *.c - -@strong{4} parallel indent ::: *.c - -@strong{5} find ~ksb/bin -type f ! -perm -111 -print | xapply -f -v 'chmod a+x' - - -@strong{5} find ~ksb/bin -type f ! -perm -111 -print | parallel -v chmod a+x - -@strong{6} find */ -... | fmt 960 1024 | xapply -f -i /dev/tty 'vi' - - -@strong{6} sh <(find */ -... | parallel -s 1024 echo vi) - -@strong{6} find */ -... | parallel -s 1024 -Xuj1 vi - -@strong{7} find ... | xapply -f -5 -i /dev/tty 'vi' - - - - - - -@strong{7} sh <(find ... |parallel -n5 echo vi) - -@strong{7} find ... |parallel -n5 -uj1 vi - -@strong{8} xapply -fn "" /etc/passwd - -@strong{8} parallel -k echo < /etc/passwd - -@strong{9} tr ':' '\012' < /etc/passwd | xapply -7 -nf 'chown %1 %6' - - - - - - - - -@strong{9} tr ':' '\012' < /etc/passwd | parallel -N7 chown @{1@} @{6@} - -@strong{10} xapply '[ -d %1/RCS ] || echo %1' */ - -@strong{10} parallel '[ -d @{@}/RCS ] || echo @{@}' ::: */ - -@strong{11} xapply -f '[ -f %1 ] && echo %1' List | ... - -@strong{11} parallel '[ -f @{@} ] && echo @{@}' < List | ... - -@section DIFFERENCES BETWEEN paexec AND GNU Parallel -@anchor{DIFFERENCES BETWEEN paexec AND GNU Parallel} - -@strong{paexec} can run jobs in parallel on both the local and remote computers. - -@strong{paexec} requires commands to print a blank line as the last -output. This means you will have to write a wrapper for most programs. - -@strong{paexec} has a job dependency facility so a job can depend on another -job to be executed successfully. Sort of a poor-man's @strong{make}. - -Here are the examples from @strong{paexec}'s example catalog with the equivalent -using GNU @strong{parallel}: - -@table @asis -@item 1_div_X_run: -@anchor{1_div_X_run:} - -@verbatim - ../../paexec -s -l -c "`pwd`/1_div_X_cmd" -n +1 < file -@end verbatim - -If the jobs are of the form read-compute-write, so writing starts -before all reading is done, it may be faster to force only one reader -and writer at the time: - -@verbatim - sem --id read cat file | compute | sem --id write cat > file -@end verbatim - -If the jobs are of the form read-compute-read-compute, it may be -faster to run more jobs in parallel than the system has CPUs, as some -of the jobs will be stuck waiting for disk access. - -@section --nice limits command length -@anchor{--nice limits command length} - -The current implementation of @strong{--nice} is too pessimistic in the max -allowed command length. It only uses a little more than half of what -it could. This affects @strong{-X} and @strong{-m}. If this becomes a real problem for -you file a bug-report. - -@section Aliases and functions do not work -@anchor{Aliases and functions do not work} - -If you get: - -@strong{Can't exec "@emph{command}": No such file or directory} - -or: - -@strong{open3: exec of by @emph{command} failed} - -it may be because @emph{command} is not known, but it could also be -because @emph{command} is an alias or a function. If it is a function you -need to @strong{export -f} the function first. An alias will, however, not -work (see why http://www.perlmonks.org/index.pl?node_id=484296), so -change your alias to a script. - -@chapter REPORTING BUGS -@anchor{REPORTING BUGS} - -Report bugs to or -https://savannah.gnu.org/bugs/?func=additem&group=parallel - -Your bug report should always include: - -@itemize -@item The output of @strong{parallel --version}. If you are not running the latest -released version you should specify why you believe the problem is not -fixed in that version. - -@item A complete example that others can run that shows the problem. A -combination of @strong{seq}, @strong{cat}, @strong{echo}, and @strong{sleep} can reproduce -most errors. If your example requires large files, see if you can make -them by something like @strong{seq 1000000} > @strong{file}. - -@end itemize - -If you suspect the error is dependent on your distribution, please see -if you can reproduce the error on one of these VirtualBox images: -http://sourceforge.net/projects/virtualboximage/files/ - -Specifying the name of your distribution is not enough as you may have -installed software that is not in the VirtualBox images. - -@chapter AUTHOR -@anchor{AUTHOR} - -When using GNU @strong{parallel} for a publication please cite: - -O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: -The USENIX Magazine, February 2011:42-47. - -Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk - -Copyright (C) 2008,2009,2010 Ole Tange, http://ole.tange.dk - -Copyright (C) 2010,2011,2012 Ole Tange, http://ole.tange.dk and Free -Software Foundation, Inc. - -Parts of the manual concerning @strong{xargs} compatibility is inspired by -the manual of @strong{xargs} from GNU findutils 4.4.2. - -@chapter LICENSE -@anchor{LICENSE} - -Copyright (C) 2007,2008,2009,2010,2011,2012 Free Software Foundation, -Inc. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3 of the License, or -at your option any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -@section Documentation license I -@anchor{Documentation license I} - -Permission is granted to copy, distribute and/or modify this documentation -under the terms of the GNU Free Documentation License, Version 1.3 or -any later version published by the Free Software Foundation; with no -Invariant Sections, with no Front-Cover Texts, and with no Back-Cover -Texts. A copy of the license is included in the file fdl.txt. - -@section Documentation license II -@anchor{Documentation license II} - -You are free: - -@table @asis -@item @strong{to Share} -@anchor{@strong{to Share}} - -to copy, distribute and transmit the work - -@item @strong{to Remix} -@anchor{@strong{to Remix}} - -to adapt the work - -@end table - -Under the following conditions: - -@table @asis -@item @strong{Attribution} -@anchor{@strong{Attribution}} - -You must attribute the work in the manner specified by the author or -licensor (but not in any way that suggests that they endorse you or -your use of the work). - -@item @strong{Share Alike} -@anchor{@strong{Share Alike}} - -If you alter, transform, or build upon this work, you may distribute -the resulting work only under the same, similar or a compatible -license. - -@end table - -With the understanding that: - -@table @asis -@item @strong{Waiver} -@anchor{@strong{Waiver}} - -Any of the above conditions can be waived if you get permission from -the copyright holder. - -@item @strong{Public Domain} -@anchor{@strong{Public Domain}} - -Where the work or any of its elements is in the public domain under -applicable law, that status is in no way affected by the license. - -@item @strong{Other Rights} -@anchor{@strong{Other Rights}} - -In no way are any of the following rights affected by the license: - -@itemize -@item Your fair dealing or fair use rights, or other applicable -copyright exceptions and limitations; - -@item The author's moral rights; - -@item Rights other persons may have either in the work itself or in -how the work is used, such as publicity or privacy rights. - -@end itemize - -@end table - -@table @asis -@item @strong{Notice} -@anchor{@strong{Notice}} - -For any reuse or distribution, you must make clear to others the -license terms of this work. - -@end table - -A copy of the full license is included in the file as cc-by-sa.txt. - -@chapter DEPENDENCIES -@anchor{DEPENDENCIES} - -GNU @strong{parallel} uses Perl, and the Perl modules Getopt::Long, -IPC::Open3, Symbol, IO::File, POSIX, and File::Temp. For remote usage -it also uses rsync with ssh. - -@chapter SEE ALSO -@anchor{SEE ALSO} - -@strong{ssh}(1), @strong{rsync}(1), @strong{find}(1), @strong{xargs}(1), @strong{dirname}, -@strong{make}(1), @strong{pexec}(1), @strong{ppss}(1), @strong{xjobs}(1), @strong{prll}(1), -@strong{dxargs}(1), @strong{mdm}(1), - -@bye + parallel --xapply e \ No newline at end of file diff --git a/testsuite/tests-to-run/parallel-local114.sh b/testsuite/tests-to-run/parallel-local114.sh index b300b795..092bf327 100755 --- a/testsuite/tests-to-run/parallel-local114.sh +++ b/testsuite/tests-to-run/parallel-local114.sh @@ -41,4 +41,10 @@ echo "bug #36657: --load does not work with custom ssh" export -f ssh; parallel --load=1000% -S "/usr/bin/ssh localhost" echo ::: OK +echo "bug #34958: --pipe with record size measured in lines" + seq 10 | parallel --pipe -L 4 cat\;echo FOO + +echo "bug #34958: --pipe with record size measured in lines" + seq 10 | parallel --pipe -l 4 cat\;echo FOO + EOF diff --git a/testsuite/wanted-results/parallel-local114 b/testsuite/wanted-results/parallel-local114 index f8b99bdc..c3d74c28 100644 --- a/testsuite/wanted-results/parallel-local114 +++ b/testsuite/wanted-results/parallel-local114 @@ -88,3 +88,18 @@ OK OK bug #36657: --load does not work with custom ssh OK +bug #34958: --pipe with --record size measured in lines +1 +2 +3 +4 +FOO +5 +6 +7 +8 +FOO +9 +10 +FOO +FOO