diff --git a/doc/release_new_version b/doc/release_new_version index bb9bd81a..2a646a56 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -208,6 +208,29 @@ available for download at: http://ftp.gnu.org/gnu/parallel/ New in this release: +* Using GNU Parallel to roll-your-own Map Reduce! + http://www.rankfocus.com/hello-world/ + +* 平行化你的工作 + http://www.slideshare.net/drakeguan/part1-23705978 + +* Best Practices for Amazon EMR + http://media.amazonwebservices.com/AWS_Amazon_EMR_Best_Practices.pdf + +* Using GNU Parallel at HPC @ Uni.lu + https://hpc.uni.lu/users/use_cases/ + +* Scaling up with parallelization + https://www.msi.umn.edu/sites/default/files/AdvPython_1.pdf + +* Optimizing translated file downloads + http://www.smartling.com/blog/2013/05/20/optimizing-translated-file-downloads/ + +https://identi.ca/evan/note/6yf1GzAARtyBhj__xzMvAg + +* Faster Rasters For All + http://2013.foss4g.org/conf/programme/presentations/52/ + * Bug fixes and man page updates. diff --git a/src/parallel b/src/parallel index 7ee1cc49..3c9eccae 100755 --- a/src/parallel +++ b/src/parallel @@ -5465,8 +5465,10 @@ sub max_length { close $fh; } else { $cached_limit = real_max_length(); - Semaphore::mkdir_or_die($ENV{'HOME'} . "/.parallel/tmp"); - open(my $fh, ">", $len_cache) || ::die_bug("Cannot write $len_cache"); + # If $HOME is write protected: Do not fail + mkdir($ENV{'HOME'} . "/.parallel"); + mkdir($ENV{'HOME'} . "/.parallel/tmp"); + open(my $fh, ">", $len_cache); print $fh $cached_limit; close $fh; } diff --git a/src/parallel.pdf b/src/parallel.pdf index bf5fdb0c..df256241 100644 Binary files a/src/parallel.pdf and b/src/parallel.pdf differ diff --git a/src/parallel.pod b/src/parallel.pod index efc21ad3..35e65000 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -3542,23 +3542,32 @@ The error message you get (if any). =item * -The output of B. If you are not running the latest -released version you should specify why you believe the problem is not -fixed in that version. +The complete output of B. If you are not running +the latest released version you should specify why you believe the +problem is not fixed in that version. =item * A complete example that others can run that shows the problem. This -should preferably be small and simple. A combination of B, B, -B, B, and B can reproduce most errors. If your -example requires large files, see if you can make them by something -like B > B or B > B. +should preferably be small and simple. A combination of B, +B, B, B, and B can reproduce most errors. If +your example requires large files, see if you can make them by +something like B > B or B > +B. If your example requires remote execution, see if you can +use B. =item * The output of your example. If your problem is not easily reproduced by others, the output might help them figure out the problem. +=item * + +Whether you have watched the intro videos +(http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1), walked +through the tutorial (man parallel_tutorial), and read the EXAMPLE +section in the man page (man parallel - search for EXAMPLE:). + =back If you suspect the error is dependent on your environment or diff --git a/src/parallel.texi b/src/parallel.texi index 144c767f..0998aabb 100644 --- a/src/parallel.texi +++ b/src/parallel.texi @@ -456,8 +456,8 @@ Set the end of file string to eof-str. If the end of file string occurs as a line of input, the rest of the input is ignored. If neither @strong{-E} nor @strong{-e} is used, no end of file string is used. -@item @strong{--delay} @emph{secs} (alpha testing) -@anchor{@strong{--delay} @emph{secs} (alpha testing)} +@item @strong{--delay} @emph{secs} (beta testing) +@anchor{@strong{--delay} @emph{secs} (beta testing)} Delay starting next job @emph{secs} seconds. GNU @strong{parallel} will pause @emph{secs} seconds after starting each job. @emph{secs} can be less than 1 @@ -483,8 +483,8 @@ because it is POSIX compliant for @strong{xargs} while this option is not. If @emph{eof-str} is omitted, there is no end of file string. If neither @strong{-E} nor @strong{-e} is used, no end of file string is used. -@item @strong{--env} @emph{var} (alpha testing) -@anchor{@strong{--env} @emph{var} (alpha testing)} +@item @strong{--env} @emph{var} (beta testing) +@anchor{@strong{--env} @emph{var} (beta testing)} Copy environment variable @emph{var}. This will copy @emph{var} to the environment that the command is run in. This is especially useful for @@ -516,8 +516,8 @@ See also @strong{--bg}, @strong{man sem}. Implies @strong{--semaphore}. -@item @strong{--filter-hosts} (alpha testing) -@anchor{@strong{--filter-hosts} (alpha testing)} +@item @strong{--filter-hosts} (beta testing) +@anchor{@strong{--filter-hosts} (beta testing)} Remove down hosts. For each remote host: check that login through ssh works. If not: do not use this host. @@ -582,8 +582,8 @@ status will be the exit status from the failing job. @end table -@item @strong{--header} @emph{regexp} (alpha testing) -@anchor{@strong{--header} @emph{regexp} (alpha testing)} +@item @strong{--header} @emph{regexp} (beta testing) +@anchor{@strong{--header} @emph{regexp} (beta testing)} Use regexp as header. For normal usage the matched header (typically the first line: @strong{--header '.*\n'}) will be split using @strong{--colsep} @@ -757,8 +757,8 @@ standard specifies @strong{-L} instead. Implies @strong{-X} unless @strong{-m}, @strong{--xargs}, or @strong{--pipe} is set. -@item @strong{--line-buffer} (alpha testing) -@anchor{@strong{--line-buffer} (alpha testing)} +@item @strong{--line-buffer} (beta testing) +@anchor{@strong{--line-buffer} (beta testing)} Buffer output on line basis. @strong{--group} will keep the output together for a whole job. @strong{--ungroup} allows output to mixup with half a line @@ -827,8 +827,8 @@ GNU @strong{parallel} is less than @emph{version} the exit code is This is useful for scripts that depend on features only available from a certain version of GNU @strong{parallel}. -@item @strong{--nonall} -@anchor{@strong{--nonall}} +@item @strong{--nonall} (alpha testing) +@anchor{@strong{--nonall} (alpha testing)} @strong{--onall} with no arguments. Run the command on all computers given with @strong{--sshlogin} but take no arguments. GNU @strong{parallel} will log @@ -838,8 +838,8 @@ computer. @strong{-j} adjusts how many computers to log into in parallel. This is useful for running the same command (e.g. uptime) on a list of servers. -@item @strong{--onall} -@anchor{@strong{--onall}} +@item @strong{--onall} (alpha testing) +@anchor{@strong{--onall} (alpha testing)} Run all the jobs on all computers given with @strong{--sshlogin}. GNU @strong{parallel} will log into @strong{--jobs} number of computers in parallel @@ -862,11 +862,11 @@ all the output from one server will be grouped together. Instead of printing the output to stdout (standard output) the output of each job is saved in a file and the filename is then printed. -@item @strong{--pipe} (alpha testing) -@anchor{@strong{--pipe} (alpha testing)} +@item @strong{--pipe} (beta testing) +@anchor{@strong{--pipe} (beta testing)} -@item @strong{--spreadstdin} (alpha testing) -@anchor{@strong{--spreadstdin} (alpha testing)} +@item @strong{--spreadstdin} (beta testing) +@anchor{@strong{--spreadstdin} (beta testing)} Spread input to jobs on stdin (standard input). Read a block of data from stdin (standard input) and give one block of data as input to one @@ -1029,8 +1029,8 @@ If the stdin (standard input) only contains whitespace, do not run the command. If used with @strong{--pipe} this is slow. -@item @strong{--record-env} (alpha testing) -@anchor{@strong{--record-env} (alpha testing)} +@item @strong{--record-env} (beta testing) +@anchor{@strong{--record-env} (beta testing)} Record current environment variables in ~/.parallel/ignored_vars. This is useful before using @strong{--env _}. @@ -1080,11 +1080,11 @@ it to the command. Only used with @strong{--pipe}. -@item @strong{--results} @emph{prefix} (alpha testing) -@anchor{@strong{--results} @emph{prefix} (alpha testing)} +@item @strong{--results} @emph{prefix} (beta testing) +@anchor{@strong{--results} @emph{prefix} (beta testing)} -@item @strong{--res} @emph{prefix} (alpha testing) -@anchor{@strong{--res} @emph{prefix} (alpha testing)} +@item @strong{--res} @emph{prefix} (beta testing) +@anchor{@strong{--res} @emph{prefix} (beta testing)} Save the output into files. The files will be stored in a directory tree rooted at @emph{prefix}. Within this directory tree, each command will result @@ -1206,11 +1206,11 @@ times: @strong{--return} is ignored when used with @strong{--sshlogin :} or when not used with @strong{--sshlogin}. -@item @strong{--round-robin} (alpha testing) -@anchor{@strong{--round-robin} (alpha testing)} +@item @strong{--round-robin} (beta testing) +@anchor{@strong{--round-robin} (beta testing)} -@item @strong{--round} (alpha testing) -@anchor{@strong{--round} (alpha testing)} +@item @strong{--round} (beta testing) +@anchor{@strong{--round} (beta testing)} Normally @strong{--pipe} will give a single block to each instance of the command. With @strong{--round-robin} all blocks will at random be written to @@ -1419,11 +1419,11 @@ The remote host must have GNU @strong{parallel} installed. @strong{--sshlogin} is often used with @strong{--transfer}, @strong{--return}, @strong{--cleanup}, and @strong{--trc}. -@item @strong{--sshloginfile} @emph{filename} (alpha testing) -@anchor{@strong{--sshloginfile} @emph{filename} (alpha testing)} +@item @strong{--sshloginfile} @emph{filename} (beta testing) +@anchor{@strong{--sshloginfile} @emph{filename} (beta testing)} -@item @strong{--slf} @emph{filename} (alpha testing) -@anchor{@strong{--slf} @emph{filename} (alpha testing)} +@item @strong{--slf} @emph{filename} (beta testing) +@anchor{@strong{--slf} @emph{filename} (beta testing)} File with sshlogins. The file consists of sshlogins on separate lines. Empty lines and lines starting with '#' are ignored. Example: @@ -1516,8 +1516,8 @@ into temporary files in /tmp. By setting @strong{--tmpdir} you can use a different dir for the files. Setting @strong{--tmpdir} is equivalent to setting $TMPDIR. -@item @strong{--timeout} @emph{val} (alpha testing) -@anchor{@strong{--timeout} @emph{val} (alpha testing)} +@item @strong{--timeout} @emph{val} (beta testing) +@anchor{@strong{--timeout} @emph{val} (beta testing)} Time out for command. If the command runs for longer than @emph{val} seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms @@ -1673,11 +1673,11 @@ Use @strong{-v} @strong{-v} to print the wrapping ssh command when running remot Print the version GNU @strong{parallel} and exit. -@item @strong{--workdir} @emph{mydir} (alpha testing) -@anchor{@strong{--workdir} @emph{mydir} (alpha testing)} +@item @strong{--workdir} @emph{mydir} (beta testing) +@anchor{@strong{--workdir} @emph{mydir} (beta testing)} -@item @strong{--wd} @emph{mydir} (alpha testing) -@anchor{@strong{--wd} @emph{mydir} (alpha testing)} +@item @strong{--wd} @emph{mydir} (beta testing) +@anchor{@strong{--wd} @emph{mydir} (beta testing)} Files transferred using @strong{--transfer} and @strong{--return} will be relative to @emph{mydir} on remote computers, and the command will be executed in @@ -3786,19 +3786,26 @@ Your bug report should always include: @itemize @item The error message you get (if any). -@item The output of @strong{parallel --version}. If you are not running the latest -released version you should specify why you believe the problem is not -fixed in that version. +@item The complete output of @strong{parallel --version}. If you are not running +the latest released version you should specify why you believe the +problem is not fixed in that version. @item A complete example that others can run that shows the problem. This -should preferably be small and simple. A combination of @strong{yes}, @strong{seq}, -@strong{cat}, @strong{echo}, and @strong{sleep} can reproduce most errors. If your -example requires large files, see if you can make them by something -like @strong{seq 1000000} > @strong{file} or @strong{yes | head -n 10000000} > @strong{file}. +should preferably be small and simple. A combination of @strong{yes}, +@strong{seq}, @strong{cat}, @strong{echo}, and @strong{sleep} can reproduce most errors. If +your example requires large files, see if you can make them by +something like @strong{seq 1000000} > @strong{file} or @strong{yes | head -n 10000000} > +@strong{file}. If your example requires remote execution, see if you can +use @strong{localhost}. @item The output of your example. If your problem is not easily reproduced by others, the output might help them figure out the problem. +@item Whether you have watched the intro videos +(http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1), walked +through the tutorial (man parallel_tutorial), and read the EXAMPLE +section in the man page (man parallel - search for EXAMPLE:). + @end itemize If you suspect the error is dependent on your environment or diff --git a/src/parallel_tutorial.1 b/src/parallel_tutorial.1 index fd3de0c3..92ad88ec 100644 --- a/src/parallel_tutorial.1 +++ b/src/parallel_tutorial.1 @@ -124,7 +124,7 @@ .\" ======================================================================== .\" .IX Title "PARALLEL_TUTORIAL 1" -.TH PARALLEL_TUTORIAL 1 "2013-09-18" "20130922" "parallel" +.TH PARALLEL_TUTORIAL 1 "2013-09-28" "20130922" "parallel" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -465,7 +465,8 @@ Output (the order may be different): The command can be a script, a binary or a Bash function if the function is exported using 'export \-f': .PP -.Vb 5 +.Vb 6 +\& # Only works in Bash and only if $SHELL=.../bash \& my_func() { \& echo in my_func $1 \& } @@ -1516,7 +1517,7 @@ Output: \& tried 0 .Ve .PP -Note how job 1 and 2 was tried 3 times, but 0 was not retried because it had exit code 0. +Note how job 1 and 2 were tried 3 times, but 0 was not retried because it had exit code 0. .SS "Limiting the ressources" .IX Subsection "Limiting the ressources" To avoid overloading systems \s-1GNU\s0 Parallel can look at the system load @@ -1858,9 +1859,10 @@ Output: \& foo bar baz .Ve .PP -This works for functions too: +This works for functions too if your shell is Bash: .PP -.Vb 5 +.Vb 6 +\& # This only works in Bash \& my_func() { \& echo in my_func $1 \& } @@ -1892,7 +1894,8 @@ Output: Now all new variables and functions defined will be copied when using \&\-\-env _: .PP -.Vb 6 +.Vb 7 +\& # The function is only copied if using Bash \& my_func2() { \& echo in my_func2 $VAR $1 \& } @@ -1900,12 +1903,13 @@ Now all new variables and functions defined will be copied when using \& VAR=foo \& export VAR \& -\& parallel \-\-env _ \-S $SERVER1 my_func2 ::: bar +\& parallel \-\-env _ \-S $SERVER1 \*(Aqecho $VAR; my_func2\*(Aq ::: bar .Ve .PP Output: .PP -.Vb 1 +.Vb 2 +\& foo \& in my_func2 foo bar .Ve .SS "Showing what is actually run" diff --git a/src/parallel_tutorial.html b/src/parallel_tutorial.html index 3a86065f..806e6484 100644 --- a/src/parallel_tutorial.html +++ b/src/parallel_tutorial.html @@ -374,6 +374,7 @@ treated as commands:

The command can be a script, a binary or a Bash function if the function is exported using 'export -f':

+  # Only works in Bash and only if $SHELL=.../bash
   my_func() {
     echo in my_func $1
   }
@@ -1051,7 +1052,7 @@ command fails for unkown reasons now and then.

tried 1 tried 2 tried 0
-

Note how job 1 and 2 was tried 3 times, but 0 was not retried because it had exit code 0.

+

Note how job 1 and 2 were tried 3 times, but 0 was not retried because it had exit code 0.

Limiting the ressources

@@ -1274,8 +1275,9 @@ remote system.

Output:

   foo bar baz
-

This works for functions too:

+

This works for functions too if your shell is Bash:

+  # This only works in Bash
   my_func() {
     echo in my_func $1
   }
@@ -1296,6 +1298,7 @@ remote system. It just need to record which ones to ignore in
 

Now all new variables and functions defined will be copied when using --env _:

+  # The function is only copied if using Bash
   my_func2() {
     echo in my_func2 $VAR $1
   }
@@ -1303,9 +1306,10 @@ remote system. It just need to record which ones to ignore in
   VAR=foo
   export VAR
-  parallel --env _ -S $SERVER1 my_func2 ::: bar
+ parallel --env _ -S $SERVER1 'echo $VAR; my_func2' ::: bar

Output:

+  foo
   in my_func2 foo bar

diff --git a/src/parallel_tutorial.pdf b/src/parallel_tutorial.pdf index 4fe6034d..cf0a1154 100644 Binary files a/src/parallel_tutorial.pdf and b/src/parallel_tutorial.pdf differ diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod index fd3d3b5c..99f69b86 100644 --- a/src/parallel_tutorial.pod +++ b/src/parallel_tutorial.pod @@ -285,6 +285,7 @@ Output (the order may be different): The command can be a script, a binary or a Bash function if the function is exported using 'export -f': + # Only works in Bash and only if $SHELL=.../bash my_func() { echo in my_func $1 } @@ -1089,7 +1090,7 @@ Output: tried 2 tried 0 -Note how job 1 and 2 was tried 3 times, but 0 was not retried because it had exit code 0. +Note how job 1 and 2 were tried 3 times, but 0 was not retried because it had exit code 0. =head2 Limiting the ressources @@ -1358,8 +1359,9 @@ Output: foo bar baz -This works for functions too: +This works for functions too if your shell is Bash: + # This only works in Bash my_func() { echo in my_func $1 } @@ -1384,6 +1386,7 @@ Output: Now all new variables and functions defined will be copied when using --env _: + # The function is only copied if using Bash my_func2() { echo in my_func2 $VAR $1 } @@ -1391,10 +1394,11 @@ Now all new variables and functions defined will be copied when using VAR=foo export VAR - parallel --env _ -S $SERVER1 my_func2 ::: bar + parallel --env _ -S $SERVER1 'echo $VAR; my_func2' ::: bar Output: + foo in my_func2 foo bar =head2 Showing what is actually run diff --git a/testsuite/tests-to-run/parallel-local-ssh1.sh b/testsuite/tests-to-run/parallel-local-ssh1.sh index 67aeba14..9b4c99fc 100644 --- a/testsuite/tests-to-run/parallel-local-ssh1.sh +++ b/testsuite/tests-to-run/parallel-local-ssh1.sh @@ -12,4 +12,9 @@ echo '### --env _ with explicit mentioning of normally ignored var $DISPLAY' echo '### --filter-hosts --slf <()' parallel --nonall --filter-hosts --slf <(echo localhost) echo OK +echo '### bug #40002: --files and --nonall seem not to work together:' + parallel --files --nonall -S localhost true | tee >(parallel rm) | wc -l + +echo '### bug #40001: --joblog and --nonall seem not to work together:' + parallel --joblog - --nonall -S lo,localhost true | wc -l EOF diff --git a/testsuite/wanted-results/parallel-local-ssh1 b/testsuite/wanted-results/parallel-local-ssh1 index 31a695c9..7f3579e0 100644 --- a/testsuite/wanted-results/parallel-local-ssh1 +++ b/testsuite/wanted-results/parallel-local-ssh1 @@ -9,6 +9,10 @@ Block_end ### --env _ DISPLAY: Undefined variable. ### --env _ with explicit mentioning of normally ignored var $DISPLAY -OK FUBAR :0 test +OK FUBAR :0.0 test ### --filter-hosts --slf <() OK +### bug #40002: --files and --nonall seem not to work together: +1 +### bug #40001: --joblog and --nonall seem not to work together: +3