From 763dd12caa80c1c279277f0dfb054563eba6310b Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Sat, 15 Nov 2014 15:25:19 +0100 Subject: [PATCH] Fixed env_parallel so it works post-shell-shock. --- doc/release_new_version | 9 +- src/parallel | 5 +- src/parallel.pod | 111 +++++++++--------- testsuite/tests-to-run/parallel-local-ssh1.sh | 12 ++ testsuite/wanted-results/parallel-local-ssh1 | 8 ++ 5 files changed, 87 insertions(+), 58 deletions(-) diff --git a/doc/release_new_version b/doc/release_new_version index 1e6ff6b1..88df3ead 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -238,13 +238,18 @@ A central piece of command generation was rewritten making this release beta qua New in this release: -* --hostgroup - Thanks to Michel Courtine for developing a prototype. +* Remote systems can be divided into hostgroups (e.g. web and db) by prepending '@groupname/' to the sshlogin. Multiple groups can be given by separating groups with '+'. E.g. @web/www1 @web+db/www2 @db/mariadb + +* Remote execution can be restricted to servers that are part of one or more groups by '@groupname' as an sshlogin. Multiple groups can be given by separating groups with '+'. E.g. -S @web or -S @db+web + +* With --hostgroup you can restrict arguments to certain hostgroups by appending '@groupname' to the argument. Multiple groups can be given by separating groups with '+'. E.g. my_web_arg@web db-or-web-arg@db+web db-only-arg@db Thanks to Michel Courtine for developing a prototype for this. * GNU Parallel was cited in: SlideToolkit: An Assistive Toolset for the Histological Quantification of Whole Slide Images http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0110289#close * GNU Parallel was cited in: Exploring a multiprocessor design space to analyze the impact of using STT-RAM in the memory hierarchy http://conservancy.umn.edu/bitstream/handle/11299/167286/Borse_umn_0130M_15431.pdf +* Command-Line OCR with Tesseract on Mac OS X https://ryanfb.github.io/etc/2014/11/13/command_line_ocr_on_mac_os_x.html + * Bug fixes and man page updates. GNU Parallel - For people who live life in the parallel lane. diff --git a/src/parallel b/src/parallel index 9851b9d3..537b8d9f 100755 --- a/src/parallel +++ b/src/parallel @@ -507,6 +507,7 @@ sub nindex { } $sleep = ::reap_usleep($sleep); } +# TODO Why is needed? # start_more_jobs(); return $something_written; } @@ -1159,7 +1160,7 @@ sub parse_env_var { my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } grep { not /^parallel_bash_environment$/ } @non_functions); my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) } - grep { not /^parallel_bash_environment$/ } @non_functions, @bash_pre_shellshock); + @non_functions, @bash_pre_shellshock); push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock; push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock; @@ -1187,7 +1188,7 @@ sub parse_env_var { . join(" && ", @qbash) .q{;}); if($ENV{'parallel_bash_environment'}) { - $Global::envvar .= "parallel_bash_environment;\n"; + $Global::envvar .= 'eval "$parallel_bash_environment";'."\n"; } } $Global::envvarlen = length $Global::envvar; diff --git a/src/parallel.pod b/src/parallel.pod index 4452a01c..7d20b1c8 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -80,20 +80,22 @@ If it is a Bash function you need to B the function first. To use aliases copy the full environment as described under B<--env> and use B instead of B. -If it is a zsh function you will need to use this helper function -B to export and to set $PARALLEL_SHELL to bash: - - function exportf (){ - export $(echo $1)="`whence -f $1 | sed -e "s/$1 //" `" - } - - function my_func(){ - echo $1; - echo "hello"; - } - - exportf my_func - PARALLEL_SHELL=/bin/bash parallel "my_func {}" ::: 1 2 +=cut +# If it is a zsh function you will need to use this helper function +# B to export and to set $PARALLEL_SHELL to bash: +# +# function exportf (){ +# export $(echo $1)="`whence -f $1 | sed -e "s/$1 //" `" +# } +# +# function my_func(){ +# echo $1; +# echo "hello"; +# } +# +# exportf my_func +# PARALLEL_SHELL=/bin/bash parallel "my_func {}" ::: 1 2 +=pod The command cannot contain the character \257 (macron: ¯). @@ -434,15 +436,15 @@ than a single record. I defaults to 1M. -See B<--pipe> for use of this. +See B<--pipe> and B<--pipepart> for use of this. =item B<--cat> -Create a temporary file with content. Normally B<--pipe> will give -data to the program on stdin (standard input). With B<--cat> GNU -B will create a temporary file with the name in {}, so you -can do: B. +Create a temporary file with content. Normally B<--pipe>/B<--pipepart> +will give data to the program on stdin (standard input). With B<--cat> +GNU B will create a temporary file with the name in {}, so +you can do: B. See also B<--fifo>. @@ -503,23 +505,21 @@ output) unless B<--decompress-program> is given. =item B<--ctrlc> -Sends SIGINT to tasks running on remote computers thus killing them. +If receiving SIGNING, GNU B will send SIGINT to tasks +running on remote computers thus killing them. =item B<--delimiter> I =item B<-d> I -Input items are terminated by the specified character. Quotes and -backslash are not special; every character in the input is taken -literally. Disables the end-of-file string, which is treated like any -other argument. This can be used when the input consists of simply -newline-separated items, although it is almost always better to design -your program to use --null where this is possible. The specified -delimiter may be a single character, a C-style character escape such -as \n, or an octal or hexadecimal escape code. Octal and -hexadecimal escape codes are understood as for the printf command. -Multibyte characters are not supported. +Input items are terminated by I. Quotes and backslash are not +special; every character in the input is taken literally. Disables +the end-of-file string, which is treated like any other argument. The +specified delimiter may be characters, C-style character escapes such +as \n, or octal or hexadecimal escape codes. Octal and hexadecimal +escape codes are understood as for the printf command. Multibyte +characters are not supported. =item B<--dirnamereplace> I @@ -531,8 +531,8 @@ dirname of input line. =item B<-E> I -Set the end of file string to eof-str. If the end of file string -occurs as a line of input, the rest of the input is ignored. If +Set the end of file string to I. If the end of file string +occurs as a line of input, the rest of the input is not read. If neither B<-E> nor B<-e> is used, no end of file string is used. @@ -540,16 +540,17 @@ neither B<-E> nor B<-e> is used, no end of file string is used. Delay starting next job I seconds. GNU B will pause I seconds after starting each job. I can be less than 1 -seconds. +second. =item B<--dry-run> Print the job to run on stdout (standard output), but do not run the -job. Use B<-v -v> to include the ssh/rsync wrapping if the job would -be run on a remote computer. Do not count on this literaly, though, as -the job may be scheduled on another computer or the local computer if -: is in the list. +job. Use B<-v -v> to include the wrapping that GNU Parallel generates +(for remote jobs, B<--tmux>, B<--nice>, B<--pipe>, B<--pipepart>, +B<--fifo> and B<--cat>). Do not count on this literaly, though, as the +job may be scheduled on another computer or the local computer if : is +in the list. =item B<--eof>[=I] @@ -571,8 +572,8 @@ remote execution. In Bash I can also be a Bash function - just remember to B the function, see B. -The variable '_' is special. It will copy all environment variables -except for the ones mentioned in ~/.parallel/ignored_vars. +The variable '_' is special. It will copy all exported environment +variables except for the ones mentioned in ~/.parallel/ignored_vars. To copy Bash arrays you need an importer function, as Bash arrays cannot be exported: @@ -594,15 +595,13 @@ cannot be exported: parallel --env my_importer \ 'my_importer; echo "{}" "${indexed[{}]}" "${assoc[${indexed[{}]}]}"' ::: "${!indexed[@]}" -To copy the full environment use this function (e.g. by putting it in .bashrc): +To copy the full environment (both exported and not exported variables +and functions) use this function (e.g. by putting it in .bashrc): env_parallel() { - export parallel_bash_environment='() { - '"$(echo "shopt -s expand_aliases 2>/dev/null"; alias;typeset -p | grep -vFf <(readonly; echo GROUPS; echo FUNCNAME; echo DIRSTACK; echo _; echo PIPESTATUS; echo USERNAME) | grep -v BASH_;typeset -f)"' - }' - # Run as: env_parallel [normal parallel options] - `which parallel` "$@" - unset parallel_bash_environment + export parallel_bash_environment="$(echo "shopt -s expand_aliases 2>/dev/null"; alias;typeset -p | grep -vFf <(readonly; echo GROUPS; echo FUNCNAME; echo DIRSTACK; echo _; echo PIPESTATUS; echo USERNAME) | grep -v BASH_;typeset -f)"; + `which parallel` "$@"; + unset parallel_bash_environment; } # call as: env_parallel [normal parallel options] @@ -615,25 +614,28 @@ See also: B<--record-env>. Show the estimated number of seconds before finishing. This forces GNU B to read all jobs before starting to find the number of jobs. GNU B normally only reads the next job to run. + Implies B<--progress>. +See also: B<--bar>, B<--progress>. + =item B<--fg> Run command in foreground thus GNU B will wait for completion of the command before exiting. -See also B<--bg>, B. - Implies B<--semaphore>. +See also B<--bg>, B. + =item B<--fifo> -Create a temporary fifo with content. Normally B<--pipe> will give -data to the program on stdin (standard input). With B<--fifo> GNU -B will create a temporary fifo with the name in {}, so you -can do: B. +Create a temporary fifo with content. Normally B<--pipe> and +B<--pipepart> will give data to the program on stdin (standard +input). With B<--fifo> GNU B will create a temporary fifo +with the name in {}, so you can do: B. Beware: If data is not read from the fifo, the job will block forever. @@ -724,7 +726,7 @@ For B<--pipe> the matched header will be prepended to each output. B<--header :> is an alias for B<--header '.*\n'>. -If I is a number, it will match that many lines. +If I is a number, it is a fixed number of lines. =item B<--hostgroups> (alpha testing) @@ -775,7 +777,7 @@ To convert the times into ISO-8601 strict do: perl -a -F"\t" -ne \ 'chomp($F[2]=`date -d \@$F[2] +%FT%T`); print join("\t",@F)' -See also B<--resume>. +See also B<--resume> B<--resume-failed>. =item B<--jobs> I @@ -997,6 +999,7 @@ all the output from one server will be grouped together. Instead of printing the output to stdout (standard output) the output of each job is saved in a file and the filename is then printed. +See also: B<--results> =item B<--pipe> diff --git a/testsuite/tests-to-run/parallel-local-ssh1.sh b/testsuite/tests-to-run/parallel-local-ssh1.sh index 8ab3acfa..6e3bd99d 100644 --- a/testsuite/tests-to-run/parallel-local-ssh1.sh +++ b/testsuite/tests-to-run/parallel-local-ssh1.sh @@ -29,6 +29,7 @@ echo '### Test bug #34241: --pipe should not spawn unneeded processes' echo '### --env _' fUbAr="OK FUBAR" parallel -S parallel@lo --env _ echo '$fUbAr $DEBEMAIL' ::: test fUbAr="OK FUBAR" parallel -S csh@lo --env _ echo '$fUbAr $DEBEMAIL' ::: test + echo '### --env _ with explicit mentioning of normally ignored var $DEBEMAIL' fUbAr="OK FUBAR" parallel -S parallel@lo --env DEBEMAIL,_ echo '$fUbAr $DEBEMAIL' ::: test fUbAr="OK FUBAR" parallel -S csh@lo --env DEBEMAIL,_ echo '$fUbAr $DEBEMAIL' ::: test @@ -37,6 +38,17 @@ echo 'bug #40137: SHELL not bash: Warning when exporting funcs' . <(printf 'myfunc() {\necho $1\n}'); export -f myfunc; parallel --env myfunc -S lo myfunc ::: no_warning . <(printf 'myfunc() {\necho $1\n}'); export -f myfunc; SHELL=/bin/sh parallel --env myfunc -S lo myfunc ::: warning +echo 'env_parallel from man page - transfer non-exported var' + env_parallel() { + export parallel_bash_environment="$(echo "shopt -s expand_aliases 2>/dev/null"; alias;typeset -p | grep -vFf <(readonly; echo GROUPS; echo FUNCNAME; echo DIRSTACK; echo _; echo PIPESTATUS; echo USERNAME) | grep -v BASH_;typeset -f)"; + `which parallel` "$@"; + unset parallel_bash_environment; + }; + var=nonexported env_parallel -S parallel@lo echo '$var' ::: variable + +echo 'compared to parallel - no transfer non-exported var' + var=nonexported parallel -S parallel@lo echo '$var' ::: variable + echo '### bug #40002: --files and --nonall seem not to work together:' parallel --files --nonall -S localhost true | tee >(parallel rm) | wc -l diff --git a/testsuite/wanted-results/parallel-local-ssh1 b/testsuite/wanted-results/parallel-local-ssh1 index 1a87138e..9a6a801c 100644 --- a/testsuite/wanted-results/parallel-local-ssh1 +++ b/testsuite/wanted-results/parallel-local-ssh1 @@ -59,6 +59,14 @@ bug #40137: SHELL not bash: Warning when exporting funcs no_warning . <(printf 'myfunc() {\necho $1\n}'); export -f myfunc; SHELL=/bin/sh parallel --env myfunc -S lo myfunc ::: warning warning +echo 'env_parallel from man page - transfer non-exported var' +env_parallel from man page - transfer non-exported var + env_parallel() { export parallel_bash_environment="$(echo "shopt -s expand_aliases 2>/dev/null"; alias;typeset -p | grep -vFf <(readonly; echo GROUPS; echo FUNCNAME; echo DIRSTACK; echo _; echo PIPESTATUS; echo USERNAME) | grep -v BASH_;typeset -f)"; `which parallel` "$@"; unset parallel_bash_environment; }; var=nonexported env_parallel -S parallel@lo echo '$var' ::: variable +nonexported variable +echo 'compared to parallel - no transfer non-exported var' +compared to parallel - no transfer non-exported var + var=nonexported parallel -S parallel@lo echo '$var' ::: variable +variable echo '### bug #40002: --files and --nonall seem not to work together:' ### bug #40002: --files and --nonall seem not to work together: parallel --files --nonall -S localhost true | tee >(parallel rm) | wc -l