From d08c8ac41750663c772f3886e2ac23cce81f1c7d Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Sat, 11 Mar 2017 21:19:10 +0100 Subject: [PATCH] parallel: --plus enables most bashisms for replacement strings. --- doc/release_new_version | 13 +++++ src/parallel | 21 +++++++ src/parallel.pod | 16 ++++- testsuite/tests-to-run/parallel-local-0.3s.sh | 58 ++++++++++++++++++- testsuite/wanted-results/parallel-local-0.3s | 42 +++++++++++++- 5 files changed, 145 insertions(+), 5 deletions(-) diff --git a/doc/release_new_version b/doc/release_new_version index 1eb21308..7eda6ec0 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -206,6 +206,19 @@ Haiku of the month: New in this release: +* --rpl can now take arguments by adding '(regexp)' in the replacement string. + +https://joss.theoj.org/papers/3cde54de7dfbcada7c0fc04f569b36c7 +https://link.springer.com/article/10.1134/S0016793217010108 +http://biorxiv.org/content/biorxiv/early/2017/02/17/109280.full.pdf +https://arxiv.org/pdf/1612.08239.pdf +http://pubs.acs.org/doi/pdfplus/10.1021/acs.jctc.6b00506 +https://link.springer.com/article/10.1007/s11042-016-4185-5 +https://arxiv.org/pdf/1611.08003.pdf +https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8 + +https://dspace.library.colostate.edu/bitstream/handle/11124/170687/Johnson_mines_0052E_11207.pdf?sequence=1&isAllowed=y + http://www.blopig.com/blog/2017/02/parallel-computing-gnu-parallel/ http://garf.us/2017/02/stig-sandbeck-mathisen-change-all-the-passwords-again/ diff --git a/src/parallel b/src/parallel index 7eee3104..21ecbab6 100755 --- a/src/parallel +++ b/src/parallel @@ -1397,6 +1397,26 @@ sub init_globals { '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', # {##} = number of jobs '{##}' => '$_=total_jobs()', + # Bash ${a:-myval} + '{:-(.+?)}' => '$_ ||= $$1', + # Bash ${a:2} + '{:(\d+?)}' => 'substr($_,0,$$1) = ""', + # Bash ${a:2:3} + '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', + # Bash ${a#bc} + '{#([^#].*?)}' => 's/^$$1//;', + # Bash ${a%def} + '{%(.+?)}' => 's/$$1$//;', + # Bash ${a/def/ghi} + '{/(.+?)/(.+?)}' => 's/$$1/$$2/;', + # Bash ${a^a} + '{^(.+?)}' => 's/($$1)/uc($1)/e;', + # Bash ${a^^a} + '{^^(.+?)}' => 's/($$1)/uc($1)/eg;', + # Bash ${a,A} + '{,(.+?)}' => 's/($$1)/lc($1)/e;', + # Bash ${a,,A} + '{,,(.+?)}' => 's/($$1)/lc($1)/eg;', ); # Modifiable copy of %Global::replace %Global::rpl = %Global::replace; @@ -9477,6 +9497,7 @@ sub new { ( \(.*\) )? # Group capture regexp - e.g (.*) ( [^)]* )$ # Postfix - e.g } /x; + $grp_regexp ||= ''; my $rplval = $Global::rpl{$rpl}; while(s{( (?: ^|\257> ) [^\257]*? ) # Don't replace after \257 unless \257> \Q$prefix\E $grp_regexp \Q$postfix\E} diff --git a/src/parallel.pod b/src/parallel.pod index 7670ba45..0ad23086 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -727,9 +727,9 @@ may not be used. B<--gnu> is kept for compatibility. =item B<--group> Group output. Output from each jobs is grouped together and is only -printed when the command is finished. stderr (standard error) first -followed by stdout (standard output). This takes some CPU time. In -rare situations GNU B takes up lots of CPU time and if it is +printed when the command is finished. stdout (standard output) first +followed by stderr (standard error). This takes some CPU time. In rare +situations GNU B takes up lots of CPU time and if it is acceptable that the outputs from different commands are mixed together, then disabling grouping with B<-u> can speedup GNU B by a factor of 10. @@ -1759,6 +1759,16 @@ The B<--plus> replacement strings are implemented as: --rpl '{/..} s:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::' --rpl '{/...} s:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::' --rpl '{##} $_=total_jobs()' + --rpl '{:-(.+?)} $_ ||= $$1' + --rpl '{:(\d+?)} substr($_,0,$$1) = ""' + --rpl '{:(\d+?):(\d+?)} $_ = substr($_,$$1,$$2);' + --rpl '{#([^#].*?)} s/^$$1//;' + --rpl '{%(.+?)} s/$$1$//;' + --rpl '{/(.+?)/(.+?)} s/$$1/$$2/;' + --rpl '{^(.+?)} s/($$1)/uc($1)/e;' + --rpl '{^^(.+?)} s/($$1)/uc($1)/eg;' + --rpl '{,(.+?)} s/($$1)/lc($1)/e;' + --rpl '{,,(.+?)} s/($$1)/lc($1)/eg;' If the user defined replacement string starts with '{' it can also be diff --git a/testsuite/tests-to-run/parallel-local-0.3s.sh b/testsuite/tests-to-run/parallel-local-0.3s.sh index f9f7b8c8..c1f31ef2 100644 --- a/testsuite/tests-to-run/parallel-local-0.3s.sh +++ b/testsuite/tests-to-run/parallel-local-0.3s.sh @@ -96,7 +96,7 @@ echo '### bug #43817: Some JP char cause problems in positional replacement stri echo '**' echo '### --rpl % that is a substring of longer --rpl %D' -parallel --plus --rpl '%' +parallel --rpl '{+.} s:.*\.::' --rpl '%' --rpl '%D $_=::shell_quote(::dirname($_));' --rpl '%B s:.*/::;s:\.[^/.]+$::;' --rpl '%E s:.*\.::' 'echo {}=%;echo %D={//};echo %B={/.};echo %E={+.};echo %D/%B.%E={}' ::: a.b/c.d/e.f @@ -683,6 +683,62 @@ par_tagstring_pipe() { seq 3000 | parallel -j4 --pipe -N1000 -k --tagstring {%} LANG=C wc } +par_plus_dyn_repl() { + echo "Dynamic replacement strings defined by --plus" + + unset a + echo ${a:-myval} + parallel --rpl '{:-(.+)} $_ ||= $$1' echo {:-myval} ::: "$a" + parallel --plus echo {:-myval} ::: "$a" + + a=abcAaAdef + echo ${a:2} + parallel --rpl '{:(\d+)} substr($_,0,$$1) = ""' echo {:2} ::: "$a" + parallel --plus echo {:2} ::: "$a" + + echo ${a:2:3} + parallel --rpl '{:(\d+?):(\d+?)} $_ = substr($_,$$1,$$2);' echo {:2:3} ::: "$a" + parallel --plus echo {:2:3} ::: "$a" + + echo ${#a} + parallel --rpl '{#} $_ = length $_;' echo {#} ::: "$a" + # {#} used for job number + parallel --plus echo {#} ::: "$a" + + echo ${a#bc} + parallel --rpl '{#(.+?)} s/^$$1//;' echo {#bc} ::: "$a" + parallel --plus echo {#bc} ::: "$a" + echo ${a#abc} + parallel --rpl '{#(.+?)} s/^$$1//;' echo {#abc} ::: "$a" + parallel --plus echo {#abc} ::: "$a" + + echo ${a%de} + parallel --rpl '{%(.+?)} s/$$1$//;' echo {%de} ::: "$a" + parallel --plus echo {%de} ::: "$a" + echo ${a%def} + parallel --rpl '{%(.+?)} s/$$1$//;' echo {%def} ::: "$a" + parallel --plus echo {%def} ::: "$a" + + echo ${a/def/ghi} + parallel --rpl '{/(.+?)/(.+?)} s/$$1/$$2/;' echo {/def/ghi} ::: "$a" + parallel --plus echo {/def/ghi} ::: "$a" + + echo ${a^a} + parallel --rpl '{^(.+?)} s/($$1)/uc($1)/e;' echo {^a} ::: "$a" + parallel --plus echo {^a} ::: "$a" + echo ${a^^a} + parallel --rpl '{^^(.+?)} s/($$1)/uc($1)/eg;' echo {^^a} ::: "$a" + parallel --plus echo {^^a} ::: "$a" + + a=AbcAaAdef + echo ${a,A} + parallel --rpl '{,(.+?)} s/($$1)/lc($1)/e;' echo '{,A}' ::: "$a" + parallel --plus echo '{,A}' ::: "$a" + echo ${a,,A} + parallel --rpl '{,,(.+?)} s/($$1)/lc($1)/eg;' echo '{,,A}' ::: "$a" + parallel --plus echo '{,,A}' ::: "$a" +} + export -f $(compgen -A function | grep par_) compgen -A function | grep par_ | sort | parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1' diff --git a/testsuite/wanted-results/parallel-local-0.3s b/testsuite/wanted-results/parallel-local-0.3s index 92334e7d..c63c648f 100644 --- a/testsuite/wanted-results/parallel-local-0.3s +++ b/testsuite/wanted-results/parallel-local-0.3s @@ -101,7 +101,7 @@ echo '**' ** echo '### --rpl % that is a substring of longer --rpl %D' ### --rpl % that is a substring of longer --rpl %D -parallel --plus --rpl '%' --rpl '%D $_=::shell_quote(::dirname($_));' --rpl '%B s:.*/::;s:\.[^/.]+$::;' --rpl '%E s:.*\.::' 'echo {}=%;echo %D={//};echo %B={/.};echo %E={+.};echo %D/%B.%E={}' ::: a.b/c.d/e.f +parallel --rpl '{+.} s:.*\.::' --rpl '%' --rpl '%D $_=::shell_quote(::dirname($_));' --rpl '%B s:.*/::;s:\.[^/.]+$::;' --rpl '%E s:.*\.::' 'echo {}=%;echo %D={//};echo %B={/.};echo %E={+.};echo %D/%B.%E={}' ::: a.b/c.d/e.f a.b/c.d/e.f=a.b/c.d/e.f a.b/c.d=a.b/c.d e=e @@ -1623,6 +1623,46 @@ par_file_ending_in_newline gzip /tmp/parallel_f2' par_file_ending_in_newline ' par_pipepart_block_bigger_2G ### Test that --pipepart can have blocks > 2GB par_pipepart_block_bigger_2G 1 1 4 +par_plus_dyn_repl Dynamic replacement strings defined by --plus +par_plus_dyn_repl myval +par_plus_dyn_repl myval +par_plus_dyn_repl myval +par_plus_dyn_repl cAaAdef +par_plus_dyn_repl cAaAdef +par_plus_dyn_repl cAaAdef +par_plus_dyn_repl cAa +par_plus_dyn_repl cAa +par_plus_dyn_repl cAa +par_plus_dyn_repl 9 +par_plus_dyn_repl 9 +par_plus_dyn_repl 1 +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl AaAdef +par_plus_dyn_repl AaAdef +par_plus_dyn_repl AaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaA +par_plus_dyn_repl abcAaA +par_plus_dyn_repl abcAaA +par_plus_dyn_repl abcAaAghi +par_plus_dyn_repl abcAaAghi +par_plus_dyn_repl abcAaAghi +par_plus_dyn_repl AbcAaAdef +par_plus_dyn_repl AbcAaAdef +par_plus_dyn_repl AbcAaAdef +par_plus_dyn_repl AbcAAAdef +par_plus_dyn_repl AbcAAAdef +par_plus_dyn_repl AbcAAAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcAaAdef +par_plus_dyn_repl abcaaadef +par_plus_dyn_repl abcaaadef +par_plus_dyn_repl abcaaadef par_python_children ### bug #49970: Python child process dies if --env is used par_retries_replacement_string 11 par_retries_replacement_string 22