parallel: --plus enables most bashisms for replacement strings.

This commit is contained in:
Ole Tange 2017-03-11 21:19:10 +01:00
parent 2724941a91
commit d08c8ac417
5 changed files with 145 additions and 5 deletions

View file

@ -206,6 +206,19 @@ Haiku of the month:
New in this release:
* --rpl can now take arguments by adding '(regexp)' in the replacement string.
https://joss.theoj.org/papers/3cde54de7dfbcada7c0fc04f569b36c7
https://link.springer.com/article/10.1134/S0016793217010108
http://biorxiv.org/content/biorxiv/early/2017/02/17/109280.full.pdf
https://arxiv.org/pdf/1612.08239.pdf
http://pubs.acs.org/doi/pdfplus/10.1021/acs.jctc.6b00506
https://link.springer.com/article/10.1007/s11042-016-4185-5
https://arxiv.org/pdf/1611.08003.pdf
https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0208-8
https://dspace.library.colostate.edu/bitstream/handle/11124/170687/Johnson_mines_0052E_11207.pdf?sequence=1&isAllowed=y
http://www.blopig.com/blog/2017/02/parallel-computing-gnu-parallel/
http://garf.us/2017/02/stig-sandbeck-mathisen-change-all-the-passwords-again/

View file

@ -1397,6 +1397,26 @@ sub init_globals {
'{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
# {##} = number of jobs
'{##}' => '$_=total_jobs()',
# Bash ${a:-myval}
'{:-(.+?)}' => '$_ ||= $$1',
# Bash ${a:2}
'{:(\d+?)}' => 'substr($_,0,$$1) = ""',
# Bash ${a:2:3}
'{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
# Bash ${a#bc}
'{#([^#].*?)}' => 's/^$$1//;',
# Bash ${a%def}
'{%(.+?)}' => 's/$$1$//;',
# Bash ${a/def/ghi}
'{/(.+?)/(.+?)}' => 's/$$1/$$2/;',
# Bash ${a^a}
'{^(.+?)}' => 's/($$1)/uc($1)/e;',
# Bash ${a^^a}
'{^^(.+?)}' => 's/($$1)/uc($1)/eg;',
# Bash ${a,A}
'{,(.+?)}' => 's/($$1)/lc($1)/e;',
# Bash ${a,,A}
'{,,(.+?)}' => 's/($$1)/lc($1)/eg;',
);
# Modifiable copy of %Global::replace
%Global::rpl = %Global::replace;
@ -9477,6 +9497,7 @@ sub new {
( \(.*\) )? # Group capture regexp - e.g (.*)
( [^)]* )$ # Postfix - e.g }
/x;
$grp_regexp ||= '';
my $rplval = $Global::rpl{$rpl};
while(s{( (?: ^|\257> ) [^\257]*? ) # Don't replace after \257 unless \257>
\Q$prefix\E $grp_regexp \Q$postfix\E}

View file

@ -727,9 +727,9 @@ may not be used. B<--gnu> is kept for compatibility.
=item B<--group>
Group output. Output from each jobs is grouped together and is only
printed when the command is finished. stderr (standard error) first
followed by stdout (standard output). This takes some CPU time. In
rare situations GNU B<parallel> takes up lots of CPU time and if it is
printed when the command is finished. stdout (standard output) first
followed by stderr (standard error). This takes some CPU time. In rare
situations GNU B<parallel> takes up lots of CPU time and if it is
acceptable that the outputs from different commands are mixed
together, then disabling grouping with B<-u> can speedup GNU
B<parallel> by a factor of 10.
@ -1759,6 +1759,16 @@ The B<--plus> replacement strings are implemented as:
--rpl '{/..} s:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::'
--rpl '{/...} s:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::'
--rpl '{##} $_=total_jobs()'
--rpl '{:-(.+?)} $_ ||= $$1'
--rpl '{:(\d+?)} substr($_,0,$$1) = ""'
--rpl '{:(\d+?):(\d+?)} $_ = substr($_,$$1,$$2);'
--rpl '{#([^#].*?)} s/^$$1//;'
--rpl '{%(.+?)} s/$$1$//;'
--rpl '{/(.+?)/(.+?)} s/$$1/$$2/;'
--rpl '{^(.+?)} s/($$1)/uc($1)/e;'
--rpl '{^^(.+?)} s/($$1)/uc($1)/eg;'
--rpl '{,(.+?)} s/($$1)/lc($1)/e;'
--rpl '{,,(.+?)} s/($$1)/lc($1)/eg;'
If the user defined replacement string starts with '{' it can also be

View file

@ -96,7 +96,7 @@ echo '### bug #43817: Some JP char cause problems in positional replacement stri
echo '**'
echo '### --rpl % that is a substring of longer --rpl %D'
parallel --plus --rpl '%'
parallel --rpl '{+.} s:.*\.::' --rpl '%'
--rpl '%D $_=::shell_quote(::dirname($_));' --rpl '%B s:.*/::;s:\.[^/.]+$::;' --rpl '%E s:.*\.::'
'echo {}=%;echo %D={//};echo %B={/.};echo %E={+.};echo %D/%B.%E={}' ::: a.b/c.d/e.f
@ -683,6 +683,62 @@ par_tagstring_pipe() {
seq 3000 | parallel -j4 --pipe -N1000 -k --tagstring {%} LANG=C wc
}
par_plus_dyn_repl() {
echo "Dynamic replacement strings defined by --plus"
unset a
echo ${a:-myval}
parallel --rpl '{:-(.+)} $_ ||= $$1' echo {:-myval} ::: "$a"
parallel --plus echo {:-myval} ::: "$a"
a=abcAaAdef
echo ${a:2}
parallel --rpl '{:(\d+)} substr($_,0,$$1) = ""' echo {:2} ::: "$a"
parallel --plus echo {:2} ::: "$a"
echo ${a:2:3}
parallel --rpl '{:(\d+?):(\d+?)} $_ = substr($_,$$1,$$2);' echo {:2:3} ::: "$a"
parallel --plus echo {:2:3} ::: "$a"
echo ${#a}
parallel --rpl '{#} $_ = length $_;' echo {#} ::: "$a"
# {#} used for job number
parallel --plus echo {#} ::: "$a"
echo ${a#bc}
parallel --rpl '{#(.+?)} s/^$$1//;' echo {#bc} ::: "$a"
parallel --plus echo {#bc} ::: "$a"
echo ${a#abc}
parallel --rpl '{#(.+?)} s/^$$1//;' echo {#abc} ::: "$a"
parallel --plus echo {#abc} ::: "$a"
echo ${a%de}
parallel --rpl '{%(.+?)} s/$$1$//;' echo {%de} ::: "$a"
parallel --plus echo {%de} ::: "$a"
echo ${a%def}
parallel --rpl '{%(.+?)} s/$$1$//;' echo {%def} ::: "$a"
parallel --plus echo {%def} ::: "$a"
echo ${a/def/ghi}
parallel --rpl '{/(.+?)/(.+?)} s/$$1/$$2/;' echo {/def/ghi} ::: "$a"
parallel --plus echo {/def/ghi} ::: "$a"
echo ${a^a}
parallel --rpl '{^(.+?)} s/($$1)/uc($1)/e;' echo {^a} ::: "$a"
parallel --plus echo {^a} ::: "$a"
echo ${a^^a}
parallel --rpl '{^^(.+?)} s/($$1)/uc($1)/eg;' echo {^^a} ::: "$a"
parallel --plus echo {^^a} ::: "$a"
a=AbcAaAdef
echo ${a,A}
parallel --rpl '{,(.+?)} s/($$1)/lc($1)/e;' echo '{,A}' ::: "$a"
parallel --plus echo '{,A}' ::: "$a"
echo ${a,,A}
parallel --rpl '{,,(.+?)} s/($$1)/lc($1)/eg;' echo '{,,A}' ::: "$a"
parallel --plus echo '{,,A}' ::: "$a"
}
export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | sort |
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'

View file

@ -101,7 +101,7 @@ echo '**'
**
echo '### --rpl % that is a substring of longer --rpl %D'
### --rpl % that is a substring of longer --rpl %D
parallel --plus --rpl '%' --rpl '%D $_=::shell_quote(::dirname($_));' --rpl '%B s:.*/::;s:\.[^/.]+$::;' --rpl '%E s:.*\.::' 'echo {}=%;echo %D={//};echo %B={/.};echo %E={+.};echo %D/%B.%E={}' ::: a.b/c.d/e.f
parallel --rpl '{+.} s:.*\.::' --rpl '%' --rpl '%D $_=::shell_quote(::dirname($_));' --rpl '%B s:.*/::;s:\.[^/.]+$::;' --rpl '%E s:.*\.::' 'echo {}=%;echo %D={//};echo %B={/.};echo %E={+.};echo %D/%B.%E={}' ::: a.b/c.d/e.f
a.b/c.d/e.f=a.b/c.d/e.f
a.b/c.d=a.b/c.d
e=e
@ -1623,6 +1623,46 @@ par_file_ending_in_newline gzip /tmp/parallel_f2'
par_file_ending_in_newline '
par_pipepart_block_bigger_2G ### Test that --pipepart can have blocks > 2GB
par_pipepart_block_bigger_2G 1 1 4
par_plus_dyn_repl Dynamic replacement strings defined by --plus
par_plus_dyn_repl myval
par_plus_dyn_repl myval
par_plus_dyn_repl myval
par_plus_dyn_repl cAaAdef
par_plus_dyn_repl cAaAdef
par_plus_dyn_repl cAaAdef
par_plus_dyn_repl cAa
par_plus_dyn_repl cAa
par_plus_dyn_repl cAa
par_plus_dyn_repl 9
par_plus_dyn_repl 9
par_plus_dyn_repl 1
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl AaAdef
par_plus_dyn_repl AaAdef
par_plus_dyn_repl AaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaA
par_plus_dyn_repl abcAaA
par_plus_dyn_repl abcAaA
par_plus_dyn_repl abcAaAghi
par_plus_dyn_repl abcAaAghi
par_plus_dyn_repl abcAaAghi
par_plus_dyn_repl AbcAaAdef
par_plus_dyn_repl AbcAaAdef
par_plus_dyn_repl AbcAaAdef
par_plus_dyn_repl AbcAAAdef
par_plus_dyn_repl AbcAAAdef
par_plus_dyn_repl AbcAAAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcAaAdef
par_plus_dyn_repl abcaaadef
par_plus_dyn_repl abcaaadef
par_plus_dyn_repl abcaaadef
par_python_children ### bug #49970: Python child process dies if --env is used
par_retries_replacement_string 11
par_retries_replacement_string 22