parallel: {%...} / {#...} is now non-greedy {%%...} / {##...}.

This commit is contained in:
Ole Tange 2021-05-14 23:35:03 +02:00
parent 06388c8dd3
commit 9654be02dc
7 changed files with 117 additions and 26 deletions

View file

@ -241,9 +241,9 @@ from:tange@gnu.org
to:parallel@gnu.org, bug-parallel@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm> stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20210422 ('Ever Given') released <<[stable]>> Subject: GNU Parallel 20210522 ('Palestine/Lag Ba'Omer celebrations at Mt. Meron Israel/Michael Collins <<>>') released <<[stable]>>
GNU Parallel 20210322 ('Ever Given') <<[stable]>> has been released. It is available for download at: lbry://@GnuParallel:4 GNU Parallel 20210522 ('<<>>') <<[stable]>> has been released. It is available for download at: lbry://@GnuParallel:4
<<No new functionality was introduced so this is a good candidate for a stable release.>> <<No new functionality was introduced so this is a good candidate for a stable release.>>
@ -253,9 +253,7 @@ It does not have to be as detailed as Juan's. It is perfectly fine if you just s
Quote of the month: Quote of the month:
GNU Parallel is your friend. <<>>
Can shorten that time by X cores.
-- iRODS @irods@twitter
New in this release: New in this release:
@ -263,19 +261,17 @@ New in this release:
News about GNU Parallel: News about GNU Parallel:
* Bioinformatics tutorials - linux and shell advanced - parallel https://www.youtube.com/watch?v=5leL8pyl0XA * Batch Calculate and Verify MD5 Checksum With GNU Parallel https://omicx.cc/posts/2021-04-28-calculate-and-verify-md5-checksum-with-gnu-parallel/
* GNU Parallel for quick gains https://edbennett.github.io/high-performance-python/04-gnu-parallel/index.html * HerrComp Gnu parallel, c++11 threads 2021 04 28 https://www.youtube.com/watch?v=wDd9F9nn0qA
* Processing Linux Commands in Parallel https://www.baeldung.com/linux/processing-commands-in-parallel news??https://ulhpc-tutorials.readthedocs.io/en/latest/sequential/gnu-parallel/
news?https://edbennett.github.io/high-performance-python/04-gnu-parallel/index.html
https://www.maryamdaryalal.com/post/job-parallelization-on-niagara
https://omicx.cc/posts/2021-04-28-calculate-and-verify-md5-checksum-with-gnu-parallel/
https://madflex.de/use-parallel-to-split-by-line/
* GNU parallel https://docs-research-it.berkeley.edu/services/high-performance-computing/user-guide/running-your-jobs/gnu-parallel/ <<>>
* GNU Parallel 활용 가이드https://genoglobe.com/kribb/gnu_parallel
* Parallel Grep and Awk https://www.highonscience.com/blog/2021/03/21/parallel-grep/
* Getting things done with shell scripting https://doma.dev/blog/get-things-done-with-bash/
Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html

View file

@ -2242,12 +2242,18 @@ sub init_globals() {
'{:(\d+?)}' => 'substr($_,0,$$1) = ""', '{:(\d+?)}' => 'substr($_,0,$$1) = ""',
# Bash ${a:2:3} # Bash ${a:2:3}
'{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);',
# echo {#z.*z.} ::: z.z.z.foo => z.foo
# echo {##z.*z.} ::: z.z.z.foo => foo
# Bash ${a#bc} # Bash ${a#bc}
'{#([^#}][^}]*?)}' => 's/^$$1//;', '{#([^#}][^}]*?)}' =>
'$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;',
# Bash ${a##bc} # Bash ${a##bc}
'{##([^#}][^}]*?)}' => 's/^$$1//;', '{##([^#}][^}]*?)}' => 's/^$$1//;',
# echo {%.z.*z} ::: foo.z.z.z => foo.z
# echo {%%.z.*z} ::: foo.z.z.z => foo
# Bash ${a%def} # Bash ${a%def}
'{%([^}]+?)}' => 's/$$1$//;', '{%([^}]+?)}' =>
'$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;',
# Bash ${a%%def} # Bash ${a%%def}
'{%%([^}]+?)}' => 's/$$1$//;', '{%%([^}]+?)}' => 's/$$1$//;',
# Bash ${a/def/ghi} ${a/def/} # Bash ${a/def/ghi} ${a/def/}
@ -5931,6 +5937,37 @@ sub usleep($) {
select(undef, undef, undef, $ms/1000); select(undef, undef, undef, $ms/1000);
} }
sub make_regexp_ungreedy {
my $regexp = shift;
my $class_state = 0;
my $escape_state = 0;
my $found = 0;
my $ungreedy = "";
my $c;
for $c (split (//, $regexp)) {
if ($found) {
if($c ne "?") { $ungreedy .= "?"; }
$found = 0;
}
$ungreedy .= $c;
if ($escape_state) { $escape_state = 0; next; }
if ($c eq "\\") { $escape_state = 1; next; }
if ($c eq '[') { $class_state = 1; next; }
if ($class_state) {
if($c eq ']') { $class_state = 0; }
next;
}
# Quantifiers: + * {...}
if ($c =~ /[*}+]/) { $found = 1; }
}
if($found) { $ungreedy .= '?'; }
return $ungreedy;
}
sub __KILLER_REAPER__() {} sub __KILLER_REAPER__() {}
sub reap_usleep() { sub reap_usleep() {
@ -6481,16 +6518,15 @@ sub limit($) {
limit=$1; limit=$1;
io_file=$2; io_file=$2;
# Do the measurement in the background # Do the measurement in the background
(tmp=$(tempfile); ((tmp=$(tempfile);
LANG=C iostat -x 1 2 > $tmp; LANG=C iostat -x 1 2 > $tmp;
mv $tmp $io_file) & mv $tmp $io_file) </dev/null >/dev/null & );
perl -e '-e $ARGV[0] or exit(1); perl -e '-e $ARGV[0] or exit(1);
for(reverse <>) { for(reverse <>) {
/Device/ and last; /Device/ and last;
/(\S+)$/ and $max = $max > $1 ? $max : $1; } /(\S+)$/ and $max = $max > $1 ? $max : $1; }
exit ($max < '$limit')' $io_file; exit ('$limit' < $max)' $io_file;
}; };
export -f io;
io %s %s io %s %s
!, !,
"mem" => q! "mem" => q!
@ -6533,6 +6569,7 @@ sub limit($) {
local %ENV = %env; local %ENV = %env;
$ENV{'SSHLOGIN'} = $self->string(); $ENV{'SSHLOGIN'} = $self->string();
system($Global::shell,"-c",$self->{'limitscript'}); system($Global::shell,"-c",$self->{'limitscript'});
#::qqx($self->{'limitscript'});
::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n"); ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n");
return $?>>8; return $?>>8;
} }

View file

@ -1662,9 +1662,11 @@ inspired by bash's parameter expansion:
{:-str} str if the value is empty {:-str} str if the value is empty
{:num} remove the first num characters {:num} remove the first num characters
{:num1:num2} characters from num1 to num2 {:num1:num2} characters from num1 to num2
{#str} remove prefix str {#regexp} remove prefix regexp (non-greedy)
{%str} remove postfix str {##regexp} remove prefix regexp (greedy)
{/str1/str2} replace str1 with str2 {%regexp} remove postfix regexp (non-greedy)
{%%regexp} remove postfix regexp (greedy)
{/regexp/str} replace regexp with str
{^str} uppercase str if found at the start {^str} uppercase str if found at the start
{^^str} uppercase str {^^str} uppercase str
{,str} lowercase str if found at the start {,str} lowercase str if found at the start
@ -4284,6 +4286,38 @@ If not all hosts are accessible through TOR:
See more B<ssh> tricks on https://en.wikibooks.org/wiki/OpenSSH/Cookbook/Proxies_and_Jump_Hosts See more B<ssh> tricks on https://en.wikibooks.org/wiki/OpenSSH/Cookbook/Proxies_and_Jump_Hosts
=head2 EXAMPLE: Use outrun instead of ssh
B<outrun> lets you run a command on a remote server. B<outrun> sets up
a connection to access files at the source server, and automatically
transfers files. B<outrun> must be installed on the remote system.
You can use B<outrun> in an sshlogin this way:
parallel -S 'outrun user@server eval' command
=head2 EXAMPLE: Slurm cluster
The Slurm Workload Manager is used in many clusters.
Here is a simple example of using GNU B<parallel> to call B<srun>:
#!/bin/bash
#SBATCH --time 00:02:00
#SBATCH --ntasks=4
#SBATCH --job-name GnuParallelDemo
#SBATCH --output gnuparallel.out
module purge
module load gnu_parallel
my_parallel="parallel --delay .2 -j $SLURM_NTASKS"
my_srun="srun --export=all --exclusive -n1 --cpus-per-task=1 --cpu-bind=cores"
$my_parallel "$my_srun" echo This is job {} ::: {1..20}
=head2 EXAMPLE: Parallelizing rsync =head2 EXAMPLE: Parallelizing rsync
B<rsync> is a great tool, but sometimes it will not fill up the B<rsync> is a great tool, but sometimes it will not fill up the

View file

@ -16,6 +16,21 @@ export -f stdsort
# Test amount of parallelization # Test amount of parallelization
# parallel --shuf --jl /tmp/myjl -j1 'export JOBS={1};'bash tests-to-run/parallel-local-0.3s.sh ::: {1..16} ::: {1..5} # parallel --shuf --jl /tmp/myjl -j1 'export JOBS={1};'bash tests-to-run/parallel-local-0.3s.sh ::: {1..16} ::: {1..5}
par_pct() {
echo '### Test {%...} {%%...} {#...} {##...}'
a=z.z.z.foo
echo ${a#z*z.}
parallel --plus echo {#z.*z.} ::: z.z.z.foo
echo ${a##z*z.}
parallel --plus echo {##z.*z.} ::: z.z.z.foo
a=foo.z.z.z
echo ${a%.z.z}
parallel --plus echo {%.z.z} ::: foo.z.z.z
echo ${a%%.z*z}
parallel --plus echo {%%.z.*z} ::: foo.z.z.z
}
par_env_parallel_pipefail() { par_env_parallel_pipefail() {
cat <<'EOF' | bash cat <<'EOF' | bash
echo "### test env_parallel with pipefail + inherit_errexit" echo "### test env_parallel with pipefail + inherit_errexit"

View file

@ -555,6 +555,15 @@ par_parcat_args_stdin OK2
par_parcat_rm bug #51691: parcat --rm remove fifo when opened par_parcat_rm bug #51691: parcat --rm remove fifo when opened
par_parcat_rm OK1 par_parcat_rm OK1
par_parcat_rm OK file removed par_parcat_rm OK file removed
par_pct ### Test {%...} {%%...} {#...} {##...}
par_pct z.foo
par_pct z.foo
par_pct foo
par_pct foo
par_pct foo.z
par_pct foo.z
par_pct foo
par_pct foo
par_perlexpr_with_newline Perl expression spanning 2 lines par_perlexpr_with_newline Perl expression spanning 2 lines
par_pipe_N1_regexp bug #55131: --regexp --recstart hangs par_pipe_N1_regexp bug #55131: --regexp --recstart hangs
par_pipe_N1_regexp These should give the same par_pipe_N1_regexp These should give the same

View file

@ -40,5 +40,5 @@ par_space_quote a \'"b 4
par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5 par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5
par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5 par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5
par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5 par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5
par_special_char a' * ? >o <i*? ][!#¤%=( ) | }b 5 par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5
par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5 par_special_char a' * ? >o <i*? ][\!#¤%=( ) | }b 5

View file

@ -1285,4 +1285,4 @@ mentioned in the release notes of next version of GNU Parallel.
echo A echo A
echo B echo B
echo C echo C
7 8