Fixed newlines and positional replacement strings

https://lists.gnu.org/archive/html/parallel/2017-07/msg00001.html
This commit is contained in:
Ole Tange 2017-07-06 14:36:06 +02:00
parent e0baea9a4a
commit 8e22009706
4 changed files with 85 additions and 19 deletions

View file

@ -1791,27 +1791,35 @@ sub find_compression_program {
# $compress_program = compress program with options # $compress_program = compress program with options
# $decompress_program = decompress program with options # $decompress_program = decompress program with options
# Search for these. Sorted by speed on 32 core # Search for these. Sorted by speed on 128 core
# apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip
# seq 120000000|shuf > 1gb &
# apt-get update
# apt install make g++ htop
# wget -O - pi.dk/3 | bash
# apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
# git clone https://github.com/facebook/zstd.git # git clone https://github.com/facebook/zstd.git
# cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
# echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz
# chmod +x /usr/local/bin/lrz # chmod +x /usr/local/bin/lrz
# seq 120000000|shuf > 1gb # wait
# onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
# multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz" # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
# parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
# parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread
# parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread
# sort -nk4 jl-? # sort -nk4 jl-?
# 1-core: # 1-core:
# 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
# 4-cores: # 4-cores:
# 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
# 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
# 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
# 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
# 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
my @prg = qw(pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip pixz lzip lrz my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
gzip pxz lzma bzip2 xz clzip); lrz pxz bzip2 lzma xz clzip);
for my $p (@prg) { for my $p (@prg) {
if(which($p)) { if(which($p)) {
return ("$p -c -1","$p -dc"); return ("$p -c -1","$p -dc");
@ -9418,12 +9426,12 @@ sub replaced {
(?: (?! \257[<>]). )* # The perl expression (?: (?! \257[<>]). )* # The perl expression
\257> # =} \257> # =}
[^\s\257]* # after =} [^\s\257]* # after =}
)+)/ /x) { )+)/ /xs) {
# $1 = pre \257< perlexpr \257> post # $1 = pre \257< perlexpr \257> post
$word{"$1"} ||= 1; $word{"$1"} ||= 1;
} }
} else { } else {
while($tt =~ s/( \257<(?: (?! \257[<>]). )*\257> )//x) { while($tt =~ s/( \257<(?: (?! \257[<>]). )*\257> )//xs) {
# $1 = \257< perlexpr \257> # $1 = \257< perlexpr \257>
$word{$1} ||= 1; $word{$1} ||= 1;
} }
@ -9622,7 +9630,7 @@ sub new {
$rpl =~ /^( [^(]* ) # Prefix - e.g. {%% $rpl =~ /^( [^(]* ) # Prefix - e.g. {%%
( \(.*\) )? # Group capture regexp - e.g (.*) ( \(.*\) )? # Group capture regexp - e.g (.*)
( [^)]* )$ # Postfix - e.g } ( [^)]* )$ # Postfix - e.g }
/x; /xs;
$grp_regexp ||= ''; $grp_regexp ||= '';
my $rplval = $Global::rpl{$rpl}; my $rplval = $Global::rpl{$rpl};
while(s{( (?: ^|\257> ) (?: (?! \257[<>])(?:.|\n) )*? ) while(s{( (?: ^|\257> ) (?: (?! \257[<>])(?:.|\n) )*? )
@ -9650,7 +9658,7 @@ sub new {
::perl_quote_scalar($grp[$i]) . "\";"; ::perl_quote_scalar($grp[$i]) . "\";";
} }
$unchanged . "\257<" . $set_args . $rv . "\257>" $unchanged . "\257<" . $set_args . $rv . "\257>"
}gxe) { }gxes) {
} }
# Do the same for the positional replacement strings # Do the same for the positional replacement strings
$posrpl = $rpl; $posrpl = $rpl;
@ -9682,7 +9690,7 @@ sub new {
::perl_quote_scalar($grp[$i]) . "\";"; ::perl_quote_scalar($grp[$i]) . "\";";
} }
$unchanged . "\257<" . $position . $set_args . $rv . "\257>" $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
}gxe) { }gxes) {
} }
} }
} }
@ -9777,7 +9785,7 @@ sub replacement_counts_and_lengths {
my $noncontextlen = 0; my $noncontextlen = 0;
my $contextgroups = 0; my $contextgroups = 0;
for my $c (@cmd) { for my $c (@cmd) {
while($c =~ s/ \257<( (?: (?! \257[<>]). )*?)\257> /\000/x) { while($c =~ s/ \257<( (?: (?! \257[<>]). )*?)\257> /\000/xs) {
# %replacecount = { "perlexpr" => number of times seen } # %replacecount = { "perlexpr" => number of times seen }
# e.g { "s/a/b/" => 2 } # e.g { "s/a/b/" => 2 }
$replacecount{$1}++; $replacecount{$1}++;
@ -9786,7 +9794,7 @@ sub replacement_counts_and_lengths {
# Measure the length of the context around the {= perl expr =} # Measure the length of the context around the {= perl expr =}
# Use that {=...=} has been replaced with \000 above # Use that {=...=} has been replaced with \000 above
# So there is no need to deal with \257< # So there is no need to deal with \257<
while($c =~ s/ (\S*\000\S*) //x) { while($c =~ s/ (\S*\000\S*) //xs) {
my $w = $1; my $w = $1;
$w =~ tr/\000//d; # Remove all \000's $w =~ tr/\000//d; # Remove all \000's
$contextlen += length($w); $contextlen += length($w);
@ -9800,7 +9808,7 @@ sub replacement_counts_and_lengths {
# Options that can contain replacement strings # Options that can contain replacement strings
$_ or next; $_ or next;
my $t = $_; my $t = $_;
while($t =~ s/ \257<( (?: (?! \257[<>]). )* )\257> //x) { while($t =~ s/ \257<( (?: (?! \257[<>]). )* )\257> //xs) {
# %replacecount = { "perlexpr" => number of times seen } # %replacecount = { "perlexpr" => number of times seen }
# e.g { "$_++" => 2 } # e.g { "$_++" => 2 }
# But for tagstring we just need to mark it as seen # But for tagstring we just need to mark it as seen

View file

@ -557,7 +557,14 @@ Column separator. The input will be treated as a table with I<regexp>
separating the columns. The n'th column can be access using separating the columns. The n'th column can be access using
B<{>I<n>B<}> or B<{>I<n>.B<}>. E.g. B<{3}> is the 3rd column. B<{>I<n>B<}> or B<{>I<n>.B<}>. E.g. B<{3}> is the 3rd column.
B<--colsep> implies B<--trim rl>. If there are more input sources, each input source will be separated,
but the columns from each input source will be linked (see B<--link>).
parallel --colsep '-' echo {4} {3} {2} {1} \
::: A-B C-D ::: e-f g-h
B<--colsep> implies B<--trim rl>, which can be overridden with
B<--trim n>.
I<regexp> is a Perl Regular Expression: I<regexp> is a Perl Regular Expression:
http://perldoc.perl.org/perlre.html http://perldoc.perl.org/perlre.html
@ -3024,6 +3031,33 @@ This also works if the input file is a file with columns:
parallel --colsep '\t' --header : echo {Name} {E-mail address} parallel --colsep '\t' --header : echo {Name} {E-mail address}
=head1 EXAMPLE: From a to b and b to c
Assume you have input like:
aardvark
babble
cab
dab
each
and want to run combinations like:
aardvark babble
babble cab
cab dab
dab each
If the input is in the file in.txt:
parallel echo {1} - {2} ::::+ <(head -n -1 in.txt) <(tail -n +2 in.txt)
If the input is in the array $a here are two solutions:
seq $((${#a[@]}-1)) | env_parallel --env a echo '${a[{=$_--=}]} - ${a[{}]}'
parallel echo {1} - {2} ::: "${a[@]::${#a[@]}-1}" :::+ "${a[@]:1}"
=head1 EXAMPLE: Count the differences between all files in a dir =head1 EXAMPLE: Count the differences between all files in a dir
Using B<--results> the results are saved in /tmp/diffcount*. Using B<--results> the results are saved in /tmp/diffcount*.
@ -3137,6 +3171,20 @@ Check the uptime of the servers in I<~/.parallel/sshloginfile>:
parallel --tag -S .. --nonall uptime parallel --tag -S .. --nonall uptime
=head1 EXAMPLE: Colorize output
Give each job a new color. Most terminals support ANSI colors with the
escape code "\033[30;XXm" where 30 <= XX <= 37:
parallel --tagstring '\033[30;{=$_=++$::color%8+30=}m' seq {} ::: {1..10}
parallel --rpl '{color} $_="\033[30;".++$::color%8+30."=}m"' \
--tagstring {color} seq {} ::: {1..10}
To get rid of the initial \t (which comes from B<--tagstring>):
... | perl -pe 's/\t//'
=head1 EXAMPLE: Keep order of output same as order of input =head1 EXAMPLE: Keep order of output same as order of input
Normally the output of a job will be printed as soon as it Normally the output of a job will be printed as soon as it
@ -4190,7 +4238,7 @@ computer.
=item $PARALLEL_SHELL =item $PARALLEL_SHELL
Use this shell the shell for the commands run by GNU Parallel: Use this shell for the commands run by GNU Parallel:
=over 2 =over 2

View file

@ -716,6 +716,14 @@ par_basic_halt() {
parallel --halt now echo ::: should not print parallel --halt now echo ::: should not print
} }
par_newline_in_command() {
echo Command with newline and positional replacement strings
parallel "
echo {1
} {2}
" ::: O ::: K
}
export -f $(compgen -A function | grep par_) export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | sort | compgen -A function | grep par_ | sort |
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1' parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'

View file

@ -1638,6 +1638,8 @@ par_macron
par_macron ¯<¯<¯>¯> par_macron ¯<¯<¯>¯>
par_macron ¯<¯<¯>¯> ¯<¯<¯>¯> par_macron ¯<¯<¯>¯> ¯<¯<¯>¯>
par_macron "¯<¯<¯>¯>" ¯<¯<¯>¯> par_macron "¯<¯<¯>¯>" ¯<¯<¯>¯>
par_newline_in_command Command with newline and positional replacement strings
par_newline_in_command O K
par_pipepart_block_bigger_2G ### Test that --pipepart can have blocks > 2GB par_pipepart_block_bigger_2G ### Test that --pipepart can have blocks > 2GB
par_pipepart_block_bigger_2G 1 1 4 par_pipepart_block_bigger_2G 1 1 4
par_python_children ### bug #49970: Python child process dies if --env is used par_python_children ### bug #49970: Python child process dies if --env is used