From 8e22009706a53922710ebdf732d942febfdce52c Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Thu, 6 Jul 2017 14:36:06 +0200 Subject: [PATCH] Fixed newlines and positional replacement strings https://lists.gnu.org/archive/html/parallel/2017-07/msg00001.html --- src/parallel | 42 +++++++++------ src/parallel.pod | 52 ++++++++++++++++++- testsuite/tests-to-run/parallel-local-0.3s.sh | 8 +++ testsuite/wanted-results/parallel-local-0.3s | 2 + 4 files changed, 85 insertions(+), 19 deletions(-) diff --git a/src/parallel b/src/parallel index 6f205a18..398d6008 100755 --- a/src/parallel +++ b/src/parallel @@ -1791,27 +1791,35 @@ sub find_compression_program { # $compress_program = compress program with options # $decompress_program = decompress program with options - # Search for these. Sorted by speed on 32 core - # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip + # Search for these. Sorted by speed on 128 core + + # seq 120000000|shuf > 1gb & + # apt-get update + # apt install make g++ htop + # wget -O - pi.dk/3 | bash + # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz # git clone https://github.com/facebook/zstd.git - # cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin + # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin) # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz # chmod +x /usr/local/bin/lrz - # seq 120000000|shuf > 1gb + # wait # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" - # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz" - # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread + # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz" # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread + # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread # sort -nk4 jl-? + # 1-core: # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip # 4-cores: # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip - - my @prg = qw(pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip pixz lzip lrz - gzip pxz lzma bzip2 xz clzip); + # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip + # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip + + my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip + lrz pxz bzip2 lzma xz clzip); for my $p (@prg) { if(which($p)) { return ("$p -c -1","$p -dc"); @@ -9418,12 +9426,12 @@ sub replaced { (?: (?! \257[<>]). )* # The perl expression \257> # =} [^\s\257]* # after =} - )+)/ /x) { + )+)/ /xs) { # $1 = pre \257< perlexpr \257> post $word{"$1"} ||= 1; } } else { - while($tt =~ s/( \257<(?: (?! \257[<>]). )*\257> )//x) { + while($tt =~ s/( \257<(?: (?! \257[<>]). )*\257> )//xs) { # $1 = \257< perlexpr \257> $word{$1} ||= 1; } @@ -9622,7 +9630,7 @@ sub new { $rpl =~ /^( [^(]* ) # Prefix - e.g. {%% ( \(.*\) )? # Group capture regexp - e.g (.*) ( [^)]* )$ # Postfix - e.g } - /x; + /xs; $grp_regexp ||= ''; my $rplval = $Global::rpl{$rpl}; while(s{( (?: ^|\257> ) (?: (?! \257[<>])(?:.|\n) )*? ) @@ -9650,7 +9658,7 @@ sub new { ::perl_quote_scalar($grp[$i]) . "\";"; } $unchanged . "\257<" . $set_args . $rv . "\257>" - }gxe) { + }gxes) { } # Do the same for the positional replacement strings $posrpl = $rpl; @@ -9682,7 +9690,7 @@ sub new { ::perl_quote_scalar($grp[$i]) . "\";"; } $unchanged . "\257<" . $position . $set_args . $rv . "\257>" - }gxe) { + }gxes) { } } } @@ -9777,7 +9785,7 @@ sub replacement_counts_and_lengths { my $noncontextlen = 0; my $contextgroups = 0; for my $c (@cmd) { - while($c =~ s/ \257<( (?: (?! \257[<>]). )*?)\257> /\000/x) { + while($c =~ s/ \257<( (?: (?! \257[<>]). )*?)\257> /\000/xs) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "s/a/b/" => 2 } $replacecount{$1}++; @@ -9786,7 +9794,7 @@ sub replacement_counts_and_lengths { # Measure the length of the context around the {= perl expr =} # Use that {=...=} has been replaced with \000 above # So there is no need to deal with \257< - while($c =~ s/ (\S*\000\S*) //x) { + while($c =~ s/ (\S*\000\S*) //xs) { my $w = $1; $w =~ tr/\000//d; # Remove all \000's $contextlen += length($w); @@ -9800,7 +9808,7 @@ sub replacement_counts_and_lengths { # Options that can contain replacement strings $_ or next; my $t = $_; - while($t =~ s/ \257<( (?: (?! \257[<>]). )* )\257> //x) { + while($t =~ s/ \257<( (?: (?! \257[<>]). )* )\257> //xs) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "$_++" => 2 } # But for tagstring we just need to mark it as seen diff --git a/src/parallel.pod b/src/parallel.pod index dae3b3b7..155d366f 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -557,7 +557,14 @@ Column separator. The input will be treated as a table with I separating the columns. The n'th column can be access using B<{>IB<}> or B<{>I.B<}>. E.g. B<{3}> is the 3rd column. -B<--colsep> implies B<--trim rl>. +If there are more input sources, each input source will be separated, +but the columns from each input source will be linked (see B<--link>). + + parallel --colsep '-' echo {4} {3} {2} {1} \ + ::: A-B C-D ::: e-f g-h + +B<--colsep> implies B<--trim rl>, which can be overridden with +B<--trim n>. I is a Perl Regular Expression: http://perldoc.perl.org/perlre.html @@ -3024,6 +3031,33 @@ This also works if the input file is a file with columns: parallel --colsep '\t' --header : echo {Name} {E-mail address} +=head1 EXAMPLE: From a to b and b to c + +Assume you have input like: + + aardvark + babble + cab + dab + each + +and want to run combinations like: + + aardvark babble + babble cab + cab dab + dab each + +If the input is in the file in.txt: + + parallel echo {1} - {2} ::::+ <(head -n -1 in.txt) <(tail -n +2 in.txt) + +If the input is in the array $a here are two solutions: + + seq $((${#a[@]}-1)) | env_parallel --env a echo '${a[{=$_--=}]} - ${a[{}]}' + parallel echo {1} - {2} ::: "${a[@]::${#a[@]}-1}" :::+ "${a[@]:1}" + + =head1 EXAMPLE: Count the differences between all files in a dir Using B<--results> the results are saved in /tmp/diffcount*. @@ -3137,6 +3171,20 @@ Check the uptime of the servers in I<~/.parallel/sshloginfile>: parallel --tag -S .. --nonall uptime +=head1 EXAMPLE: Colorize output + +Give each job a new color. Most terminals support ANSI colors with the +escape code "\033[30;XXm" where 30 <= XX <= 37: + + parallel --tagstring '\033[30;{=$_=++$::color%8+30=}m' seq {} ::: {1..10} + parallel --rpl '{color} $_="\033[30;".++$::color%8+30."=}m"' \ + --tagstring {color} seq {} ::: {1..10} + +To get rid of the initial \t (which comes from B<--tagstring>): + + ... | perl -pe 's/\t//' + + =head1 EXAMPLE: Keep order of output same as order of input Normally the output of a job will be printed as soon as it @@ -4190,7 +4238,7 @@ computer. =item $PARALLEL_SHELL -Use this shell the shell for the commands run by GNU Parallel: +Use this shell for the commands run by GNU Parallel: =over 2 diff --git a/testsuite/tests-to-run/parallel-local-0.3s.sh b/testsuite/tests-to-run/parallel-local-0.3s.sh index 615aa6d2..8ee48e11 100644 --- a/testsuite/tests-to-run/parallel-local-0.3s.sh +++ b/testsuite/tests-to-run/parallel-local-0.3s.sh @@ -716,6 +716,14 @@ par_basic_halt() { parallel --halt now echo ::: should not print } +par_newline_in_command() { + echo Command with newline and positional replacement strings + parallel " + echo {1 + } {2} + " ::: O ::: K +} + export -f $(compgen -A function | grep par_) compgen -A function | grep par_ | sort | parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1' diff --git a/testsuite/wanted-results/parallel-local-0.3s b/testsuite/wanted-results/parallel-local-0.3s index 02c19fa5..1b9cd955 100644 --- a/testsuite/wanted-results/parallel-local-0.3s +++ b/testsuite/wanted-results/parallel-local-0.3s @@ -1638,6 +1638,8 @@ par_macron par_macron ¯<¯<¯>¯> par_macron ¯<¯<¯>¯> ¯<¯<¯>¯> par_macron "¯<¯<¯>¯>" ¯<¯<¯>¯> +par_newline_in_command Command with newline and positional replacement strings +par_newline_in_command O K par_pipepart_block_bigger_2G ### Test that --pipepart can have blocks > 2GB par_pipepart_block_bigger_2G 1 1 4 par_python_children ### bug #49970: Python child process dies if --env is used