Fixed newlines and positional replacement strings

https://lists.gnu.org/archive/html/parallel/2017-07/msg00001.html
2024-12-23 05:07:54 +00:00 · 2017-07-06 14:36:06 +02:00 · 2017-07-06 14:36:06 +02:00 · 8e22009706
parent e0baea9a4a
commit 8e22009706
4 changed files with 85 additions and 19 deletions
--- a/src/parallel
+++ b/src/parallel
@ -1791,27 +1791,35 @@ sub find_compression_program {
    #   $compress_program = compress program with options
    #   $decompress_program = decompress program with options

-    # Search for these. Sorted by speed on 32 core
-    # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip
+    # Search for these. Sorted by speed on 128 core
+
+    # seq 120000000|shuf > 1gb &
+    # apt-get update
+    # apt install make g++ htop
+    # wget -O - pi.dk/3 | bash
+    # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz
    # git clone https://github.com/facebook/zstd.git
-    # cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin
+    # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin)
    # echo 'lrzip -L $((-$1))'  >/usr/local/bin/lrz
    # chmod +x /usr/local/bin/lrz
-    # seq 120000000|shuf > 1gb
+    # wait
    # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2"
-    # multithread="pzstd pigz pxz plzip pbzip2 lzip  lbzip2 lrz"
-    # parallel --shuf -j50% --delay 1  --joblog jl-s --arg-sep ,  parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 ,  {1..3} , $onethread
+    # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz"
    # parallel --shuf -j1  --joblog jl-m --arg-sep ,  parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 ,  {1..3} , $multithread
+    # parallel --shuf -j50% --delay 1  --joblog jl-s --arg-sep ,  parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 ,  {1..3} , $onethread
    # sort -nk4 jl-?
+
    # 1-core:
    # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip
    # 4-cores:
    # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip
    # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2
    # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip
-
-    my @prg = qw(pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip pixz lzip lrz
-                 gzip pxz lzma bzip2 xz clzip);
+    # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip
+    # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip
+    
+    my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip
+                 lrz pxz bzip2 lzma xz clzip);
    for my $p (@prg) {
 	if(which($p)) {
 	    return ("$p -c -1","$p -dc");
@ -9418,12 +9426,12 @@ sub replaced {
                      (?: (?! \257[<>]). )* # The perl expression
                      \257>       # =}
                      [^\s\257]*  # after =}
-                     )+)/ /x) {
+                     )+)/ /xs) {
 			# $1 = pre \257< perlexpr \257> post
 			$word{"$1"} ||= 1;
 		    }
 		} else {
-		    while($tt =~ s/( \257<(?: (?! \257[<>]). )*\257> )//x) {
+		    while($tt =~ s/( \257<(?: (?! \257[<>]). )*\257> )//xs) {
 			# $1 = \257< perlexpr \257>
 			$word{$1} ||= 1;
 		    }
@ -9622,7 +9630,7 @@ sub new {
 		$rpl =~ /^( [^(]*  )    # Prefix - e.g. {%%
                          ( \(.*\) )?   # Group capture regexp - e.g (.*)
                          ( [^)]*  )$   # Postfix - e.g }
-                        /x;
+                        /xs;
 	    $grp_regexp ||= '';
 	    my $rplval = $Global::rpl{$rpl};
 	    while(s{( (?: ^|\257> ) (?: (?! \257[<>])(?:.|\n) )*? )
@ -9650,7 +9658,7 @@ sub new {
 			      ::perl_quote_scalar($grp[$i]) . "\";";
 		      }
 		      $unchanged . "\257<" . $set_args . $rv . "\257>"
-		  }gxe) {
+		  }gxes) {
 	    }
 	    # Do the same for the positional replacement strings
 	    $posrpl = $rpl;
@ -9682,7 +9690,7 @@ sub new {
 			      ::perl_quote_scalar($grp[$i]) . "\";";
 		      }
 		      $unchanged . "\257<" . $position . $set_args . $rv . "\257>"
-		  }gxe) {
+		  }gxes) {
 		}
 	    }
 	}
@ -9777,7 +9785,7 @@ sub replacement_counts_and_lengths {
 	my $noncontextlen = 0;
 	my $contextgroups = 0;
 	for my $c (@cmd) {
-	    while($c =~ s/ \257<( (?: (?! \257[<>]). )*?)\257> /\000/x) {
+	    while($c =~ s/ \257<( (?: (?! \257[<>]). )*?)\257> /\000/xs) {
 		# %replacecount = { "perlexpr" => number of times seen }
 		# e.g { "s/a/b/" => 2 }
 		$replacecount{$1}++;
@ -9786,7 +9794,7 @@ sub replacement_counts_and_lengths {
 	    # Measure the length of the context around the {= perl expr =}
 	    # Use that {=...=} has been replaced with \000 above
 	    # So there is no need to deal with \257<
-	    while($c =~ s/ (\S*\000\S*) //x) {
+	    while($c =~ s/ (\S*\000\S*) //xs) {
 		my $w = $1;
 		$w =~ tr/\000//d; # Remove all \000's
 		$contextlen += length($w);
@ -9800,7 +9808,7 @@ sub replacement_counts_and_lengths {
 	    # Options that can contain replacement strings
 	    $_ or next;
 	    my $t = $_;
-	    while($t =~ s/ \257<( (?: (?! \257[<>]). )* )\257> //x) {
+	    while($t =~ s/ \257<( (?: (?! \257[<>]). )* )\257> //xs) {
 		# %replacecount = { "perlexpr" => number of times seen }
 		# e.g { "$_++" => 2 }
 		# But for tagstring we just need to mark it as seen
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -557,7 +557,14 @@ Column separator. The input will be treated as a table with I<regexp>
 separating the columns. The n'th column can be access using
 B<{>I<n>B<}> or B<{>I<n>.B<}>. E.g. B<{3}> is the 3rd column.

-B<--colsep> implies B<--trim rl>.
+If there are more input sources, each input source will be separated,
+but the columns from each input source will be linked (see B<--link>).
+
+  parallel --colsep '-' echo {4} {3} {2} {1} \
+    ::: A-B C-D ::: e-f g-h
+
+B<--colsep> implies B<--trim rl>, which can be overridden with
+B<--trim n>.

 I<regexp> is a Perl Regular Expression:
 http://perldoc.perl.org/perlre.html
@ -3024,6 +3031,33 @@ This also works if the input file is a file with columns:
    parallel --colsep '\t' --header : echo {Name} {E-mail address}


+=head1 EXAMPLE: From a to b and b to c
+
+Assume you have input like:
+
+  aardvark
+  babble
+  cab
+  dab
+  each
+
+and want to run combinations like:
+
+  aardvark babble
+  babble cab
+  cab dab
+  dab each
+
+If the input is in the file in.txt:
+
+  parallel echo {1} - {2} ::::+ <(head -n -1 in.txt) <(tail -n +2 in.txt)
+
+If the input is in the array $a here are two solutions:
+
+  seq $((${#a[@]}-1)) | env_parallel --env a echo '${a[{=$_--=}]} - ${a[{}]}'
+  parallel echo {1} - {2} ::: "${a[@]::${#a[@]}-1}" :::+ "${a[@]:1}"
+
+
 =head1 EXAMPLE: Count the differences between all files in a dir

 Using B<--results> the results are saved in /tmp/diffcount*.
@ -3137,6 +3171,20 @@ Check the uptime of the servers in I<~/.parallel/sshloginfile>:
  parallel --tag -S .. --nonall uptime


+=head1 EXAMPLE: Colorize output
+
+Give each job a new color. Most terminals support ANSI colors with the
+escape code "\033[30;XXm" where 30 <= XX <= 37:
+
+    parallel --tagstring '\033[30;{=$_=++$::color%8+30=}m' seq {} ::: {1..10}    
+    parallel --rpl '{color} $_="\033[30;".++$::color%8+30."=}m"' \
+      --tagstring {color} seq {} ::: {1..10}
+
+To get rid of the initial \t (which comes from B<--tagstring>):
+
+    ... | perl -pe 's/\t//'
+
+
 =head1 EXAMPLE: Keep order of output same as order of input

 Normally the output of a job will be printed as soon as it
@ -4190,7 +4238,7 @@ computer.

 =item $PARALLEL_SHELL

-Use this shell the shell for the commands run by GNU Parallel:
+Use this shell for the commands run by GNU Parallel:

 =over 2

--- a/testsuite/tests-to-run/parallel-local-0.3s.sh
+++ b/testsuite/tests-to-run/parallel-local-0.3s.sh
@ -716,6 +716,14 @@ par_basic_halt() {
    parallel --halt now echo ::: should not print
 }

+par_newline_in_command() {
+    echo Command with newline and positional replacement strings
+    parallel "
+      echo {1
+      } {2}
+    " ::: O ::: K
+}
+
 export -f $(compgen -A function | grep par_)
 compgen -A function | grep par_ | sort |
    parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'
--- a/testsuite/wanted-results/parallel-local-0.3s
+++ b/testsuite/wanted-results/parallel-local-0.3s
@ -1638,6 +1638,8 @@ par_macron
 par_macron	¯<¯<¯>¯>
 par_macron	¯<¯<¯>¯> ¯<¯<¯>¯>
 par_macron	"¯<¯<¯>¯>" ¯<¯<¯>¯>
+par_newline_in_command	Command with newline and positional replacement strings
+par_newline_in_command	O K
 par_pipepart_block_bigger_2G	### Test that --pipepart can have blocks > 2GB
 par_pipepart_block_bigger_2G	      1       1       4
 par_python_children	### bug #49970: Python child process dies if --env is used