parallel: -X {1}-{2} with multiple input sources. Passes testsuite.

2024-11-22 05:57:54 +00:00 · 2011-06-01 00:02:29 +02:00 · 2011-06-01 00:02:29 +02:00 · 64f0579195
parent f05105ab62
commit 64f0579195
6 changed files with 224 additions and 136 deletions
--- a/doc/FUTURE_IDEAS
+++ b/doc/FUTURE_IDEAS
@ -1,18 +1,40 @@
+parallel echo {} ::: 1 2 3
+src/parallel echo {} ::: 1 2 3
+parallel echo a{}b ::: 1 2 3
+src/parallel echo a{}b ::: 1 2 3
+parallel echo a{}b ::: 1/Q.e 2/W.t 3/E.f
+src/parallel echo a{}b ::: 1/Q.e 2/W.t 3/E.f
+parallel echo a{.}b ::: 1/Q.e 2/W.t 3/E.f
+src/parallel echo a{.}b ::: 1/Q.e 2/W.t 3/E.f
+parallel echo a{.}b {//} ::: 1/Q.e 2/W.t 3/E.f
+src/parallel echo a{.}b {//} ::: 1/Q.e 2/W.t 3/E.f
+parallel echo a{.}b {//}-{/.} ::: 1/Q.e 2/W.t 3/E.f
+src/parallel echo a{.}b {//}-{/.} ::: 1/Q.e 2/W.t 3/E.f
+parallel --xapply echo a{3.}b {//}-{/.} ::: 1/Q.e 2/W.t 3/E.f
+src/parallel --xapply echo a{3.}b {//}-{/.} ::: 1/Q.e 2/W.t 3/E.f
+parallel --xapply echo a{3.}b {//}-{/.} ::: 1/Q.e ::: 2/W.t ::: 3/E.f
+src/parallel --xapply echo a{3.}b {//}-{/.} ::: 1/Q.e ::: 2/W.t ::: 3/E.f
+
+
+
+Fixed bug in {n//}
+
 -S - should read --sshloginfile from stdin

--onall
+-X for multiple input sources:
+parallel -X echo mkdir -p dir-{1} dir-{1}/subdir-{2} :::: <(seq 1 5) <(seq 5 8)
+parallel -X echo mkdir -p dir-{1//} dir-{1.}/subdir-{2} ::: a/foo.d b/bar.d :::: <(seq 5 8)

-One jobqueue per sshlogin.
+Dont start:

-cat | parallel --onall -S eos,iris '(echo {3} {2}) | awk \{print\ \$2}' :::: - ::: a b c ::: 1 2 3
+* load
+* memory free | head -n 2 | awk \{print\ \$4+\$6+\$7\}
+* swap
+* seek

 vmstat\ 1\ 2\ \|\ tail\ -n1\ \|\ awk\ \{print\\\ \\\$7*\\\$8\}


-Dont start new job if:
-
-* memory free is too low
-
 Video 30. 36. 41. 48


--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -168,14 +168,29 @@ Subject: GNU Parallel 20110622 ('XXX') released
 GNU Parallel 20116022 ('XXX') has been released. It is
 available for download at: http://ftp.gnu.org/gnu/parallel/

+For sysadmins this is a major release, as --onall makes it easy to run
+the same commands on a list of computers.
+
 New in this release:

-* Parallel processing without Hadoop!
+* --onall will run all the jobs on all the computers. This is useful
+  for systemadinistrators having multiple servers.
+
+* --nonall runs like --onall but reads no arguments from stdin
+  (standard input), so is possible to do:
+  parallel --nonall -S computer1,computer2 uptime
+
+* --noswap do not start a job on a server that is swapping.
+
+* Article: Parallel processing without Hadoop!
  http://www.solutionhacker.com/parallel-processing-without-hadoop/

 * Article in Linux Magazine (Spanish). Thanks to Ben Martin.
  http://www.linux-magazine.es/issue/67/

+* Blog post in English. Thanks to Dagon.
+  hekate.homeip.net/2011/05/29/
+
 * Bug fixes and man page updates.


--- a/src/parallel
+++ b/src/parallel
@ -3382,17 +3382,6 @@ sub populate {
 	    }
 	}
    }
-
-    if($self->number_of_args() > 0) {
-	# Fill up if we have a half completed line
-	if(defined $self->{'max_number_of_args'}) {
-	    # If you want a number of args and do not have it then fill out the rest with empties
-	    # so magic strings like '{2}' will be replaced with empty.
-	    while($self->number_of_args() < $self->{'max_number_of_args'}) {
-		$self->push([Arg->new("")]);
-	    }
-	}
-    }
 }

 sub push {
@ -3408,7 +3397,7 @@ sub push {
 	if(defined $arg) {
 	    if($self->{'positional_replace'}{$arg_no}) {
 		for my $used (keys %{$self->{'replacecount'}}) {
-		    my $replacementfunction = $self->{'positional_replace'}{$arg_no}; # {} {/} {.} or {/.}
+		    my $replacementfunction = $self->{'positional_replace'}{$arg_no}; # {} {/} {//} {.} or {/.}
 		    # Find the single replacements
 		    $self->{'len'}{$used} += length $arg->replace($replacementfunction);
 		}
@ -3427,7 +3416,7 @@ sub pop {
    my $record = pop @{$self->{'arg_list'}};
    for my $arg (@$record) {
 	if(defined $arg) {
-	    for my $replacement_string qw(keys %{$self->{'replacecount'}}) {
+	    for my $replacement_string (keys %{$self->{'replacecount'}}) {
 		$self->{'len'}{$replacement_string} -= length $arg->replace($replacement_string);
 	    }
 	}
@ -3439,7 +3428,7 @@ sub pop_all {
    # Remove all arguments
    my $self = shift;
    my @popped = @{$self->{'arg_list'}};
-    for my $replacement_string qw(keys %{$self->{'replacecount'}}) {
+    for my $replacement_string (keys %{$self->{'replacecount'}}) {
 	$self->{'len'}{$replacement_string} = 0;
    }
    $self->{'arg_list'} = [];
@ -3475,7 +3464,7 @@ sub len {
 		    $self->{'replacecount'}{$replstring};
 	    }
 	    if($Global::replace{$replstring}) {
-		# This is a multi replacestring ({} {/} {.} {/.} {//})
+		# This is a multi replacestring ({} {/} {//} {.} {/.})
 		# Add each space between two arguments
 		my $number_of_args = ($#{$self->{'arg_list'}[0]}+1)*$self->number_of_args();
 		$len += ($number_of_args-1) * $self->{'replacecount'}{$replstring};
@ -3516,7 +3505,7 @@ sub number_of_replacements {
    my $multi_regexp = multi_regexp();
    my $replacement_regexp =
 	"(?:".
-	'\{\d+/?\.?\}'. # {n}, {n.} {n/.} {n/}
+	'\{\d+(?:|\.|/\.|/|//)?\}'. # {n} {n.} {n/.} {n/} {n//}
 	'|'.
 	join("|",map {$a=$_;$a=~s/(\W)/\\$1/g; $a} values %Global::replace).
 	")";
@ -3579,125 +3568,176 @@ sub replaced {
 sub replace_placeholders {
    my $self = shift;
    my $target = shift;
-    my $quote_special_chars = shift;
+    my $quoteall = shift;
    my $context_replace = $self->{'context_replace'};
-    my $context_regexp = $context_replace ? '\S*' : ''; # Regexp to match surrounding context
+    my $replaced;

-    if($self->number_of_args() == 0) {
-	Carp::confess("0 args should never call replaced");
+    if($self->{'context_replace'}) {
+	$replaced = $self->context_replace_placeholders($target,$quoteall);
+    } else {
+	$replaced = $self->simple_replace_placeholders($target,$quoteall);
    }
+    return $replaced;
+}
+
+sub context_replace_placeholders {
+    my $self = shift;
+    my $target = shift;
+    my $quoteall = shift;
+    # -X = context replace
+    # maybe multiple input sources
+    # maybe --xapply
+    # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]

-    my %replace;
-    my %replace_single;
-    my %replace_multi;
-    my @replace_context;
    my @args=();
    my @used_multi;
+    my %replace;

    for my $record (@{$self->{'arg_list'}}) {
-	# Merge arguments from records into args
+	# Merge arguments from records into args for easy access
 	CORE::push @args, @$record;
    }
+
+    # Replacement functions
+    my @rep = qw({} {/} {//} {.} {/.});
+    # Inner part of replacement functions
+    my @rep_inner = ('', '/', '//', '.', '/.');
+    # Regexp for replacement functions
+    my $rep_regexp = "(?:". join('|', map { $_=~s/(\W)/\\$1/g; $_} @rep) . ")";
+    # Regexp for inner replacement functions
+    my $rep_inner_regexp = "(?:". join('|', map { $_=~s/(\W)/\\$1/g; $_} @rep_inner) . ")";
+    # Seq replace string: {#}
+    my $rep_seq_regexp = '(?:\{\#\})';
+    # Normal replace strings
+    my $rep_str_regexp = multi_regexp();
+
+    # Fish out the words that have replacement strings in them
+    my $tt = $target;
+    my %word;
+    while($tt =~ s/(\S*(?:$rep_str_regexp|\{\d+$rep_inner_regexp\}|$rep_seq_regexp)\S*)/\0/o) {
+	$word{$1}++;
+    }
+    # For each word: Generate the replacement string for that word.
+    for my $origword (keys %word) {
+	my @pos_replacements=();
+	my @replacements=();
+	my $w;
+	my $word = $origword; # Make a local modifyable copy
+
+	# replace {#} if it exists
+	$word =~ s/\{\#\}/$self->seq()/geo;
+	if($word =~ m:\{\d+$rep_inner_regexp\}:o) {
+	    # There are positional replacement strings
+	    my @argset;
+	    if($#{$self->{'arg_list'}->[0]} == 0) {
+		# Only one input source: Treat it as a set
+		@argset = [ @args ];
+	    } else {
+		@argset = @{$self->{'arg_list'}};
+	    }
+	    # Match 1..n where n = max args in a argset
+	    my $pos_regexp = "(?:".join("|", 1 .. $#{$argset[0]}+1).")";
+	    for my $argset (@argset) {
+		# Replace all positional arguments - e.g. {7/.}
+		# with the replacement function - e.g. {/.}
+		# of that argument
+		if(defined $self->{'max_number_of_args'}) {
+		    # Fill up if we have a half completed line, so {n} will be empty
+		    while($#$argset < $self->{'max_number_of_args'}-1) {
+			CORE::push @$argset, Arg->new("");
+		    }
+		}
+		$w = $word;
+		$w =~ s/\{($pos_regexp)($rep_inner_regexp)\}/$argset->[$1-1]->replace('{'.$2.'}')/geo;
+		CORE::push @pos_replacements, $w;
+	    }
+	}
+	if(not @pos_replacements) {
+	    @pos_replacements = ($word);
+	}
+
+	if($word =~ m:$rep_str_regexp:) {
+	    # There are normal replacement strings
+	    for my $w (@pos_replacements) {
+		for my $arg (@args) {
+		    my $wmulti = $w;
+		    $wmulti =~ s/($rep_str_regexp)/$arg->replace($Global::replace_rev{$1})/geo;
+		    CORE::push @replacements, $wmulti;
+		}
+	    }
+	}
+	if(@replacements) {
+	    CORE::push @{$replace{$origword}}, @replacements;
+	} else {
+	    CORE::push @{$replace{$origword}}, @pos_replacements;
+	}
+    }
+    # Substitute the replace strings with the replacement values
+    # Must be sorted by length if a short word is a substring of a long word
+    my $regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_}
+		      sort { length $b <=> length $a } keys %word);
+    $target =~ s/($regexp)/join(" ",@{$replace{$1}})/ge;
+    return $target;
+}
+
+sub simple_replace_placeholders {
+    # no context (no -X)
+    # maybe multiple input sources
+    # maybe --xapply
+    my $self = shift;
+    my $target = shift;
+    my $quoteall = shift;
+    my @args=();
+    my @used_multi;
+    my %replace;
+
+    for my $record (@{$self->{'arg_list'}}) {
+	# Merge arguments from records into args for easy access
+	CORE::push @args, @$record;
+    }
+    # Which replace strings are used?
+    # {#} {} {/} {//} {.} {/.} {n} {n/} {n//} {n.} {n/.}
    for my $used (keys %{$self->{'replacecount'}}) {
-	if($used =~ /^{(\d+)(\D*)}$/) {
+	# What are the replacement values for the replace strings?
+	if(grep { $used eq $_ } qw({} {/} {//} {.} {/.})) {
+	    # {} {/} {//} {.} {/.}
+	    $replace{$Global::replace{$used}} =
+		join(" ", map { $_->replace($used) } @args);
+	} elsif($used =~ /^\{(\d+)(|\/|\/\/|\.|\/\.)\}$/) {
+	    # {n} {n/} {n//} {n.} {n/.}
 	    my $positional = $1; # number if any
-	    my $replacementfunction = "{".::undef_as_empty($2)."}"; # {} {/} {.} or {/.}
-	    # Find the single replacements
+	    my $replacementfunction = "{".::undef_as_empty($2)."}"; # {} {/} {//} {.} or {/.}
 	    if(defined $args[$positional-1]) {
 		# we have a matching argument for {n}
-		$replace_single{$used} = $args[$positional-1]->replace($replacementfunction);
-	    }
-	} elsif($used =~ /^(\{\}|\{\/\}|\{\/\/\}|\{\.\}|\{\/\.\})$/) {
-	    # Add to the multireplacement
-	    my $replacementfunction = $used; # {} {/} {//} {.} or {/.}
-	    CORE::push @used_multi, $replacementfunction;
-	    if($self->{'context_replace'}) {
-		for my $n (0 .. $#args) {
-		    $replace_context[$n]{$replacementfunction} =
-			$args[$n]->replace($replacementfunction);
-		}
+		$replace{$used} = $args[$positional-1]->replace($replacementfunction);
 	    } else {
-		CORE::push(@{$replace_multi{$replacementfunction}},
-			   map { $args[$_]->replace($replacementfunction) }
-			   0 .. $#args);
+		if($positional <= $self->{'max_number_of_args'}) {
+		    # Fill up if we have a half completed line
+		    $replace{$used} = "";
 		}
-	} elsif($used eq '{#}') {
-	    $replace_single{$Global::replace{$used}} = $self->seq();
+	    }
+	} elsif($used eq "{#}") {
+	    # {#}
+	    $replace{$Global::replace{$used}} = $self->seq();
 	} else {
-	    ::die_bug('replace_placeholders20110309');
+	    ::die_bug('simple_replace_placeholders_20110530');
 	}
    }

-    my $replacements = 0;
-    if(%replace_single) {
-	my $single_regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_} sort keys %replace_single);
-	$replacements += ($target =~ s/($single_regexp)/$replace_single{$1}/ge);
-    }
-    my $orig_target = $target;
-    if(@used_multi) {
-	my $multi_regexp = join('|', map {
-	    $a=$Global::replace{$_};
-	    $a=~s/(\W)/\\$1/g; $a
-				} @used_multi);
-	my %wordargs;
-	if($quote_special_chars) {
-	    while($target =~ s/(.*($multi_regexp).*)/\0/o) {
-		my $wordarg = $1;
-		my $pattern = $2;
-		if($self->{'context_replace'}) {
-		    my $substituted = $wordarg;
-		    my @all=();
-		    for my $argref (@replace_context) {
-			# for each argument convert a{}b to a1b a2b
-			my $substituted = $wordarg;
-			$substituted =~ s/($multi_regexp)/$argref->{$Global::replace_rev{$1}}/g;
-			CORE::push @all,$substituted;
-		    }
-		    $wordargs{$wordarg} = join" ",@all;
-		    return @all;
+    # Substitute the replace strings with the replacement values
+    my $regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_} keys %replace);
+    if($regexp) {
+	if($quoteall) {
+	    # This is for --return: The whole expression must be
+	    # quoted - not just the replacements
+	    %replace = map { $_ => ::shell_unquote($replace{$_}) } keys %replace;
+	    $target =~ s/($regexp)/$replace{$1}/g;
+	    $target = ::shell_quote_scalar($target);
 	} else {
-		    my $substituted = $wordarg;
-		    $substituted =~ s/($multi_regexp)/join(" ",map {$_} @{$replace_multi{$Global::replace_rev{$1}}})/eg;
-		    $wordargs{$wordarg} = $substituted;
+	    $target =~ s/($regexp)/$replace{$1}/g;
 	}
    }
-	} else {
-	    while($target =~ s/(\S*($multi_regexp)\S*)/\0/o) {
-		my $wordarg = $1;
-		my $pattern = $2;
-		if($self->{'context_replace'}) {
-		    my $substituted = $wordarg;
-		    my @all=();
-		    for my $argref (@replace_context) {
-			# for each argument convert a{}b to a1b a2b
-			my $substituted = $wordarg;
-			$substituted =~ s/($multi_regexp)/$argref->{$Global::replace_rev{$1}}/g;
-			CORE::push @all,$substituted;
-		    }
-		    $wordargs{$wordarg} = join" ",@all;
-		} else {
-		    my $substituted = $wordarg;
-		    $substituted =~ s/($multi_regexp)/join(" ",map {$_} @{$replace_multi{$Global::replace_rev{$1}}})/eg;
-		    $wordargs{$wordarg} = $substituted;
-		}
-	    }
-	}
-
-	my @k=keys %wordargs;
-	for(@k) {s/(\W)/\\$1/g};
-	my $regexp=join("|",@k);
-	if($quote_special_chars) {
-	    # When --return'ing a file with added special chars
-	    # they need to be quoted.
-	    # E.g. --trc 'a {}'
-	    # Not really pretty. Can this be done better?
-	    $orig_target =~s/($regexp)/::shell_unquote($wordargs{$1})/ge;
-	    $orig_target = ::shell_quote_scalar($orig_target);
-	} else {
-	    $orig_target =~s/($regexp)/$wordargs{$1}/g;
-	}
-    }
-    return $orig_target;
+    return $target;
 }


@ -4214,7 +4254,7 @@ sub new {

 sub replace {
    my $self = shift;
-    my $replacement_string = shift; # {} {/} {.} {/.}
+    my $replacement_string = shift; # {} {/} {//} {.} {/.}
    if(not defined $self->{$replacement_string}) {
 	my $s;
 	if($Global::trim eq "n") {
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -70,14 +70,14 @@ alias or a function will not work (see why
 http://www.perlmonks.org/index.pl?node_id=484296).


-=item B<{}>
+=item B<{}> (still alpha testing)

 Input line. This is the default replacement string and will normally
 be used for putting the argument in the command line. It can be
 changed with B<-I>.


-=item B<{.}>
+=item B<{.}> (still alpha testing)

 Input line without extension. This is a specialized replacement string
 with the extension removed. If the input line contains B<.> after the
@ -91,7 +91,7 @@ B<{.}> can be used the same places as B<{}>. The replacement string
 B<{.}> can be changed with B<-U>.


-=item B<{/}>
+=item B<{/}> (still alpha testing)

 Basename of input line. This is a specialized replacement string
 with the directory part removed.
@ -100,7 +100,7 @@ B<{/}> can be used the same places as B<{}>. The replacement string
 B<{/}> can be changed with B<--basenamereplace>.


-=item B<{//}> (alpha testing)
+=item B<{//}> (still alpha testing)

 Dirname of input line. This is a specialized replacement string
 containing the dir of the input. See B<dirname>(1).
@ -109,7 +109,7 @@ B<{//}> can be used the same places as B<{}>. The replacement string
 B<{//}> can be changed with B<--dirnamereplace>.


-=item B<{/.}>
+=item B<{/.}> (still alpha testing)

 Basename of input line without extension. This is a specialized
 replacement string with the directory and extension part removed. It
@ -119,14 +119,14 @@ B<{/.}> can be used the same places as B<{}>. The replacement string
 B<{/.}> can be changed with B<--basenameextensionreplace>.


-=item B<{#}> (beta testing)
+=item B<{#}> (still alpha testing)

 Sequence number of the job to run. The same as $PARALLEL_SEQ.

 The replacement string B<{#}> can be changed with B<--seqreplace>.


-=item B<{>I<n>B<}>
+=item B<{>I<n>B<}> (still alpha testing)

 Argument from input source I<n> or the I<n>'th argument. See B<-a>
 and B<-N>.
@ -134,7 +134,7 @@ and B<-N>.
 B<{>I<n>B<}> can be used the same places as B<{}>.


-=item B<{>I<n>.B<}>
+=item B<{>I<n>.B<}> (still alpha testing)

 Argument from input source I<n> or the I<n>'th argument without
 extension. It is a combination of B<{>I<n>B<}> and B<{.}>.
@ -142,7 +142,7 @@ extension. It is a combination of B<{>I<n>B<}> and B<{.}>.
 B<{>I<n>.B<}> can be used the same places as B<{>I<n>B<}>.


-=item B<{>I<n>/B<}>
+=item B<{>I<n>/B<}> (still alpha testing)

 Basename of argument from input source I<n> or the I<n>'th argument.
 It is a combination of B<{>I<n>B<}> and B<{/}>.  See B<-a> and B<-N>.
@ -150,7 +150,7 @@ It is a combination of B<{>I<n>B<}> and B<{/}>.  See B<-a> and B<-N>.
 B<{>I<n>/B<}> can be used the same places as B<{>I<n>B<}>.


-=item B<{>I<n>/.B<}>
+=item B<{>I<n>/.B<}> (still alpha testing)

 Basename of argument from input source I<n> or the I<n>'th argument
 without extension.  It is a combination of B<{>I<n>B<}>, B<{/}>, and
--- a/testsuite/wanted-results/test55
+++ b/testsuite/wanted-results/test55
@ -0,0 +1,11 @@
+### Test race condition on 8 CPU (my laptop)
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10