--extensionreplace implemented

A few bugs around {} fixed
This commit is contained in:
Ole Tange 2010-04-13 22:59:39 +02:00
parent 8873273a92
commit a8c29c8f33
10 changed files with 161 additions and 111 deletions

126
parallel
View file

@ -36,7 +36,7 @@ be used for putting the argument in the command line. It can be
changed with B<-I>.
=item B<{.}> (not implemented)
=item B<{.}>
Input line without extension. This is a specialized replacement string
with the extension removed. It will remove from the last B<.> till the
@ -103,7 +103,9 @@ B<-g> is the default. Can be reversed with B<-u>.
Use the replacement string I<string> instead of {}.
=item B<-U> I<string> (not implemented)
=item B<-U> I<string>
=item B<--extensionreplace> I<string>
Use the replacement string I<string> instead of {.} for input line without extension.
@ -782,7 +784,7 @@ hard, as all foreground processes get the INT from the shell.
If there are nomore jobs (STDIN is closed) then make sure to
distribute the arguments evenly if running -X.
Distibute jobs to computers with different speeds/number-of-cpu-cores using ssh
Distribute jobs to computers with different speeds/number-of-cpu-cores using ssh
ask the computers how many cpus they have and spawn appropriately
according to -j setting. Reuse ssh connection (-M and -S)
http://www.semicomplete.com/blog/geekery/distributed-xargs.html?source=rss20
@ -790,9 +792,7 @@ http://code.google.com/p/ppss/wiki/Manual2
http://www.gnu.org/software/pexec/
Where will '>' be run? Local or remote? Where ever is easier.
Where will '>' be run? Local or remote? Remote.
Parallelize so this can be done:
@ -870,6 +870,7 @@ GetOptions("debug|D" => \$::opt_D,
"null|0" => \$::opt_0,
"quote|q" => \$::opt_q,
"I=s" => \$::opt_I,
"extensionreplace|U=s" => \$::opt_U,
"jobs|j=s" => \$::opt_P,
"number-of-cpus" => \$::opt_number_of_cpus,
"number-of-cores" => \$::opt_number_of_cores,
@ -910,6 +911,7 @@ $Global::grouped = 1;
$Global::keeporder = 0;
$Global::quoting = 0;
$Global::replacestring = '{}';
$Global::replace_no_ext = '{.}';
$Global::input_is_filename = (@ARGV);
$/="\n";
$Global::ignore_empty = 0;
@ -934,6 +936,7 @@ if(defined $::opt_q) { $Global::quoting = 1; }
if(defined $::opt_r) { $Global::ignore_empty = 1; }
if(defined $::opt_t) { $Global::stderr_verbose = 1; }
if(defined $::opt_I) { $Global::replacestring = $::opt_I; }
if(defined $::opt_U) { $Global::replace_no_ext = $::opt_U; }
if(defined $::opt_i and $::opt_i) { $Global::replacestring = $::opt_i; }
if(defined $::opt_E and $::opt_E) { $Global::end_of_file_string = $::opt_E; }
if(defined $::opt_n and $::opt_n) { $Global::max_number_of_args = $::opt_n; }
@ -976,8 +979,9 @@ drain_job_queue();
sub generate_command_line {
my $command = shift;
my ($job_line,$last_good);
my ($next_arg,@quoted_args,$arg_length);
my ($next_arg,@quoted_args,@quoted_args_no_ext,$arg_length);
my ($number_of_substitution) = 1;
my ($number_of_substitution_no_ext) = 0;
my ($length_of_context) = 0;
my ($length_of_command_no_args);
if($Global::xargs or $Global::Xargs) {
@ -985,48 +989,76 @@ sub generate_command_line {
$number_of_substitution = ($command =~ s/\Q$Global::replacestring\E/$Global::replacestring/go);
$number_of_substitution ||= 1;
}
if($Global::xargs) {
my $c = $command;
# remove all {}s
$c =~ s/\Q$Global::replacestring\E//go;
$length_of_command_no_args = length($c);
if($Global::xargs or $Global::Xargs) {
# Count number of {.}'s on the command line
$number_of_substitution_no_ext =
($command =~ s/\Q$Global::replace_no_ext\E/$Global::replace_no_ext/go);
$number_of_substitution_no_ext ||= 0;
}
if($Global::Xargs) {
my $spaces=0;
if($Global::xargs or $Global::Xargs) {
my $c = $command;
while($c =~ s/(\S*\Q$Global::replacestring\E\S*)//o) {
# Length of context minus the {}
$length_of_context += length($1) - 2;
# count number of replacements
my $no_of_replace = 0;
$no_of_replace++ while ($c =~ m/\Q$Global::replacestring\E/g);
my $no_of_no_ext = 0;
$no_of_no_ext++ while ($c =~ m/\Q$Global::replace_no_ext\E/g);
if($Global::xargs) {
# remove all {}s
$c =~ s/\Q$Global::replacestring\E|\Q$Global::replace_no_ext\E//og;
$length_of_command_no_args = length($c) - $no_of_replace - $no_of_no_ext;
$length_of_context = 0;
$spaces = 1;
}
if($Global::Xargs) {
$c =~ s/\S*\Q$Global::replacestring\E\S*//go;
$c =~ s/\S*\Q$Global::replace_no_ext\E\S*//go;
$length_of_command_no_args = length($c) - 1;
$length_of_context = length($command) - $length_of_command_no_args
- $no_of_replace * length($Global::replacestring)
- $no_of_no_ext * length($Global::replace_no_ext);
}
$length_of_command_no_args = length($c);
}
my $number_of_args = 0;
# max number of lines (-L) =
# number_of_read_lines = 0
while (defined($next_arg = get_next_arg())) {
# if defined max_number_of_lines {
my $next_arg_no_ext = $next_arg;
$next_arg_no_ext =~ s/\.[^\.]*$//; # Remove .ext from argument
# if defined max_number_of_lines
# number_of_read_lines++
# if $next_arg =~ /\w$/ number_of_read_lines-- (Trailing blanks cause an
# input line to be logically continued on the next input line.)
# if $next_arg =~ /\w$/ then number_of_read_lines--
# Trailing blanks cause an
# input line to be logically continued on the next input line.
# if number_of_read_lines > max_number_of_lines
# last
push (@quoted_args, $next_arg);
push (@quoted_args_no_ext, $next_arg_no_ext);
$number_of_args++;
if(not $Global::xargs and not $Global::Xargs) {
last;
} else {
# Emulate xargs if there is a command and -x or -X is set
$arg_length += $number_of_substitution * (1 + length ($next_arg))
my $next_arg_len = $number_of_substitution * (length ($next_arg) + $spaces) +
+ $number_of_substitution_no_ext * (length ($next_arg_no_ext) + $spaces)
+ $length_of_context;
$arg_length += $next_arg_len;
# debug("arglen $arg_length = $number_of_substitution * (1 + length ($next_arg)) + $length_of_context\n");
my $job_line_length = $length_of_command_no_args + 1 + $arg_length;
my $job_line_length = $length_of_command_no_args + $arg_length;
# debug("linelen $job_line_length = $length_of_command_no_args + 1 + $arg_length\n");
# print STDERR "1234567890123456789012345678901234567890\n";
#print STDERR "LENcalc $number_of_args CON$length_of_context $length_of_command_no_args ".length ($next_arg)." LL$job_line_length NAL$next_arg_len ",$job_line_length-$next_arg_len-1,"\n";
if($job_line_length >= max_length_of_command_line()) {
unget_arg(pop @quoted_args);
if($quoted_args[0]) {
if(defined $quoted_args[0]) {
last;
} else {
die ("Command line too long at: $next_arg");
die ("Command line too long ($job_line_length >= "
. max_length_of_command_line() . ") at number $number_of_args: $next_arg");
}
}
if($Global::max_number_of_args and $number_of_args >= $Global::max_number_of_args) {
@ -1036,27 +1068,18 @@ sub generate_command_line {
}
if(@quoted_args) {
$job_line = $command;
if(defined $job_line and $job_line =~/\Q$Global::replacestring\E/o) {
# substitute {} with args
if(defined $job_line and
($job_line =~/\Q$Global::replacestring\E/o or $job_line =~/\Q$Global::replace_no_ext\E/o)) {
# substitute {} and {.} with args
if($Global::Xargs) {
# Context sensitive replace (foo{}bar with fooargsbar)
while($job_line =~/\Q$Global::replacestring\E/o) {
$job_line =~ /(\S*\Q$Global::replacestring\E\S*)/ or die ("This should never happen");
my $wordarg = $1;
my @all_word_arg;
for my $arg (@quoted_args) {
my $substituted = $wordarg;
$substituted=~s/\Q$Global::replacestring\E/$arg/go;
push @all_word_arg, $substituted;
}
my $all_word_arg = join(" ",@all_word_arg);
my ($quoted_wordarg) = shell_quote($wordarg);
$job_line =~ s/$quoted_wordarg/$all_word_arg/;
}
$job_line = context_replace($job_line, \@quoted_args, \@quoted_args_no_ext);
} else {
# Normal replace {} with args
# Normal replace {} with args and {.} with args without extension
my $arg=join(" ",@quoted_args);
my $arg_no_ext=join(" ",@quoted_args_no_ext);
$job_line =~ s/\Q$Global::replacestring\E/$arg/go;
$job_line =~ s/\Q$Global::replace_no_ext\E/$arg_no_ext/go;
}
} else {
# append args
@ -1085,6 +1108,28 @@ sub shell_quote {
return (@strings);
}
# Replace foo{}bar or foo{.}bar
sub context_replace {
my ($job_line,$quoted,$no_ext) = (@_);
while($job_line =~/\Q$Global::replacestring\E|\Q$Global::replace_no_ext\E/o) {
$job_line =~ /(\S*(\Q$Global::replacestring\E|\Q$Global::replace_no_ext\E)\S*)/o
or die ("This should never happen");
my $wordarg = $1; # This is the context that needs to be substituted
my @all_word_arg;
for my $n (0 .. $#$quoted) {
my $arg = $quoted->[$n];
my $arg_no_ext = $no_ext->[$n];
my $substituted = $wordarg;
$substituted=~s/\Q$Global::replacestring\E/$arg/go;
$substituted=~s/\Q$Global::replace_no_ext\E/$arg_no_ext/go;
push @all_word_arg, $substituted;
}
my $all_word_arg = join(" ",@all_word_arg);
$job_line =~ s/\Q$wordarg\E/$all_word_arg/;
}
return $job_line;
}
#
# Number of processes, filehandles, max length of command line
#
@ -1484,6 +1529,7 @@ sub start_job {
}
$Global::running_jobs++;
debug("$Global::running_jobs processes. Starting: $command\n");
#print STDERR "LEN".length($command)."\n";
$pid = open3(gensym, ">&STDOUT", ">&STDERR", $command) ||
die("open3 failed. Report a bug to <parallel\@tange.dk>\n");
debug("started: $command\n");

View file

@ -155,8 +155,8 @@ If \fBcommand\fR is given, \fBparallel\fR will behave similar to \fBxargs\fR. If
Input line. This is the default replacement string and will normally
be used for putting the argument in the command line. It can be
changed with \fB\-I\fR.
.IP "\fB{.}\fR (not implemented)" 9
.IX Item "{.} (not implemented)"
.IP "\fB{.}\fR" 9
.IX Item "{.}"
Input line without extension. This is a specialized replacement string
with the extension removed. It will remove from the last \fB.\fR till the
end of line of each input line and replace {.} with the
@ -219,8 +219,12 @@ printed when the command is finished. \s-1STDERR\s0 first followed by \s-1STDOUT
.IP "\fB\-I\fR \fIstring\fR" 9
.IX Item "-I string"
Use the replacement string \fIstring\fR instead of {}.
.IP "\fB\-U\fR \fIstring\fR (not implemented)" 9
.IX Item "-U string (not implemented)"
.IP "\fB\-U\fR \fIstring\fR" 9
.IX Item "-U string"
.PD 0
.IP "\fB\-\-extensionreplace\fR \fIstring\fR" 9
.IX Item "--extensionreplace string"
.PD
Use the replacement string \fIstring\fR instead of {.} for input line without extension.
.IP "\fB\-\-jobs\fR \fIN\fR" 9
.IX Item "--jobs N"
@ -896,7 +900,7 @@ hard, as all foreground processes get the \s-1INT\s0 from the shell.
If there are nomore jobs (\s-1STDIN\s0 is closed) then make sure to
distribute the arguments evenly if running \-X.
.PP
Distibute jobs to computers with different speeds/number\-of\-cpu\-cores using ssh
Distribute jobs to computers with different speeds/number\-of\-cpu\-cores using ssh
ask the computers how many cpus they have and spawn appropriately
according to \-j setting. Reuse ssh connection (\-M and \-S)
http://www.semicomplete.com/blog/geekery/distributed\-xargs.html?source=rss20
@ -904,7 +908,7 @@ http://code.google.com/p/ppss/wiki/Manual2
.PP
http://www.gnu.org/software/pexec/
.PP
Where will '>' be run? Local or remote? Where ever is easier.
Where will '>' be run? Local or remote? Remote.
.PP
Parallelize so this can be done:
mdm.screen find dir \-execdir mdm-run cmd {} \e;

View file

@ -4,9 +4,9 @@ b35d8e49be8d94899b719c40d3f1f4bb -
3 60000 348894
1foo bar2foo bar3 Afoo barBfoo barC
1foo2foo3 1bar2bar3 AfooBfooC AbarBbarC
51736abdee4738369ce04b354d40c887 -
31d9274be5fdc2de59487cb05ba57776 -
6 119994 697800
51736abdee4738369ce04b354d40c887 -
31d9274be5fdc2de59487cb05ba57776 -
Chars per line: 116300
'a'
'a'

View file

@ -73,7 +73,7 @@ a7 b1 2 3 4 5 6 7
a8 b1 2 3 4 5 6 7 8
a9 b1 2 3 4 5 6 7 8 9
a10 b1 2 3 4 5 6 7 8 9 10
51736abdee4738369ce04b354d40c887 -
Chars per line: 116300
1471045299517233a8dc29b1c3227f2e -
Chars per line: 102223
31d9274be5fdc2de59487cb05ba57776 -
Chars per line (697800/6): 116300
22074f9acada52462defb18ba912d744 -
Chars per line (817788/7): 116826

View file

@ -53,26 +53,26 @@ line 1 line 1
line 2
line 1 line 1
line 2
1
2
3
4
5
6
7
8
9
10
1
2
3
4
5
6
7
8
9
10
max proc 1
max proc 2
max proc 3
max proc 4
max proc 5
max proc 6
max proc 7
max proc 8
max proc 9
max proc 10
200% proc 1
200% proc 2
200% proc 3
200% proc 4
200% proc 5
200% proc 6
200% proc 7
200% proc 8
200% proc 9
200% proc 10
This is line 1
This is line 2
This is line 3
@ -81,10 +81,10 @@ This is line 1
This is line 2
This is line 3
line 1
delimiter NUL line 1
line 2
line 3
line 1
delimiter TAB line 1
line 2
line 3
line 1 line 1

View file

@ -9,14 +9,14 @@ seq 1 10 | parallel -k 'seq 1 {} | parallel -m -k -I :: echo a{} b::'
seq 1 60000 | parallel -I :: -m echo a::b::c | \
mop -q "|sort |md5sum" :parallel
echo -n "Chars per line: "
CHAR=$(cat ~/.mop/:parallel | wc -c)
LINES=$(cat ~/.mop/:parallel | wc -l)
echo -n "Chars per line ($CHAR/$LINES): "
echo "$CHAR/$LINES" | bc
seq 1 60000 | parallel -I :: -X echo a::b::c | \
mop -q "|sort |md5sum" :parallel
echo -n "Chars per line: "
CHAR=$(cat ~/.mop/:parallel | wc -c)
LINES=$(cat ~/.mop/:parallel | wc -l)
echo -n "Chars per line ($CHAR/$LINES): "
echo "$CHAR/$LINES" | bc

View file

@ -40,15 +40,15 @@ parallel --arg-file /tmp/$$ echo
(echo line 1;echo line 1;echo line 2) | parallel -k --max-args 2 -X echo
# Test --max-procs and -P: Number of processes
seq 1 10 | parallel -k --max-procs +0 echo
seq 1 10 | parallel -k -P 200% echo
seq 1 10 | parallel -k --max-procs +0 echo max proc
seq 1 10 | parallel -k -P 200% echo 200% proc
# Test --delimiter and -d: Delimiter instead of newline
# Yes there is supposed to be an extra newline for -d N
echo line 1Nline 2Nline 3 | parallel -k -d N echo This is
echo line 1Nline 2Nline 3 | parallel -k --delimiter N echo This is
printf "line 1\0line 2\0line 3" | parallel -d '\0' echo
printf "line 1\tline 2\tline 3" | parallel --delimiter '\t' echo
printf "delimiter NUL line 1\0line 2\0line 3" | parallel -k -d '\0' echo
printf "delimiter TAB line 1\tline 2\tline 3" | parallel -k --delimiter '\t' echo
# Test --max-chars and -s: Max number of chars in a line
(echo line 1;echo line 1;echo line 2) | parallel -k --max-chars 25 -X echo

View file

@ -4,9 +4,9 @@ b35d8e49be8d94899b719c40d3f1f4bb -
3 60000 348894
1foo bar2foo bar3 Afoo barBfoo barC
1foo2foo3 1bar2bar3 AfooBfooC AbarBbarC
51736abdee4738369ce04b354d40c887 -
31d9274be5fdc2de59487cb05ba57776 -
6 119994 697800
51736abdee4738369ce04b354d40c887 -
31d9274be5fdc2de59487cb05ba57776 -
Chars per line: 116300
'a'
'a'

View file

@ -73,7 +73,7 @@ a7 b1 2 3 4 5 6 7
a8 b1 2 3 4 5 6 7 8
a9 b1 2 3 4 5 6 7 8 9
a10 b1 2 3 4 5 6 7 8 9 10
51736abdee4738369ce04b354d40c887 -
Chars per line: 116300
1471045299517233a8dc29b1c3227f2e -
Chars per line: 102223
31d9274be5fdc2de59487cb05ba57776 -
Chars per line (697800/6): 116300
22074f9acada52462defb18ba912d744 -
Chars per line (817788/7): 116826

View file

@ -53,26 +53,26 @@ line 1 line 1
line 2
line 1 line 1
line 2
1
2
3
4
5
6
7
8
9
10
1
2
3
4
5
6
7
8
9
10
max proc 1
max proc 2
max proc 3
max proc 4
max proc 5
max proc 6
max proc 7
max proc 8
max proc 9
max proc 10
200% proc 1
200% proc 2
200% proc 3
200% proc 4
200% proc 5
200% proc 6
200% proc 7
200% proc 8
200% proc 9
200% proc 10
This is line 1
This is line 2
This is line 3
@ -81,10 +81,10 @@ This is line 1
This is line 2
This is line 3
line 1
delimiter NUL line 1
line 2
line 3
line 1
delimiter TAB line 1
line 2
line 3
line 1 line 1