From d78f8539f9a8be447315f6e399a69b0718acfe24 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Mon, 16 Mar 2009 08:58:39 +0100 Subject: [PATCH] Implemented -X. Fixed bug with -x: If the command line had more than one {} it could result in a too long line --- parallel | 85 +++++++++++++++++++++++++++------ parallel.1 | 16 +++++-- unittest/actual-results/test10 | 10 +++- unittest/tests-to-run/test10.sh | 11 ++++- unittest/wanted-results/test10 | 10 +++- 5 files changed, 107 insertions(+), 25 deletions(-) diff --git a/parallel b/parallel index e8c29c0c..761489f7 100755 --- a/parallel +++ b/parallel @@ -28,7 +28,7 @@ command also invokes B<-f>. If B is given, B will behave similar to B. If B is not given B will behave similar to B. -=item I<-0> +=item B<-0> Use NUL as delimiter. Normally input lines will end in \n (newline). If they end in \0 (NUL), then use this option. It is useful @@ -56,13 +56,13 @@ B<-g> is the default. Can be reversed with B<-u>. Run N jobs in parallel. 0 means as many as possible. Default is 10. -=item B<-j> +I +=item B<-j> I<+N> Add N to the number of CPUs. Run this many jobs in parallel. For compute intensive jobs I<-j +0> is useful as it will run number-of-cpus jobs in parallel. -=item B<-j> -I +=item B<-j> I<-N> Subtract N from the number of CPUs. Run this many jobs in parallel. If the evaluated number is less than 1 then 1 will be used. @@ -95,7 +95,14 @@ Verbose. Print the job to be run. Can be reversed with B<-s>. =item B<-x> -xargs. Insert as many arguments as the command line length permits. +xargs. Insert as many arguments as the command line length permits. If +{} is not used the arguments will be appended to the line. If {} is +used multiple times each {} will be replaced with all the arguments. + +=item B<-X> + +xargs with context replace. This works like B<-x> except if {} is part +of a word (like I) then the whole word will be repeated. =back @@ -264,6 +271,7 @@ B functionality is missing. Quoting in B works like B<-q> in B. Doing B> B<{}.wc"> using B seems to be impossible. + =head1 BUGS Filenames beginning with '-' can cause some commands to give @@ -322,7 +330,7 @@ use Getopt::Std; use strict; my ($processes,$command); -getopts("0cdefgj:qsuvx") || die_usage(); +getopts("0cdefgj:qsuvxX") || die_usage(); # Defaults: $Global::debug = 0; @@ -339,6 +347,10 @@ if(defined $::opt_x) { $Global::xargs = 1; $Global::command_line_max_len = max_length_of_command_line(); } +if(defined $::opt_X) { + $Global::Xargs = 1; + $Global::command_line_max_len = max_length_of_command_line(); +} if(defined $::opt_v) { $Global::verbose = 1; } if(defined $::opt_s) { $Global::verbose = 0; } if(defined $::opt_g) { $Global::grouped = 1; } @@ -378,16 +390,40 @@ sub generate_command_line { my $command = shift; my ($job_line,$last_good); my ($next_arg,@quoted_args,$arg_length); + my ($number_of_substitution) = 1; + my ($length_of_context) = 0; + my ($length_of_command_no_args); + if($Global::xargs or $Global::Xargs) { + # Count number of {}'s on the command line + $number_of_substitution = ($command =~ s/{}/{}/g); + $number_of_substitution ||= 1; + } + if($Global::xargs) { + my $c = $command; + # remove all {}s + $c =~ s/{}//g; + $length_of_command_no_args = length($c); + } + if($Global::Xargs) { + my $c = $command; + while($c =~ s/(\S*{}\S*)//) { + # Length of context minus the {} + $length_of_context += length($1) - 2; + } + $length_of_command_no_args = length($c); + } + while ($next_arg = get_next_arg()) { push (@quoted_args, $next_arg); - if(not $Global::xargs) { + if(not $Global::xargs and not $Global::Xargs) { last; } else { - # Emulate xargs if there is a command and -x is set - $arg_length += length $next_arg + 1; - debug("arglen $arg_length\n"); - my $job_line_length = length($command) + 1 + $arg_length; - debug("linelen $job_line_length\n"); + # Emulate xargs if there is a command and -x or -X is set + $arg_length += $number_of_substitution * (1 + length ($next_arg)) + + $length_of_context; + # debug("arglen $arg_length = $number_of_substitution * (1 + length ($next_arg)) + $length_of_context\n"); + my $job_line_length = $length_of_command_no_args + 1 + $arg_length; + # debug("linelen $job_line_length = $length_of_command_no_args + 1 + $arg_length\n"); if($job_line_length >= $Global::command_line_max_len) { unget_arg(pop @quoted_args); if($quoted_args[0]) { @@ -399,12 +435,31 @@ sub generate_command_line { } } if(@quoted_args) { - my $arg=join(" ",@quoted_args); $job_line = $command; - if(defined $job_line and $job_line =~ s/{}/$arg/g) { - # substituted {} with args + if(defined $job_line and $job_line =~/{}/) { + # substitute {} with args + if($Global::Xargs) { + # Context sensitive replace + while($job_line =~/{}/) { + $job_line =~ /(\S*{}\S*)/ or die ("This should never happen"); + my $wordarg = $1; + my @all_word_arg; + for my $arg (@quoted_args) { + my $substituted = $wordarg; + $substituted=~s/{}/$arg/g; + push @all_word_arg, $substituted; + } + my $all_word_arg = join(" ",@all_word_arg); + $job_line =~ s/$wordarg/$all_word_arg/; + } + } else { + # Normal replace + my $arg=join(" ",@quoted_args); + $job_line =~ s/{}/$arg/g; + } } else { # append args + my $arg=join(" ",@quoted_args); $job_line .= " $arg"; } debug("Return jobline: $job_line\n"); @@ -905,5 +960,5 @@ sub my_dump { # Keep perl -w happy $main::opt_u = $main::opt_c = $main::opt_f = $main::opt_q = $main::opt_0 = $main::opt_s = $main::opt_v = $main::opt_g = -$main::opt_j = $main::opt_d = $main::opt_x =1; +$main::opt_j = $main::opt_d = $main::opt_x = $main::opt_X =1; $Global::xargs = 1; diff --git a/parallel.1 b/parallel.1 index d68c723f..2744ffb8 100644 --- a/parallel.1 +++ b/parallel.1 @@ -124,7 +124,7 @@ .\" ======================================================================== .\" .IX Title "PARALLEL 1" -.TH PARALLEL 1 "2009-03-12" "perl v5.10.0" "User Contributed Perl Documentation" +.TH PARALLEL 1 "2009-03-16" "perl v5.10.0" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -150,7 +150,7 @@ command also invokes \fB\-f\fR. .Sp If \fBcommand\fR is given, \fBparallel\fR will behave similar to \fBxargs\fR. If \&\fBcommand\fR is not given \fBparallel\fR will behave similar to \fBcat | sh\fR. -.IP "\fI\-0\fR" 9 +.IP "\fB\-0\fR" 9 .IX Item "-0" Use \s-1NUL\s0 as delimiter. Normally input lines will end in \en (newline). If they end in \e0 (\s-1NUL\s0), then use this option. It is useful @@ -173,12 +173,12 @@ printed when the command is finished. \s-1STDERR\s0 first followed by \s-1STDOUT .IP "\fB\-j\fR \fIN\fR" 9 .IX Item "-j N" Run N jobs in parallel. 0 means as many as possible. Default is 10. -.IP "\fB\-j\fR +\fIN\fR" 9 +.IP "\fB\-j\fR \fI+N\fR" 9 .IX Item "-j +N" Add N to the number of CPUs. Run this many jobs in parallel. For compute intensive jobs \fI\-j +0\fR is useful as it will run number-of-cpus jobs in parallel. -.IP "\fB\-j\fR \-\fIN\fR" 9 +.IP "\fB\-j\fR \fI\-N\fR" 9 .IX Item "-j -N" Subtract N from the number of CPUs. Run this many jobs in parallel. If the evaluated number is less than 1 then 1 will be used. @@ -205,7 +205,13 @@ output from different commands to be mixed. Can be reversed with \fB\-g\fR. Verbose. Print the job to be run. Can be reversed with \fB\-s\fR. .IP "\fB\-x\fR" 9 .IX Item "-x" -xargs. Insert as many arguments as the command line length permits. +xargs. Insert as many arguments as the command line length permits. If +{} is not used the arguments will be appended to the line. If {} is +used multiple times each {} will be replaced with all the arguments. +.IP "\fB\-X\fR" 9 +.IX Item "-X" +xargs with context replace. This works like \fB\-x\fR except if {} is part +of a word (like \fIpic{}.jpg\fR) then the whole word will be repeated. .SH "EXAMPLE 1: Working as cat | sh. Ressource inexpensive jobs and evaluation" .IX Header "EXAMPLE 1: Working as cat | sh. Ressource inexpensive jobs and evaluation" \&\fBparallel\fR can work similar to \fBcat | sh\fR. diff --git a/unittest/actual-results/test10 b/unittest/actual-results/test10 index c953589c..da43de87 100644 --- a/unittest/actual-results/test10 +++ b/unittest/actual-results/test10 @@ -1,4 +1,10 @@ 33bf8b2986551515cdaff5e860618098 - 960bedf6398273937e314fb49c7b3ffa - -8f7a50abe8d1024e90a2bc9c99ee0b05 - - 3 80000 468894 +b35d8e49be8d94899b719c40d3f1f4bb - + 3 60000 348894 +1foo bar2foo bar3 Afoo barBfoo barC +1foo2foo3 1bar2bar3 AfooBfooC AbarBbarC +1c0c49286e5b5b18437e51b438ea5475 - + 6 119994 697800 +1c0c49286e5b5b18437e51b438ea5475 - +Chars per line: 116300 diff --git a/unittest/tests-to-run/test10.sh b/unittest/tests-to-run/test10.sh index 3c1d67a7..cf714067 100644 --- a/unittest/tests-to-run/test10.sh +++ b/unittest/tests-to-run/test10.sh @@ -5,4 +5,13 @@ seq 1 40 | parallel -j 0 seq 1 10 | sort |md5sum seq 1 40 | parallel -j 0 seq 1 10 '| parallel -j 3 echo' | sort |md5sum # Test of xargs -seq 1 80000 | parallel -x echo | mop -d 4 "|sort |md5sum" "| wc" +seq 1 60000 | parallel -x echo | mop -d 4 "|sort |md5sum" "| wc" +(echo foo;echo bar) | parallel -x echo 1{}2{}3 A{}B{}C +(echo foo;echo bar) | parallel -X echo 1{}2{}3 A{}B{}C +seq 1 60000 | parallel -x echo 1{}2{}3 | mop -d 4 "|sort |md5sum" "| wc" +seq 1 60000 | parallel -x echo 1{}2{}3 | \ + mop -q "|sort |md5sum" :parallel +echo -n "Chars per line: " +CHAR=$(cat ~/.mop/:parallel | wc -c) +LINES=$(cat ~/.mop/:parallel | wc -l) +echo "$CHAR/$LINES" | bc diff --git a/unittest/wanted-results/test10 b/unittest/wanted-results/test10 index c953589c..da43de87 100644 --- a/unittest/wanted-results/test10 +++ b/unittest/wanted-results/test10 @@ -1,4 +1,10 @@ 33bf8b2986551515cdaff5e860618098 - 960bedf6398273937e314fb49c7b3ffa - -8f7a50abe8d1024e90a2bc9c99ee0b05 - - 3 80000 468894 +b35d8e49be8d94899b719c40d3f1f4bb - + 3 60000 348894 +1foo bar2foo bar3 Afoo barBfoo barC +1foo2foo3 1bar2bar3 AfooBfooC AbarBbarC +1c0c49286e5b5b18437e51b438ea5475 - + 6 119994 697800 +1c0c49286e5b5b18437e51b438ea5475 - +Chars per line: 116300