parallel: Implemented {//}. Converted use into conditional use (hopefully with speedup).

This commit is contained in:
Ole Tange 2011-04-27 17:12:35 +02:00
parent c5af096f76
commit a63e258bb1
5 changed files with 95 additions and 46 deletions

View file

@ -158,38 +158,18 @@ cc:Peter Simons <simons@cryp.to>, Sandro Cazzaniga <kharec@mandriva.org>,
Christian Faulhammer <fauli@gentoo.org>, Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
Jesse Alama <jesse.alama@gmail.com>
Subject: GNU Parallel 2011XXXX ('Libya') released
Subject: GNU Parallel 2011XXXX ('?') released
GNU Parallel 2011XXXX ('Libya') has been released. It is
GNU Parallel 2011XXXX ('?') has been released. It is
available for download at: http://ftp.gnu.org/gnu/parallel/
New in this release:
* {#} now works an alias for $PARALLEL_PID.
* Using GNU Parallel with EC2. Thanks to Kevin Wu.
http://blog.kevinformatics.com/post/4970574713/interested-in-your-experience-using-gnu-parallel-in
* --eta now estimates way more accurately.
* CPU detection code for AIX thanks to Christian Netrwal.
* --joblog contains exitcode.
* Thanks to Ævar Arnfjörð Bjarmason for reading my code.
* GNU Parallel was presented at:
- LUGA, Augsburg, 2011-03-26, http://www.luga.de/Aktionen/LIT-2011/Programm
- OSAA.dk, Aarhus, 2011-04-12
* Blog entry in Japanese. Thanks to Riywo.
http://blog.riywo.com/2011/04/19/022802
* Example of how to use GNU Parallel on PBS clusters. Thanks to Andrew
J Dolgert: http://web0.tc.cornell.edu/wiki/index.php?title=Gnu_Parallel
* First example of using GNU Parallel in Hebrew. Thanks to omry.
http://nd.gd/jk
* Intro video got past 10000 hits.
http://www.youtube.com/watch?v=OpaiGYxkSuQ
* Review with idea for {..} and {...} in Japanese. Thanks to ichii386.
http://d.hatena.ne.jp/ichii386/20110426
* Bug fixes and man page updates.

View file

@ -1,14 +1,17 @@
#!/usr/bin/perl -w
# open3 used in Job::start
use IPC::Open3;
use Fcntl;
use Symbol qw(gensym);
use IO::File;
# &WNOHANG used in reaper
use POSIX qw(:sys_wait_h setsid ceil :errno_h);
# gensym used in Job::start
use Symbol qw(gensym);
# tempfile used in Job::start
use File::Temp qw(tempfile tempdir);
# GetOptions used in get_options_from_array
use Getopt::Long;
# Used to ensure code quality
use strict;
use Carp;
$::oodebug=0;
$SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X
@ -309,6 +312,7 @@ sub get_options_from_array {
"I=s" => \$::opt_I,
"extensionreplace|U=s" => \$::opt_U,
"basenamereplace|bnr=s" => \$::opt_basenamereplace,
"dirnamereplace|dnr=s" => \$::opt_dirnamereplace,
"basenameextensionreplace=s" => \$::opt_basenameextensionreplace,
"seqreplace=s" => \$::opt_seqreplace,
"jobs|j=s" => \$::opt_P,
@ -400,6 +404,7 @@ sub parse_options {
$Global::replace{'{}'} = '{}';
$Global::replace{'{.}'} = '{.}';
$Global::replace{'{/}'} = '{/}';
$Global::replace{'{//}'} = '{//}';
$Global::replace{'{/.}'} = '{/.}';
$Global::replace{'{#}'} = '{#}';
$/="\n";
@ -436,6 +441,7 @@ sub parse_options {
$Global::replace{'{}'} = $::opt_i eq "" ? "{}" : $::opt_i;
}
if(defined $::opt_basenamereplace) { $Global::replace{'{/}'} = $::opt_basenamereplace; }
if(defined $::opt_dirnamereplace) { $Global::replace{'{//}'} = $::opt_dirnamereplace; }
if(defined $::opt_basenameextensionreplace) {
$Global::replace{'{/.}'} = $::opt_basenameextensionreplace;
}
@ -2594,8 +2600,7 @@ sub complete_write {
my $self = shift;
my $in = $self->{'stdin'};
my $len = syswrite($in,$self->{'remaining'});
if (!defined($len) && $! == ::EAGAIN) {
if (!defined($len) && $! == &::EAGAIN) {
# write would block;
} else {
# Remove the part that was written
@ -3358,7 +3363,7 @@ sub len {
$self->{'replacecount'}{$replstring};
}
if($Global::replace{$replstring}) {
# This is a multi replacestring ({} {/} {.} {/.})
# This is a multi replacestring ({} {/} {.} {/.} {//})
# Add each space between two arguments
my $number_of_args = ($#{$self->{'arg_list'}[0]}+1)*$self->number_of_args();
$len += ($number_of_args-1) * $self->{'replacecount'}{$replstring};
@ -3381,6 +3386,7 @@ sub multi_regexp {
($Global::replace{"{}"},
$Global::replace{"{.}"},
$Global::replace{"{/}"},
$Global::replace{"{//}"},
$Global::replace{"{/.}"})
).")";
}
@ -3431,7 +3437,7 @@ sub number_of_replacements {
}
for my $k (keys %count) {
if(defined $Global::replace{$k}) {
# {} {/} {.} {/.} {#}
# {} {/} {//} {.} {/.} {#}
$context -= (length $Global::replace{$k}) * $count{$k};
} else {
# {n}
@ -3489,9 +3495,9 @@ sub replace_placeholders {
# we have a matching argument for {n}
$replace_single{$used} = $args[$positional-1]->replace($replacementfunction);
}
} elsif($used =~ /^(\{\}|\{\/\}|\{\.\}|\{\/\.\})$/) {
} elsif($used =~ /^(\{\}|\{\/\}|\{\/\/\}|\{\.\}|\{\/\.\})$/) {
# Add to the multireplacement
my $replacementfunction = $used; # {} {/} {.} or {/.}
my $replacementfunction = $used; # {} {/} {//} {.} or {/.}
CORE::push @used_multi, $replacementfunction;
if($self->{'context_replace'}) {
for my $n (0 .. $#args) {
@ -3518,7 +3524,7 @@ sub replace_placeholders {
my $orig_target = $target;
if(@used_multi) {
my $multi_regexp = join('|', map {
$a=$Global::replace{"$_"};
$a=$Global::replace{$_};
$a=~s/(\W)/\\$1/g; $a
} @used_multi);
my %wordargs;
@ -3994,6 +4000,10 @@ sub replace {
$s =~ s:\.[^/\.]*$::; # Remove .ext from argument
} elsif($replacement_string eq "{/}") {
$s =~ s:^.*/([^/]+)/?$:$1:; # Remove dir from argument. If ending in /, remove final /
} elsif($replacement_string eq "{//}") {
# Only load File::Basename if actually needed
$Global::use{"File::Basename"} ||= eval "use File::Basename;";
$s = dirname($s); # Keep dir from argument.
} elsif($replacement_string eq "{/.}") {
$s =~ s:^.*/([^/]+)/?$:$1:; # Remove dir from argument. If ending in /, remove final /
$s =~ s:\.[^/\.]*$::; # Remove .ext from argument
@ -4049,8 +4059,6 @@ package Semaphore;
# process holding the entry. If the process dies, the entry can be
# taken by another process.
use Fcntl qw(:DEFAULT :flock);
sub new {
my $class = shift;
my $id = shift;
@ -4159,6 +4167,7 @@ sub lock {
open $self->{'lockfh'}, ">", $self->{'lockfile'}
or ::die_bug("Can't open semaphore file $self->{'lockfile'}: $!");
chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
$Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock);";
while(not flock $self->{'lockfh'}, LOCK_EX()|LOCK_NB()) {
::debug("Cannot lock $self->{'lockfile'}");
# TODO if timeout: last
@ -4179,5 +4188,5 @@ sub unlock {
# Keep perl -w happy
$::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
$::opt_skip_first_line = $::opt_shebang = 0 ;
$::opt_skip_first_line = $::opt_shebang = $Global::use = 0 ;

View file

@ -48,9 +48,10 @@ after the list of options. That will give you an idea of what GNU
B<parallel> is capable of.
You can also watch the intro video for a quick introduction:
http://www.youtube.com/watch?v=OpaiGYxkSuQ or at
http://tinyogg.com/watch/TORaR/ http://tinyogg.com/watch/hfxKj/ and
http://tinyogg.com/watch/YQuXd/
http://tinyogg.com/watch/YQuXd/ or
http://www.youtube.com/watch?v=OpaiGYxkSuQ and
http://www.youtube.com/watch?v=1ntxT-47VPA
=head1 OPTIONS
@ -100,6 +101,15 @@ B<{/}> can be used the same places as B<{}>. The replacement string
B<{/}> can be changed with B<--basenamereplace>.
=item B<{//}>
Dirname of input line. This is a specialized replacement string
containing the dir of the input. See B<dirname>(1).
B<{//}> can be used the same places as B<{}>. The replacement string
B<{//}> can be changed with B<--dirnamereplace>.
=item B<{/.}>
Basename of input line without extension. This is a specialized
@ -326,6 +336,14 @@ as \n, or an octal or hexadecimal escape code. Octal and
hexadecimal escape codes are understood as for the printf command.
Multibyte characters are not supported.
=item B<--dirnamereplace> I<replace-str> (alpha testing)
=item B<--dnr> I<replace-str> (alpha testing)
Use the replacement string I<replace-str> instead of B<{//}> for
dirname of input line.
=item B<-E> I<eof-str>
Set the end of file string to eof-str. If the end of file string
@ -1263,7 +1281,7 @@ solution is to quote the whole command:
B<parallel "zcat {} >>B<{.}" ::: *.gz>
Other special shell charaters (such as * ; $ > < | >> <<) also needs
Other special shell charaters (such as * ; $ > < | >> <<) also need
to be put in quotes, as they may otherwise be interpreted by the shell
and not given to GNU B<parallel>.
@ -2878,8 +2896,8 @@ it also uses rsync with ssh.
=head1 SEE ALSO
B<ssh>(1), B<rsync>(1), B<find>(1), B<xargs>(1), B<make>(1),
B<pexec>(1), B<ppss>(1), B<xjobs>(1), B<prll>(1), B<dxargs>(1),
B<mdm>(1),
B<ssh>(1), B<rsync>(1), B<find>(1), B<xargs>(1), B<dirname>,
B<make>(1), B<pexec>(1), B<ppss>(1), B<xjobs>(1), B<prll>(1),
B<dxargs>(1), B<mdm>(1),
=cut

View file

@ -0,0 +1,15 @@
#!/bin/bash
echo '### Test {//}'
parallel -k echo {//} {} ::: a a/b a/b/c
parallel -k echo {//} {} ::: /a /a/b /a/b/c
parallel -k echo {//} {} ::: ./a ./a/b ./a/b/c
parallel -k echo {//} {} ::: a.jpg a/b.jpg a/b/c.jpg
parallel -k echo {//} {} ::: /a.jpg /a/b.jpg /a/b/c.jpg
parallel -k echo {//} {} ::: ./a.jpg ./a/b.jpg ./a/b/c.jpg
echo '### Test --dnr'
parallel --dnr II -k echo II {} ::: a a/b a/b/c
echo '### Test --dirnamereplace'
parallel --dirnamereplace II -k echo II {} ::: a a/b a/b/c

View file

@ -0,0 +1,27 @@
### Test {//}
. a
a a/b
a/b a/b/c
/ /a
/a /a/b
/a/b /a/b/c
. ./a
./a ./a/b
./a/b ./a/b/c
. a.jpg
a a/b.jpg
a/b a/b/c.jpg
/ /a.jpg
/a /a/b.jpg
/a/b /a/b/c.jpg
. ./a.jpg
./a ./a/b.jpg
./a/b ./a/b/c.jpg
### Test --dnr
. a
a a/b
a/b a/b/c
### Test --dirnamereplace
. a
a a/b
a/b a/b/c