Fixed: parallel -a not_existing --pipepart wc

This commit is contained in:
Ole Tange 2014-08-04 13:10:38 +02:00
parent 0a7b38c257
commit df5e2231d0
2 changed files with 76 additions and 51 deletions

View file

@ -184,8 +184,7 @@ sub pipe_part_files {
# @commands to run to pipe the blocks of the file to the command given # @commands to run to pipe the blocks of the file to the command given
my ($file) = @_; my ($file) = @_;
my $buf = ""; my $buf = "";
open(my $fh, "<", $file) || die; my $header = find_header(\$buf,open_or_exit($file));
my $header = find_header(\$buf,$fh);
# find positions # find positions
my @pos = find_split_positions($file,$opt::blocksize,length $header); my @pos = find_split_positions($file,$opt::blocksize,length $header);
# unshift job with cat_partial # unshift job with cat_partial
@ -693,6 +692,10 @@ sub get_options_from_array {
# false if parsing failed # false if parsing failed
# @array is changed # @array is changed
my ($array_ref, @keep_only) = @_; my ($array_ref, @keep_only) = @_;
if(not @$array_ref) {
# Empty array: No need to look more at that
return 1;
}
# A bit of shuffling of @ARGV needed as GetOptionsFromArray is not # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
# supported everywhere # supported everywhere
my @save_argv; my @save_argv;
@ -3742,7 +3745,8 @@ sub user_requested_processes {
my $j = $1; my $j = $1;
$processes = $processes =
$self->ncpus() - $j; $self->ncpus() - $j;
} elsif ($opt_P =~ /^(\d+)\%$/) { } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
# E.g. -P 10.5%
my $j = $1; my $j = $1;
$processes = $processes =
$self->ncpus() * $j / 100; $self->ncpus() * $j / 100;
@ -4917,10 +4921,10 @@ sub kill {
{ {
my %pid_parentpid_cmd; my %pid_parentpid_cmd;
sub family_pids { sub pid_table {
# Find the pids with this->pid as (grand)*parent # return two tables:
my $self = shift; # pid -> children of pid
my $pid = $self->pid(); # pid -> commandname
%pid_parentpid_cmd or %pid_parentpid_cmd = %pid_parentpid_cmd or %pid_parentpid_cmd =
( (
@ -4932,7 +4936,7 @@ sub kill {
'freebsd' => q( ps -o pid,ppid -ax ), 'freebsd' => q( ps -o pid,ppid -ax ),
'gnu' => q( ps -ef | awk '{print $2" "$3}' ), 'gnu' => q( ps -ef | awk '{print $2" "$3}' ),
'hpux' => q( ps -ef | awk '{print $2" "$3}' ), 'hpux' => q( ps -ef | awk '{print $2" "$3}' ),
'linux' => q( ps -ef | awk '{print $2" "$3}' ), 'linux' => q( ps -ef | grep tange | awk '{print $2" "$3" "$8}' ),
'mirbsd' => q( ps -o pid,ppid -ax ), 'mirbsd' => q( ps -o pid,ppid -ax ),
'netbsd' => q( ps -o pid,ppid -ax ), 'netbsd' => q( ps -o pid,ppid -ax ),
'nto' => q( ps -ef | awk '{print $2" "$3}' ), 'nto' => q( ps -ef | awk '{print $2" "$3}' ),
@ -4942,31 +4946,42 @@ sub kill {
); );
$pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
my (@pidtable,%children_of,@pids); my (@pidtable,%children_of,%name_of);
# Table with pid parentpid # Table with pid -> children of pid
@pidtable = `$pid_parentpid_cmd{$^O}`; @pidtable = `$pid_parentpid_cmd{$^O}`;
for (@pidtable) { for (@pidtable) {
/(\S+)\s+(\S+)/ or ::die_bug("pidtable format"); /(\S+)\s+(\S+)\s+(\S+)/ or ::die_bug("pidtable format: $_");
push @{$children_of{$2}}, $1; push @{$children_of{$2}}, $1;
$name_of{$2} = $3;
} }
my @more = ($pid); return(\%children_of, \%name_of);
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of{$parent}) {
# add the children of this parent
push @m, @{$children_of{$parent}};
}
}
@more = @m;
}
return (@pids);
} }
} }
sub family_pids {
# Find the pids with this->pid as (grand)*parent
my $self = shift;
my $pid = $self->pid();
my @pids;
my ($children_of_ref,$name_of_ref) = pid_table();
my @more = ($pid);
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of_ref->{$parent}) {
# add the children of this parent
push @m, @{$children_of_ref->{$parent}};
}
}
@more = @m;
}
return (@pids);
}
sub failed { sub failed {
# return number of times failed for this $sshlogin # return number of times failed for this $sshlogin
my $self = shift; my $self = shift;

View file

@ -3198,6 +3198,7 @@ There are a lot programs with some of the functionality of GNU
B<parallel>. GNU B<parallel> strives to include the best of the B<parallel>. GNU B<parallel> strives to include the best of the
functionality without sacrificing ease of use. functionality without sacrificing ease of use.
=head2 SUMMARY TABLE =head2 SUMMARY TABLE
The following features are in some of the comparable tools: The following features are in some of the comparable tools:
@ -3325,6 +3326,9 @@ supports (See REPORTING BUGS).
paexec: TODO - Please file a bug-report if you know what features paexec paexec: TODO - Please file a bug-report if you know what features paexec
supports (See REPORTING BUGS). supports (See REPORTING BUGS).
ladon: TODO - Please file a bug-report if you know what features ladon
supports (See REPORTING BUGS).
ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH
supports (See REPORTING BUGS). supports (See REPORTING BUGS).
@ -3504,6 +3508,8 @@ B<9> killall -SIGUSR2 parallel
B<pexec> is also a tool for running jobs in parallel. B<pexec> is also a tool for running jobs in parallel.
=head3 EXAMPLES FROM pexec MANUAL
Here are the examples from B<pexec>'s info page with the equivalent Here are the examples from B<pexec>'s info page with the equivalent
using GNU B<parallel>: using GNU B<parallel>:
@ -3557,6 +3563,7 @@ faster as only one process will be either reading or writing:
B<8> ls *jpg | parallel -j8 'sem --id diskio cat {} | jpegtopnm |' \ B<8> ls *jpg | parallel -j8 'sem --id diskio cat {} | jpegtopnm |' \
'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_{}' 'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_{}'
=head2 DIFFERENCES BETWEEN xjobs AND GNU Parallel =head2 DIFFERENCES BETWEEN xjobs AND GNU Parallel
B<xjobs> is also a tool for running jobs in parallel. It only supports B<xjobs> is also a tool for running jobs in parallel. It only supports
@ -3644,6 +3651,7 @@ B<cat files | parallel cmd>
B<find dir -execdir sem cmd {} \;> B<find dir -execdir sem cmd {} \;>
=head2 DIFFERENCES BETWEEN xapply AND GNU Parallel =head2 DIFFERENCES BETWEEN xapply AND GNU Parallel
B<xapply> can run jobs in parallel on the local computer. B<xapply> can run jobs in parallel on the local computer.
@ -3745,6 +3753,7 @@ using GNU B<parallel>:
=back =back
=head2 DIFFERENCES BETWEEN map AND GNU Parallel =head2 DIFFERENCES BETWEEN map AND GNU Parallel
B<map> sees it as a feature to have less features and in doing so it B<map> sees it as a feature to have less features and in doing so it
@ -3825,7 +3834,7 @@ processing, and dynamically timeouts.
=head2 DIFFERENCES BETWEEN ladon AND GNU Parallel =head2 DIFFERENCES BETWEEN ladon AND GNU Parallel
B<ladon> https://www.npmjs.org/package/ladon can run multiple jobs on files in parallel. B<ladon> can run multiple jobs on files in parallel.
B<ladon> only works on files and the only way to specify files is B<ladon> only works on files and the only way to specify files is
using a quoted glob string (such as \*.jpg). It is not possible to using a quoted glob string (such as \*.jpg). It is not possible to
@ -3836,10 +3845,10 @@ As replacement strings it uses FULLPATH DIRNAME BASENAME EXT RELDIR RELPATH
These can be simulated using GNU B<parallel> by putting this in B<~/.parallel/config>: These can be simulated using GNU B<parallel> by putting this in B<~/.parallel/config>:
--rpl 'FULLPATH $_=::shell_quote($_);chomp($_=qx{readlink -f $_});' --rpl 'FULLPATH $_=::shell_quote($_);chomp($_=qx{readlink -f $_});'
--rpl 'DIRNAME $_=::dirname($_); $_=::shell_quote($_);chomp($_=qx{readlink -f $_});' --rpl 'DIRNAME $_=::shell_quote(::dirname($_));chomp($_=qx{readlink -f $_});'
--rpl 'BASENAME s:.*/::; s:\.[^/.]+$::;' --rpl 'BASENAME s:.*/::;s:\.[^/.]+$::;'
--rpl 'EXT s:.*\.::' --rpl 'EXT s:.*\.::'
--rpl 'RELDIR $_=::shell_quote($_);chomp(($_,$c)=qx{readlink -f $_;pwd});s:\Q$c/\E::;$_ = ::dirname($_);' --rpl 'RELDIR $_=::shell_quote($_);chomp(($_,$c)=qx{readlink -f $_;pwd});s:\Q$c/\E::;$_=::dirname($_);'
--rpl 'RELPATH $_=::shell_quote($_);chomp(($_,$c)=qx{readlink -f $_;pwd});s:\Q$c/\E::;' --rpl 'RELPATH $_=::shell_quote($_);chomp(($_,$c)=qx{readlink -f $_;pwd});s:\Q$c/\E::;'
B<ladon> deals badly with filenames containing " and newline, and it fails for output larger than 200k: B<ladon> deals badly with filenames containing " and newline, and it fails for output larger than 200k:
@ -3867,6 +3876,7 @@ B<4> ladon "~/Music/*.wav" -- lame -V 2 FULLPATH DIRNAME/BASENAME.mp3
B<4> parallel lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 ::: ~/Music/*.wav B<4> parallel lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 ::: ~/Music/*.wav
=head2 DIFFERENCES BETWEEN ClusterSSH AND GNU Parallel =head2 DIFFERENCES BETWEEN ClusterSSH AND GNU Parallel
ClusterSSH solves a different problem than GNU B<parallel>. ClusterSSH solves a different problem than GNU B<parallel>.