parallel -x supported

This commit is contained in:
Ole Tange 2009-02-18 06:45:59 +01:00
parent aa43823d12
commit 409f6acd77
2 changed files with 18 additions and 139 deletions

151
parallel
View file

@ -269,9 +269,6 @@ parallel "wc {} >> B<{}.wc"> using B<xargs> seems to be impossible.
Filenames beginning with '-' can cause some commands to give Filenames beginning with '-' can cause some commands to give
unexpected results, as it will often be interpreted as an option. unexpected results, as it will often be interpreted as an option.
If you have a lot of filehandles, then computing the max no
takes a long time.
=head1 REPORTING BUGS =head1 REPORTING BUGS
@ -281,12 +278,13 @@ Report bugs to <bug-parallel@tange.dk>.
=head1 AUTHOR =head1 AUTHOR
Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk
Copyright (C) 2008-2009 Ole Tange, http://ole.tange.dk Copyright (C) 2008-2009 Ole Tange, http://ole.tange.dk
=head1 LICENSE =head1 LICENSE
Copyright (C) 2007 Free Software Foundation, Inc. Copyright (C) 2007-2009 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -336,7 +334,6 @@ $Global::input_is_filename = (@ARGV);
$/="\n"; $/="\n";
$Global::debug = (defined $::opt_d); $Global::debug = (defined $::opt_d);
if(defined $::opt_j) { $processes = compute_number_of_processes($::opt_j); }
if(defined $::opt_x) { if(defined $::opt_x) {
$Global::xargs = 1; $Global::xargs = 1;
$Global::command_line_max_len = max_length_of_command_line(); $Global::command_line_max_len = max_length_of_command_line();
@ -356,6 +353,9 @@ if(@ARGV) {
$Global::command = join(" ", @ARGV); $Global::command = join(" ", @ARGV);
} }
} }
# Needs to be done after setting $Global::command and $Global::command_line_max_len
# as '-x' influences the number of commands that needs to be run
if(defined $::opt_j) { $processes = compute_number_of_processes($::opt_j); }
init_run_jobs(); init_run_jobs();
DoNotReap(); DoNotReap();
@ -432,13 +432,15 @@ sub shell_quote {
# Number of processes, filehandles, max length of command line # Number of processes, filehandles, max length of command line
# #
# Maximal command line length (for -x)
sub max_length_of_command_line { sub max_length_of_command_line {
# Find the max_length of a command line # Find the max_length of a command line
# First find an upper bound # First find an upper bound
my $len = 2; my $len = 2;
do { do {
$len += $len+1; $len += $len+1;
} while (acceptable_command_line_length($len)); } while (is_acceptable_command_line_length($len));
# Then search for the actual max length between 0 and upper bound # Then search for the actual max length between 0 and upper bound
return binary_find_max_length(0,$len); return binary_find_max_length(0,$len);
} }
@ -449,14 +451,14 @@ sub binary_find_max_length {
if($lower == $upper or $lower == $upper-1) { return $lower; } if($lower == $upper or $lower == $upper-1) { return $lower; }
my $middle = int (($upper-$lower)/2 + $lower); my $middle = int (($upper-$lower)/2 + $lower);
$debug && print "$lower,$upper,$middle\n"; $debug && print "$lower,$upper,$middle\n";
if (acceptable_command_line_length($middle)) { if (is_acceptable_command_line_length($middle)) {
return binary_find_max_length($middle,$upper); return binary_find_max_length($middle,$upper);
} else { } else {
return binary_find_max_length($lower,$middle); return binary_find_max_length($lower,$middle);
} }
} }
sub acceptable_command_line_length { sub is_acceptable_command_line_length {
# Test if a command line of this length can run # Test if a command line of this length can run
# This is done using external perl script to avoid warning # This is done using external perl script to avoid warning
# (Can this be done prettier?) # (Can this be done prettier?)
@ -467,6 +469,8 @@ sub acceptable_command_line_length {
return not $?; return not $?;
} }
# Number of parallel processes to run
sub compute_number_of_processes { sub compute_number_of_processes {
# Number of processes wanted and limited by system ressources # Number of processes wanted and limited by system ressources
my $opt_j = shift; my $opt_j = shift;
@ -479,12 +483,10 @@ sub processes_available_by_system_limit {
# If the wanted number of processes is bigger than the system limits: # If the wanted number of processes is bigger than the system limits:
# Limit them to the system limits # Limit them to the system limits
# Limits are: File handles, number of input lines, processes, # Limits are: File handles, number of input lines, processes,
# and taking > 1 second to spawn 10 extra procs # and taking > 1 second to spawn 10 extra processes
my $wanted_processes = shift; my $wanted_processes = shift;
my $system_limit=0; my $system_limit=0;
#GONE my @args=();
my @command_lines=(); my @command_lines=();
#GONE my $next_arg;
my $next_command_line; my $next_command_line;
my $more_filehandles; my $more_filehandles;
my $max_system_proc_reached=0; my $max_system_proc_reached=0;
@ -509,13 +511,6 @@ sub processes_available_by_system_limit {
push(@command_lines, $next_command_line); push(@command_lines, $next_command_line);
} }
#GONNE # If there are no more arguments, then we have a process per argument
#GONNE # so no need to go further
#GONNE $next_arg = get_next_arg();
#GONNE if(defined $next_arg) {
#GONNE push(@args, $next_arg);
#GONNE }
# Every simultaneous process uses 2 filehandles when grouping # Every simultaneous process uses 2 filehandles when grouping
$more_filehandles = open($fh{$system_limit*2},"</dev/null") $more_filehandles = open($fh{$system_limit*2},"</dev/null")
&& open($fh{$system_limit*2+1},"</dev/null"); && open($fh{$system_limit*2+1},"</dev/null");
@ -546,19 +541,16 @@ sub processes_available_by_system_limit {
and $more_filehandles and $more_filehandles
and not $max_system_proc_reached and not $max_system_proc_reached
and not $spawning_too_slow); and not $spawning_too_slow);
if($system_limit <= $wanted_processes if($system_limit <= $wanted_processes and not $more_filehandles) {
and not $more_filehandles) {
print STDERR ("Warning: Only enough filehandles to run ", print STDERR ("Warning: Only enough filehandles to run ",
$system_limit, " jobs in parallel. ", $system_limit, " jobs in parallel. ",
"Raising ulimit -n may help\n"); "Raising ulimit -n may help\n");
} }
if($system_limit <= $wanted_processes if($system_limit <= $wanted_processes and $max_system_proc_reached) {
and $max_system_proc_reached) {
print STDERR ("Warning: Only enough available processes to run ", print STDERR ("Warning: Only enough available processes to run ",
$system_limit, " jobs in parallel.\n"); $system_limit, " jobs in parallel.\n");
} }
if($system_limit <= $wanted_processes if($system_limit <= $wanted_processes and $spawning_too_slow) {
and $spawning_too_slow) {
print STDERR ("Warning: Starting 10 extra processes takes > 1 sec.\n", print STDERR ("Warning: Starting 10 extra processes takes > 1 sec.\n",
"Limiting to ", $system_limit, " jobs in parallel.\n"); "Limiting to ", $system_limit, " jobs in parallel.\n");
} }
@ -570,8 +562,6 @@ sub processes_available_by_system_limit {
waitpid($pid,0); waitpid($pid,0);
} }
wait(); wait();
# # Cleanup: Unget the args
# unget_arg(@args);
# Cleanup: Unget the command_lines # Cleanup: Unget the command_lines
unget_command_line(@command_lines); unget_command_line(@command_lines);
return $system_limit; return $system_limit;
@ -608,114 +598,6 @@ sub user_requested_processes {
return $processes; return $processes;
} }
sub GONE_compute_number_of_processes {
my $opt_j = shift;
my $processes = 0;
if(defined $opt_j) {
if($opt_j =~ /^\+(\d+)$/) {
# E.g. -j +2
my $j = $1;
$processes = $j + no_of_cpus();
} elsif ($opt_j =~ /^-(\d+)$/) {
# E.g. -j -2
my $j = $1;
$processes = no_of_cpus() - $j;
} elsif ($opt_j =~ /^(\d+)\%$/) {
my $j = $1;
$processes = no_of_cpus() * $j / 100;
} elsif ($opt_j =~ /^(\d+)$/) {
$processes = $1;
if($processes == 0) {
# -j 0 = infinity (or at least close)
$processes = 2**31;
}
} else {
die_usage();
}
if($processes < 1) {
$processes = 1;
}
}
# Have we asked for more processes than arguments?
$processes = min_of_args_and_processes($processes);
# Every simultaneous process uses 2 filehandles when grouping
# perl uses 7 for something?
# parallel uses 1 for memory_usage
my $file_handles_needed = $processes*2+7+1;
my $free_handles = compute_no_of_free_filehandles($file_handles_needed);
if($file_handles_needed > $free_handles) {
$processes = int (($free_handles -7 -1) / 2);
print STDERR ("Warning: Only enough filehandles to run ",
$processes, " jobs in parallel. ",
"Raising ulimit -n may help\n");
}
debug("Computed processes: ".(int $processes)."\n");
return int $processes;
}
sub GONE_min_of_args_and_processes {
my $processes = shift;
my $min_of_args_and_processes=0;
my @args=();
my $next_arg;
my $max_system_proc_reached=0;
my $time = time;
DoNotReap();
do {
$min_of_args_and_processes++;
$next_arg = get_next_arg();
if(defined $next_arg) {
push(@args, $next_arg);
}
$min_of_args_and_processes % 10 or $time=time;
if($child = fork()) {
push (@children,$child);
} elsif(defined $child) {
# The child needs to take one process.
# It will be killed later
sleep 100000;
exit;
} else {
$max_system_proc_reached = 1;
}
debug("Time to fork ten procs ", time-$time, " process ", $min_of_args_and_processes);
if(time-$time > 1) {
# It took more than 1 second to fork ten processes. We should stop forking.
# Let us give the system a little slack
debug("\nLimiting processes to: $min_of_args_and_processes-10%=".
(int ($min_of_args_and_processes * 0.9)+1)."\n");
$min_of_args_and_processes = int ($min_of_args_and_processes * 0.9)+1;
$max_system_proc_reached = 1;
}
} while($min_of_args_and_processes <= $processes
and defined $next_arg
and not $max_system_proc_reached);
for $pid (@children) {
kill 15, $pid;
waitpid($pid,0);
}
wait();
unget_arg(@args);
return $min_of_args_and_processes;
}
sub NullReaper {
while (waitpid(-1, &WNOHANG) > 0) { }
}
sub GONE_compute_no_of_free_filehandles {
my $needed = shift;
my $i=1;
my %fh;
while(open($fh{$i},"</dev/null")) { $i++; $i > $needed and last }
for (keys %fh) { close $fh{$_} }
debug("Number of free handles: ".$i."\n");
return $i;
}
sub no_of_cpus { sub no_of_cpus {
my $no_of_cpus = my $no_of_cpus =
(no_of_cpus_gnu_linux() || (no_of_cpus_gnu_linux() ||
@ -785,7 +667,6 @@ sub unget_command_line {
push @Global::unget_next_command_line, @_; push @Global::unget_next_command_line, @_;
} }
sub get_next_arg { sub get_next_arg {
my $arg; my $arg;
if(@Global::unget_arg) { if(@Global::unget_arg) {

View file

@ -372,19 +372,17 @@ parallel "wc {} \fR> \fB{}.wc"\fR using \fBxargs\fR seems to be impossible.
.IX Header "BUGS" .IX Header "BUGS"
Filenames beginning with '\-' can cause some commands to give Filenames beginning with '\-' can cause some commands to give
unexpected results, as it will often be interpreted as an option. unexpected results, as it will often be interpreted as an option.
.PP
If you have a lot of filehandles, then computing the max no
takes a long time.
.SH "REPORTING BUGS" .SH "REPORTING BUGS"
.IX Header "REPORTING BUGS" .IX Header "REPORTING BUGS"
Report bugs to <bug\-parallel@tange.dk>. Report bugs to <bug\-parallel@tange.dk>.
.SH "AUTHOR" .SH "AUTHOR"
.IX Header "AUTHOR" .IX Header "AUTHOR"
Copyright (C) 2007\-10\-18 Ole Tange, http://ole.tange.dk Copyright (C) 2007\-10\-18 Ole Tange, http://ole.tange.dk
.PP
Copyright (C) 2008\-2009 Ole Tange, http://ole.tange.dk Copyright (C) 2008\-2009 Ole Tange, http://ole.tange.dk
.SH "LICENSE" .SH "LICENSE"
.IX Header "LICENSE" .IX Header "LICENSE"
Copyright (C) 2007 Free Software Foundation, Inc. Copyright (C) 2007\-2009 Free Software Foundation, Inc.
.PP .PP
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the \s-1GNU\s0 General Public License as published by it under the terms of the \s-1GNU\s0 General Public License as published by