From 409f6acd77e1fb81771a5af81578e46575925527 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Wed, 18 Feb 2009 06:45:59 +0100 Subject: [PATCH] parallel -x supported --- parallel | 151 ++++++----------------------------------------------- parallel.1 | 6 +-- 2 files changed, 18 insertions(+), 139 deletions(-) diff --git a/parallel b/parallel index 6d015d02..c8821dde 100755 --- a/parallel +++ b/parallel @@ -269,9 +269,6 @@ parallel "wc {} >> B<{}.wc"> using B seems to be impossible. Filenames beginning with '-' can cause some commands to give unexpected results, as it will often be interpreted as an option. -If you have a lot of filehandles, then computing the max no -takes a long time. - =head1 REPORTING BUGS @@ -281,12 +278,13 @@ Report bugs to . =head1 AUTHOR Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + Copyright (C) 2008-2009 Ole Tange, http://ole.tange.dk =head1 LICENSE -Copyright (C) 2007 Free Software Foundation, Inc. +Copyright (C) 2007-2009 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -336,7 +334,6 @@ $Global::input_is_filename = (@ARGV); $/="\n"; $Global::debug = (defined $::opt_d); -if(defined $::opt_j) { $processes = compute_number_of_processes($::opt_j); } if(defined $::opt_x) { $Global::xargs = 1; $Global::command_line_max_len = max_length_of_command_line(); @@ -356,6 +353,9 @@ if(@ARGV) { $Global::command = join(" ", @ARGV); } } +# Needs to be done after setting $Global::command and $Global::command_line_max_len +# as '-x' influences the number of commands that needs to be run +if(defined $::opt_j) { $processes = compute_number_of_processes($::opt_j); } init_run_jobs(); DoNotReap(); @@ -432,13 +432,15 @@ sub shell_quote { # Number of processes, filehandles, max length of command line # +# Maximal command line length (for -x) + sub max_length_of_command_line { # Find the max_length of a command line # First find an upper bound my $len = 2; do { $len += $len+1; - } while (acceptable_command_line_length($len)); + } while (is_acceptable_command_line_length($len)); # Then search for the actual max length between 0 and upper bound return binary_find_max_length(0,$len); } @@ -449,14 +451,14 @@ sub binary_find_max_length { if($lower == $upper or $lower == $upper-1) { return $lower; } my $middle = int (($upper-$lower)/2 + $lower); $debug && print "$lower,$upper,$middle\n"; - if (acceptable_command_line_length($middle)) { + if (is_acceptable_command_line_length($middle)) { return binary_find_max_length($middle,$upper); } else { return binary_find_max_length($lower,$middle); } } -sub acceptable_command_line_length { +sub is_acceptable_command_line_length { # Test if a command line of this length can run # This is done using external perl script to avoid warning # (Can this be done prettier?) @@ -467,6 +469,8 @@ sub acceptable_command_line_length { return not $?; } +# Number of parallel processes to run + sub compute_number_of_processes { # Number of processes wanted and limited by system ressources my $opt_j = shift; @@ -479,12 +483,10 @@ sub processes_available_by_system_limit { # If the wanted number of processes is bigger than the system limits: # Limit them to the system limits # Limits are: File handles, number of input lines, processes, - # and taking > 1 second to spawn 10 extra procs + # and taking > 1 second to spawn 10 extra processes my $wanted_processes = shift; my $system_limit=0; -#GONE my @args=(); my @command_lines=(); -#GONE my $next_arg; my $next_command_line; my $more_filehandles; my $max_system_proc_reached=0; @@ -509,13 +511,6 @@ sub processes_available_by_system_limit { push(@command_lines, $next_command_line); } -#GONNE # If there are no more arguments, then we have a process per argument -#GONNE # so no need to go further -#GONNE $next_arg = get_next_arg(); -#GONNE if(defined $next_arg) { -#GONNE push(@args, $next_arg); -#GONNE } - # Every simultaneous process uses 2 filehandles when grouping $more_filehandles = open($fh{$system_limit*2}," 1 sec.\n", "Limiting to ", $system_limit, " jobs in parallel.\n"); } @@ -570,8 +562,6 @@ sub processes_available_by_system_limit { waitpid($pid,0); } wait(); -# # Cleanup: Unget the args -# unget_arg(@args); # Cleanup: Unget the command_lines unget_command_line(@command_lines); return $system_limit; @@ -608,114 +598,6 @@ sub user_requested_processes { return $processes; } - -sub GONE_compute_number_of_processes { - my $opt_j = shift; - my $processes = 0; - if(defined $opt_j) { - if($opt_j =~ /^\+(\d+)$/) { - # E.g. -j +2 - my $j = $1; - $processes = $j + no_of_cpus(); - } elsif ($opt_j =~ /^-(\d+)$/) { - # E.g. -j -2 - my $j = $1; - $processes = no_of_cpus() - $j; - } elsif ($opt_j =~ /^(\d+)\%$/) { - my $j = $1; - $processes = no_of_cpus() * $j / 100; - } elsif ($opt_j =~ /^(\d+)$/) { - $processes = $1; - if($processes == 0) { - # -j 0 = infinity (or at least close) - $processes = 2**31; - } - } else { - die_usage(); - } - if($processes < 1) { - $processes = 1; - } - } - # Have we asked for more processes than arguments? - $processes = min_of_args_and_processes($processes); - - # Every simultaneous process uses 2 filehandles when grouping - # perl uses 7 for something? - # parallel uses 1 for memory_usage - my $file_handles_needed = $processes*2+7+1; - my $free_handles = compute_no_of_free_filehandles($file_handles_needed); - if($file_handles_needed > $free_handles) { - $processes = int (($free_handles -7 -1) / 2); - print STDERR ("Warning: Only enough filehandles to run ", - $processes, " jobs in parallel. ", - "Raising ulimit -n may help\n"); - } - - debug("Computed processes: ".(int $processes)."\n"); - return int $processes; -} - -sub GONE_min_of_args_and_processes { - my $processes = shift; - my $min_of_args_and_processes=0; - my @args=(); - my $next_arg; - my $max_system_proc_reached=0; - my $time = time; - DoNotReap(); - do { - $min_of_args_and_processes++; - $next_arg = get_next_arg(); - if(defined $next_arg) { - push(@args, $next_arg); - } - $min_of_args_and_processes % 10 or $time=time; - if($child = fork()) { - push (@children,$child); - } elsif(defined $child) { - # The child needs to take one process. - # It will be killed later - sleep 100000; - exit; - } else { - $max_system_proc_reached = 1; - } - debug("Time to fork ten procs ", time-$time, " process ", $min_of_args_and_processes); - if(time-$time > 1) { - # It took more than 1 second to fork ten processes. We should stop forking. - # Let us give the system a little slack - debug("\nLimiting processes to: $min_of_args_and_processes-10%=". - (int ($min_of_args_and_processes * 0.9)+1)."\n"); - $min_of_args_and_processes = int ($min_of_args_and_processes * 0.9)+1; - $max_system_proc_reached = 1; - } - } while($min_of_args_and_processes <= $processes - and defined $next_arg - and not $max_system_proc_reached); - for $pid (@children) { - kill 15, $pid; - waitpid($pid,0); - } - wait(); - unget_arg(@args); - return $min_of_args_and_processes; -} - -sub NullReaper { - while (waitpid(-1, &WNOHANG) > 0) { } -} - -sub GONE_compute_no_of_free_filehandles { - my $needed = shift; - my $i=1; - my %fh; - while(open($fh{$i}," $needed and last } - for (keys %fh) { close $fh{$_} } - debug("Number of free handles: ".$i."\n"); - return $i; -} - sub no_of_cpus { my $no_of_cpus = (no_of_cpus_gnu_linux() || @@ -785,7 +667,6 @@ sub unget_command_line { push @Global::unget_next_command_line, @_; } - sub get_next_arg { my $arg; if(@Global::unget_arg) { diff --git a/parallel.1 b/parallel.1 index f42d86c5..f905b54a 100644 --- a/parallel.1 +++ b/parallel.1 @@ -372,19 +372,17 @@ parallel "wc {} \fR> \fB{}.wc"\fR using \fBxargs\fR seems to be impossible. .IX Header "BUGS" Filenames beginning with '\-' can cause some commands to give unexpected results, as it will often be interpreted as an option. -.PP -If you have a lot of filehandles, then computing the max no -takes a long time. .SH "REPORTING BUGS" .IX Header "REPORTING BUGS" Report bugs to . .SH "AUTHOR" .IX Header "AUTHOR" Copyright (C) 2007\-10\-18 Ole Tange, http://ole.tange.dk +.PP Copyright (C) 2008\-2009 Ole Tange, http://ole.tange.dk .SH "LICENSE" .IX Header "LICENSE" -Copyright (C) 2007 Free Software Foundation, Inc. +Copyright (C) 2007\-2009 Free Software Foundation, Inc. .PP This program is free software; you can redistribute it and/or modify it under the terms of the \s-1GNU\s0 General Public License as published by