diff --git a/configure b/configure index fe2e6a51..fc8b46d9 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.67 for parallel 20101222. +# Generated by GNU Autoconf 2.67 for parallel 20110101. # # Report bugs to . # @@ -551,8 +551,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='parallel' PACKAGE_TARNAME='parallel' -PACKAGE_VERSION='20101222' -PACKAGE_STRING='parallel 20101222' +PACKAGE_VERSION='20110101' +PACKAGE_STRING='parallel 20110101' PACKAGE_BUGREPORT='bug-parallel@gnu.org' PACKAGE_URL='' @@ -1168,7 +1168,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures parallel 20101222 to adapt to many kinds of systems. +\`configure' configures parallel 20110101 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1234,7 +1234,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of parallel 20101222:";; + short | recursive ) echo "Configuration of parallel 20110101:";; esac cat <<\_ACEOF @@ -1301,7 +1301,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -parallel configure 20101222 +parallel configure 20110101 generated by GNU Autoconf 2.67 Copyright (C) 2010 Free Software Foundation, Inc. @@ -1318,7 +1318,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by parallel $as_me 20101222, which was +It was created by parallel $as_me 20110101, which was generated by GNU Autoconf 2.67. Invocation command line was $ $0 $@ @@ -2133,7 +2133,7 @@ fi # Define the identity of the package. PACKAGE='parallel' - VERSION='20101222' + VERSION='20110101' cat >>confdefs.h <<_ACEOF @@ -2684,7 +2684,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by parallel $as_me 20101222, which was +This file was extended by parallel $as_me 20110101, which was generated by GNU Autoconf 2.67. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2746,7 +2746,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -parallel config.status 20101222 +parallel config.status 20110101 configured by $0, generated by GNU Autoconf 2.67, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 560309bc..299e6931 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([parallel], [20101222], [bug-parallel@gnu.org]) +AC_INIT([parallel], [20110101], [bug-parallel@gnu.org]) AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([ diff --git a/doc/FUTURE_IDEAS b/doc/FUTURE_IDEAS index c25e1316..d8ddc47b 100644 --- a/doc/FUTURE_IDEAS +++ b/doc/FUTURE_IDEAS @@ -1,7 +1,8 @@ -Testsuite: sem without ~/.parallel +parallel: spread arguments between all jobslots when reaching EOF of input -cleanup of transferred files in workdir fixed. --T implemented as ssh/rsync sometimes hang due to getting a tty. +codecoverage + +Testsuite: sem without ~/.parallel Til QUOTING: @@ -44,10 +45,6 @@ http://code.google.com/p/push/ time find . -type f | parallel -j+0 --eta -S..,: --progress --trc {}.gz gzip {} -== Workdir == - -parallel -N2 --workdir ... --cleanup --transfer --return {2}.2 -S .. -v echo {} ">{2}.2" ::: ./tmp/foo ./tmp/bar - == SQL == Example with %0a as newline @@ -94,11 +91,6 @@ colsep = [sepchars]{no_of_sepchars} # TODO max_line_length on remote # TODO compute how many can be transferred within max_line_length # TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many -# TODO --max-number-of-jobs print the system limited number of jobs - -# TODO Debian package - -# TODO to kill from a run script parallel should set PARALLEL_PID that can be sig termed =head1 YouTube video2 @@ -314,8 +306,7 @@ distribute the arguments evenly if running -X. =head1 options -One char options not used: A F G K O Q R T Z c f --T terminal +One char options not used: A F G K O Q R Z c f =head1 sem diff --git a/doc/release_new_version b/doc/release_new_version index 5a7f0335..98aff01a 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -60,6 +60,7 @@ gpg -b parallel-$YYYYMMDD.tar.bz2 YYYYMMDD=`yyyymmdd` echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org/incoming/ftp/ +#echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org/incoming/alpha/ == Download and test == @@ -135,40 +136,17 @@ cc:Peter Simons , Sandro Cazzaniga , ryoichiro.suzuki@gmail.com,kerick@shiftedbit.net, Christian Faulhammer -Subject: GNU Parallel 20101222 released +Subject: GNU Parallel 2010XXXX released -GNU Parallel 20101222 has been released. It is available for +GNU Parallel 2010XXXX has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ New in this release: -* GNU niceload is now part of GNU Parallel. GNU niceload slows down a - program if the load average is above a certain limit. +* Review in French. Thanks to Denis Dordoigne. + http://linuxfr.org/2010/12/29/27715.html -* Implemented --tmpdir to buffer standard output and standard error in - a different place. - -* Implemented --load to wait until the load is below a limit before - starting another job on that computer. - -* Implemented --nice set the niceness of jobs running both locally and - remotely. - -* Implemented --dry-run to print the job without running it. - -* Implemented --tty as the old default of assigning a tty to the first - job causes problems. - -* Review with focus on clusters. Thanks to Taylor Gillespie - http://www.unixpronews.com/unixpronews-49-20101019GNUParallelSpeedUpProcessingWithMulticoresClusters.html - -* Review with focus on protein similarity. - http://kevinformatics.tumblr.com/post/2142473893/cluster-like-computing-using-gnu-parallel - -* Review in Spanish. - http://gr3p.com/2010/12/gnu-parallel-acelera-tus-scripts-en-linux - -* Quite a few bug fixes and man page updates. +* Bug fixes and man page updates. = About GNU Parallel = diff --git a/src/niceload b/src/niceload index e21fd5fd..a9e28052 100755 --- a/src/niceload +++ b/src/niceload @@ -236,7 +236,7 @@ B(1), B(1) use strict; use Getopt::Long; $Global::progname="niceload"; -$Global::version = 20101222; +$Global::version = 20110101; Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV) || die_usage(); if($::opt_version) { diff --git a/src/parallel b/src/parallel index 2335a652..967fba53 100755 --- a/src/parallel +++ b/src/parallel @@ -3,7 +3,7 @@ use IPC::Open3; use Symbol qw(gensym); use IO::File; -use POSIX qw(:sys_wait_h setsid); +use POSIX qw(:sys_wait_h setsid ceil); use File::Temp qw(tempfile tempdir); use Getopt::Long; use strict; @@ -47,6 +47,9 @@ if(@ARGV) { $Global::JobQueue = JobQueue->new( $command,\@fhlist,$Global::Xargs,$number_of_args,\@Global::ret_files); +for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); +} init_run_jobs(); my $sem; @@ -193,8 +196,9 @@ sub get_options_from_array { sub parse_options { # Returns: N/A # Defaults: - $Global::version = 20101222; + $Global::version = 20110101; $Global::progname = 'parallel'; + $Global::infinity = 2**31; $Global::debug = 0; $Global::verbose = 0; $Global::grouped = 1; @@ -214,6 +218,7 @@ sub parse_options { $Global::arg_sep = ":::"; $Global::arg_file_sep = "::::"; $Global::trim = 'n'; + $Global::max_jobs_running = 0; @ARGV=read_options(); @@ -330,9 +335,10 @@ sub parse_options { } if(not defined $::opt_P) { - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_jobs_running($Global::default_simultaneous_sshlogins); - } + $::opt_P = "+0"; + #for my $sshlogin (values %Global::host) { + # $sshlogin->set_max_jobs_running($Global::default_simultaneous_sshlogins); + #} } } @@ -495,7 +501,7 @@ sub cleanup { sub shell_quote { - my (@strings) = (@_); + my @strings = (@_); for my $a (@strings) { $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\*\>\<\~\|\; \"\!\$\&\'])/\\$1/g; $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' @@ -517,7 +523,7 @@ sub shell_unquote { # Unquote strings from shell_quote # Returns: # string with shell quoting removed - my (@strings) = (@_); + my @strings = (@_); my $arg; for $arg (@strings) { if(not defined $arg) { @@ -709,7 +715,7 @@ sub progress { # header that will fit on the screen # status message that will fit on the screen my $termcols = terminal_columns(); - my ($status, $header)=("x"x($termcols+1),""); + my ($status, $header) = ("x"x($termcols+1),""); my @workers = sort keys %Global::host; my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; my $workerno = 1; @@ -918,7 +924,7 @@ sub start_another_job { # No more commands to run return 0; } else { - my ($job) = get_job_with_sshlogin($sshlogin); + my $job = get_job_with_sshlogin($sshlogin); if(not defined $job) { # No command available for that sshlogin return 0; @@ -951,7 +957,7 @@ sub get_job_with_sshlogin { Carp::confess("get_job_with_sshlogin should never be called if empty"); } - my ($job) = $Global::JobQueue->get(); + my $job = $Global::JobQueue->get(); if(not defined $job) { # No more jobs return undef; @@ -960,8 +966,8 @@ sub get_job_with_sshlogin { if($::oodebug and not defined $job->{'commandline'}) { Carp::confess("get_job_with_sshlogin job->commandline should never be empty"); } - my ($next_command_line) = $job->replaced(); - my ($clean_command) = $next_command_line; + my $next_command_line = $job->replaced(); + my $clean_command = $next_command_line; if($clean_command =~ /^\s*$/) { # Do not run empty lines if(not $Global::JobQueue->empty()) { @@ -1015,7 +1021,7 @@ sub read_sshloginfile { sub parse_sshlogin { # Returns: N/A - my (@login); + my @login; if(not @Global::sshlogin) { @Global::sshlogin = (":"); } for my $sshlogin (@Global::sshlogin) { # Split up -S sshlogin,sshlogin @@ -1427,7 +1433,11 @@ sub inc_jobs_completed { sub set_max_jobs_running { my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } $self->{'max_jobs_running'} = shift; + $Global::max_jobs_running += $self->{'max_jobs_running'}; } sub loadavg_too_high { @@ -1551,8 +1561,7 @@ sub compute_max_loadavg { sub max_jobs_running { my $self = shift; if(not defined $self->{'max_jobs_running'}) { - $self->{'max_jobs_running'} = - $self->compute_number_of_processes($::opt_P); + $self->set_max_jobs_running($self->compute_number_of_processes($::opt_P)); } return $self->{'max_jobs_running'}; } @@ -1584,12 +1593,14 @@ sub processes_available_by_system_limit { my $self = shift; my $wanted_processes = shift; - my $system_limit=0; - my @jobs=(); - my ($job, $args_ref); - my $more_filehandles=1; - my $max_system_proc_reached=0; - my $slow_spawining_warning_printed=0; + my $system_limit = 0; + my @jobs = (); + my $job; + my @args = (); + my $arg; + my $more_filehandles = 1; + my $max_system_proc_reached = 0; + my $slow_spawining_warning_printed = 0; my $time = time; my %fh; my @children; @@ -1612,11 +1623,20 @@ sub processes_available_by_system_limit { # so include the already read jobs for this sshlogin $count_jobs_already_read--; } else { - # If there are no more command lines, then we have a process - # per command line, so no need to go further - $Global::JobQueue->empty() and last; - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); + if($::opt_X or $::opt_m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty() and last; + ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + $Global::JobQueue->empty() and last; + ($job) = $Global::JobQueue->get(); + push(@jobs, $job); + } } $system_limit++; @@ -1667,9 +1687,9 @@ sub processes_available_by_system_limit { kill 9, $pid; waitpid($pid,0); } - #wait(); - # Cleanup: Unget the command_lines (and args_refs) + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); $Global::JobQueue->unget(@jobs); if($self->string() ne ":" and $system_limit > $Global::default_simultaneous_sshlogins) { @@ -1756,7 +1776,7 @@ sub user_requested_processes { $processes = $1; if($processes == 0) { # -P 0 = infinity (or at least close) - $processes = 2**31; + $processes = $Global::infinity; } } elsif (-f $opt_P) { $Global::max_procs_file = $opt_P; @@ -2257,7 +2277,7 @@ sub sshlogin_wrap { } my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); - my ($next_command_line) = $self->replaced(); + my $next_command_line = $self->replaced(); my ($pre,$post,$cleanup)=("","",""); if($serverlogin eq ":") { $self->{'sshlogin_wrap'} = $next_command_line; @@ -2380,7 +2400,7 @@ sub sshcleanup { # Returns: # ssh command needed to remove files from sshlogin my $self = shift; - my ($sshlogin) = $self->sshlogin(); + my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my $workdir = $self->workdir(); @@ -2573,7 +2593,7 @@ sub print { $Global::grouped or return; my $out = $self->stdout(); my $err = $self->stderr(); - my ($command) = $self->sshlogin_wrap(); + my $command = $self->sshlogin_wrap(); if(($::opt_dryrun or $Global::verbose) and $Global::grouped) { if($Global::verbose <= 1) { @@ -2677,6 +2697,7 @@ sub populate { # Add arguments from arg_queue until the number of arguments or # max line length is reached my $self = shift; +# my $first_time_empty = 1; my $next_arg; while (not $self->{'arg_queue'}->empty()) { $next_arg = $self->{'arg_queue'}->get(); @@ -2711,10 +2732,27 @@ sub populate { } } } + if($self->{'arg_queue'}->empty() and not $CommandLine::already_spread) { + # EOF => Spread the arguments over all jobslots (unless they + # are already spread) + $CommandLine::already_spread++; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + } + if($self->number_of_args() > 0) { # Fill up if we have a half completed line if(defined $self->{'max_number_of_args'}) { # If you want a number of args and do not have it then fill out the rest with empties + # so magic strings like '{2}' will be replaced with empty. while($self->number_of_args() < $self->{'max_number_of_args'}) { $self->push([Arg->new("")]); } @@ -2762,6 +2800,17 @@ sub pop { return $record; } +sub pop_all { + # Remove all arguments + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $replacement_string qw(keys %{$self->{'replacecount'}}) { + $self->{'len'}{$replacement_string} = 0; + } + $self->{'arg_list'} = []; + return @popped; +} + sub number_of_args { my $self = shift; # This is really number of records @@ -3047,7 +3096,7 @@ sub get { my $cmd_line = shift @{$self->{'unget'}}; return ($cmd_line); } else { - my ($cmd_line); + my $cmd_line; $cmd_line = CommandLine->new($self->{'command'}, $self->{'arg_queue'}, $self->{'context_replace'}, @@ -3427,7 +3476,7 @@ sub trim_of { # lr|rl = both # Returns: # string with white space removed as needed - my (@strings) = map { defined $_ ? $_ : "" } (@_); + my @strings = map { defined $_ ? $_ : "" } (@_); my $arg; if($Global::trim eq "n") { # skip @@ -3508,7 +3557,7 @@ sub acquire { } sub release { - my ($self) = shift; + my $self = shift; unlink $self->{'pidfile'}; if($self->nlinks() == 1) { # This is the last link, so atomic cleanup @@ -3525,8 +3574,8 @@ sub release { sub atomic_link_if_count_less_than { # Link $file1 to $file2 if nlinks to $file1 < $count - my ($self) = shift; - my ($retval) = 0; + my $self = shift; + my $retval = 0; $self->lock(); ::debug($self->nlinks()."<".$self->{'count'}); if($self->nlinks() < $self->{'count'}) { @@ -3553,7 +3602,7 @@ sub nlinks { } sub lock { - my ($self) = shift; + my $self = shift; open $self->{'lockfh'}, ">", $self->{'lockfile'} or die "Can't open semaphore file $self->{'lockfile'}: $!"; chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw diff --git a/src/parallel.pod b/src/parallel.pod index f344b792..b5eb2d5e 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -398,8 +398,8 @@ use B<-I> instead. =item B<-P> I -Run up to N jobs in parallel. 0 means as many as possible. Default is -9. +Number of jobslots. Run up to N jobs in parallel. 0 means as many as +possible. Default is 9. If B<--semaphore> is set default is 1 thus making a mutex. @@ -1534,10 +1534,10 @@ editor will be started again with the remaining files. =head1 EXAMPLE: GNU Parallel as queue system/batch manager -GNU Parallel can work as a simple job queue system or batch manager. -The idea is to put the jobs into a file and have GNU Parallel read -from that continuously. As GNU Parallel will stop at end of file we -use tail to continue reading: +GNU B can work as a simple job queue system or batch manager. +The idea is to put the jobs into a file and have GNU B read +from that continuously. As GNU B will stop at end of file we +use B to continue reading: B>B; B @@ -1550,6 +1550,26 @@ computers: B>B; B +There are a two small issues when using GNU B as queue +system/batch manager: + +=over 2 + +=item * + +You will get a warning if you do not submit JobSlots jobs within the +first second. E.g. if you have 8 cores and use B<-j+2> you have to submit +10 jobs. These can be dummy jobs (e.g. B). You can also simply +ignore the warning. + +=item * + +Jobs will be run immediately, but output from jobs will only be +printed when JobSlots more jobs has been started. E.g. if you have 10 +jobslots then the output from the first completed job will only be +printed when job 11 is started. + +=back =head1 EXAMPLE: GNU Parallel as dir processor diff --git a/src/sql b/src/sql index 19c269d6..18997abe 100755 --- a/src/sql +++ b/src/sql @@ -528,7 +528,7 @@ $Global::Initfile && unlink $Global::Initfile; exit ($err); sub parse_options { - $Global::version = 20101222; + $Global::version = 20110101; $Global::progname = 'sql'; # This must be done first as this may exec myself diff --git a/testsuite/tests-to-run/test48.sh b/testsuite/tests-to-run/test48.sh new file mode 100644 index 00000000..87e3ffbd --- /dev/null +++ b/testsuite/tests-to-run/test48.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +echo '### Test distribute arguments at EOF to 2 jobslots' +seq 1 92 | parallel -j+0 -kX -s 100 echo + +echo '### Test distribute arguments at EOF to 5 jobslots' +seq 1 92 | parallel -j+3 -kX -s 100 echo + +echo '### Test distribute arguments at EOF to infinity jobslots' +seq 1 92 | parallel -j0 -kX -s 100 echo + + + diff --git a/testsuite/wanted-results/test48 b/testsuite/wanted-results/test48 new file mode 100644 index 00000000..7d12cdd9 --- /dev/null +++ b/testsuite/wanted-results/test48 @@ -0,0 +1,43 @@ +### Test distribute arguments at EOF to 2 jobslots +1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 +35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 +66 67 68 69 70 71 72 73 74 75 76 77 78 79 +80 81 82 83 84 85 86 87 88 89 90 91 92 +### Test distribute arguments at EOF to 5 jobslots +1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 +35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 +66 67 68 69 70 71 +72 73 74 75 76 77 +78 79 80 81 82 83 +84 85 86 87 88 89 +90 91 92 +### Test distribute arguments at EOF to infinity jobslots +1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 +35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92