parallel: alpharelease. Distribute args between jobslots for -X. -j+0 now default.

This commit is contained in:
Ole Tange 2011-01-02 01:01:21 +01:00
parent 39526ff1fe
commit 57cf4df2bc
10 changed files with 193 additions and 99 deletions

20
configure vendored
View file

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.67 for parallel 20101222.
# Generated by GNU Autoconf 2.67 for parallel 20110101.
#
# Report bugs to <bug-parallel@gnu.org>.
#
@ -551,8 +551,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='parallel'
PACKAGE_TARNAME='parallel'
PACKAGE_VERSION='20101222'
PACKAGE_STRING='parallel 20101222'
PACKAGE_VERSION='20110101'
PACKAGE_STRING='parallel 20110101'
PACKAGE_BUGREPORT='bug-parallel@gnu.org'
PACKAGE_URL=''
@ -1168,7 +1168,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures parallel 20101222 to adapt to many kinds of systems.
\`configure' configures parallel 20110101 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1234,7 +1234,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of parallel 20101222:";;
short | recursive ) echo "Configuration of parallel 20110101:";;
esac
cat <<\_ACEOF
@ -1301,7 +1301,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
parallel configure 20101222
parallel configure 20110101
generated by GNU Autoconf 2.67
Copyright (C) 2010 Free Software Foundation, Inc.
@ -1318,7 +1318,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by parallel $as_me 20101222, which was
It was created by parallel $as_me 20110101, which was
generated by GNU Autoconf 2.67. Invocation command line was
$ $0 $@
@ -2133,7 +2133,7 @@ fi
# Define the identity of the package.
PACKAGE='parallel'
VERSION='20101222'
VERSION='20110101'
cat >>confdefs.h <<_ACEOF
@ -2684,7 +2684,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by parallel $as_me 20101222, which was
This file was extended by parallel $as_me 20110101, which was
generated by GNU Autoconf 2.67. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -2746,7 +2746,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
parallel config.status 20101222
parallel config.status 20110101
configured by $0, generated by GNU Autoconf 2.67,
with options \\"\$ac_cs_config\\"

View file

@ -1,4 +1,4 @@
AC_INIT([parallel], [20101222], [bug-parallel@gnu.org])
AC_INIT([parallel], [20110101], [bug-parallel@gnu.org])
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([

View file

@ -1,7 +1,8 @@
Testsuite: sem without ~/.parallel
parallel: spread arguments between all jobslots when reaching EOF of input
cleanup of transferred files in workdir fixed.
-T implemented as ssh/rsync sometimes hang due to getting a tty.
codecoverage
Testsuite: sem without ~/.parallel
Til QUOTING:
@ -44,10 +45,6 @@ http://code.google.com/p/push/
time find . -type f | parallel -j+0 --eta -S..,: --progress --trc {}.gz gzip {}
== Workdir ==
parallel -N2 --workdir ... --cleanup --transfer --return {2}.2 -S .. -v echo {} ">{2}.2" ::: ./tmp/foo ./tmp/bar
== SQL ==
Example with %0a as newline
@ -94,11 +91,6 @@ colsep = [sepchars]{no_of_sepchars}
# TODO max_line_length on remote
# TODO compute how many can be transferred within max_line_length
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
# TODO --max-number-of-jobs print the system limited number of jobs
# TODO Debian package
# TODO to kill from a run script parallel should set PARALLEL_PID that can be sig termed
=head1 YouTube video2
@ -314,8 +306,7 @@ distribute the arguments evenly if running -X.
=head1 options
One char options not used: A F G K O Q R T Z c f
-T terminal
One char options not used: A F G K O Q R Z c f
=head1 sem

View file

@ -60,6 +60,7 @@ gpg -b parallel-$YYYYMMDD.tar.bz2
YYYYMMDD=`yyyymmdd`
echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org/incoming/ftp/
#echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org/incoming/alpha/
== Download and test ==
@ -135,40 +136,17 @@ cc:Peter Simons <simons@cryp.to>, Sandro Cazzaniga <kharec@mandriva.org>,
ryoichiro.suzuki@gmail.com,kerick@shiftedbit.net,
Christian Faulhammer <fauli@gentoo.org>
Subject: GNU Parallel 20101222 released
Subject: GNU Parallel 2010XXXX released
GNU Parallel 20101222 has been released. It is available for
GNU Parallel 2010XXXX has been released. It is available for
download at: http://ftp.gnu.org/gnu/parallel/
New in this release:
* GNU niceload is now part of GNU Parallel. GNU niceload slows down a
program if the load average is above a certain limit.
* Review in French. Thanks to Denis Dordoigne.
http://linuxfr.org/2010/12/29/27715.html
* Implemented --tmpdir to buffer standard output and standard error in
a different place.
* Implemented --load to wait until the load is below a limit before
starting another job on that computer.
* Implemented --nice set the niceness of jobs running both locally and
remotely.
* Implemented --dry-run to print the job without running it.
* Implemented --tty as the old default of assigning a tty to the first
job causes problems.
* Review with focus on clusters. Thanks to Taylor Gillespie
http://www.unixpronews.com/unixpronews-49-20101019GNUParallelSpeedUpProcessingWithMulticoresClusters.html
* Review with focus on protein similarity.
http://kevinformatics.tumblr.com/post/2142473893/cluster-like-computing-using-gnu-parallel
* Review in Spanish.
http://gr3p.com/2010/12/gnu-parallel-acelera-tus-scripts-en-linux
* Quite a few bug fixes and man page updates.
* Bug fixes and man page updates.
= About GNU Parallel =

View file

@ -236,7 +236,7 @@ B<parallel>(1), B<nice>(1)
use strict;
use Getopt::Long;
$Global::progname="niceload";
$Global::version = 20101222;
$Global::version = 20110101;
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV) || die_usage();
if($::opt_version) {

View file

@ -3,7 +3,7 @@
use IPC::Open3;
use Symbol qw(gensym);
use IO::File;
use POSIX qw(:sys_wait_h setsid);
use POSIX qw(:sys_wait_h setsid ceil);
use File::Temp qw(tempfile tempdir);
use Getopt::Long;
use strict;
@ -47,6 +47,9 @@ if(@ARGV) {
$Global::JobQueue = JobQueue->new(
$command,\@fhlist,$Global::Xargs,$number_of_args,\@Global::ret_files);
for my $sshlogin (values %Global::host) {
$sshlogin->max_jobs_running();
}
init_run_jobs();
my $sem;
@ -193,8 +196,9 @@ sub get_options_from_array {
sub parse_options {
# Returns: N/A
# Defaults:
$Global::version = 20101222;
$Global::version = 20110101;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;
$Global::verbose = 0;
$Global::grouped = 1;
@ -214,6 +218,7 @@ sub parse_options {
$Global::arg_sep = ":::";
$Global::arg_file_sep = "::::";
$Global::trim = 'n';
$Global::max_jobs_running = 0;
@ARGV=read_options();
@ -330,9 +335,10 @@ sub parse_options {
}
if(not defined $::opt_P) {
for my $sshlogin (values %Global::host) {
$sshlogin->set_max_jobs_running($Global::default_simultaneous_sshlogins);
}
$::opt_P = "+0";
#for my $sshlogin (values %Global::host) {
# $sshlogin->set_max_jobs_running($Global::default_simultaneous_sshlogins);
#}
}
}
@ -495,7 +501,7 @@ sub cleanup {
sub shell_quote {
my (@strings) = (@_);
my @strings = (@_);
for my $a (@strings) {
$a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\*\>\<\~\|\; \"\!\$\&\'])/\\$1/g;
$a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \'
@ -517,7 +523,7 @@ sub shell_unquote {
# Unquote strings from shell_quote
# Returns:
# string with shell quoting removed
my (@strings) = (@_);
my @strings = (@_);
my $arg;
for $arg (@strings) {
if(not defined $arg) {
@ -709,7 +715,7 @@ sub progress {
# header that will fit on the screen
# status message that will fit on the screen
my $termcols = terminal_columns();
my ($status, $header)=("x"x($termcols+1),"");
my ($status, $header) = ("x"x($termcols+1),"");
my @workers = sort keys %Global::host;
my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers;
my $workerno = 1;
@ -918,7 +924,7 @@ sub start_another_job {
# No more commands to run
return 0;
} else {
my ($job) = get_job_with_sshlogin($sshlogin);
my $job = get_job_with_sshlogin($sshlogin);
if(not defined $job) {
# No command available for that sshlogin
return 0;
@ -951,7 +957,7 @@ sub get_job_with_sshlogin {
Carp::confess("get_job_with_sshlogin should never be called if empty");
}
my ($job) = $Global::JobQueue->get();
my $job = $Global::JobQueue->get();
if(not defined $job) {
# No more jobs
return undef;
@ -960,8 +966,8 @@ sub get_job_with_sshlogin {
if($::oodebug and not defined $job->{'commandline'}) {
Carp::confess("get_job_with_sshlogin job->commandline should never be empty");
}
my ($next_command_line) = $job->replaced();
my ($clean_command) = $next_command_line;
my $next_command_line = $job->replaced();
my $clean_command = $next_command_line;
if($clean_command =~ /^\s*$/) {
# Do not run empty lines
if(not $Global::JobQueue->empty()) {
@ -1015,7 +1021,7 @@ sub read_sshloginfile {
sub parse_sshlogin {
# Returns: N/A
my (@login);
my @login;
if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
for my $sshlogin (@Global::sshlogin) {
# Split up -S sshlogin,sshlogin
@ -1427,7 +1433,11 @@ sub inc_jobs_completed {
sub set_max_jobs_running {
my $self = shift;
if(defined $self->{'max_jobs_running'}) {
$Global::max_jobs_running -= $self->{'max_jobs_running'};
}
$self->{'max_jobs_running'} = shift;
$Global::max_jobs_running += $self->{'max_jobs_running'};
}
sub loadavg_too_high {
@ -1551,8 +1561,7 @@ sub compute_max_loadavg {
sub max_jobs_running {
my $self = shift;
if(not defined $self->{'max_jobs_running'}) {
$self->{'max_jobs_running'} =
$self->compute_number_of_processes($::opt_P);
$self->set_max_jobs_running($self->compute_number_of_processes($::opt_P));
}
return $self->{'max_jobs_running'};
}
@ -1584,12 +1593,14 @@ sub processes_available_by_system_limit {
my $self = shift;
my $wanted_processes = shift;
my $system_limit=0;
my @jobs=();
my ($job, $args_ref);
my $more_filehandles=1;
my $max_system_proc_reached=0;
my $slow_spawining_warning_printed=0;
my $system_limit = 0;
my @jobs = ();
my $job;
my @args = ();
my $arg;
my $more_filehandles = 1;
my $max_system_proc_reached = 0;
my $slow_spawining_warning_printed = 0;
my $time = time;
my %fh;
my @children;
@ -1611,6 +1622,14 @@ sub processes_available_by_system_limit {
# For retries we may need to run all jobs on this sshlogin
# so include the already read jobs for this sshlogin
$count_jobs_already_read--;
} else {
if($::opt_X or $::opt_m) {
# The arguments may have to be re-spread over several jobslots
# So pessimistically only read one arg per jobslot
# instead of a full commandline
$Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty() and last;
($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
push(@args, $arg);
} else {
# If there are no more command lines, then we have a process
# per command line, so no need to go further
@ -1618,6 +1637,7 @@ sub processes_available_by_system_limit {
($job) = $Global::JobQueue->get();
push(@jobs, $job);
}
}
$system_limit++;
# Every simultaneous process uses 2 filehandles when grouping
@ -1667,9 +1687,9 @@ sub processes_available_by_system_limit {
kill 9, $pid;
waitpid($pid,0);
}
#wait();
# Cleanup: Unget the command_lines (and args_refs)
# Cleanup: Unget the command_lines or the @args
$Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
$Global::JobQueue->unget(@jobs);
if($self->string() ne ":" and
$system_limit > $Global::default_simultaneous_sshlogins) {
@ -1756,7 +1776,7 @@ sub user_requested_processes {
$processes = $1;
if($processes == 0) {
# -P 0 = infinity (or at least close)
$processes = 2**31;
$processes = $Global::infinity;
}
} elsif (-f $opt_P) {
$Global::max_procs_file = $opt_P;
@ -2257,7 +2277,7 @@ sub sshlogin_wrap {
}
my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
my ($next_command_line) = $self->replaced();
my $next_command_line = $self->replaced();
my ($pre,$post,$cleanup)=("","","");
if($serverlogin eq ":") {
$self->{'sshlogin_wrap'} = $next_command_line;
@ -2380,7 +2400,7 @@ sub sshcleanup {
# Returns:
# ssh command needed to remove files from sshlogin
my $self = shift;
my ($sshlogin) = $self->sshlogin();
my $sshlogin = $self->sshlogin();
my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
my $workdir = $self->workdir();
@ -2573,7 +2593,7 @@ sub print {
$Global::grouped or return;
my $out = $self->stdout();
my $err = $self->stderr();
my ($command) = $self->sshlogin_wrap();
my $command = $self->sshlogin_wrap();
if(($::opt_dryrun or $Global::verbose) and $Global::grouped) {
if($Global::verbose <= 1) {
@ -2677,6 +2697,7 @@ sub populate {
# Add arguments from arg_queue until the number of arguments or
# max line length is reached
my $self = shift;
# my $first_time_empty = 1;
my $next_arg;
while (not $self->{'arg_queue'}->empty()) {
$next_arg = $self->{'arg_queue'}->get();
@ -2711,10 +2732,27 @@ sub populate {
}
}
}
if($self->{'arg_queue'}->empty() and not $CommandLine::already_spread) {
# EOF => Spread the arguments over all jobslots (unless they
# are already spread)
$CommandLine::already_spread++;
if($self->number_of_args() > 1) {
$self->{'max_number_of_args'} =
::ceil($self->number_of_args()/$Global::max_jobs_running);
$Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
$self->{'max_number_of_args'};
$self->{'arg_queue'}->unget($self->pop_all());
while($self->number_of_args() < $self->{'max_number_of_args'}) {
$self->push($self->{'arg_queue'}->get());
}
}
}
if($self->number_of_args() > 0) {
# Fill up if we have a half completed line
if(defined $self->{'max_number_of_args'}) {
# If you want a number of args and do not have it then fill out the rest with empties
# so magic strings like '{2}' will be replaced with empty.
while($self->number_of_args() < $self->{'max_number_of_args'}) {
$self->push([Arg->new("")]);
}
@ -2762,6 +2800,17 @@ sub pop {
return $record;
}
sub pop_all {
# Remove all arguments
my $self = shift;
my @popped = @{$self->{'arg_list'}};
for my $replacement_string qw(keys %{$self->{'replacecount'}}) {
$self->{'len'}{$replacement_string} = 0;
}
$self->{'arg_list'} = [];
return @popped;
}
sub number_of_args {
my $self = shift;
# This is really number of records
@ -3047,7 +3096,7 @@ sub get {
my $cmd_line = shift @{$self->{'unget'}};
return ($cmd_line);
} else {
my ($cmd_line);
my $cmd_line;
$cmd_line = CommandLine->new($self->{'command'},
$self->{'arg_queue'},
$self->{'context_replace'},
@ -3427,7 +3476,7 @@ sub trim_of {
# lr|rl = both
# Returns:
# string with white space removed as needed
my (@strings) = map { defined $_ ? $_ : "" } (@_);
my @strings = map { defined $_ ? $_ : "" } (@_);
my $arg;
if($Global::trim eq "n") {
# skip
@ -3508,7 +3557,7 @@ sub acquire {
}
sub release {
my ($self) = shift;
my $self = shift;
unlink $self->{'pidfile'};
if($self->nlinks() == 1) {
# This is the last link, so atomic cleanup
@ -3525,8 +3574,8 @@ sub release {
sub atomic_link_if_count_less_than {
# Link $file1 to $file2 if nlinks to $file1 < $count
my ($self) = shift;
my ($retval) = 0;
my $self = shift;
my $retval = 0;
$self->lock();
::debug($self->nlinks()."<".$self->{'count'});
if($self->nlinks() < $self->{'count'}) {
@ -3553,7 +3602,7 @@ sub nlinks {
}
sub lock {
my ($self) = shift;
my $self = shift;
open $self->{'lockfh'}, ">", $self->{'lockfile'}
or die "Can't open semaphore file $self->{'lockfile'}: $!";
chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw

View file

@ -398,8 +398,8 @@ use B<-I> instead.
=item B<-P> I<N>
Run up to N jobs in parallel. 0 means as many as possible. Default is
9.
Number of jobslots. Run up to N jobs in parallel. 0 means as many as
possible. Default is 9.
If B<--semaphore> is set default is 1 thus making a mutex.
@ -1534,10 +1534,10 @@ editor will be started again with the remaining files.
=head1 EXAMPLE: GNU Parallel as queue system/batch manager
GNU Parallel can work as a simple job queue system or batch manager.
The idea is to put the jobs into a file and have GNU Parallel read
from that continuously. As GNU Parallel will stop at end of file we
use tail to continue reading:
GNU B<parallel> can work as a simple job queue system or batch manager.
The idea is to put the jobs into a file and have GNU B<parallel> read
from that continuously. As GNU B<parallel> will stop at end of file we
use B<tail> to continue reading:
B<echo >>B<jobqueue>; B<tail -f jobqueue | parallel>
@ -1550,6 +1550,26 @@ computers:
B<echo >>B<jobqueue>; B<tail -f jobqueue | parallel -S ..>
There are a two small issues when using GNU B<parallel> as queue
system/batch manager:
=over 2
=item *
You will get a warning if you do not submit JobSlots jobs within the
first second. E.g. if you have 8 cores and use B<-j+2> you have to submit
10 jobs. These can be dummy jobs (e.g. B<echo foo>). You can also simply
ignore the warning.
=item *
Jobs will be run immediately, but output from jobs will only be
printed when JobSlots more jobs has been started. E.g. if you have 10
jobslots then the output from the first completed job will only be
printed when job 11 is started.
=back
=head1 EXAMPLE: GNU Parallel as dir processor

View file

@ -528,7 +528,7 @@ $Global::Initfile && unlink $Global::Initfile;
exit ($err);
sub parse_options {
$Global::version = 20101222;
$Global::version = 20110101;
$Global::progname = 'sql';
# This must be done first as this may exec myself

View file

@ -0,0 +1,13 @@
#!/bin/bash
echo '### Test distribute arguments at EOF to 2 jobslots'
seq 1 92 | parallel -j+0 -kX -s 100 echo
echo '### Test distribute arguments at EOF to 5 jobslots'
seq 1 92 | parallel -j+3 -kX -s 100 echo
echo '### Test distribute arguments at EOF to infinity jobslots'
seq 1 92 | parallel -j0 -kX -s 100 echo

View file

@ -0,0 +1,43 @@
### Test distribute arguments at EOF to 2 jobslots
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
66 67 68 69 70 71 72 73 74 75 76 77 78 79
80 81 82 83 84 85 86 87 88 89 90 91 92
### Test distribute arguments at EOF to 5 jobslots
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
66 67 68 69 70 71
72 73 74 75 76 77
78 79 80 81 82 83
84 85 86 87 88 89
90 91 92
### Test distribute arguments at EOF to infinity jobslots
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92