parallel: Code reorg.

This commit is contained in:
Ole Tange 2013-12-02 17:38:19 +01:00
parent f21641733a
commit 3f372fe259

View file

@ -109,210 +109,16 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
filter_hosts();
}
sub filter_hosts {
my(@cores, @cpus, @maxline, @echo);
while (my ($host, $sshlogin) = each %Global::host) {
# The 'true' is used to get the $host out later
my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
push(@cores, $host."\t".$sshcmd." parallel --number-of-cores\n");
push(@cpus, $host."\t".$sshcmd." parallel --number-of-cpus\n");
push(@maxline, $host."\t".$sshcmd." parallel --max-line-length-allowed\n");
# 'echo' is used to get the best possible value for an ssh login time
push(@echo, $host."\t".$sshcmd." echo\n");
}
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
print $fh @cores, @cpus, @maxline, @echo;
close $fh;
# --timeout 5: Setting up an SSH connection and running a simple
# command should never take > 5 sec.
# --delay 0.1: If multiple sshlogins use the same proxy the delay
# will make it less likely to overload the ssh daemon.
# --retries 3: If the ssh daemon it overloaded, try 3 times
my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
::debug($cmd."\n");
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
while(<$host_fh>) {
chomp;
my @col = split /\t/, $_;
if(defined $col[6]) {
# This is a line from --joblog
# seq host time spent sent received exit signal command
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
if($col[0] eq "Seq" and $col[1] eq "Host" and
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
# Header => skip
next;
}
# Get server from: eval true server\;
$col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
my $host = $1;
$host =~ s/\\//g;
$Global::host{$host} or next;
if($col[6] eq "255" or $col[7] eq "15") {
# exit == 255 or signal == 15: ssh failed
# Remove sshlogin
::debug("--filtered $host\n");
push(@down_hosts, $host);
@down_hosts = uniq(@down_hosts);
} elsif($col[6] eq "127") {
# signal == 127: parallel not installed remote
# Set ncpus and ncores = 1
::warning("Could not figure out ",
"number of cpus on $host. Using 1.\n");
$ncores{$host} = 1;
$ncpus{$host} = 1;
$maxlen{$host} = Limits::Command::max_length();
} elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
# Remember how log it took to log in
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
$time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
} else {
::die_bug("host check unmatched long jobline: $_");
}
} elsif($Global::host{$col[0]}) {
# This output from --number-of-cores, --number-of-cpus,
# --max-line-length-allowed
# ncores: server 8
# ncpus: server 2
# maxlen: server 131071
if(not $ncores{$col[0]}) {
$ncores{$col[0]} = $col[1];
} elsif(not $ncpus{$col[0]}) {
$ncpus{$col[0]} = $col[1];
} elsif(not $maxlen{$col[0]}) {
$maxlen{$col[0]} = $col[1];
} elsif(not $echo{$col[0]}) {
$echo{$col[0]} = $col[1];
} elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
# Skip these:
# perl: warning: Setting locale failed.
# perl: warning: Please check that your locale settings:
# LANGUAGE = (unset),
# LC_ALL = (unset),
# LANG = "en_US.UTF-8"
# are supported and installed on your system.
# perl: warning: Falling back to the standard locale ("C").
} else {
::die_bug("host check too many col0: $_");
}
} else {
::die_bug("host check unmatched short jobline ($col[0]): $_");
}
}
close $host_fh;
unlink $tmpfile;
delete @Global::host{@down_hosts};
@down_hosts and ::warning("Removed @down_hosts\n");
while (my ($sshlogin, $obj) = each %Global::host) {
$ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
$ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
$time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin());
$maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin());
if($opt::use_cpus_instead_of_cores) {
$obj->set_ncpus($ncpus{$sshlogin});
} else {
$obj->set_ncpus($ncores{$sshlogin});
}
$obj->set_time_to_login($time_to_login{$sshlogin});
$obj->set_maxlength($maxlen{$sshlogin});
::debug("Timing from -S:$sshlogin ncpus:$ncpus{$sshlogin} ncores:$ncores{$sshlogin} ",
"time_to_login:$time_to_login{$sshlogin} maxlen:$maxlen{$sshlogin}\n");
}
}
if($opt::nonall or $opt::onall) {
onall();
wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
}
sub onall {
# TODO --transfer foo/./bar --cleanup
# multiple --transfer and --basefile with different /./
sub tmp_joblog {
my $joblog = shift;
if(not defined $joblog) {
return undef;
}
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".log");
close $fh;
return $tmpfile;
}
# Copy all @fhlist into tempfiles
my @argfiles = ();
for my $fh (@fhlist) {
my ($outfh, $name) = ::tempfile(SUFFIX => ".all", UNLINK => 1);
print $outfh (<$fh>);
close $outfh;
push @argfiles, $name;
}
if(@opt::basefile) { setup_basefile(); }
# for each sshlogin do:
# parallel -S $sshlogin $command :::: @argfiles
#
# Pass some of the options to the sub-parallels, not all of them as
# -P should only go to the first, and -S should not be copied at all.
my $options =
join(" ",
((defined $opt::P) ? "-P $opt::P" : ""),
((defined $opt::u) ? "-u" : ""),
((defined $opt::group) ? "-g" : ""),
((defined $opt::keeporder) ? "--keeporder" : ""),
((defined $opt::D) ? "-D" : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
);
my $suboptions =
join(" ",
((defined $opt::u) ? "-u" : ""),
((defined $opt::group) ? "-g" : ""),
((defined $opt::files) ? "--files" : ""),
((defined $opt::keeporder) ? "--keeporder" : ""),
((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
((@opt::v) ? "-vv" : ""),
((defined $opt::D) ? "-D" : ""),
((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::retries) ? "--retries ".$opt::retries : ""),
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
(@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
);
::debug("| $0 $options\n");
open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
::die_bug("This does not run GNU Parallel: $0 $options");
my @joblogs;
for my $host (sort keys %Global::host) {
my $sshlogin = $Global::host{$host};
my $joblog = tmp_joblog($opt::joblog);
if($joblog) {
push @joblogs, $joblog;
$joblog = "--joblog $joblog";
}
print $parallel_fh "$0 $suboptions -j1 $joblog ".
((defined $opt::tag) ?
"--tagstring ".shell_quote_scalar($sshlogin->string()) : "").
" -S ". shell_quote_scalar($sshlogin->string())." ".
shell_quote_scalar($command)." :::: @argfiles\n";
}
close $parallel_fh;
$Global::exitstatus = $? >> 8;
debug("--onall exitvalue ",$?);
if(@opt::basefile) { cleanup_basefile(); }
unlink(@argfiles);
my %seen;
for my $joblog (@joblogs) {
# Append to $joblog
open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
# Skip first line (header);
<$fh>;
print $Global::joblog (<$fh>);
close $fh;
unlink($joblog);
}
}
$Global::JobQueue = JobQueue->new(
$command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
if($opt::pipe and @opt::a) {
@ -2162,6 +1968,201 @@ sub cleanup_basefile {
print `$cmd`;
}
sub filter_hosts {
my(@cores, @cpus, @maxline, @echo);
while (my ($host, $sshlogin) = each %Global::host) {
# The 'true' is used to get the $host out later
my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
push(@cores, $host."\t".$sshcmd." parallel --number-of-cores\n");
push(@cpus, $host."\t".$sshcmd." parallel --number-of-cpus\n");
push(@maxline, $host."\t".$sshcmd." parallel --max-line-length-allowed\n");
# 'echo' is used to get the best possible value for an ssh login time
push(@echo, $host."\t".$sshcmd." echo\n");
}
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
print $fh @cores, @cpus, @maxline, @echo;
close $fh;
# --timeout 5: Setting up an SSH connection and running a simple
# command should never take > 5 sec.
# --delay 0.1: If multiple sshlogins use the same proxy the delay
# will make it less likely to overload the ssh daemon.
# --retries 3: If the ssh daemon it overloaded, try 3 times
my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null";
::debug($cmd."\n");
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
while(<$host_fh>) {
chomp;
my @col = split /\t/, $_;
if(defined $col[6]) {
# This is a line from --joblog
# seq host time spent sent received exit signal command
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
if($col[0] eq "Seq" and $col[1] eq "Host" and
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
# Header => skip
next;
}
# Get server from: eval true server\;
$col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
my $host = $1;
$host =~ s/\\//g;
$Global::host{$host} or next;
if($col[6] eq "255" or $col[7] eq "15") {
# exit == 255 or signal == 15: ssh failed
# Remove sshlogin
::debug("--filtered $host\n");
push(@down_hosts, $host);
@down_hosts = uniq(@down_hosts);
} elsif($col[6] eq "127") {
# signal == 127: parallel not installed remote
# Set ncpus and ncores = 1
::warning("Could not figure out ",
"number of cpus on $host. Using 1.\n");
$ncores{$host} = 1;
$ncpus{$host} = 1;
$maxlen{$host} = Limits::Command::max_length();
} elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
# Remember how log it took to log in
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
$time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
} else {
::die_bug("host check unmatched long jobline: $_");
}
} elsif($Global::host{$col[0]}) {
# This output from --number-of-cores, --number-of-cpus,
# --max-line-length-allowed
# ncores: server 8
# ncpus: server 2
# maxlen: server 131071
if(not $ncores{$col[0]}) {
$ncores{$col[0]} = $col[1];
} elsif(not $ncpus{$col[0]}) {
$ncpus{$col[0]} = $col[1];
} elsif(not $maxlen{$col[0]}) {
$maxlen{$col[0]} = $col[1];
} elsif(not $echo{$col[0]}) {
$echo{$col[0]} = $col[1];
} elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
# Skip these:
# perl: warning: Setting locale failed.
# perl: warning: Please check that your locale settings:
# LANGUAGE = (unset),
# LC_ALL = (unset),
# LANG = "en_US.UTF-8"
# are supported and installed on your system.
# perl: warning: Falling back to the standard locale ("C").
} else {
::die_bug("host check too many col0: $_");
}
} else {
::die_bug("host check unmatched short jobline ($col[0]): $_");
}
}
close $host_fh;
unlink $tmpfile;
delete @Global::host{@down_hosts};
@down_hosts and ::warning("Removed @down_hosts\n");
while (my ($sshlogin, $obj) = each %Global::host) {
$ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
$ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
$time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin());
$maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin());
if($opt::use_cpus_instead_of_cores) {
$obj->set_ncpus($ncpus{$sshlogin});
} else {
$obj->set_ncpus($ncores{$sshlogin});
}
$obj->set_time_to_login($time_to_login{$sshlogin});
$obj->set_maxlength($maxlen{$sshlogin});
::debug("Timing from -S:$sshlogin ncpus:$ncpus{$sshlogin} ncores:$ncores{$sshlogin} ",
"time_to_login:$time_to_login{$sshlogin} maxlen:$maxlen{$sshlogin}\n");
}
}
sub onall {
sub tmp_joblog {
my $joblog = shift;
if(not defined $joblog) {
return undef;
}
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".log");
close $fh;
return $tmpfile;
}
# Copy all @fhlist into tempfiles
my @argfiles = ();
for my $fh (@fhlist) {
my ($outfh, $name) = ::tempfile(SUFFIX => ".all", UNLINK => 1);
print $outfh (<$fh>);
close $outfh;
push @argfiles, $name;
}
if(@opt::basefile) { setup_basefile(); }
# for each sshlogin do:
# parallel -S $sshlogin $command :::: @argfiles
#
# Pass some of the options to the sub-parallels, not all of them as
# -P should only go to the first, and -S should not be copied at all.
my $options =
join(" ",
((defined $opt::P) ? "-P $opt::P" : ""),
((defined $opt::u) ? "-u" : ""),
((defined $opt::group) ? "-g" : ""),
((defined $opt::keeporder) ? "--keeporder" : ""),
((defined $opt::D) ? "-D" : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
);
my $suboptions =
join(" ",
((defined $opt::u) ? "-u" : ""),
((defined $opt::group) ? "-g" : ""),
((defined $opt::files) ? "--files" : ""),
((defined $opt::keeporder) ? "--keeporder" : ""),
((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
((@opt::v) ? "-vv" : ""),
((defined $opt::D) ? "-D" : ""),
((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::retries) ? "--retries ".$opt::retries : ""),
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
(@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
);
::debug("| $0 $options\n");
open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
::die_bug("This does not run GNU Parallel: $0 $options");
my @joblogs;
for my $host (sort keys %Global::host) {
my $sshlogin = $Global::host{$host};
my $joblog = tmp_joblog($opt::joblog);
if($joblog) {
push @joblogs, $joblog;
$joblog = "--joblog $joblog";
}
print $parallel_fh "$0 $suboptions -j1 $joblog ".
((defined $opt::tag) ?
"--tagstring ".shell_quote_scalar($sshlogin->string()) : "").
" -S ". shell_quote_scalar($sshlogin->string())." ".
shell_quote_scalar($command)." :::: @argfiles\n";
}
close $parallel_fh;
$Global::exitstatus = $? >> 8;
debug("--onall exitvalue ",$?);
if(@opt::basefile) { cleanup_basefile(); }
unlink(@argfiles);
my %seen;
for my $joblog (@joblogs) {
# Append to $joblog
open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
# Skip first line (header);
<$fh>;
print $Global::joblog (<$fh>);
close $fh;
unlink($joblog);
}
}
sub __SIGNAL_HANDLING__ {}
sub save_original_signal_handler {
@ -2275,7 +2276,6 @@ sub reaper {
return $children_reaped;
}
sub __USAGE__ {}
sub wait_and_exit {