From 3f372fe25944b6e59f74597747f49e3237a9adaf Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Mon, 2 Dec 2013 17:38:19 +0100 Subject: [PATCH] parallel: Code reorg. --- src/parallel | 392 +++++++++++++++++++++++++-------------------------- 1 file changed, 196 insertions(+), 196 deletions(-) diff --git a/src/parallel b/src/parallel index aab42650..fbda66e8 100755 --- a/src/parallel +++ b/src/parallel @@ -109,210 +109,16 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { filter_hosts(); } -sub filter_hosts { - my(@cores, @cpus, @maxline, @echo); - while (my ($host, $sshlogin) = each %Global::host) { - # The 'true' is used to get the $host out later - my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); - push(@cores, $host."\t".$sshcmd." parallel --number-of-cores\n"); - push(@cpus, $host."\t".$sshcmd." parallel --number-of-cpus\n"); - push(@maxline, $host."\t".$sshcmd." parallel --max-line-length-allowed\n"); - # 'echo' is used to get the best possible value for an ssh login time - push(@echo, $host."\t".$sshcmd." echo\n"); - } - my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh"); - print $fh @cores, @cpus, @maxline, @echo; - close $fh; - # --timeout 5: Setting up an SSH connection and running a simple - # command should never take > 5 sec. - # --delay 0.1: If multiple sshlogins use the same proxy the delay - # will make it less likely to overload the ssh daemon. - # --retries 3: If the ssh daemon it overloaded, try 3 times - my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null"; - ::debug($cmd."\n"); - open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); - my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); - while(<$host_fh>) { - chomp; - my @col = split /\t/, $_; - if(defined $col[6]) { - # This is a line from --joblog - # seq host time spent sent received exit signal command - # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores - if($col[0] eq "Seq" and $col[1] eq "Host" and - $col[2] eq "Starttime" and $col[3] eq "Runtime") { - # Header => skip - next; - } - # Get server from: eval true server\; - $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); - my $host = $1; - $host =~ s/\\//g; - $Global::host{$host} or next; - if($col[6] eq "255" or $col[7] eq "15") { - # exit == 255 or signal == 15: ssh failed - # Remove sshlogin - ::debug("--filtered $host\n"); - push(@down_hosts, $host); - @down_hosts = uniq(@down_hosts); - } elsif($col[6] eq "127") { - # signal == 127: parallel not installed remote - # Set ncpus and ncores = 1 - ::warning("Could not figure out ", - "number of cpus on $host. Using 1.\n"); - $ncores{$host} = 1; - $ncpus{$host} = 1; - $maxlen{$host} = Limits::Command::max_length(); - } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { - # Remember how log it took to log in - # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo - $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); - } else { - ::die_bug("host check unmatched long jobline: $_"); - } - } elsif($Global::host{$col[0]}) { - # This output from --number-of-cores, --number-of-cpus, - # --max-line-length-allowed - # ncores: server 8 - # ncpus: server 2 - # maxlen: server 131071 - if(not $ncores{$col[0]}) { - $ncores{$col[0]} = $col[1]; - } elsif(not $ncpus{$col[0]}) { - $ncpus{$col[0]} = $col[1]; - } elsif(not $maxlen{$col[0]}) { - $maxlen{$col[0]} = $col[1]; - } elsif(not $echo{$col[0]}) { - $echo{$col[0]} = $col[1]; - } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { - # Skip these: - # perl: warning: Setting locale failed. - # perl: warning: Please check that your locale settings: - # LANGUAGE = (unset), - # LC_ALL = (unset), - # LANG = "en_US.UTF-8" - # are supported and installed on your system. - # perl: warning: Falling back to the standard locale ("C"). - } else { - ::die_bug("host check too many col0: $_"); - } - } else { - ::die_bug("host check unmatched short jobline ($col[0]): $_"); - } - } - close $host_fh; - unlink $tmpfile; - delete @Global::host{@down_hosts}; - @down_hosts and ::warning("Removed @down_hosts\n"); - while (my ($sshlogin, $obj) = each %Global::host) { - $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); - $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); - $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); - $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); - if($opt::use_cpus_instead_of_cores) { - $obj->set_ncpus($ncpus{$sshlogin}); - } else { - $obj->set_ncpus($ncores{$sshlogin}); - } - $obj->set_time_to_login($time_to_login{$sshlogin}); - $obj->set_maxlength($maxlen{$sshlogin}); - ::debug("Timing from -S:$sshlogin ncpus:$ncpus{$sshlogin} ncores:$ncores{$sshlogin} ", - "time_to_login:$time_to_login{$sshlogin} maxlen:$maxlen{$sshlogin}\n"); - } -} + if($opt::nonall or $opt::onall) { onall(); wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); } -sub onall { # TODO --transfer foo/./bar --cleanup # multiple --transfer and --basefile with different /./ - sub tmp_joblog { - my $joblog = shift; - if(not defined $joblog) { - return undef; - } - my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".log"); - close $fh; - return $tmpfile; - } - # Copy all @fhlist into tempfiles - my @argfiles = (); - for my $fh (@fhlist) { - my ($outfh, $name) = ::tempfile(SUFFIX => ".all", UNLINK => 1); - print $outfh (<$fh>); - close $outfh; - push @argfiles, $name; - } - if(@opt::basefile) { setup_basefile(); } - # for each sshlogin do: - # parallel -S $sshlogin $command :::: @argfiles - # - # Pass some of the options to the sub-parallels, not all of them as - # -P should only go to the first, and -S should not be copied at all. - my $options = - join(" ", - ((defined $opt::P) ? "-P $opt::P" : ""), - ((defined $opt::u) ? "-u" : ""), - ((defined $opt::group) ? "-g" : ""), - ((defined $opt::keeporder) ? "--keeporder" : ""), - ((defined $opt::D) ? "-D" : ""), - ((defined $opt::plain) ? "--plain" : ""), - ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), - ); - my $suboptions = - join(" ", - ((defined $opt::u) ? "-u" : ""), - ((defined $opt::group) ? "-g" : ""), - ((defined $opt::files) ? "--files" : ""), - ((defined $opt::keeporder) ? "--keeporder" : ""), - ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), - ((@opt::v) ? "-vv" : ""), - ((defined $opt::D) ? "-D" : ""), - ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), - ((defined $opt::plain) ? "--plain" : ""), - ((defined $opt::retries) ? "--retries ".$opt::retries : ""), - ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), - (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), - ); - ::debug("| $0 $options\n"); - open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || - ::die_bug("This does not run GNU Parallel: $0 $options"); - my @joblogs; - for my $host (sort keys %Global::host) { - my $sshlogin = $Global::host{$host}; - my $joblog = tmp_joblog($opt::joblog); - if($joblog) { - push @joblogs, $joblog; - $joblog = "--joblog $joblog"; - } - print $parallel_fh "$0 $suboptions -j1 $joblog ". - ((defined $opt::tag) ? - "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""). - " -S ". shell_quote_scalar($sshlogin->string())." ". - shell_quote_scalar($command)." :::: @argfiles\n"; - } - close $parallel_fh; - $Global::exitstatus = $? >> 8; - debug("--onall exitvalue ",$?); - if(@opt::basefile) { cleanup_basefile(); } - unlink(@argfiles); - my %seen; - for my $joblog (@joblogs) { - # Append to $joblog - open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); - # Skip first line (header); - <$fh>; - print $Global::joblog (<$fh>); - close $fh; - unlink($joblog); - } -} - - $Global::JobQueue = JobQueue->new( $command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); if($opt::pipe and @opt::a) { @@ -2162,6 +1968,201 @@ sub cleanup_basefile { print `$cmd`; } +sub filter_hosts { + my(@cores, @cpus, @maxline, @echo); + while (my ($host, $sshlogin) = each %Global::host) { + # The 'true' is used to get the $host out later + my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); + push(@cores, $host."\t".$sshcmd." parallel --number-of-cores\n"); + push(@cpus, $host."\t".$sshcmd." parallel --number-of-cpus\n"); + push(@maxline, $host."\t".$sshcmd." parallel --max-line-length-allowed\n"); + # 'echo' is used to get the best possible value for an ssh login time + push(@echo, $host."\t".$sshcmd." echo\n"); + } + my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh"); + print $fh @cores, @cpus, @maxline, @echo; + close $fh; + # --timeout 5: Setting up an SSH connection and running a simple + # command should never take > 5 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon it overloaded, try 3 times + my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null"; + ::debug($cmd."\n"); + open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); + my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); + while(<$host_fh>) { + chomp; + my @col = split /\t/, $_; + if(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime" and $col[3] eq "Runtime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); + my $host = $1; + $host =~ s/\\//g; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[7] eq "15") { + # exit == 255 or signal == 15: ssh failed + # Remove sshlogin + ::debug("--filtered $host\n"); + push(@down_hosts, $host); + @down_hosts = uniq(@down_hosts); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set ncpus and ncores = 1 + ::warning("Could not figure out ", + "number of cpus on $host. Using 1.\n"); + $ncores{$host} = 1; + $ncpus{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(not $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not $ncpus{$col[0]}) { + $ncpus{$col[0]} = $col[1]; + } elsif(not $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { + # Skip these: + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + close $host_fh; + unlink $tmpfile; + delete @Global::host{@down_hosts}; + @down_hosts and ::warning("Removed @down_hosts\n"); + while (my ($sshlogin, $obj) = each %Global::host) { + $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); + $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); + $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); + $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); + if($opt::use_cpus_instead_of_cores) { + $obj->set_ncpus($ncpus{$sshlogin}); + } else { + $obj->set_ncpus($ncores{$sshlogin}); + } + $obj->set_time_to_login($time_to_login{$sshlogin}); + $obj->set_maxlength($maxlen{$sshlogin}); + ::debug("Timing from -S:$sshlogin ncpus:$ncpus{$sshlogin} ncores:$ncores{$sshlogin} ", + "time_to_login:$time_to_login{$sshlogin} maxlen:$maxlen{$sshlogin}\n"); + } +} + +sub onall { + sub tmp_joblog { + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + # Copy all @fhlist into tempfiles + my @argfiles = (); + for my $fh (@fhlist) { + my ($outfh, $name) = ::tempfile(SUFFIX => ".all", UNLINK => 1); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::P) ? "-P $opt::P" : ""), + ((defined $opt::u) ? "-u" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::D) ? "-D" : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::u) ? "-u" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((@opt::v) ? "-vv" : ""), + ((defined $opt::D) ? "-D" : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), + ); + ::debug("| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + print $parallel_fh "$0 $suboptions -j1 $joblog ". + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""). + " -S ". shell_quote_scalar($sshlogin->string())." ". + shell_quote_scalar($command)." :::: @argfiles\n"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("--onall exitvalue ",$?); + if(@opt::basefile) { cleanup_basefile(); } + unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + sub __SIGNAL_HANDLING__ {} sub save_original_signal_handler { @@ -2275,7 +2276,6 @@ sub reaper { return $children_reaped; } - sub __USAGE__ {} sub wait_and_exit {