mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
src/parallel: --filter-host is now much faster.
This commit is contained in:
parent
14b15f3d43
commit
822f0d4d73
|
@ -163,7 +163,7 @@ http://freshmeat.net/projects/parallel/releases/new
|
|||
== Update Diaspora Twitter ==
|
||||
|
||||
New release of #GNU Parallel pi.dk/0 New in this release pi.dk/2 See the intro videos pi.dk/1
|
||||
10 seconds installation: wget -O - pi.dk/3|sh
|
||||
10 secs installation: wget -O - pi.dk/3|sh
|
||||
|
||||
[x] Twitter
|
||||
Aspect: Public
|
||||
|
@ -171,7 +171,7 @@ Aspect: Public
|
|||
== Send announce ==
|
||||
|
||||
http://groups.google.com/group/comp.unix.shell/post
|
||||
Newsgroups: comp.unix.shell,comp.unix.admin
|
||||
http://groups.google.com/group/comp.unix.admin/post
|
||||
|
||||
https://lists.gnu.org/mailman/admindb/bug-parallel
|
||||
https://lists.gnu.org/mailman/admindb/parallel
|
||||
|
@ -195,32 +195,18 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
|
|||
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
|
||||
Jesse Alama <jesse.alama@gmail.com>
|
||||
|
||||
Subject: GNU Parallel 20130622 ('Snowden') released
|
||||
Subject: GNU Parallel 20130722 ('') released
|
||||
|
||||
GNU Parallel 20130622 ('Snowden') has been released. It is
|
||||
GNU Parallel 20130722 ('') has been released. It is
|
||||
available for download at: http://ftp.gnu.org/gnu/parallel/
|
||||
|
||||
Very few changes so this can be considered a stable release.
|
||||
|
||||
New in this release:
|
||||
|
||||
* --xapply now recycles arguments if an input source has more
|
||||
arguments than others.
|
||||
* http://www.brunokim.com.br/blog/?p=18
|
||||
|
||||
* The sleep time between jobs is now both increased and decreased
|
||||
exponentially.
|
||||
|
||||
* 10 seconds installation check the signature using GnuPG if GnuPG is
|
||||
installed.
|
||||
|
||||
* Developer job asking for GNU Parallel expertise.
|
||||
http://careers.stackoverflow.com/jobs/35562/developer-big-data-geo-and-web-climate-central
|
||||
|
||||
* A small utility program to run youtube-dl in parallel.
|
||||
https://github.com/dlh/youtube-dl-parallel
|
||||
|
||||
* Parallelizing Freesurfer:
|
||||
http://blog.cogneurostats.com/?p=148
|
||||
* http://www.open-open.com/news/view/371301
|
||||
|
||||
* Bug fixes and man page updates.
|
||||
|
||||
|
|
43
src/parallel
43
src/parallel
|
@ -99,21 +99,36 @@ if($opt::header and not $opt::pipe) {
|
|||
# Parallel check for all hosts are up
|
||||
#if(not $opt::plain and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||
if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||
my @S = map { "-S " . ::shell_quote_scalar($_) } @opt::sshlogin;
|
||||
my @slf = map { "--slf " . ::shell_quote_scalar($_) } @opt::sshloginfile;
|
||||
my $cmd = "$0 --plain --tag --joblog - -k --onall @S @slf " .
|
||||
"::: ".
|
||||
"'parallel --number-of-cores ' ".
|
||||
"'parallel --number-of-cpus' ".
|
||||
"'parallel --max-line-length-allowed' ".
|
||||
"'true' ";
|
||||
my(@cores, @cpus, @maxline, @echo);
|
||||
while (my ($host, $sshlogin) = each %Global::host) {
|
||||
# The 'true' is used to get the $host out later
|
||||
my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
|
||||
push(@cores, $host."\t".$sshcmd." parallel --number-of-cores\n");
|
||||
push(@cpus, $host."\t".$sshcmd." parallel --number-of-cpus\n");
|
||||
push(@maxline, $host."\t".$sshcmd." parallel --max-line-length-allowed\n");
|
||||
# 'echo' is used to get the best possible value for an ssh login time
|
||||
push(@echo, $host."\t".$sshcmd." echo\n");
|
||||
}
|
||||
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
|
||||
print $fh @cores, @cpus, @maxline, @echo;
|
||||
close $fh;
|
||||
my $cmd = "cat $tmpfile | $0 -j0 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2}";
|
||||
::debug($cmd."\n");
|
||||
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
|
||||
my (%ncores, %ncpus, %time_to_login, %maxlen);
|
||||
my (%ncores, %ncpus, %time_to_login, %maxlen, %echo);
|
||||
while(<$host_fh>) {
|
||||
my @col = split /\t/, $_;
|
||||
if(defined $col[6]) {
|
||||
# This is a line from --joblog
|
||||
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
|
||||
if($col[0] eq "Seq" and $col[1] eq "Host" and
|
||||
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
|
||||
# Header => skip
|
||||
next;
|
||||
}
|
||||
# Get server from: eval true server\;
|
||||
$col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
|
||||
my $host = $1;
|
||||
if($col[6] eq "255") {
|
||||
# signal == 255: ssh failed
|
||||
# Remove sshlogin
|
||||
|
@ -126,14 +141,11 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
|||
$ncores{$col[1]} = 1;
|
||||
$ncpus{$col[1]} = 1;
|
||||
$maxlen{$col[1]} = Limits::Command::max_length();
|
||||
} elsif($col[0] =~ /^\d+$/ and $Global::host{$col[1]}) {
|
||||
} elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
|
||||
# 1 server 1338156112.05 0.303 0 0 0 0
|
||||
# parallel --number-of-cores ; parallel --number-of-cpus
|
||||
# Remember how log it took to log in
|
||||
$time_to_login{$col[1]} = ::min($time_to_login{$col[1]},$col[3]);
|
||||
} elsif($col[0] eq "Seq" and $col[1] eq "Host" and
|
||||
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
|
||||
# skip
|
||||
$time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
|
||||
} else {
|
||||
::die_bug("host check unmatched long jobline: $_");
|
||||
}
|
||||
|
@ -149,6 +161,8 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
|||
$ncpus{$col[0]} = $col[1];
|
||||
} elsif(not $maxlen{$col[0]}) {
|
||||
$maxlen{$col[0]} = $col[1];
|
||||
} elsif(not $echo{$col[0]}) {
|
||||
$echo{$col[0]} = $col[1];
|
||||
} else {
|
||||
::die_bug("host check too many col0: $_");
|
||||
}
|
||||
|
@ -157,6 +171,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
|||
}
|
||||
}
|
||||
close $host_fh;
|
||||
unlink $tmpfile;
|
||||
while (my ($sshlogin, $obj) = each %Global::host) {
|
||||
$ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
|
||||
$ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
|
||||
|
|
|
@ -68,8 +68,8 @@ B<xargs>. If I<command> is not given GNU B<parallel> will behave
|
|||
similar to B<cat | sh>.
|
||||
|
||||
The I<command> must be an executable, a script, a composed command, or
|
||||
a function. If it is a function you need to B<export -f> the function
|
||||
first. An alias will, however, not work (see why
|
||||
a function. If it is a Bash function you need to B<export -f> the
|
||||
function first. An alias will, however, not work (see why
|
||||
http://www.perlmonks.org/index.pl?node_id=484296).
|
||||
|
||||
|
||||
|
@ -1745,6 +1745,31 @@ Find the files in a list that do not exist
|
|||
|
||||
B<cat file_list | parallel 'if [ ! -e {} ] ; then echo {}; fi'>
|
||||
|
||||
|
||||
=head1 EXAMPLE: Calling Bash functions
|
||||
|
||||
If the composed command is longer than a line, it becomes hard to
|
||||
read. In Bash you can use functions. Just remember to B<export -f> the
|
||||
function.
|
||||
|
||||
doit() {
|
||||
echo Doing it for $1
|
||||
sleep 2
|
||||
echo Done with $1
|
||||
}
|
||||
export -f doit
|
||||
parallel doit ::: 1 2 3
|
||||
|
||||
doubleit() {
|
||||
echo Doing it for $1 $2
|
||||
sleep 2
|
||||
echo Done with $1 $2
|
||||
}
|
||||
export -f doubleit
|
||||
parallel doubleit ::: 1 2 3 ::: a b
|
||||
|
||||
|
||||
|
||||
=head1 EXAMPLE: Removing file extension when processing files
|
||||
|
||||
When processing files removing the file extension using B<{.}> is
|
||||
|
@ -1782,7 +1807,7 @@ B<ls *.tar.gz| parallel --er {tar} 'echo {tar}|parallel "mkdir -p {.} ; tar -C {
|
|||
|
||||
Let us assume a website stores images like:
|
||||
|
||||
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
||||
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
||||
|
||||
where YYYYMMDD is the date and ## is the number 01-10. This will
|
||||
download images for the past 30 days:
|
||||
|
|
|
@ -1481,7 +1481,7 @@ seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms
|
|||
later, followed by SIGKILL 200 ms later.
|
||||
|
||||
If @emph{val} is followed by a % then the timeout will dynamically be
|
||||
computed as a percentage of the smoothed average runtime. Only values
|
||||
computed as a percentage of the median average runtime. Only values
|
||||
> 100% will make sense.
|
||||
|
||||
@item @strong{--tollef} (obsolete - will be retired 20140222)
|
||||
|
@ -1866,6 +1866,31 @@ Find the files in a list that do not exist
|
|||
|
||||
@strong{cat file_list | parallel 'if [ ! -e @{@} ] ; then echo @{@}; fi'}
|
||||
|
||||
@chapter EXAMPLE: Calling Bash functions
|
||||
@anchor{EXAMPLE: Calling Bash functions}
|
||||
|
||||
If the composed command is longer than a line, it becomes hard to
|
||||
read. In Bash you can use functions. Just remember to @strong{export -f} the
|
||||
function.
|
||||
|
||||
@verbatim
|
||||
doit() {
|
||||
echo Doing it for $1
|
||||
sleep 2
|
||||
echo Done with $1
|
||||
}
|
||||
export -f doit
|
||||
parallel doit ::: 1 2 3
|
||||
|
||||
doubleit() {
|
||||
echo Doing it for $1 $2
|
||||
sleep 2
|
||||
echo Done with $1 $2
|
||||
}
|
||||
export -f doubleit
|
||||
parallel doubleit ::: 1 2 3 ::: a b
|
||||
@end verbatim
|
||||
|
||||
@chapter EXAMPLE: Removing file extension when processing files
|
||||
@anchor{EXAMPLE: Removing file extension when processing files}
|
||||
|
||||
|
@ -1904,7 +1929,7 @@ foo) you can do:
|
|||
Let us assume a website stores images like:
|
||||
|
||||
@verbatim
|
||||
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
||||
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
||||
@end verbatim
|
||||
|
||||
where YYYYMMDD is the date and ## is the number 01-10. This will
|
||||
|
|
Loading…
Reference in a new issue