mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 14:07:55 +00:00
src/parallel: --filter-host is now much faster.
This commit is contained in:
parent
14b15f3d43
commit
822f0d4d73
|
@ -163,7 +163,7 @@ http://freshmeat.net/projects/parallel/releases/new
|
||||||
== Update Diaspora Twitter ==
|
== Update Diaspora Twitter ==
|
||||||
|
|
||||||
New release of #GNU Parallel pi.dk/0 New in this release pi.dk/2 See the intro videos pi.dk/1
|
New release of #GNU Parallel pi.dk/0 New in this release pi.dk/2 See the intro videos pi.dk/1
|
||||||
10 seconds installation: wget -O - pi.dk/3|sh
|
10 secs installation: wget -O - pi.dk/3|sh
|
||||||
|
|
||||||
[x] Twitter
|
[x] Twitter
|
||||||
Aspect: Public
|
Aspect: Public
|
||||||
|
@ -171,7 +171,7 @@ Aspect: Public
|
||||||
== Send announce ==
|
== Send announce ==
|
||||||
|
|
||||||
http://groups.google.com/group/comp.unix.shell/post
|
http://groups.google.com/group/comp.unix.shell/post
|
||||||
Newsgroups: comp.unix.shell,comp.unix.admin
|
http://groups.google.com/group/comp.unix.admin/post
|
||||||
|
|
||||||
https://lists.gnu.org/mailman/admindb/bug-parallel
|
https://lists.gnu.org/mailman/admindb/bug-parallel
|
||||||
https://lists.gnu.org/mailman/admindb/parallel
|
https://lists.gnu.org/mailman/admindb/parallel
|
||||||
|
@ -195,32 +195,18 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
|
||||||
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
|
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
|
||||||
Jesse Alama <jesse.alama@gmail.com>
|
Jesse Alama <jesse.alama@gmail.com>
|
||||||
|
|
||||||
Subject: GNU Parallel 20130622 ('Snowden') released
|
Subject: GNU Parallel 20130722 ('') released
|
||||||
|
|
||||||
GNU Parallel 20130622 ('Snowden') has been released. It is
|
GNU Parallel 20130722 ('') has been released. It is
|
||||||
available for download at: http://ftp.gnu.org/gnu/parallel/
|
available for download at: http://ftp.gnu.org/gnu/parallel/
|
||||||
|
|
||||||
Very few changes so this can be considered a stable release.
|
Very few changes so this can be considered a stable release.
|
||||||
|
|
||||||
New in this release:
|
New in this release:
|
||||||
|
|
||||||
* --xapply now recycles arguments if an input source has more
|
* http://www.brunokim.com.br/blog/?p=18
|
||||||
arguments than others.
|
|
||||||
|
|
||||||
* The sleep time between jobs is now both increased and decreased
|
* http://www.open-open.com/news/view/371301
|
||||||
exponentially.
|
|
||||||
|
|
||||||
* 10 seconds installation check the signature using GnuPG if GnuPG is
|
|
||||||
installed.
|
|
||||||
|
|
||||||
* Developer job asking for GNU Parallel expertise.
|
|
||||||
http://careers.stackoverflow.com/jobs/35562/developer-big-data-geo-and-web-climate-central
|
|
||||||
|
|
||||||
* A small utility program to run youtube-dl in parallel.
|
|
||||||
https://github.com/dlh/youtube-dl-parallel
|
|
||||||
|
|
||||||
* Parallelizing Freesurfer:
|
|
||||||
http://blog.cogneurostats.com/?p=148
|
|
||||||
|
|
||||||
* Bug fixes and man page updates.
|
* Bug fixes and man page updates.
|
||||||
|
|
||||||
|
|
43
src/parallel
43
src/parallel
|
@ -99,21 +99,36 @@ if($opt::header and not $opt::pipe) {
|
||||||
# Parallel check for all hosts are up
|
# Parallel check for all hosts are up
|
||||||
#if(not $opt::plain and (@opt::sshlogin or @opt::sshloginfile)) {
|
#if(not $opt::plain and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||||
if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||||
my @S = map { "-S " . ::shell_quote_scalar($_) } @opt::sshlogin;
|
my(@cores, @cpus, @maxline, @echo);
|
||||||
my @slf = map { "--slf " . ::shell_quote_scalar($_) } @opt::sshloginfile;
|
while (my ($host, $sshlogin) = each %Global::host) {
|
||||||
my $cmd = "$0 --plain --tag --joblog - -k --onall @S @slf " .
|
# The 'true' is used to get the $host out later
|
||||||
"::: ".
|
my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
|
||||||
"'parallel --number-of-cores ' ".
|
push(@cores, $host."\t".$sshcmd." parallel --number-of-cores\n");
|
||||||
"'parallel --number-of-cpus' ".
|
push(@cpus, $host."\t".$sshcmd." parallel --number-of-cpus\n");
|
||||||
"'parallel --max-line-length-allowed' ".
|
push(@maxline, $host."\t".$sshcmd." parallel --max-line-length-allowed\n");
|
||||||
"'true' ";
|
# 'echo' is used to get the best possible value for an ssh login time
|
||||||
|
push(@echo, $host."\t".$sshcmd." echo\n");
|
||||||
|
}
|
||||||
|
my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh");
|
||||||
|
print $fh @cores, @cpus, @maxline, @echo;
|
||||||
|
close $fh;
|
||||||
|
my $cmd = "cat $tmpfile | $0 -j0 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2}";
|
||||||
::debug($cmd."\n");
|
::debug($cmd."\n");
|
||||||
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
|
open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
|
||||||
my (%ncores, %ncpus, %time_to_login, %maxlen);
|
my (%ncores, %ncpus, %time_to_login, %maxlen, %echo);
|
||||||
while(<$host_fh>) {
|
while(<$host_fh>) {
|
||||||
my @col = split /\t/, $_;
|
my @col = split /\t/, $_;
|
||||||
if(defined $col[6]) {
|
if(defined $col[6]) {
|
||||||
# This is a line from --joblog
|
# This is a line from --joblog
|
||||||
|
# 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
|
||||||
|
if($col[0] eq "Seq" and $col[1] eq "Host" and
|
||||||
|
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
|
||||||
|
# Header => skip
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
# Get server from: eval true server\;
|
||||||
|
$col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
|
||||||
|
my $host = $1;
|
||||||
if($col[6] eq "255") {
|
if($col[6] eq "255") {
|
||||||
# signal == 255: ssh failed
|
# signal == 255: ssh failed
|
||||||
# Remove sshlogin
|
# Remove sshlogin
|
||||||
|
@ -126,14 +141,11 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||||
$ncores{$col[1]} = 1;
|
$ncores{$col[1]} = 1;
|
||||||
$ncpus{$col[1]} = 1;
|
$ncpus{$col[1]} = 1;
|
||||||
$maxlen{$col[1]} = Limits::Command::max_length();
|
$maxlen{$col[1]} = Limits::Command::max_length();
|
||||||
} elsif($col[0] =~ /^\d+$/ and $Global::host{$col[1]}) {
|
} elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
|
||||||
# 1 server 1338156112.05 0.303 0 0 0 0
|
# 1 server 1338156112.05 0.303 0 0 0 0
|
||||||
# parallel --number-of-cores ; parallel --number-of-cpus
|
# parallel --number-of-cores ; parallel --number-of-cpus
|
||||||
# Remember how log it took to log in
|
# Remember how log it took to log in
|
||||||
$time_to_login{$col[1]} = ::min($time_to_login{$col[1]},$col[3]);
|
$time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
|
||||||
} elsif($col[0] eq "Seq" and $col[1] eq "Host" and
|
|
||||||
$col[2] eq "Starttime" and $col[3] eq "Runtime") {
|
|
||||||
# skip
|
|
||||||
} else {
|
} else {
|
||||||
::die_bug("host check unmatched long jobline: $_");
|
::die_bug("host check unmatched long jobline: $_");
|
||||||
}
|
}
|
||||||
|
@ -149,6 +161,8 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||||
$ncpus{$col[0]} = $col[1];
|
$ncpus{$col[0]} = $col[1];
|
||||||
} elsif(not $maxlen{$col[0]}) {
|
} elsif(not $maxlen{$col[0]}) {
|
||||||
$maxlen{$col[0]} = $col[1];
|
$maxlen{$col[0]} = $col[1];
|
||||||
|
} elsif(not $echo{$col[0]}) {
|
||||||
|
$echo{$col[0]} = $col[1];
|
||||||
} else {
|
} else {
|
||||||
::die_bug("host check too many col0: $_");
|
::die_bug("host check too many col0: $_");
|
||||||
}
|
}
|
||||||
|
@ -157,6 +171,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close $host_fh;
|
close $host_fh;
|
||||||
|
unlink $tmpfile;
|
||||||
while (my ($sshlogin, $obj) = each %Global::host) {
|
while (my ($sshlogin, $obj) = each %Global::host) {
|
||||||
$ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
|
$ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
|
||||||
$ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
|
$ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
|
||||||
|
|
|
@ -68,8 +68,8 @@ B<xargs>. If I<command> is not given GNU B<parallel> will behave
|
||||||
similar to B<cat | sh>.
|
similar to B<cat | sh>.
|
||||||
|
|
||||||
The I<command> must be an executable, a script, a composed command, or
|
The I<command> must be an executable, a script, a composed command, or
|
||||||
a function. If it is a function you need to B<export -f> the function
|
a function. If it is a Bash function you need to B<export -f> the
|
||||||
first. An alias will, however, not work (see why
|
function first. An alias will, however, not work (see why
|
||||||
http://www.perlmonks.org/index.pl?node_id=484296).
|
http://www.perlmonks.org/index.pl?node_id=484296).
|
||||||
|
|
||||||
|
|
||||||
|
@ -1745,6 +1745,31 @@ Find the files in a list that do not exist
|
||||||
|
|
||||||
B<cat file_list | parallel 'if [ ! -e {} ] ; then echo {}; fi'>
|
B<cat file_list | parallel 'if [ ! -e {} ] ; then echo {}; fi'>
|
||||||
|
|
||||||
|
|
||||||
|
=head1 EXAMPLE: Calling Bash functions
|
||||||
|
|
||||||
|
If the composed command is longer than a line, it becomes hard to
|
||||||
|
read. In Bash you can use functions. Just remember to B<export -f> the
|
||||||
|
function.
|
||||||
|
|
||||||
|
doit() {
|
||||||
|
echo Doing it for $1
|
||||||
|
sleep 2
|
||||||
|
echo Done with $1
|
||||||
|
}
|
||||||
|
export -f doit
|
||||||
|
parallel doit ::: 1 2 3
|
||||||
|
|
||||||
|
doubleit() {
|
||||||
|
echo Doing it for $1 $2
|
||||||
|
sleep 2
|
||||||
|
echo Done with $1 $2
|
||||||
|
}
|
||||||
|
export -f doubleit
|
||||||
|
parallel doubleit ::: 1 2 3 ::: a b
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
=head1 EXAMPLE: Removing file extension when processing files
|
=head1 EXAMPLE: Removing file extension when processing files
|
||||||
|
|
||||||
When processing files removing the file extension using B<{.}> is
|
When processing files removing the file extension using B<{.}> is
|
||||||
|
@ -1782,7 +1807,7 @@ B<ls *.tar.gz| parallel --er {tar} 'echo {tar}|parallel "mkdir -p {.} ; tar -C {
|
||||||
|
|
||||||
Let us assume a website stores images like:
|
Let us assume a website stores images like:
|
||||||
|
|
||||||
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
||||||
|
|
||||||
where YYYYMMDD is the date and ## is the number 01-10. This will
|
where YYYYMMDD is the date and ## is the number 01-10. This will
|
||||||
download images for the past 30 days:
|
download images for the past 30 days:
|
||||||
|
|
|
@ -1481,7 +1481,7 @@ seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms
|
||||||
later, followed by SIGKILL 200 ms later.
|
later, followed by SIGKILL 200 ms later.
|
||||||
|
|
||||||
If @emph{val} is followed by a % then the timeout will dynamically be
|
If @emph{val} is followed by a % then the timeout will dynamically be
|
||||||
computed as a percentage of the smoothed average runtime. Only values
|
computed as a percentage of the median average runtime. Only values
|
||||||
> 100% will make sense.
|
> 100% will make sense.
|
||||||
|
|
||||||
@item @strong{--tollef} (obsolete - will be retired 20140222)
|
@item @strong{--tollef} (obsolete - will be retired 20140222)
|
||||||
|
@ -1866,6 +1866,31 @@ Find the files in a list that do not exist
|
||||||
|
|
||||||
@strong{cat file_list | parallel 'if [ ! -e @{@} ] ; then echo @{@}; fi'}
|
@strong{cat file_list | parallel 'if [ ! -e @{@} ] ; then echo @{@}; fi'}
|
||||||
|
|
||||||
|
@chapter EXAMPLE: Calling Bash functions
|
||||||
|
@anchor{EXAMPLE: Calling Bash functions}
|
||||||
|
|
||||||
|
If the composed command is longer than a line, it becomes hard to
|
||||||
|
read. In Bash you can use functions. Just remember to @strong{export -f} the
|
||||||
|
function.
|
||||||
|
|
||||||
|
@verbatim
|
||||||
|
doit() {
|
||||||
|
echo Doing it for $1
|
||||||
|
sleep 2
|
||||||
|
echo Done with $1
|
||||||
|
}
|
||||||
|
export -f doit
|
||||||
|
parallel doit ::: 1 2 3
|
||||||
|
|
||||||
|
doubleit() {
|
||||||
|
echo Doing it for $1 $2
|
||||||
|
sleep 2
|
||||||
|
echo Done with $1 $2
|
||||||
|
}
|
||||||
|
export -f doubleit
|
||||||
|
parallel doubleit ::: 1 2 3 ::: a b
|
||||||
|
@end verbatim
|
||||||
|
|
||||||
@chapter EXAMPLE: Removing file extension when processing files
|
@chapter EXAMPLE: Removing file extension when processing files
|
||||||
@anchor{EXAMPLE: Removing file extension when processing files}
|
@anchor{EXAMPLE: Removing file extension when processing files}
|
||||||
|
|
||||||
|
@ -1904,7 +1929,7 @@ foo) you can do:
|
||||||
Let us assume a website stores images like:
|
Let us assume a website stores images like:
|
||||||
|
|
||||||
@verbatim
|
@verbatim
|
||||||
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
http://www.example.com/path/to/YYYYMMDD_##.jpg
|
||||||
@end verbatim
|
@end verbatim
|
||||||
|
|
||||||
where YYYYMMDD is the date and ## is the number 01-10. This will
|
where YYYYMMDD is the date and ## is the number 01-10. This will
|
||||||
|
|
Loading…
Reference in a new issue