405 lines
8.5 KiB
Perl
Executable file
405 lines
8.5 KiB
Perl
Executable file
#!/usr/bin/perl
|
|
|
|
=head1 NAME
|
|
|
|
bsearch - binary search through sorted text files
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
B<bsearch> [-nrfB] file string [string...]
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
B<bsearch> searches a sorted file for a string. It outputs the
|
|
following line or the byte position of this line, which is where the
|
|
string would have been if it had been in the sorted file.
|
|
|
|
=over 9
|
|
|
|
=item B<--ignore-leading-blanks> (not implemented)
|
|
|
|
=item B<-b>
|
|
|
|
ignore leading blanks
|
|
|
|
=item B<--byte-offset>
|
|
|
|
=item B<-B>
|
|
|
|
print byte position where string would have been
|
|
|
|
=item B<--dictionary-order> (not implemented)
|
|
|
|
=item B<-d>
|
|
|
|
consider only blanks and alphanumeric characters
|
|
|
|
=item B<--debug> (not implemented)
|
|
|
|
=item B<-D>
|
|
|
|
annotate the part of the line used to sort, and warn about
|
|
questionable usage to stderr
|
|
|
|
=item B<--ignore-case>
|
|
|
|
=item B<-f>
|
|
|
|
fold lower case to upper case characters
|
|
|
|
=item B<--general-numeric-sort> (not implemented)
|
|
|
|
=item B<-g>
|
|
|
|
compare according to general numerical value
|
|
|
|
=item B<--ignore-nonprinting> (not implemented)
|
|
|
|
=item B<-i>
|
|
|
|
consider only printable characters
|
|
|
|
=item B<--month-sort> (not implemented)
|
|
|
|
=item B<-M>
|
|
|
|
compare (unknown) < 'JAN' < ... < 'DEC'
|
|
|
|
=item B<--human-numeric-sort> (not implemented)
|
|
|
|
=item B<-h>
|
|
|
|
compare human readable numbers (e.g., 2K 1G)
|
|
|
|
=item B<--key=KEYDEF> (not implemented)
|
|
|
|
=item B<-k>
|
|
|
|
sort via a key; KEYDEF gives location and type
|
|
|
|
=item B<--numeric-sort>
|
|
|
|
=item B<-n>
|
|
|
|
compare according to string numerical value
|
|
|
|
=item B<--random-sort>
|
|
|
|
=item B<-R>
|
|
|
|
sort by random hash of keys
|
|
|
|
=item B<--reverse>
|
|
|
|
=item B<-r>
|
|
|
|
reverse the result of comparisons
|
|
|
|
=item B<--sort=WORD> (not implemented)
|
|
|
|
sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
|
|
B<-M>, numeric B<-n>, random B<-R>, version B<-V>
|
|
|
|
=item B<-t> (not implemented)
|
|
|
|
=item B<--field-separator=SEP>
|
|
|
|
use SEP instead of non-blank to blank transition
|
|
|
|
=item B<-z>
|
|
|
|
=item B<--zero-terminated>
|
|
|
|
end lines with 0 byte, not newline
|
|
|
|
=back
|
|
|
|
=head1 EXAMPLES
|
|
|
|
=head2 Missing
|
|
|
|
Missing
|
|
|
|
|
|
=head1 REPORTING BUGS
|
|
|
|
B<bsearch> is part of tangetools. Report bugs to <tools@tange.dk>.
|
|
|
|
|
|
=head1 AUTHOR
|
|
|
|
Copyright (C) 2016 Ole Tange http://ole.tange.dk
|
|
|
|
|
|
=head1 LICENSE
|
|
|
|
Copyright (C) 2013 Free Software Foundation, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
at your option any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
=head2 Documentation license I
|
|
|
|
Permission is granted to copy, distribute and/or modify this documentation
|
|
under the terms of the GNU Free Documentation License, Version 1.3 or
|
|
any later version published by the Free Software Foundation; with no
|
|
Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
|
|
Texts. A copy of the license is included in the file fdl.txt.
|
|
|
|
=head2 Documentation license II
|
|
|
|
You are free:
|
|
|
|
=over 9
|
|
|
|
=item B<to Share>
|
|
|
|
to copy, distribute and transmit the work
|
|
|
|
=item B<to Remix>
|
|
|
|
to adapt the work
|
|
|
|
=back
|
|
|
|
Under the following conditions:
|
|
|
|
=over 9
|
|
|
|
=item B<Attribution>
|
|
|
|
You must attribute the work in the manner specified by the author or
|
|
licensor (but not in any way that suggests that they endorse you or
|
|
your use of the work).
|
|
|
|
=item B<Share Alike>
|
|
|
|
If you alter, transform, or build upon this work, you may distribute
|
|
the resulting work only under the same, similar or a compatible
|
|
license.
|
|
|
|
=back
|
|
|
|
With the understanding that:
|
|
|
|
=over 9
|
|
|
|
=item B<Waiver>
|
|
|
|
Any of the above conditions can be waived if you get permission from
|
|
the copyright holder.
|
|
|
|
=item B<Public Domain>
|
|
|
|
Where the work or any of its elements is in the public domain under
|
|
applicable law, that status is in no way affected by the license.
|
|
|
|
=item B<Other Rights>
|
|
|
|
In no way are any of the following rights affected by the license:
|
|
|
|
=over 9
|
|
|
|
=item *
|
|
|
|
Your fair dealing or fair use rights, or other applicable
|
|
copyright exceptions and limitations;
|
|
|
|
=item *
|
|
|
|
The author's moral rights;
|
|
|
|
=item *
|
|
|
|
Rights other persons may have either in the work itself or in
|
|
how the work is used, such as publicity or privacy rights.
|
|
|
|
=back
|
|
|
|
=item B<Notice>
|
|
|
|
For any reuse or distribution, you must make clear to others the
|
|
license terms of this work.
|
|
|
|
=back
|
|
|
|
A copy of the full license is included in the file as cc-by-sa.txt.
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
B<bsearch> uses Perl.
|
|
|
|
|
|
=head1 SEE ALSO
|
|
|
|
B<grep>(1), B<sort>(1).
|
|
|
|
=cut
|
|
|
|
use Getopt::Long;
|
|
|
|
Getopt::Long::Configure("bundling","require_order");
|
|
|
|
GetOptions(
|
|
"debug|D=s" => \$opt::D,
|
|
"version" => \$opt::version,
|
|
"verbose|v" => \$opt::verbose,
|
|
"B|byte-offset" => \$opt::byte_offset,
|
|
"b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
|
|
"d|dictionary-order" => \$opt::dictionary_order,
|
|
"f|ignore-case" => \$opt::ignore_case,
|
|
"g|general-numeric-sort" => \$opt::general_numeric_sort,
|
|
"i|ignore-nonprinting" => \$opt::ignore_nonprinting,
|
|
"M|month-sort" => \$opt::month_sort,
|
|
"h|human-numeric-sort" => \$opt::human_numeric_sort,
|
|
"n|numeric-sort" => \$opt::numeric_sort,
|
|
"r|reverse" => \$opt::reverse,
|
|
"R|random-sort" => \$opt::random_sort,
|
|
"sort=s" => \$opt::sort,
|
|
"V|version-sort" => \$opt::version_sort,
|
|
"k|key=s" => \@opt::key,
|
|
"t|field-separator=s" => \$opt::field_separator,
|
|
"z|zero-terminated" => \$opt::zero_terminated,
|
|
);
|
|
$Global::progname = "bsearch";
|
|
$Global::version = 20160712;
|
|
if($opt::version) {
|
|
version();
|
|
exit 0;
|
|
}
|
|
if($opt::zero_terminated) { $/ = "\0"; }
|
|
|
|
my $file = shift;
|
|
|
|
for my $key (@ARGV) {
|
|
print bsearch($file,$key);
|
|
}
|
|
|
|
sub bsearch {
|
|
my $file = shift;
|
|
my $key = shift;
|
|
my $min = 0;
|
|
my $max = -s $file;
|
|
|
|
if(not open ($fh, "<", $file)) {
|
|
error("Cannot open '$file'");
|
|
exit 1;
|
|
}
|
|
my $line;
|
|
while($max - $min > 1) {
|
|
$middle = int(($max + $min)/2);
|
|
seek($fh,$middle,0) or die;
|
|
my $half = <$fh>;
|
|
if(eof($fh)
|
|
or
|
|
compare(($line = <$fh>),$key) >= 0) {
|
|
$max = $middle;
|
|
} else {
|
|
$min = $middle;
|
|
}
|
|
}
|
|
seek($fh,$max,0) or die;
|
|
$line = <$fh>;
|
|
if(compare($line,$key) >= 0) {
|
|
if($opt::byte_offset) {
|
|
return "0\n";
|
|
} else {
|
|
# The very first line
|
|
return "";
|
|
}
|
|
} else {
|
|
if($opt::byte_offset) {
|
|
return tell($fh)."\n";
|
|
} else {
|
|
return $line;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub compare {
|
|
my ($a,$b) = @_;
|
|
if($opt::random_sort) {
|
|
return rand() <=> rand();
|
|
}
|
|
if($opt::reverse) {
|
|
($a,$b) = ($b,$a);
|
|
}
|
|
if($opt::ignore_case) {
|
|
$a = uc($a);
|
|
$b = uc($b);
|
|
}
|
|
if($opt::numeric_sort) {
|
|
return $a <=> $b;
|
|
} elsif($opt::numascii) {
|
|
return $a <=> $b or $a cmp $b;
|
|
} else {
|
|
return $a cmp $b;
|
|
}
|
|
}
|
|
|
|
sub status {
|
|
my @w = @_;
|
|
my $fh = $Global::status_fd || *STDERR;
|
|
print $fh map { ($_, "\n") } @w;
|
|
flush $fh;
|
|
}
|
|
|
|
sub status_no_nl {
|
|
my @w = @_;
|
|
my $fh = $Global::status_fd || *STDERR;
|
|
print $fh @w;
|
|
flush $fh;
|
|
}
|
|
|
|
sub warning {
|
|
my @w = @_;
|
|
my $prog = $Global::progname || "parallel";
|
|
status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
|
|
}
|
|
|
|
sub error {
|
|
my @w = @_;
|
|
my $prog = $Global::progname || "parallel";
|
|
status(map { ($prog.": Error: ". $_); } @w);
|
|
}
|
|
|
|
sub die_bug {
|
|
my $bugid = shift;
|
|
print STDERR
|
|
("$Global::progname: This should not happen. You have found a bug.\n",
|
|
"Please contact <parallel\@gnu.org> and include:\n",
|
|
"* The version number: $Global::version\n",
|
|
"* The bugid: $bugid\n",
|
|
"* The command line being run\n",
|
|
"* The files being read (put the files on a webserver if they are big)\n",
|
|
"\n",
|
|
"If you get the error on smaller/fewer files, please include those instead.\n");
|
|
::wait_and_exit(255);
|
|
}
|
|
|
|
sub version {
|
|
# Returns: N/A
|
|
print join("\n",
|
|
"GNU $Global::progname $Global::version",
|
|
"Copyright (C) 2016",
|
|
"Ole Tange and Free Software Foundation, Inc.",
|
|
"License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
|
|
"This is free software: you are free to change and redistribute it.",
|
|
"GNU $Global::progname comes with no warranty.",
|
|
"",
|
|
"Web site: http://www.gnu.org/software/${Global::progname}\n",
|
|
"When using programs that use GNU Parallel to process data for publication",
|
|
"please cite as described in 'parallel --citation'.\n",
|
|
);
|
|
}
|