bsearch: binary search in sorted text files. Initial version.
This commit is contained in:
parent
774a5e5d67
commit
5020d3fbe7
2
README
2
README
|
@ -2,6 +2,8 @@ Tools developed by Ole Tange <ole@tange.dk>.
|
||||||
|
|
||||||
Probably not useful for you, but then again you never now.
|
Probably not useful for you, but then again you never now.
|
||||||
|
|
||||||
|
bsearch - binary search through sorted text files.
|
||||||
|
|
||||||
em - Force emacs to run in terminal. Use xemacs if installed.
|
em - Force emacs to run in terminal. Use xemacs if installed.
|
||||||
|
|
||||||
field - Split on space. Give the given field number. Support syntax 1-3,6-
|
field - Split on space. Give the given field number. Support syntax 1-3,6-
|
||||||
|
|
146
bsearch/bsearch
Executable file
146
bsearch/bsearch
Executable file
|
@ -0,0 +1,146 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
|
||||||
|
GetOptions(
|
||||||
|
"debug|D=s" => \$opt::D,
|
||||||
|
"version" => \$opt::version,
|
||||||
|
"verbose|v" => \$opt::verbose,
|
||||||
|
"b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
|
||||||
|
"d|dictionary-order" => \$opt::dictionary_order,
|
||||||
|
"f|ignore-case" => \$opt::ignore_case,
|
||||||
|
"g|general-numeric-sort" => \$opt::general_numeric_sort,
|
||||||
|
"i|ignore-nonprinting" => \$opt::ignore_nonprinting,
|
||||||
|
"M|month-sort" => \$opt::month_sort,
|
||||||
|
"h|human-numeric-sort" => \$opt::human_numeric_sort,
|
||||||
|
"n|numeric-sort" => \$opt::numeric_sort,
|
||||||
|
"r|reverse" => \$opt::reverse,
|
||||||
|
"sort=s" => \$opt::sort,
|
||||||
|
"V|version-sort" => \$opt::version_sort,
|
||||||
|
"k|key=s" => \@opt::key,
|
||||||
|
"t|field-separator=s" => \$opt::field_separator,
|
||||||
|
"z|zero-terminated" => \$opt::zero_terminated,
|
||||||
|
);
|
||||||
|
$Global::progname = "bsearch";
|
||||||
|
$Global::version = 20160712;
|
||||||
|
if($opt::version) {
|
||||||
|
version();
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $file = shift;
|
||||||
|
|
||||||
|
for my $key (@ARGV) {
|
||||||
|
print bsearch($file,$key),"\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
sub bsearch {
|
||||||
|
my $file = shift;
|
||||||
|
my $key = shift;
|
||||||
|
my $min = 0;
|
||||||
|
my $max = -s $file;
|
||||||
|
|
||||||
|
if(not open ($fh, "<", $file)) {
|
||||||
|
error("Cannot open '$file'");
|
||||||
|
exit 1;
|
||||||
|
}
|
||||||
|
my $line;
|
||||||
|
while($max - $min > 1) {
|
||||||
|
$middle = int(($max+$min)/2);
|
||||||
|
seek($fh,$middle,0) or die;
|
||||||
|
my $half = <$fh>;
|
||||||
|
if(eof($fh)
|
||||||
|
or
|
||||||
|
compare(($line = <$fh>),$key) >= 0) {
|
||||||
|
$max = $middle;
|
||||||
|
} else {
|
||||||
|
$min = $middle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seek($fh,$max,0) or die;
|
||||||
|
$line = <$fh>;
|
||||||
|
if(compare($line,$key) >= 0) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return tell $fh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# -n, --numeric-sort
|
||||||
|
# -r --reverse
|
||||||
|
# -f, --ignore-case
|
||||||
|
|
||||||
|
sub compare {
|
||||||
|
my ($a,$b) = @_;
|
||||||
|
if($opt::reverse) {
|
||||||
|
($a,$b) = ($b,$a);
|
||||||
|
}
|
||||||
|
if($opt::ignore_case) {
|
||||||
|
$a = uc($a);
|
||||||
|
$b = uc($b);
|
||||||
|
}
|
||||||
|
if($opt::numeric_sort) {
|
||||||
|
return $a <=> $b;
|
||||||
|
} elsif($opt::numascii) {
|
||||||
|
return $a <=> $b or $a cmp $b;
|
||||||
|
} else {
|
||||||
|
return $a cmp $b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub status {
|
||||||
|
my @w = @_;
|
||||||
|
my $fh = $Global::status_fd || *STDERR;
|
||||||
|
print $fh map { ($_, "\n") } @w;
|
||||||
|
flush $fh;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub status_no_nl {
|
||||||
|
my @w = @_;
|
||||||
|
my $fh = $Global::status_fd || *STDERR;
|
||||||
|
print $fh @w;
|
||||||
|
flush $fh;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub warning {
|
||||||
|
my @w = @_;
|
||||||
|
my $prog = $Global::progname || "parallel";
|
||||||
|
status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub error {
|
||||||
|
my @w = @_;
|
||||||
|
my $prog = $Global::progname || "parallel";
|
||||||
|
status(map { ($prog.": Error: ". $_); } @w);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub die_bug {
|
||||||
|
my $bugid = shift;
|
||||||
|
print STDERR
|
||||||
|
("$Global::progname: This should not happen. You have found a bug.\n",
|
||||||
|
"Please contact <parallel\@gnu.org> and include:\n",
|
||||||
|
"* The version number: $Global::version\n",
|
||||||
|
"* The bugid: $bugid\n",
|
||||||
|
"* The command line being run\n",
|
||||||
|
"* The files being read (put the files on a webserver if they are big)\n",
|
||||||
|
"\n",
|
||||||
|
"If you get the error on smaller/fewer files, please include those instead.\n");
|
||||||
|
::wait_and_exit(255);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub version {
|
||||||
|
# Returns: N/A
|
||||||
|
print join("\n",
|
||||||
|
"GNU $Global::progname $Global::version",
|
||||||
|
"Copyright (C) 2016",
|
||||||
|
"Ole Tange and Free Software Foundation, Inc.",
|
||||||
|
"License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
|
||||||
|
"This is free software: you are free to change and redistribute it.",
|
||||||
|
"GNU $Global::progname comes with no warranty.",
|
||||||
|
"",
|
||||||
|
"Web site: http://www.gnu.org/software/${Global::progname}\n",
|
||||||
|
"When using programs that use GNU Parallel to process data for publication",
|
||||||
|
"please cite as described in 'parallel --citation'.\n",
|
||||||
|
);
|
||||||
|
}
|
43
bsearch/regressiontest
Executable file
43
bsearch/regressiontest
Executable file
|
@ -0,0 +1,43 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
test_tmp=`tempfile`
|
||||||
|
export test_tmp
|
||||||
|
|
||||||
|
test_n() {
|
||||||
|
tmp=${test_tmp}_n
|
||||||
|
true > $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo > $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo 1.000000000 > $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo 1.000000000 > $tmp
|
||||||
|
echo 2 >> $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo 1 > $tmp
|
||||||
|
echo 2.000000000 >> $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo 1.000000000 > $tmp
|
||||||
|
echo 2 >> $tmp
|
||||||
|
echo 3 >> $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo 1 > $tmp
|
||||||
|
echo 2.000000000 >> $tmp
|
||||||
|
echo 3 >> $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
echo 1 > $tmp
|
||||||
|
echo 2 >> $tmp
|
||||||
|
echo 3.000000000 >> $tmp
|
||||||
|
xargs < $tmp
|
||||||
|
bsearch -n $tmp 0 2 2.1 100000
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export -f $(compgen -A function | grep test_)
|
||||||
|
compgen -A function | grep test_ | sort | parallel -j6 --tag -k '{} 2>&1'
|
Loading…
Reference in a new issue