2search: bsearch/bgrep renamed to 2search/2grep (bgrep is used by others).

2020-03-28 15:37:52 +01:00 · 2020-03-28 15:37:52 +01:00 · e8f520f642
parent 9efd18d0fc
commit e8f520f642
7 changed files with 2041 additions and 460 deletions
--- a/2search/2grep
+++ b/2search/2grep
@ -0,0 +1,777 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+2search - binary search through sorted text files
+
+2grep - binary search+grep through sorted text files
+
+=head1 SYNOPSIS
+
+B<2search> [-nrfB] file string [string...]
+
+B<2search> --grep [-nrf] file string [string...]
+
+B<2grep> [-nrf] file string [string...]
+
+... | B<2search> [-nrfB] file
+
+... | B<2search> --grep [-nrf] file
+
+... | B<2grep> [-nrf] file
+
+=head1 DESCRIPTION
+
+B<2search> searches a sorted file for a string. It outputs the
+following line or the byte position of this line, which is where the
+string would have been if it had been in the sorted file.
+
+B<2grep> output all lines starting with a given string. The file must
+be sorted.
+
+=over 9
+
+=item B<--ignore-leading-blanks>
+
+=item B<-b>
+
+ignore leading blanks
+
+
+=item B<--byte-offset>
+
+=item B<-B>
+
+print byte position where string would have been
+
+
+=item B<--dictionary-order> (not implemented)
+
+=item B<-d>
+
+consider only blanks and alphanumeric characters
+
+
+=item B<--debug> (not implemented)
+
+=item B<-D>
+
+annotate the part of the line used to sort, and warn about
+questionable usage to stderr
+
+
+=item B<--ignore-case>
+
+=item B<-f>
+
+fold lower case to upper case characters
+
+
+=item B<--file> I<file>
+
+=item B<-F> I<file>
+
+search for all lines in I<file>
+
+
+=item B<--general-numeric-sort> (not implemented)
+
+=item B<-g>
+
+compare according to general numerical value
+
+
+=item B<--ignore-nonprinting> (not implemented)
+
+=item B<-i>
+
+consider only printable characters
+
+
+=item B<--month-sort>
+
+=item B<-M>
+
+compare (unknown) < 'JAN' < ... < 'DEC'
+
+
+=item B<--human-numeric-sort>
+
+=item B<-h>
+
+compare human readable numbers (e.g., 2K 1G)
+
+
+=item B<--key=KEYDEF> (not implemented)
+
+=item B<-k>
+
+sort via a key; KEYDEF gives location and type
+
+
+=item B<--numeric-sort>
+
+=item B<-n>
+
+compare according to string numerical value. If numerical values are
+the same: split the string into blocks of numbers and non-numbers, and
+compare numbers as numbers and strings as strings.
+
+This will sort like this: chr3 chr11 3chr 11chr
+
+
+=item B<--numascii>
+
+=item B<-N>
+
+compare according to string numerical value. If numerical values are
+the same: compare as strings
+
+
+=item B<--random-sort>
+
+=item B<-R>
+
+sort by random hash of keys
+
+
+=item B<--reverse>
+
+=item B<-r>
+
+reverse the result of comparisons
+
+
+=item B<--sort=WORD> (not implemented)
+
+sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
+B<-M>, numeric B<-n>, random B<-R>, version B<-V>
+
+
+=item B<-t>
+
+=item B<--field-separator=SEP>
+
+use SEP instead of non-blank to blank transition
+
+
+=item B<-z>
+
+=item B<--zero-terminated>
+
+end lines with 0 byte, not newline
+
+=back
+
+=head1 EXAMPLES
+
+=head2 Single key
+
+Input is sorted by Chromosome,Position:
+
+  SampleID Position Chromosome 
+  foo      10000123 chr3       
+  foo      10000125 chr3       
+  foo      9999998  chr11      
+  foo      10000124 chr11      
+  foo      10000126 chr11      
+
+To find all chr3:
+
+  2grep -n -k3 inputfile chr3
+
+-n will split 'chr3' into 'chr' which is compared asciibetically and
+'3' which is compared numerically.
+
+=head2 Not implemented
+
+To find all lines with chr3,10000125:
+
+  2grep -k3n,2n inputfile chr3 10000125
+
+
+
+=head1 REPORTING BUGS
+
+B<2search> is part of tangetools. Report bugs to <tools@tange.dk>.
+
+
+=head1 AUTHOR
+
+Copyright (C) 2016-2020 Ole Tange http://ole.tange.dk
+
+
+=head1 LICENSE
+
+Copyright (C) 2013 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+at your option any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+=head2 Documentation license I
+
+Permission is granted to copy, distribute and/or modify this documentation
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with no
+Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
+Texts.  A copy of the license is included in the file fdl.txt.
+
+=head2 Documentation license II
+
+You are free:
+
+=over 9
+
+=item B<to Share>
+
+to copy, distribute and transmit the work
+
+=item B<to Remix>
+
+to adapt the work
+
+=back
+
+Under the following conditions:
+
+=over 9
+
+=item B<Attribution>
+
+You must attribute the work in the manner specified by the author or
+licensor (but not in any way that suggests that they endorse you or
+your use of the work).
+
+=item B<Share Alike>
+
+If you alter, transform, or build upon this work, you may distribute
+the resulting work only under the same, similar or a compatible
+license.
+
+=back
+
+With the understanding that:
+
+=over 9
+
+=item B<Waiver>
+
+Any of the above conditions can be waived if you get permission from
+the copyright holder.
+
+=item B<Public Domain>
+
+Where the work or any of its elements is in the public domain under
+applicable law, that status is in no way affected by the license.
+
+=item B<Other Rights>
+
+In no way are any of the following rights affected by the license:
+
+=over 9
+
+=item *
+
+Your fair dealing or fair use rights, or other applicable
+copyright exceptions and limitations;
+
+=item *
+
+The author's moral rights;
+
+=item *
+
+Rights other persons may have either in the work itself or in
+how the work is used, such as publicity or privacy rights.
+
+=back
+
+=item B<Notice>
+
+For any reuse or distribution, you must make clear to others the
+license terms of this work.
+
+=back
+
+A copy of the full license is included in the file as cc-by-sa.txt.
+
+=head1 DEPENDENCIES
+
+B<2search>/B<2grep> uses Perl.
+
+
+=head1 SEE ALSO
+
+B<grep>(1), B<sort>(1).
+
+=cut
+
+use strict;
+use Getopt::Long;
+
+Getopt::Long::Configure("bundling","require_order");
+
+GetOptions(
+    "debug|D" => \$opt::D,
+    "version" => \$opt::version,
+    "verbose|v" => \$opt::verbose,
+    "B|byte-offset" => \$opt::byte_offset,
+    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
+    "d|dictionary-order" => \$opt::dictionary_order,
+    "f|ignore-case" => \$opt::ignore_case,
+    "g|general-numeric-sort" => \$opt::general_numeric_sort,
+    "G|grep" => \$opt::grep,
+    "F|file=s" => \$opt::file,
+    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
+    "M|month-sort" => \$opt::month_sort,
+    "h|human-numeric-sort" => \$opt::human_numeric_sort,
+    "n|numeric-sort" => \$opt::numeric_sort,
+    "N|numascii" => \$opt::numascii,
+    "r|reverse" => \$opt::reverse,
+    "R|random-sort" => \$opt::random_sort,
+    "sort=s" => \$opt::sort,
+    "V|version-sort" => \$opt::version_sort,
+    "k|key=s" => \@opt::key,
+    "t|field-separator=s" => \$opt::field_separator,
+    "z|zero-terminated" => \$opt::zero_terminated,
+    );
+$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
+$Global::version = 20200328;
+if($opt::version) { version(); exit 0; }
+if($opt::zero_terminated) { $/ = "\0"; }
+if(@opt::key) { 
+    # Default separator if --key = whitespace
+    $Global::sep = '\s+';
+    if(defined $opt::field_separator) { $Global::sep = $opt::field_separator; }
+}
+if($Global::progname eq "2grep") { $opt::grep = 1; }
+$Global::debug = $opt::D;
+
+parse_keydef();
+
+debug(my_dump(\@Global::keydefs),"\n");
+
+my $file = shift;
+if(@ARGV) {
+    $opt::argv = 1;
+} elsif(defined $opt::file) {
+    # skip
+} else {
+    $opt::stdin = 1;
+}
+
+ round:
+    while(1) {
+	my @search_vals;
+	for(@Global::keydefs) {
+	    my $val = get();
+	    if(not defined $val) {
+		last round;
+	    }
+	    push @search_vals, $val;
+	}
+	if($opt::grep) {
+	    bgrep($file,@search_vals);
+	} else {
+	    print bsearch($file,@search_vals);
+	}
+}  
+
+{
+    my $fh;
+
+    sub get {
+	if($opt::argv) {
+	    # Search for strings on the command line
+	    return shift @ARGV;
+	}
+	if($opt::file) {
+	    # Search for strings given with --file
+	    if(not $fh) {
+		if(not open(my $fh, "<", $opt::file)) {
+		    error("Cannot open $opt::file");
+		    exit(255);
+		}
+	    }
+	    my $val = <$fh>;
+	    chomp $val;
+	    return $val;
+	}
+	if($opt::stdin) {
+	    # Search for strings on stdin
+	    my $val = <>;
+	    chomp $val;
+	    return $val;
+	}
+	die;
+    }
+}
+
+sub bgrep {
+    my $file = shift;
+    my @search_vals = @_;
+    $opt::byte_offset = 1;
+    my $startpos = bsearch($file,@search_vals);
+    my $fh;
+    if(not open ($fh, "<", $file)) {
+	error("Cannot open '$file'");
+	exit 1;
+    }
+    seek($fh,$startpos,0) or die;
+    # Allow for partial matches in grep (4 mathes 40, A matches Aaa)
+    for my $keydef (@Global::keydefs) {
+	$keydef->{'partial_match'} = 1;
+    }
+    my $line;
+    while($line = <$fh>
+	  and
+	  not compare($line,@search_vals)) {
+	    print $line;
+    }
+    close $fh;
+    for my $keydef (@Global::keydefs) {
+	$keydef->{'partial_match'} = 0;
+    }
+}
+
+sub bsearch {
+    my $file = shift;
+    my @search_vals = @_;
+    my $min = 0;
+    my $max = -s $file;
+    my $fh;
+    if(not open ($fh, "<", $file)) {
+	error("Cannot open '$file'");
+	exit 1;
+    }
+    my($line,$middle);
+    my $minnl = $min;
+    my $maxnl = $max;
+    while($max - $min > 1) {
+	$middle = int(($max + $min)/2);
+	seek($fh,$middle,0) or die("Cannot seek to $middle");
+	if($middle > 0) {
+	    # Read last half of a line
+	    <$fh>;
+	}
+	my $newline_pos = tell($fh);
+	debug("$min <= $middle <= $newline_pos <= $max\n");
+	debug("$minnl <= $newline_pos <= $maxnl\n");
+	if($newline_pos == $maxnl
+	   or
+	   eof($fh)
+	   or
+	   compare(($line = <$fh>),@search_vals) >= 0) {
+	    # We have see this newline position before
+	    # or we are at the end of the file
+	    # or we should search the upper half
+	    $max = $middle;
+	    $maxnl = $newline_pos;
+	} else {
+	    # We should search the upper half
+	    $min = $middle;
+	    $minnl = $newline_pos;
+	}
+    }
+    seek($fh,$minnl,0) or die("Cannot seek to $minnl");
+    $line = <$fh>;
+    if(compare($line,@search_vals) >= 0) {
+	if($opt::byte_offset) {
+	    return $minnl."\n";
+	} else {
+	    return $line;
+	}
+    } else {
+	if($opt::byte_offset) {
+	    return tell($fh)."\n";
+	} else {
+	    $line=<$fh>;
+	    return $line;
+	}
+    }
+}
+
+sub parse_keydef {
+    # parse keydef F[.C][OPTS][,F[.C][OPTS]]
+    my %defaultorder = (
+	"b" => $opt::ignore_leading_blanks,
+	"d" => $opt::dictionary_order,
+	"f" => $opt::ignore_case,
+	"g" => $opt::general_numeric_sort,
+	"i" => $opt::ignore_nonprinting,
+	"M" => $opt::month_sort,
+	"h" => $opt::human_numeric_sort,
+	"n" => $opt::numeric_sort,
+	"N" => $opt::numascii,
+	"r" => $opt::reverse,
+	"R" => $opt::random_sort,
+	"V" => $opt::version_sort,
+	);
+    my %ordertbl = (
+	"b" => 'ignore_leading_blanks',
+	"d" => 'dictionary_order',
+	"f" => 'ignore_case',
+	"g" => 'general_numeric_sort',
+	"i" => 'ignore_nonprinting',
+	"M" => 'month_sort',
+	"h" => 'human_numeric_sort',
+	"n" => 'numeric_sort',
+	"N" => 'numascii',
+	"r" => 'reverse',
+	"R" => 'random_sort',
+	"V" => 'version_sort',
+	);
+
+    if(@opt::key) {
+	
+    } else {
+	# Convert -n -r to -k1rn
+	# with sep = undef
+	$Global::sep = undef;
+	my $opt;
+	$opt->{'field'} = 1;
+	$opt->{'char'} = 1;
+	for (keys %defaultorder) {
+	    $opt->{$ordertbl{$_}} = $defaultorder{$_};
+	}
+	push(@Global::keydefs,$opt);
+    }
+    
+    for my $keydefs (@opt::key) {
+	for my $keydef (split /,/, $keydefs) {
+	    my $opt;
+	    if($keydef =~ /^(\d+)(\.(\d+))?([bdfgiMhnNRrV]+)?$/) {
+		# parse keydef F[.C][OPTS][,F[.C][OPTS]]
+		$opt->{'field'} = $1;
+		$opt->{'char'} = $3 || 1;
+		for (keys %defaultorder) {
+		    $opt->{$ordertbl{$_}} = $defaultorder{$_};
+		}
+		for my $o (split //, $4) {
+		    $opt->{$ordertbl{$o}} = 1;
+		}
+	    } else {
+		error("Keydef $keydef does not match F[.C][OPTS]");
+		exit(255);
+	    }
+	    push(@Global::keydefs,$opt);
+	}
+    }
+}
+
+sub compare {
+    # One key to search for per search column
+    my($line,@search_vals) = @_;
+    chomp($line);
+    debug("Compare: $line <=> @search_vals ");
+    my @field;
+    if($Global::sep) {
+	# Split line
+	@field = split /$Global::sep/o, $line;
+    } else {
+	@field = ($line);
+    }
+    my @tmp_vals = @search_vals;
+    for my $keydef (@Global::keydefs) {
+	# keydef = F[.C][OPTS][,F[.C][OPTS]]
+	my $f = $keydef->{'field'};
+	my $c = $keydef->{'char'};
+	my $cmp = compare_single(substr($field[$f-1],$c-1),shift @tmp_vals,$keydef);
+	# They differ on this key
+	debug("== $cmp\n");
+	if($cmp) { return $cmp; }
+    }
+    # No difference on any keydefs
+    return 0;
+}
+
+sub compare_single {
+    # Compare two lines based on order options
+    my ($a,$b,$opt) = @_;
+    debug("$a <=> $b");
+    debug(my_dump($opt),"\n");
+    if($opt->{'random_sort'}) {
+	return rand() <=> rand();
+    }
+    if($opt->{'ignore_leading_blanks'}) {
+	$a =~ s/^\s+//;
+	$b =~ s/^\s+//;
+    }
+    if($opt->{'ignore_case'}) {
+	$a = uc($a);
+	$b = uc($b);
+    }
+    if($opt->{'partial_match'}) {
+	# String 'foo' matches 'foobar'
+	$a = substr($a,0,length $b);
+    }
+    if($opt->{'reverse'}) {
+	($a,$b) = ($b,$a);
+    }
+    if($opt->{'human_numeric_sort'}) {
+	return multiply_binary_prefix($a) <=> multiply_binary_prefix($b);
+    }
+    if($opt->{'month_sort'}) {
+	my %m;
+	my @mon = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
+	@m{@mon}={1..12};
+	return ($m{$a} || 0) <=> ($m{$b} || 0);
+    }
+    if($opt->{'numeric_sort'}) {
+	return $a <=> $b;
+    } elsif($opt->{'numascii'}) {
+	return $a <=> $b or $a cmp $b;
+    } else {
+	return $a cmp $b;
+    }
+}
+
+sub multiply_binary_prefix(@) {
+    # Evalualte numbers with binary prefix
+    # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
+    # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
+    # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
+    # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
+    # 13G = 13*1024*1024*1024 = 13958643712
+    # Input:
+    #   $s = string with prefixes
+    # Returns:
+    #   $value = int with prefixes multiplied
+    my @v = @_;
+    for(@v) {
+	# 1E3=1000, 1E-3=0.001
+	s/e([+-]?\d+)/*10**$1/gi;
+    }
+    for(@v) {
+	defined $_ or next;
+	s/ki/*1024/gi;
+	s/mi/*1024*1024/gi;
+	s/gi/*1024*1024*1024/gi;
+	s/ti/*1024*1024*1024*1024/gi;
+	s/pi/*1024*1024*1024*1024*1024/gi;
+	s/ei/*1024*1024*1024*1024*1024*1024/gi;
+	s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
+	s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+	s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+
+	s/K/*1024/g;
+	s/M/*1024*1024/g;
+	s/G/*1024*1024*1024/g;
+	s/T/*1024*1024*1024*1024/g;
+	s/P/*1024*1024*1024*1024*1024/g;
+	s/E/*1024*1024*1024*1024*1024*1024/g;
+	s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
+	s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
+	s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
+
+	s/k/*1000/g;
+	s/m/*1000*1000/g;
+	s/g/*1000*1000*1000/g;
+	s/t/*1000*1000*1000*1000/g;
+	s/p/*1000*1000*1000*1000*1000/g;
+	s/e/*1000*1000*1000*1000*1000*1000/g;
+	s/z/*1000*1000*1000*1000*1000*1000*1000/g;
+	s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
+	s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
+
+	$_ = eval $_;
+    }
+    return wantarray ? @v : $v[0];
+}
+
+sub status {
+    my @w = @_;
+    my $fh = $Global::status_fd || *STDERR;
+    print $fh map { ($_, "\n") } @w;
+    flush $fh;
+}
+
+sub status_no_nl {
+    my @w = @_;
+    my $fh = $Global::status_fd || *STDERR;
+    print $fh @w;
+    flush $fh;
+}
+
+sub warning {
+    my @w = @_;
+    my $prog = $Global::progname || "parallel";
+    status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
+}
+
+sub error {
+    my @w = @_;
+    my $prog = $Global::progname || "parallel";
+    status(map { ($prog.": Error: ". $_); } @w);
+}
+
+sub die_bug {
+    my $bugid = shift;
+    print STDERR
+	("$Global::progname: This should not happen. You have found a bug.\n",
+	 "Please submit a bug at https://gitlab.com/ole.tange/tangetools/-/issues\n",
+	 "and include:\n",
+	 "* The version number: $Global::version\n",
+	 "* The bugid: $bugid\n",
+	 "* The command line being run\n",
+	 "* The files being read (put the files on a webserver if they are big)\n",
+	 "\n",
+	 "If you get the error on smaller/fewer files, please include those instead.\n");
+    exit(255);
+}
+
+sub version {
+    # Returns: N/A
+    print join("\n",
+               "$Global::progname $Global::version",
+               "Copyright (C) 2016-2020",
+	       "Ole Tange and Free Software Foundation, Inc.",
+               "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
+               "This is free software: you are free to change and redistribute it.",
+               "$Global::progname comes with no warranty.",
+               "",
+               "Web site: https://gitlab.com/ole.tange/tangetools/\n",
+        );
+}
+
+sub my_dump(@) {
+    # Returns:
+    #   ascii expression of object if Data::Dump(er) is installed
+    #   error code otherwise
+    my @dump_this = (@_);
+    eval "use Data::Dump qw(dump);";
+    if ($@) {
+        # Data::Dump not installed
+        eval "use Data::Dumper;";
+        if ($@) {
+            my $err =  "Neither Data::Dump nor Data::Dumper is installed\n".
+                "Not dumping output\n";
+            ::status($err);
+            return $err;
+        } else {
+            return Dumper(@dump_this);
+        }
+    } else {
+	# Create a dummy Data::Dump:dump as Hans Schou sometimes has
+	# it undefined
+	eval "sub Data::Dump:dump {}";
+        eval "use Data::Dump qw(dump);";
+        return (Data::Dump::dump(@dump_this));
+    }
+}
+
+sub debug(@) {
+    # Returns: N/A
+    $Global::debug or return;
+    print @_;
+}
--- a/2search/2search
+++ b/2search/2search
@ -0,0 +1,777 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+2search - binary search through sorted text files
+
+2grep - binary search+grep through sorted text files
+
+=head1 SYNOPSIS
+
+B<2search> [-nrfB] file string [string...]
+
+B<2search> --grep [-nrf] file string [string...]
+
+B<2grep> [-nrf] file string [string...]
+
+... | B<2search> [-nrfB] file
+
+... | B<2search> --grep [-nrf] file
+
+... | B<2grep> [-nrf] file
+
+=head1 DESCRIPTION
+
+B<2search> searches a sorted file for a string. It outputs the
+following line or the byte position of this line, which is where the
+string would have been if it had been in the sorted file.
+
+B<2grep> output all lines starting with a given string. The file must
+be sorted.
+
+=over 9
+
+=item B<--ignore-leading-blanks>
+
+=item B<-b>
+
+ignore leading blanks
+
+
+=item B<--byte-offset>
+
+=item B<-B>
+
+print byte position where string would have been
+
+
+=item B<--dictionary-order> (not implemented)
+
+=item B<-d>
+
+consider only blanks and alphanumeric characters
+
+
+=item B<--debug> (not implemented)
+
+=item B<-D>
+
+annotate the part of the line used to sort, and warn about
+questionable usage to stderr
+
+
+=item B<--ignore-case>
+
+=item B<-f>
+
+fold lower case to upper case characters
+
+
+=item B<--file> I<file>
+
+=item B<-F> I<file>
+
+search for all lines in I<file>
+
+
+=item B<--general-numeric-sort> (not implemented)
+
+=item B<-g>
+
+compare according to general numerical value
+
+
+=item B<--ignore-nonprinting> (not implemented)
+
+=item B<-i>
+
+consider only printable characters
+
+
+=item B<--month-sort>
+
+=item B<-M>
+
+compare (unknown) < 'JAN' < ... < 'DEC'
+
+
+=item B<--human-numeric-sort>
+
+=item B<-h>
+
+compare human readable numbers (e.g., 2K 1G)
+
+
+=item B<--key=KEYDEF> (not implemented)
+
+=item B<-k>
+
+sort via a key; KEYDEF gives location and type
+
+
+=item B<--numeric-sort>
+
+=item B<-n>
+
+compare according to string numerical value. If numerical values are
+the same: split the string into blocks of numbers and non-numbers, and
+compare numbers as numbers and strings as strings.
+
+This will sort like this: chr3 chr11 3chr 11chr
+
+
+=item B<--numascii>
+
+=item B<-N>
+
+compare according to string numerical value. If numerical values are
+the same: compare as strings
+
+
+=item B<--random-sort>
+
+=item B<-R>
+
+sort by random hash of keys
+
+
+=item B<--reverse>
+
+=item B<-r>
+
+reverse the result of comparisons
+
+
+=item B<--sort=WORD> (not implemented)
+
+sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
+B<-M>, numeric B<-n>, random B<-R>, version B<-V>
+
+
+=item B<-t>
+
+=item B<--field-separator=SEP>
+
+use SEP instead of non-blank to blank transition
+
+
+=item B<-z>
+
+=item B<--zero-terminated>
+
+end lines with 0 byte, not newline
+
+=back
+
+=head1 EXAMPLES
+
+=head2 Single key
+
+Input is sorted by Chromosome,Position:
+
+  SampleID Position Chromosome 
+  foo      10000123 chr3       
+  foo      10000125 chr3       
+  foo      9999998  chr11      
+  foo      10000124 chr11      
+  foo      10000126 chr11      
+
+To find all chr3:
+
+  2grep -n -k3 inputfile chr3
+
+-n will split 'chr3' into 'chr' which is compared asciibetically and
+'3' which is compared numerically.
+
+=head2 Not implemented
+
+To find all lines with chr3,10000125:
+
+  2grep -k3n,2n inputfile chr3 10000125
+
+
+
+=head1 REPORTING BUGS
+
+B<2search> is part of tangetools. Report bugs to <tools@tange.dk>.
+
+
+=head1 AUTHOR
+
+Copyright (C) 2016-2020 Ole Tange http://ole.tange.dk
+
+
+=head1 LICENSE
+
+Copyright (C) 2013 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+at your option any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+=head2 Documentation license I
+
+Permission is granted to copy, distribute and/or modify this documentation
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with no
+Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
+Texts.  A copy of the license is included in the file fdl.txt.
+
+=head2 Documentation license II
+
+You are free:
+
+=over 9
+
+=item B<to Share>
+
+to copy, distribute and transmit the work
+
+=item B<to Remix>
+
+to adapt the work
+
+=back
+
+Under the following conditions:
+
+=over 9
+
+=item B<Attribution>
+
+You must attribute the work in the manner specified by the author or
+licensor (but not in any way that suggests that they endorse you or
+your use of the work).
+
+=item B<Share Alike>
+
+If you alter, transform, or build upon this work, you may distribute
+the resulting work only under the same, similar or a compatible
+license.
+
+=back
+
+With the understanding that:
+
+=over 9
+
+=item B<Waiver>
+
+Any of the above conditions can be waived if you get permission from
+the copyright holder.
+
+=item B<Public Domain>
+
+Where the work or any of its elements is in the public domain under
+applicable law, that status is in no way affected by the license.
+
+=item B<Other Rights>
+
+In no way are any of the following rights affected by the license:
+
+=over 9
+
+=item *
+
+Your fair dealing or fair use rights, or other applicable
+copyright exceptions and limitations;
+
+=item *
+
+The author's moral rights;
+
+=item *
+
+Rights other persons may have either in the work itself or in
+how the work is used, such as publicity or privacy rights.
+
+=back
+
+=item B<Notice>
+
+For any reuse or distribution, you must make clear to others the
+license terms of this work.
+
+=back
+
+A copy of the full license is included in the file as cc-by-sa.txt.
+
+=head1 DEPENDENCIES
+
+B<2search>/B<2grep> uses Perl.
+
+
+=head1 SEE ALSO
+
+B<grep>(1), B<sort>(1).
+
+=cut
+
+use strict;
+use Getopt::Long;
+
+Getopt::Long::Configure("bundling","require_order");
+
+GetOptions(
+    "debug|D" => \$opt::D,
+    "version" => \$opt::version,
+    "verbose|v" => \$opt::verbose,
+    "B|byte-offset" => \$opt::byte_offset,
+    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
+    "d|dictionary-order" => \$opt::dictionary_order,
+    "f|ignore-case" => \$opt::ignore_case,
+    "g|general-numeric-sort" => \$opt::general_numeric_sort,
+    "G|grep" => \$opt::grep,
+    "F|file=s" => \$opt::file,
+    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
+    "M|month-sort" => \$opt::month_sort,
+    "h|human-numeric-sort" => \$opt::human_numeric_sort,
+    "n|numeric-sort" => \$opt::numeric_sort,
+    "N|numascii" => \$opt::numascii,
+    "r|reverse" => \$opt::reverse,
+    "R|random-sort" => \$opt::random_sort,
+    "sort=s" => \$opt::sort,
+    "V|version-sort" => \$opt::version_sort,
+    "k|key=s" => \@opt::key,
+    "t|field-separator=s" => \$opt::field_separator,
+    "z|zero-terminated" => \$opt::zero_terminated,
+    );
+$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
+$Global::version = 20200328;
+if($opt::version) { version(); exit 0; }
+if($opt::zero_terminated) { $/ = "\0"; }
+if(@opt::key) { 
+    # Default separator if --key = whitespace
+    $Global::sep = '\s+';
+    if(defined $opt::field_separator) { $Global::sep = $opt::field_separator; }
+}
+if($Global::progname eq "2grep") { $opt::grep = 1; }
+$Global::debug = $opt::D;
+
+parse_keydef();
+
+debug(my_dump(\@Global::keydefs),"\n");
+
+my $file = shift;
+if(@ARGV) {
+    $opt::argv = 1;
+} elsif(defined $opt::file) {
+    # skip
+} else {
+    $opt::stdin = 1;
+}
+
+ round:
+    while(1) {
+	my @search_vals;
+	for(@Global::keydefs) {
+	    my $val = get();
+	    if(not defined $val) {
+		last round;
+	    }
+	    push @search_vals, $val;
+	}
+	if($opt::grep) {
+	    bgrep($file,@search_vals);
+	} else {
+	    print bsearch($file,@search_vals);
+	}
+}  
+
+{
+    my $fh;
+
+    sub get {
+	if($opt::argv) {
+	    # Search for strings on the command line
+	    return shift @ARGV;
+	}
+	if($opt::file) {
+	    # Search for strings given with --file
+	    if(not $fh) {
+		if(not open(my $fh, "<", $opt::file)) {
+		    error("Cannot open $opt::file");
+		    exit(255);
+		}
+	    }
+	    my $val = <$fh>;
+	    chomp $val;
+	    return $val;
+	}
+	if($opt::stdin) {
+	    # Search for strings on stdin
+	    my $val = <>;
+	    chomp $val;
+	    return $val;
+	}
+	die;
+    }
+}
+
+sub bgrep {
+    my $file = shift;
+    my @search_vals = @_;
+    $opt::byte_offset = 1;
+    my $startpos = bsearch($file,@search_vals);
+    my $fh;
+    if(not open ($fh, "<", $file)) {
+	error("Cannot open '$file'");
+	exit 1;
+    }
+    seek($fh,$startpos,0) or die;
+    # Allow for partial matches in grep (4 mathes 40, A matches Aaa)
+    for my $keydef (@Global::keydefs) {
+	$keydef->{'partial_match'} = 1;
+    }
+    my $line;
+    while($line = <$fh>
+	  and
+	  not compare($line,@search_vals)) {
+	    print $line;
+    }
+    close $fh;
+    for my $keydef (@Global::keydefs) {
+	$keydef->{'partial_match'} = 0;
+    }
+}
+
+sub bsearch {
+    my $file = shift;
+    my @search_vals = @_;
+    my $min = 0;
+    my $max = -s $file;
+    my $fh;
+    if(not open ($fh, "<", $file)) {
+	error("Cannot open '$file'");
+	exit 1;
+    }
+    my($line,$middle);
+    my $minnl = $min;
+    my $maxnl = $max;
+    while($max - $min > 1) {
+	$middle = int(($max + $min)/2);
+	seek($fh,$middle,0) or die("Cannot seek to $middle");
+	if($middle > 0) {
+	    # Read last half of a line
+	    <$fh>;
+	}
+	my $newline_pos = tell($fh);
+	debug("$min <= $middle <= $newline_pos <= $max\n");
+	debug("$minnl <= $newline_pos <= $maxnl\n");
+	if($newline_pos == $maxnl
+	   or
+	   eof($fh)
+	   or
+	   compare(($line = <$fh>),@search_vals) >= 0) {
+	    # We have see this newline position before
+	    # or we are at the end of the file
+	    # or we should search the upper half
+	    $max = $middle;
+	    $maxnl = $newline_pos;
+	} else {
+	    # We should search the upper half
+	    $min = $middle;
+	    $minnl = $newline_pos;
+	}
+    }
+    seek($fh,$minnl,0) or die("Cannot seek to $minnl");
+    $line = <$fh>;
+    if(compare($line,@search_vals) >= 0) {
+	if($opt::byte_offset) {
+	    return $minnl."\n";
+	} else {
+	    return $line;
+	}
+    } else {
+	if($opt::byte_offset) {
+	    return tell($fh)."\n";
+	} else {
+	    $line=<$fh>;
+	    return $line;
+	}
+    }
+}
+
+sub parse_keydef {
+    # parse keydef F[.C][OPTS][,F[.C][OPTS]]
+    my %defaultorder = (
+	"b" => $opt::ignore_leading_blanks,
+	"d" => $opt::dictionary_order,
+	"f" => $opt::ignore_case,
+	"g" => $opt::general_numeric_sort,
+	"i" => $opt::ignore_nonprinting,
+	"M" => $opt::month_sort,
+	"h" => $opt::human_numeric_sort,
+	"n" => $opt::numeric_sort,
+	"N" => $opt::numascii,
+	"r" => $opt::reverse,
+	"R" => $opt::random_sort,
+	"V" => $opt::version_sort,
+	);
+    my %ordertbl = (
+	"b" => 'ignore_leading_blanks',
+	"d" => 'dictionary_order',
+	"f" => 'ignore_case',
+	"g" => 'general_numeric_sort',
+	"i" => 'ignore_nonprinting',
+	"M" => 'month_sort',
+	"h" => 'human_numeric_sort',
+	"n" => 'numeric_sort',
+	"N" => 'numascii',
+	"r" => 'reverse',
+	"R" => 'random_sort',
+	"V" => 'version_sort',
+	);
+
+    if(@opt::key) {
+	
+    } else {
+	# Convert -n -r to -k1rn
+	# with sep = undef
+	$Global::sep = undef;
+	my $opt;
+	$opt->{'field'} = 1;
+	$opt->{'char'} = 1;
+	for (keys %defaultorder) {
+	    $opt->{$ordertbl{$_}} = $defaultorder{$_};
+	}
+	push(@Global::keydefs,$opt);
+    }
+    
+    for my $keydefs (@opt::key) {
+	for my $keydef (split /,/, $keydefs) {
+	    my $opt;
+	    if($keydef =~ /^(\d+)(\.(\d+))?([bdfgiMhnNRrV]+)?$/) {
+		# parse keydef F[.C][OPTS][,F[.C][OPTS]]
+		$opt->{'field'} = $1;
+		$opt->{'char'} = $3 || 1;
+		for (keys %defaultorder) {
+		    $opt->{$ordertbl{$_}} = $defaultorder{$_};
+		}
+		for my $o (split //, $4) {
+		    $opt->{$ordertbl{$o}} = 1;
+		}
+	    } else {
+		error("Keydef $keydef does not match F[.C][OPTS]");
+		exit(255);
+	    }
+	    push(@Global::keydefs,$opt);
+	}
+    }
+}
+
+sub compare {
+    # One key to search for per search column
+    my($line,@search_vals) = @_;
+    chomp($line);
+    debug("Compare: $line <=> @search_vals ");
+    my @field;
+    if($Global::sep) {
+	# Split line
+	@field = split /$Global::sep/o, $line;
+    } else {
+	@field = ($line);
+    }
+    my @tmp_vals = @search_vals;
+    for my $keydef (@Global::keydefs) {
+	# keydef = F[.C][OPTS][,F[.C][OPTS]]
+	my $f = $keydef->{'field'};
+	my $c = $keydef->{'char'};
+	my $cmp = compare_single(substr($field[$f-1],$c-1),shift @tmp_vals,$keydef);
+	# They differ on this key
+	debug("== $cmp\n");
+	if($cmp) { return $cmp; }
+    }
+    # No difference on any keydefs
+    return 0;
+}
+
+sub compare_single {
+    # Compare two lines based on order options
+    my ($a,$b,$opt) = @_;
+    debug("$a <=> $b");
+    debug(my_dump($opt),"\n");
+    if($opt->{'random_sort'}) {
+	return rand() <=> rand();
+    }
+    if($opt->{'ignore_leading_blanks'}) {
+	$a =~ s/^\s+//;
+	$b =~ s/^\s+//;
+    }
+    if($opt->{'ignore_case'}) {
+	$a = uc($a);
+	$b = uc($b);
+    }
+    if($opt->{'partial_match'}) {
+	# String 'foo' matches 'foobar'
+	$a = substr($a,0,length $b);
+    }
+    if($opt->{'reverse'}) {
+	($a,$b) = ($b,$a);
+    }
+    if($opt->{'human_numeric_sort'}) {
+	return multiply_binary_prefix($a) <=> multiply_binary_prefix($b);
+    }
+    if($opt->{'month_sort'}) {
+	my %m;
+	my @mon = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
+	@m{@mon}={1..12};
+	return ($m{$a} || 0) <=> ($m{$b} || 0);
+    }
+    if($opt->{'numeric_sort'}) {
+	return $a <=> $b;
+    } elsif($opt->{'numascii'}) {
+	return $a <=> $b or $a cmp $b;
+    } else {
+	return $a cmp $b;
+    }
+}
+
+sub multiply_binary_prefix(@) {
+    # Evalualte numbers with binary prefix
+    # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
+    # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
+    # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
+    # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
+    # 13G = 13*1024*1024*1024 = 13958643712
+    # Input:
+    #   $s = string with prefixes
+    # Returns:
+    #   $value = int with prefixes multiplied
+    my @v = @_;
+    for(@v) {
+	# 1E3=1000, 1E-3=0.001
+	s/e([+-]?\d+)/*10**$1/gi;
+    }
+    for(@v) {
+	defined $_ or next;
+	s/ki/*1024/gi;
+	s/mi/*1024*1024/gi;
+	s/gi/*1024*1024*1024/gi;
+	s/ti/*1024*1024*1024*1024/gi;
+	s/pi/*1024*1024*1024*1024*1024/gi;
+	s/ei/*1024*1024*1024*1024*1024*1024/gi;
+	s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
+	s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+	s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+
+	s/K/*1024/g;
+	s/M/*1024*1024/g;
+	s/G/*1024*1024*1024/g;
+	s/T/*1024*1024*1024*1024/g;
+	s/P/*1024*1024*1024*1024*1024/g;
+	s/E/*1024*1024*1024*1024*1024*1024/g;
+	s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
+	s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
+	s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
+
+	s/k/*1000/g;
+	s/m/*1000*1000/g;
+	s/g/*1000*1000*1000/g;
+	s/t/*1000*1000*1000*1000/g;
+	s/p/*1000*1000*1000*1000*1000/g;
+	s/e/*1000*1000*1000*1000*1000*1000/g;
+	s/z/*1000*1000*1000*1000*1000*1000*1000/g;
+	s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
+	s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
+
+	$_ = eval $_;
+    }
+    return wantarray ? @v : $v[0];
+}
+
+sub status {
+    my @w = @_;
+    my $fh = $Global::status_fd || *STDERR;
+    print $fh map { ($_, "\n") } @w;
+    flush $fh;
+}
+
+sub status_no_nl {
+    my @w = @_;
+    my $fh = $Global::status_fd || *STDERR;
+    print $fh @w;
+    flush $fh;
+}
+
+sub warning {
+    my @w = @_;
+    my $prog = $Global::progname || "parallel";
+    status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
+}
+
+sub error {
+    my @w = @_;
+    my $prog = $Global::progname || "parallel";
+    status(map { ($prog.": Error: ". $_); } @w);
+}
+
+sub die_bug {
+    my $bugid = shift;
+    print STDERR
+	("$Global::progname: This should not happen. You have found a bug.\n",
+	 "Please submit a bug at https://gitlab.com/ole.tange/tangetools/-/issues\n",
+	 "and include:\n",
+	 "* The version number: $Global::version\n",
+	 "* The bugid: $bugid\n",
+	 "* The command line being run\n",
+	 "* The files being read (put the files on a webserver if they are big)\n",
+	 "\n",
+	 "If you get the error on smaller/fewer files, please include those instead.\n");
+    exit(255);
+}
+
+sub version {
+    # Returns: N/A
+    print join("\n",
+               "$Global::progname $Global::version",
+               "Copyright (C) 2016-2020",
+	       "Ole Tange and Free Software Foundation, Inc.",
+               "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
+               "This is free software: you are free to change and redistribute it.",
+               "$Global::progname comes with no warranty.",
+               "",
+               "Web site: https://gitlab.com/ole.tange/tangetools/\n",
+        );
+}
+
+sub my_dump(@) {
+    # Returns:
+    #   ascii expression of object if Data::Dump(er) is installed
+    #   error code otherwise
+    my @dump_this = (@_);
+    eval "use Data::Dump qw(dump);";
+    if ($@) {
+        # Data::Dump not installed
+        eval "use Data::Dumper;";
+        if ($@) {
+            my $err =  "Neither Data::Dump nor Data::Dumper is installed\n".
+                "Not dumping output\n";
+            ::status($err);
+            return $err;
+        } else {
+            return Dumper(@dump_this);
+        }
+    } else {
+	# Create a dummy Data::Dump:dump as Hans Schou sometimes has
+	# it undefined
+	eval "sub Data::Dump:dump {}";
+        eval "use Data::Dump qw(dump);";
+        return (Data::Dump::dump(@dump_this));
+    }
+}
+
+sub debug(@) {
+    # Returns: N/A
+    $Global::debug or return;
+    print @_;
+}
--- a/2search/regressiontest
+++ b/2search/regressiontest
@ -0,0 +1,194 @@
+#!/bin/bash
+
+test_tmp=`tempfile`
+export test_tmp
+
+opt_tester() {
+    opt="$@"
+    tmp=$(tempfile)
+    test_2search() {
+	xargs echo Search in < $tmp
+	2search $opt $tmp 0 2 2.1 100000
+	2search $opt -B $tmp 0 2 2.1 100000
+    }
+    (true) |
+	sort $opt > $tmp
+    echo Search in null file
+    test_2search
+
+    (echo) |
+	sort $opt > $tmp
+    echo Search in newline
+    test_2search
+    
+    (echo 1.000000000) |
+	sort $opt > $tmp
+    test_2search
+
+    (echo 1.000000000;
+     echo 2) |
+	sort $opt > $tmp
+    test_2search
+
+    (echo 1;
+     echo 2.000000000) |
+	sort $opt > $tmp
+    test_2search
+
+    (echo 1.000000000;
+     echo 2;
+     echo 3) |
+	sort $opt > $tmp
+    test_2search
+
+    (echo 1;
+     echo 2.000000000;
+     echo 3) |
+	sort $opt > $tmp
+    test_2search
+    
+    (echo 1;
+     echo 2;
+     echo 3.000000000) |
+	sort $opt > $tmp
+    test_2search
+
+    rm $tmp
+}
+export -f opt_tester
+
+test_n() {
+    tmp=${test_tmp}_n
+    true > $tmp
+    echo Search in null file
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo > $tmp
+    xargs echo Search in newline
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo 1.000000000 > $tmp
+    xargs echo Search in < $tmp
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo 1.000000000 > $tmp
+    echo 2 >> $tmp
+    xargs echo Search in < $tmp
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo 1 > $tmp
+    echo 2.000000000 >> $tmp
+    xargs echo Search in < $tmp
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo 1.000000000 > $tmp
+    echo 2 >> $tmp
+    echo 3 >> $tmp
+    xargs echo Search in < $tmp
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo 1 > $tmp
+    echo 2.000000000 >> $tmp
+    echo 3 >> $tmp
+    xargs echo Search in < $tmp
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    echo 1 > $tmp
+    echo 2 >> $tmp
+    echo 3.000000000 >> $tmp
+    xargs echo Search in < $tmp
+    2search -n $tmp 0 2 2.1 100000
+    2search -nB $tmp 0 2 2.1 100000
+    rm $tmp
+}
+
+test_n_opt() {
+    opt_tester -n
+}
+
+test_rn_opt() {
+    opt_tester -rn
+}
+
+test_r_opt() {
+    opt_tester -rn
+}
+
+test_k32_2n_1n() {
+    tmp=$(tempfile)
+    cat >$tmp <<EOF
+1	chr1	Sample 1
+11	chr1	Sample 1
+111	chr1	Sample 1
+1111	chr1	Sample 1
+11111	chr1	Sample 1
+111111	chr1	Sample 1
+1	chr2	Sample 1
+22	chr2	Sample 1
+111	chr2	Sample 1
+2222	chr2	Sample 1
+11111	chr2	Sample 1
+111111	chr2	Sample 1
+1	chr10	Sample 1
+11	chr10	Sample 1
+111	chr10	Sample 1
+1111	chr10	Sample 1
+11111	chr10	Sample 1
+111111	chr10	Sample 1
+1	chr1	Sample 2
+11	chr1	Sample 2
+111	chr1	Sample 2
+1111	chr1	Sample 2
+11111	chr1	Sample 2
+111111	chr1	Sample 2
+1	chr2	Sample 2
+22	chr2	Sample 2
+111	chr2	Sample 2
+2222	chr2	Sample 2
+11111	chr2	Sample 2
+111111	chr2	Sample 2
+1	chr10	Sample 2
+11	chr10	Sample 2
+111	chr10	Sample 2
+1111	chr10	Sample 2
+11111	chr10	Sample 2
+111111	chr10	Sample 2
+1	chr1	Sample 10
+11	chr1	Sample 10
+111	chr1	Sample 10
+1111	chr1	Sample 10
+11111	chr1	Sample 10
+111111	chr1	Sample 10
+1	chr2	Sample 10
+22	chr2	Sample 10
+111	chr2	Sample 10
+2222	chr2	Sample 10
+11111	chr2	Sample 10
+111111	chr2	Sample 10
+1	chr10	Sample 10
+11	chr10	Sample 10
+111	chr10	Sample 10
+1111	chr10	Sample 10
+11111	chr10	Sample 10
+111111	chr10	Sample 10
+EOF
+    2grep -k3N,2N,1n $tmp 'Sample 10' chr10 111
+    echo $tmp
+}
+
+test_partial_line() {
+    tmp=$(tempfile)
+    seq 100 | LC_ALL=C sort > $tmp
+    echo '### 2search --grep'
+    2search --grep $tmp 3
+    echo '### 2grep'
+    2grep $tmp 3
+    echo '### ... | 2grep'
+    echo 3 | 2grep $tmp
+    rm $tmp
+}
+
+
+export -f $(compgen -A function | grep test_)
+compgen -A function | grep test_ | sort | parallel -j6 --tag -k '{} 2>&1' > regressiontest.new
+diff regressiontest.new regressiontest.out
--- a/2search/regressiontest.out
+++ b/2search/regressiontest.out
@ -0,0 +1,280 @@
+test_k32_2n_1n	111	chr10	Sample 10
+test_k32_2n_1n	1111	chr10	Sample 10
+test_k32_2n_1n	11111	chr10	Sample 10
+test_k32_2n_1n	111111	chr10	Sample 10
+test_n	Search in null file
+test_n	0
+test_n	0
+test_n	0
+test_n	0
+test_n	Search in newline
+test_n	
+test_n	0
+test_n	1
+test_n	1
+test_n	1
+test_n	Search in 1.000000000
+test_n	1.000000000
+test_n	0
+test_n	12
+test_n	12
+test_n	12
+test_n	Search in 1.000000000 2
+test_n	1.000000000
+test_n	2
+test_n	0
+test_n	12
+test_n	14
+test_n	14
+test_n	Search in 1 2.000000000
+test_n	1
+test_n	2.000000000
+test_n	0
+test_n	2
+test_n	14
+test_n	14
+test_n	Search in 1.000000000 2 3
+test_n	1.000000000
+test_n	2
+test_n	3
+test_n	0
+test_n	12
+test_n	14
+test_n	16
+test_n	Search in 1 2.000000000 3
+test_n	1
+test_n	2.000000000
+test_n	3
+test_n	0
+test_n	2
+test_n	14
+test_n	16
+test_n	Search in 1 2 3.000000000
+test_n	1
+test_n	2
+test_n	3.000000000
+test_n	0
+test_n	2
+test_n	4
+test_n	16
+test_n_opt	Search in null file
+test_n_opt	Search in
+test_n_opt	0
+test_n_opt	0
+test_n_opt	0
+test_n_opt	0
+test_n_opt	Search in newline
+test_n_opt	Search in
+test_n_opt	
+test_n_opt	0
+test_n_opt	1
+test_n_opt	1
+test_n_opt	1
+test_n_opt	Search in 1.000000000
+test_n_opt	1.000000000
+test_n_opt	0
+test_n_opt	12
+test_n_opt	12
+test_n_opt	12
+test_n_opt	Search in 1.000000000 2
+test_n_opt	1.000000000
+test_n_opt	2
+test_n_opt	0
+test_n_opt	12
+test_n_opt	14
+test_n_opt	14
+test_n_opt	Search in 1 2.000000000
+test_n_opt	1
+test_n_opt	2.000000000
+test_n_opt	0
+test_n_opt	2
+test_n_opt	14
+test_n_opt	14
+test_n_opt	Search in 1.000000000 2 3
+test_n_opt	1.000000000
+test_n_opt	2
+test_n_opt	3
+test_n_opt	0
+test_n_opt	12
+test_n_opt	14
+test_n_opt	16
+test_n_opt	Search in 1 2.000000000 3
+test_n_opt	1
+test_n_opt	2.000000000
+test_n_opt	3
+test_n_opt	0
+test_n_opt	2
+test_n_opt	14
+test_n_opt	16
+test_n_opt	Search in 1 2 3.000000000
+test_n_opt	1
+test_n_opt	2
+test_n_opt	3.000000000
+test_n_opt	0
+test_n_opt	2
+test_n_opt	4
+test_n_opt	16
+test_partial_line	### 2search --grep
+test_partial_line	3
+test_partial_line	30
+test_partial_line	31
+test_partial_line	32
+test_partial_line	33
+test_partial_line	34
+test_partial_line	35
+test_partial_line	36
+test_partial_line	37
+test_partial_line	38
+test_partial_line	39
+test_partial_line	### 2grep
+test_partial_line	3
+test_partial_line	30
+test_partial_line	31
+test_partial_line	32
+test_partial_line	33
+test_partial_line	34
+test_partial_line	35
+test_partial_line	36
+test_partial_line	37
+test_partial_line	38
+test_partial_line	39
+test_partial_line	### ... | 2grep
+test_partial_line	3
+test_partial_line	30
+test_partial_line	31
+test_partial_line	32
+test_partial_line	33
+test_partial_line	34
+test_partial_line	35
+test_partial_line	36
+test_partial_line	37
+test_partial_line	38
+test_partial_line	39
+test_rn_opt	Search in null file
+test_rn_opt	Search in
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	Search in newline
+test_rn_opt	Search in
+test_rn_opt	
+test_rn_opt	
+test_rn_opt	
+test_rn_opt	
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	Search in 1.000000000
+test_rn_opt	1.000000000
+test_rn_opt	1.000000000
+test_rn_opt	1.000000000
+test_rn_opt	12
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	Search in 2 1.000000000
+test_rn_opt	2
+test_rn_opt	2
+test_rn_opt	2
+test_rn_opt	14
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	Search in 2.000000000 1
+test_rn_opt	2.000000000
+test_rn_opt	2.000000000
+test_rn_opt	2.000000000
+test_rn_opt	14
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	0
+test_rn_opt	Search in 3 2 1.000000000
+test_rn_opt	2
+test_rn_opt	2
+test_rn_opt	3
+test_rn_opt	16
+test_rn_opt	2
+test_rn_opt	2
+test_rn_opt	0
+test_rn_opt	Search in 3 2.000000000 1
+test_rn_opt	2.000000000
+test_rn_opt	2.000000000
+test_rn_opt	3
+test_rn_opt	16
+test_rn_opt	2
+test_rn_opt	2
+test_rn_opt	0
+test_rn_opt	Search in 3.000000000 2 1
+test_rn_opt	2
+test_rn_opt	2
+test_rn_opt	3.000000000
+test_rn_opt	16
+test_rn_opt	12
+test_rn_opt	12
+test_rn_opt	0
+test_r_opt	Search in null file
+test_r_opt	Search in
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	Search in newline
+test_r_opt	Search in
+test_r_opt	
+test_r_opt	
+test_r_opt	
+test_r_opt	
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	Search in 1.000000000
+test_r_opt	1.000000000
+test_r_opt	1.000000000
+test_r_opt	1.000000000
+test_r_opt	12
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	Search in 2 1.000000000
+test_r_opt	2
+test_r_opt	2
+test_r_opt	2
+test_r_opt	14
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	Search in 2.000000000 1
+test_r_opt	2.000000000
+test_r_opt	2.000000000
+test_r_opt	2.000000000
+test_r_opt	14
+test_r_opt	0
+test_r_opt	0
+test_r_opt	0
+test_r_opt	Search in 3 2 1.000000000
+test_r_opt	2
+test_r_opt	2
+test_r_opt	3
+test_r_opt	16
+test_r_opt	2
+test_r_opt	2
+test_r_opt	0
+test_r_opt	Search in 3 2.000000000 1
+test_r_opt	2.000000000
+test_r_opt	2.000000000
+test_r_opt	3
+test_r_opt	16
+test_r_opt	2
+test_r_opt	2
+test_r_opt	0
+test_r_opt	Search in 3.000000000 2 1
+test_r_opt	2
+test_r_opt	2
+test_r_opt	3.000000000
+test_r_opt	16
+test_r_opt	12
+test_r_opt	12
+test_r_opt	0
--- a/25
+++ b/25
@ -1,21 +1,22 @@
-CMD = blink bsearch burncpu duplicate-packets em encdir field forever	\
+CMD = blink 2grep 2search burncpu duplicate-packets em encdir field forever	\
 	fxkill G gitnext gitundo goodpasswd histogram mtrr mirrorpdf	\
 	neno off pdfman pidcmd plotpipe puniq ramusage rand rclean	\
 	rina rn rrm seekmaniac shython sound-reload splitvideo stdout	\
 	swapout T timestamp tracefile transpose upsidedown vid		\
 	w4it-for-port-open whitehash wifi-reload wssh ytv yyyymmdd

-all: blink/blink.1 bsearch/bsearch.1 burncpu/burncpu.1			\
-	encdir/encdir.1 G/G.1 gitnext/gitnext.1 gitundo/gitundo.1	\
-	goodpasswd/goodpasswd.1 histogram/histogram.1			\
-	mirrorpdf/mirrorpdf.1 neno/neno.1 off/off.1 pdfman/pdfman.1	\
-	pidcmd/pidcmd.1 plotpipe/plotpipe.1 puniq/puniq.1 rand/rand.1	\
-	rina/rina.1 rn/rn.1 rrm/rrm.1 seekmaniac/seekmaniac.1		\
-	shython/shython.1 sound-reload/sound-reload.1			\
-	splitvideo/splitvideo.1 stdout/stdout.1 timestamp/timestamp.1	\
-	tracefile/tracefile.1 transpose/transpose.1 T/T.1		\
-	upsidedown/upsidedown.1 vid/vid.1 wifi-reload/wifi-reload.1	\
-	wssh/wssh.1 ytv/ytv.1 yyyymmdd/yyyymmdd.1
+all: blink/blink.1 2search/2grep.1 2search/2search.1			\
+	burncpu/burncpu.1 encdir/encdir.1 G/G.1 gitnext/gitnext.1	\
+	gitundo/gitundo.1 goodpasswd/goodpasswd.1			\
+	histogram/histogram.1 mirrorpdf/mirrorpdf.1 neno/neno.1		\
+	off/off.1 pdfman/pdfman.1 pidcmd/pidcmd.1 plotpipe/plotpipe.1	\
+	puniq/puniq.1 rand/rand.1 rina/rina.1 rn/rn.1 rrm/rrm.1		\
+	seekmaniac/seekmaniac.1 shython/shython.1			\
+	sound-reload/sound-reload.1 splitvideo/splitvideo.1		\
+	stdout/stdout.1 timestamp/timestamp.1 tracefile/tracefile.1	\
+	transpose/transpose.1 T/T.1 upsidedown/upsidedown.1 vid/vid.1	\
+	wifi-reload/wifi-reload.1 wssh/wssh.1 ytv/ytv.1			\
+	yyyymmdd/yyyymmdd.1

 %.1: %
 	pod2man $< > $@
--- a/bsearch/bsearch
+++ b/bsearch/bsearch
@ -1,404 +0,0 @@
-#!/usr/bin/perl
-
-=head1 NAME
-
-bsearch - binary search through sorted text files
-
-=head1 SYNOPSIS
-
-B<bsearch> [-nrfB] file string [string...]
-
-=head1 DESCRIPTION
-
-B<bsearch> searches a sorted file for a string. It outputs the
-following line or the byte position of this line, which is where the
-string would have been if it had been in the sorted file.
-
-=over 9
-
-=item B<--ignore-leading-blanks> (not implemented)
-
-=item B<-b>
-
-ignore leading blanks
-
-=item B<--byte-offset>
-
-=item B<-B>
-
-print byte position where string would have been
-
-=item B<--dictionary-order> (not implemented)
-
-=item B<-d>
-
-consider only blanks and alphanumeric characters
-
-=item B<--debug> (not implemented)
-
-=item B<-D>
-
-annotate the part of the line used to sort, and warn about
-questionable usage to stderr
-
-=item B<--ignore-case>
-
-=item B<-f>
-
-fold lower case to upper case characters
-
-=item B<--general-numeric-sort> (not implemented)
-
-=item B<-g>
-
-compare according to general numerical value
-
-=item B<--ignore-nonprinting> (not implemented)
-
-=item B<-i>
-
-consider only printable characters
-
-=item B<--month-sort> (not implemented)
-
-=item B<-M>
-
-compare (unknown) < 'JAN' < ... < 'DEC'
-
-=item B<--human-numeric-sort> (not implemented)
-
-=item B<-h>
-
-compare human readable numbers (e.g., 2K 1G)
-
-=item B<--key=KEYDEF> (not implemented)
-
-=item B<-k>
-
-sort via a key; KEYDEF gives location and type
-
-=item B<--numeric-sort>
-
-=item B<-n>
-
-compare according to string numerical value
-
-=item B<--random-sort>
-
-=item B<-R>
-
-sort by random hash of keys
-
-=item B<--reverse>
-
-=item B<-r>
-
-reverse the result of comparisons
-
-=item B<--sort=WORD> (not implemented)
-
-sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
-B<-M>, numeric B<-n>, random B<-R>, version B<-V>
-
-=item B<-t> (not implemented)
-
-=item B<--field-separator=SEP>
-
-use SEP instead of non-blank to blank transition
-
-=item B<-z>
-
-=item B<--zero-terminated>
-
-end lines with 0 byte, not newline
-
-=back
-
-=head1 EXAMPLES
-
-=head2 Missing
-
-Missing
-
-
-=head1 REPORTING BUGS
-
-B<bsearch> is part of tangetools. Report bugs to <tools@tange.dk>.
-
-
-=head1 AUTHOR
-
-Copyright (C) 2016 Ole Tange http://ole.tange.dk
-
-
-=head1 LICENSE
-
-Copyright (C) 2013 Free Software Foundation, Inc.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3 of the License, or
-at your option any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-=head2 Documentation license I
-
-Permission is granted to copy, distribute and/or modify this documentation
-under the terms of the GNU Free Documentation License, Version 1.3 or
-any later version published by the Free Software Foundation; with no
-Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
-Texts.  A copy of the license is included in the file fdl.txt.
-
-=head2 Documentation license II
-
-You are free:
-
-=over 9
-
-=item B<to Share>
-
-to copy, distribute and transmit the work
-
-=item B<to Remix>
-
-to adapt the work
-
-=back
-
-Under the following conditions:
-
-=over 9
-
-=item B<Attribution>
-
-You must attribute the work in the manner specified by the author or
-licensor (but not in any way that suggests that they endorse you or
-your use of the work).
-
-=item B<Share Alike>
-
-If you alter, transform, or build upon this work, you may distribute
-the resulting work only under the same, similar or a compatible
-license.
-
-=back
-
-With the understanding that:
-
-=over 9
-
-=item B<Waiver>
-
-Any of the above conditions can be waived if you get permission from
-the copyright holder.
-
-=item B<Public Domain>
-
-Where the work or any of its elements is in the public domain under
-applicable law, that status is in no way affected by the license.
-
-=item B<Other Rights>
-
-In no way are any of the following rights affected by the license:
-
-=over 9
-
-=item *
-
-Your fair dealing or fair use rights, or other applicable
-copyright exceptions and limitations;
-
-=item *
-
-The author's moral rights;
-
-=item *
-
-Rights other persons may have either in the work itself or in
-how the work is used, such as publicity or privacy rights.
-
-=back
-
-=item B<Notice>
-
-For any reuse or distribution, you must make clear to others the
-license terms of this work.
-
-=back
-
-A copy of the full license is included in the file as cc-by-sa.txt.
-
-=head1 DEPENDENCIES
-
-B<bsearch> uses Perl.
-
-
-=head1 SEE ALSO
-
-B<grep>(1), B<sort>(1).
-
-=cut
-
-use Getopt::Long;
-
-Getopt::Long::Configure("bundling","require_order");
-
-GetOptions(
-    "debug|D=s" => \$opt::D,
-    "version" => \$opt::version,
-    "verbose|v" => \$opt::verbose,
-    "B|byte-offset" => \$opt::byte_offset,
-    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
-    "d|dictionary-order" => \$opt::dictionary_order,
-    "f|ignore-case" => \$opt::ignore_case,
-    "g|general-numeric-sort" => \$opt::general_numeric_sort,
-    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
-    "M|month-sort" => \$opt::month_sort,
-    "h|human-numeric-sort" => \$opt::human_numeric_sort,
-    "n|numeric-sort" => \$opt::numeric_sort,
-    "r|reverse" => \$opt::reverse,
-    "R|random-sort" => \$opt::random_sort,
-    "sort=s" => \$opt::sort,
-    "V|version-sort" => \$opt::version_sort,
-    "k|key=s" => \@opt::key,
-    "t|field-separator=s" => \$opt::field_separator,
-    "z|zero-terminated" => \$opt::zero_terminated,
-    );
-$Global::progname = "bsearch";
-$Global::version = 20160712;
-if($opt::version) {
-    version();
-    exit 0;
-}
-if($opt::zero_terminated) { $/ = "\0"; }
-
-my $file = shift;
-
-for my $key (@ARGV) {
-    print bsearch($file,$key);
-}
-
-sub bsearch {
-    my $file = shift;
-    my $key = shift;
-    my $min = 0;
-    my $max = -s $file;
-
-    if(not open ($fh, "<", $file)) {
-	error("Cannot open '$file'");
-	exit 1;
-    }
-    my $line;
-    while($max - $min > 1) {
-	$middle = int(($max + $min)/2);
-	seek($fh,$middle,0) or die;
-	my $half = <$fh>;
-	if(eof($fh)
-	   or 
-	   compare(($line = <$fh>),$key) >= 0) {
-	    $max = $middle;
-	} else {
-	    $min = $middle;
-	}
-    }
-    seek($fh,$max,0) or die;
-    $line = <$fh>;
-    if(compare($line,$key) >= 0) {
-	if($opt::byte_offset) {
-	    return "0\n";
-	} else {
-	    # The very first line
-	    return "";
-	}
-    } else {
-	if($opt::byte_offset) {
-	    return tell($fh)."\n";
-	} else {
-	    return $line;
-	}
-    }
-}
-
-sub compare {
-    my ($a,$b) = @_;
-    if($opt::random_sort) {
-	return rand() <=> rand();
-    }
-    if($opt::reverse) {
-	($a,$b) = ($b,$a);
-    }
-    if($opt::ignore_case) {
-	$a = uc($a);
-	$b = uc($b);
-    }
-    if($opt::numeric_sort) {
-	return $a <=> $b;
-    } elsif($opt::numascii) {
-	return $a <=> $b or $a cmp $b;
-    } else {
-	return $a cmp $b;
-    }
-}
-
-sub status {
-    my @w = @_;
-    my $fh = $Global::status_fd || *STDERR;
-    print $fh map { ($_, "\n") } @w;
-    flush $fh;
-}
-
-sub status_no_nl {
-    my @w = @_;
-    my $fh = $Global::status_fd || *STDERR;
-    print $fh @w;
-    flush $fh;
-}
-
-sub warning {
-    my @w = @_;
-    my $prog = $Global::progname || "parallel";
-    status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
-}
-
-sub error {
-    my @w = @_;
-    my $prog = $Global::progname || "parallel";
-    status(map { ($prog.": Error: ". $_); } @w);
-}
-
-sub die_bug {
-    my $bugid = shift;
-    print STDERR
-	("$Global::progname: This should not happen. You have found a bug.\n",
-	 "Please contact <parallel\@gnu.org> and include:\n",
-	 "* The version number: $Global::version\n",
-	 "* The bugid: $bugid\n",
-	 "* The command line being run\n",
-	 "* The files being read (put the files on a webserver if they are big)\n",
-	 "\n",
-	 "If you get the error on smaller/fewer files, please include those instead.\n");
-    ::wait_and_exit(255);
-}
-
-sub version {
-    # Returns: N/A
-    print join("\n",
-               "GNU $Global::progname $Global::version",
-               "Copyright (C) 2016",
-	       "Ole Tange and Free Software Foundation, Inc.",
-               "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
-               "This is free software: you are free to change and redistribute it.",
-               "GNU $Global::progname comes with no warranty.",
-               "",
-               "Web site: http://www.gnu.org/software/${Global::progname}\n",
-	       "When using programs that use GNU Parallel to process data for publication",
-	       "please cite as described in 'parallel --citation'.\n",
-        );
-}
--- a/bsearch/regressiontest
+++ b/bsearch/regressiontest
@ -1,44 +0,0 @@
-#!/bin/bash
-
-test_tmp=`tempfile`
-export test_tmp
-
-test_n() {
-    tmp=${test_tmp}_n
-    true > $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo > $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo 1.000000000 > $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo 1.000000000 > $tmp
-    echo 2 >> $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo 1 > $tmp
-    echo 2.000000000 >> $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo 1.000000000 > $tmp
-    echo 2 >> $tmp
-    echo 3 >> $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo 1 > $tmp
-    echo 2.000000000 >> $tmp
-    echo 3 >> $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    echo 1 > $tmp
-    echo 2 >> $tmp
-    echo 3.000000000 >> $tmp
-    xargs < $tmp
-    bsearch -n $tmp 0 2 2.1 100000
-    rm $tmp
-}
-
-
-export -f $(compgen -A function | grep test_)
-compgen -A function | grep test_ | sort | parallel -j6 --tag -k '{} 2>&1'