2search: bsearch/bgrep renamed to 2search/2grep (bgrep is used by others).

2020-03-28 15:37:52 +01:00 · 2020-03-28 15:37:52 +01:00 · e8f520f642
parent 9efd18d0fc
commit e8f520f642
7 changed files with 2041 additions and 460 deletions
--- a/2search/2grep
+++ b/2search/2grep
@ -0,0 +1,777 @@
 #!/usr/bin/perl
 =head1 NAME
 2search - binary search through sorted text files
 2grep - binary search+grep through sorted text files
 =head1 SYNOPSIS
 B<2search> [-nrfB] file string [string...]
 B<2search> --grep [-nrf] file string [string...]
 B<2grep> [-nrf] file string [string...]
 ... | B<2search> [-nrfB] file
 ... | B<2search> --grep [-nrf] file
 ... | B<2grep> [-nrf] file
 =head1 DESCRIPTION
 B<2search> searches a sorted file for a string. It outputs the
 following line or the byte position of this line, which is where the
 string would have been if it had been in the sorted file.
 B<2grep> output all lines starting with a given string. The file must
 be sorted.
 =over 9
 =item B<--ignore-leading-blanks>
 =item B<-b>
 ignore leading blanks
 =item B<--byte-offset>
 =item B<-B>
 print byte position where string would have been
 =item B<--dictionary-order> (not implemented)
 =item B<-d>
 consider only blanks and alphanumeric characters
 =item B<--debug> (not implemented)
 =item B<-D>
 annotate the part of the line used to sort, and warn about
 questionable usage to stderr
 =item B<--ignore-case>
 =item B<-f>
 fold lower case to upper case characters
 =item B<--file> I<file>
 =item B<-F> I<file>
 search for all lines in I<file>
 =item B<--general-numeric-sort> (not implemented)
 =item B<-g>
 compare according to general numerical value
 =item B<--ignore-nonprinting> (not implemented)
 =item B<-i>
 consider only printable characters
 =item B<--month-sort>
 =item B<-M>
 compare (unknown) < 'JAN' < ... < 'DEC'
 =item B<--human-numeric-sort>
 =item B<-h>
 compare human readable numbers (e.g., 2K 1G)
 =item B<--key=KEYDEF> (not implemented)
 =item B<-k>
 sort via a key; KEYDEF gives location and type
 =item B<--numeric-sort>
 =item B<-n>
 compare according to string numerical value. If numerical values are
 the same: split the string into blocks of numbers and non-numbers, and
 compare numbers as numbers and strings as strings.
 This will sort like this: chr3 chr11 3chr 11chr
 =item B<--numascii>
 =item B<-N>
 compare according to string numerical value. If numerical values are
 the same: compare as strings
 =item B<--random-sort>
 =item B<-R>
 sort by random hash of keys
 =item B<--reverse>
 =item B<-r>
 reverse the result of comparisons
 =item B<--sort=WORD> (not implemented)
 sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
 B<-M>, numeric B<-n>, random B<-R>, version B<-V>
 =item B<-t>
 =item B<--field-separator=SEP>
 use SEP instead of non-blank to blank transition
 =item B<-z>
 =item B<--zero-terminated>
 end lines with 0 byte, not newline
 =back
 =head1 EXAMPLES
 =head2 Single key
 Input is sorted by Chromosome,Position:
  SampleID Position Chromosome 
  foo      10000123 chr3       
  foo      10000125 chr3       
  foo      9999998  chr11      
  foo      10000124 chr11      
  foo      10000126 chr11      
 To find all chr3:
  2grep -n -k3 inputfile chr3
 -n will split 'chr3' into 'chr' which is compared asciibetically and
 '3' which is compared numerically.
 =head2 Not implemented
 To find all lines with chr3,10000125:
  2grep -k3n,2n inputfile chr3 10000125
 =head1 REPORTING BUGS
 B<2search> is part of tangetools. Report bugs to <tools@tange.dk>.
 =head1 AUTHOR
 Copyright (C) 2016-2020 Ole Tange http://ole.tange.dk
 =head1 LICENSE
 Copyright (C) 2013 Free Software Foundation, Inc.
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 at your option any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 =head2 Documentation license I
 Permission is granted to copy, distribute and/or modify this documentation
 under the terms of the GNU Free Documentation License, Version 1.3 or
 any later version published by the Free Software Foundation; with no
 Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
 Texts.  A copy of the license is included in the file fdl.txt.
 =head2 Documentation license II
 You are free:
 =over 9
 =item B<to Share>
 to copy, distribute and transmit the work
 =item B<to Remix>
 to adapt the work
 =back
 Under the following conditions:
 =over 9
 =item B<Attribution>
 You must attribute the work in the manner specified by the author or
 licensor (but not in any way that suggests that they endorse you or
 your use of the work).
 =item B<Share Alike>
 If you alter, transform, or build upon this work, you may distribute
 the resulting work only under the same, similar or a compatible
 license.
 =back
 With the understanding that:
 =over 9
 =item B<Waiver>
 Any of the above conditions can be waived if you get permission from
 the copyright holder.
 =item B<Public Domain>
 Where the work or any of its elements is in the public domain under
 applicable law, that status is in no way affected by the license.
 =item B<Other Rights>
 In no way are any of the following rights affected by the license:
 =over 9
 =item *
 Your fair dealing or fair use rights, or other applicable
 copyright exceptions and limitations;
 =item *
 The author's moral rights;
 =item *
 Rights other persons may have either in the work itself or in
 how the work is used, such as publicity or privacy rights.
 =back
 =item B<Notice>
 For any reuse or distribution, you must make clear to others the
 license terms of this work.
 =back
 A copy of the full license is included in the file as cc-by-sa.txt.
 =head1 DEPENDENCIES
 B<2search>/B<2grep> uses Perl.
 =head1 SEE ALSO
 B<grep>(1), B<sort>(1).
 =cut
 use strict;
 use Getopt::Long;
 Getopt::Long::Configure("bundling","require_order");
 GetOptions(
    "debug|D" => \$opt::D,
    "version" => \$opt::version,
    "verbose|v" => \$opt::verbose,
    "B|byte-offset" => \$opt::byte_offset,
    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
    "d|dictionary-order" => \$opt::dictionary_order,
    "f|ignore-case" => \$opt::ignore_case,
    "g|general-numeric-sort" => \$opt::general_numeric_sort,
    "G|grep" => \$opt::grep,
    "F|file=s" => \$opt::file,
    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
    "M|month-sort" => \$opt::month_sort,
    "h|human-numeric-sort" => \$opt::human_numeric_sort,
    "n|numeric-sort" => \$opt::numeric_sort,
    "N|numascii" => \$opt::numascii,
    "r|reverse" => \$opt::reverse,
    "R|random-sort" => \$opt::random_sort,
    "sort=s" => \$opt::sort,
    "V|version-sort" => \$opt::version_sort,
    "k|key=s" => \@opt::key,
    "t|field-separator=s" => \$opt::field_separator,
    "z|zero-terminated" => \$opt::zero_terminated,
    );
 $Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
 $Global::version = 20200328;
 if($opt::version) { version(); exit 0; }
 if($opt::zero_terminated) { $/ = "\0"; }
 if(@opt::key) { 
    # Default separator if --key = whitespace
    $Global::sep = '\s+';
    if(defined $opt::field_separator) { $Global::sep = $opt::field_separator; }
 }
 if($Global::progname eq "2grep") { $opt::grep = 1; }
 $Global::debug = $opt::D;
 parse_keydef();
 debug(my_dump(\@Global::keydefs),"\n");
 my $file = shift;
 if(@ARGV) {
    $opt::argv = 1;
 } elsif(defined $opt::file) {
    # skip
 } else {
    $opt::stdin = 1;
 }
 round:
    while(1) {
 	my @search_vals;
 	for(@Global::keydefs) {
 	    my $val = get();
 	    if(not defined $val) {
 		last round;
 	    }
 	    push @search_vals, $val;
 	}
 	if($opt::grep) {
 	    bgrep($file,@search_vals);
 	} else {
 	    print bsearch($file,@search_vals);
 	}
 }  
 {
    my $fh;
    sub get {
 	if($opt::argv) {
 	    # Search for strings on the command line
 	    return shift @ARGV;
 	}
 	if($opt::file) {
 	    # Search for strings given with --file
 	    if(not $fh) {
 		if(not open(my $fh, "<", $opt::file)) {
 		    error("Cannot open $opt::file");
 		    exit(255);
 		}
 	    }
 	    my $val = <$fh>;
 	    chomp $val;
 	    return $val;
 	}
 	if($opt::stdin) {
 	    # Search for strings on stdin
 	    my $val = <>;
 	    chomp $val;
 	    return $val;
 	}
 	die;
    }
 }
 sub bgrep {
    my $file = shift;
    my @search_vals = @_;
    $opt::byte_offset = 1;
    my $startpos = bsearch($file,@search_vals);
    my $fh;
    if(not open ($fh, "<", $file)) {
 	error("Cannot open '$file'");
 	exit 1;
    }
    seek($fh,$startpos,0) or die;
    # Allow for partial matches in grep (4 mathes 40, A matches Aaa)
    for my $keydef (@Global::keydefs) {
 	$keydef->{'partial_match'} = 1;
    }
    my $line;
    while($line = <$fh>
 	  and
 	  not compare($line,@search_vals)) {
 	    print $line;
    }
    close $fh;
    for my $keydef (@Global::keydefs) {
 	$keydef->{'partial_match'} = 0;
    }
 }
 sub bsearch {
    my $file = shift;
    my @search_vals = @_;
    my $min = 0;
    my $max = -s $file;
    my $fh;
    if(not open ($fh, "<", $file)) {
 	error("Cannot open '$file'");
 	exit 1;
    }
    my($line,$middle);
    my $minnl = $min;
    my $maxnl = $max;
    while($max - $min > 1) {
 	$middle = int(($max + $min)/2);
 	seek($fh,$middle,0) or die("Cannot seek to $middle");
 	if($middle > 0) {
 	    # Read last half of a line
 	    <$fh>;
 	}
 	my $newline_pos = tell($fh);
 	debug("$min <= $middle <= $newline_pos <= $max\n");
 	debug("$minnl <= $newline_pos <= $maxnl\n");
 	if($newline_pos == $maxnl
 	   or
 	   eof($fh)
 	   or
 	   compare(($line = <$fh>),@search_vals) >= 0) {
 	    # We have see this newline position before
 	    # or we are at the end of the file
 	    # or we should search the upper half
 	    $max = $middle;
 	    $maxnl = $newline_pos;
 	} else {
 	    # We should search the upper half
 	    $min = $middle;
 	    $minnl = $newline_pos;
 	}
    }
    seek($fh,$minnl,0) or die("Cannot seek to $minnl");
    $line = <$fh>;
    if(compare($line,@search_vals) >= 0) {
 	if($opt::byte_offset) {
 	    return $minnl."\n";
 	} else {
 	    return $line;
 	}
    } else {
 	if($opt::byte_offset) {
 	    return tell($fh)."\n";
 	} else {
 	    $line=<$fh>;
 	    return $line;
 	}
    }
 }
 sub parse_keydef {
    # parse keydef F[.C][OPTS][,F[.C][OPTS]]
    my %defaultorder = (
 	"b" => $opt::ignore_leading_blanks,
 	"d" => $opt::dictionary_order,
 	"f" => $opt::ignore_case,
 	"g" => $opt::general_numeric_sort,
 	"i" => $opt::ignore_nonprinting,
 	"M" => $opt::month_sort,
 	"h" => $opt::human_numeric_sort,
 	"n" => $opt::numeric_sort,
 	"N" => $opt::numascii,
 	"r" => $opt::reverse,
 	"R" => $opt::random_sort,
 	"V" => $opt::version_sort,
 	);
    my %ordertbl = (
 	"b" => 'ignore_leading_blanks',
 	"d" => 'dictionary_order',
 	"f" => 'ignore_case',
 	"g" => 'general_numeric_sort',
 	"i" => 'ignore_nonprinting',
 	"M" => 'month_sort',
 	"h" => 'human_numeric_sort',
 	"n" => 'numeric_sort',
 	"N" => 'numascii',
 	"r" => 'reverse',
 	"R" => 'random_sort',
 	"V" => 'version_sort',
 	);
    if(@opt::key) {
    } else {
 	# Convert -n -r to -k1rn
 	# with sep = undef
 	$Global::sep = undef;
 	my $opt;
 	$opt->{'field'} = 1;
 	$opt->{'char'} = 1;
 	for (keys %defaultorder) {
 	    $opt->{$ordertbl{$_}} = $defaultorder{$_};
 	}
 	push(@Global::keydefs,$opt);
    }
    for my $keydefs (@opt::key) {
 	for my $keydef (split /,/, $keydefs) {
 	    my $opt;
 	    if($keydef =~ /^(\d+)(\.(\d+))?([bdfgiMhnNRrV]+)?$/) {
 		# parse keydef F[.C][OPTS][,F[.C][OPTS]]
 		$opt->{'field'} = $1;
 		$opt->{'char'} = $3 || 1;
 		for (keys %defaultorder) {
 		    $opt->{$ordertbl{$_}} = $defaultorder{$_};
 		}
 		for my $o (split //, $4) {
 		    $opt->{$ordertbl{$o}} = 1;
 		}
 	    } else {
 		error("Keydef $keydef does not match F[.C][OPTS]");
 		exit(255);
 	    }
 	    push(@Global::keydefs,$opt);
 	}
    }
 }
 sub compare {
    # One key to search for per search column
    my($line,@search_vals) = @_;
    chomp($line);
    debug("Compare: $line <=> @search_vals ");
    my @field;
    if($Global::sep) {
 	# Split line
 	@field = split /$Global::sep/o, $line;
    } else {
 	@field = ($line);
    }
    my @tmp_vals = @search_vals;
    for my $keydef (@Global::keydefs) {
 	# keydef = F[.C][OPTS][,F[.C][OPTS]]
 	my $f = $keydef->{'field'};
 	my $c = $keydef->{'char'};
 	my $cmp = compare_single(substr($field[$f-1],$c-1),shift @tmp_vals,$keydef);
 	# They differ on this key
 	debug("== $cmp\n");
 	if($cmp) { return $cmp; }
    }
    # No difference on any keydefs
    return 0;
 }
 sub compare_single {
    # Compare two lines based on order options
    my ($a,$b,$opt) = @_;
    debug("$a <=> $b");
    debug(my_dump($opt),"\n");
    if($opt->{'random_sort'}) {
 	return rand() <=> rand();
    }
    if($opt->{'ignore_leading_blanks'}) {
 	$a =~ s/^\s+//;
 	$b =~ s/^\s+//;
    }
    if($opt->{'ignore_case'}) {
 	$a = uc($a);
 	$b = uc($b);
    }
    if($opt->{'partial_match'}) {
 	# String 'foo' matches 'foobar'
 	$a = substr($a,0,length $b);
    }
    if($opt->{'reverse'}) {
 	($a,$b) = ($b,$a);
    }
    if($opt->{'human_numeric_sort'}) {
 	return multiply_binary_prefix($a) <=> multiply_binary_prefix($b);
    }
    if($opt->{'month_sort'}) {
 	my %m;
 	my @mon = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
 	@m{@mon}={1..12};
 	return ($m{$a} || 0) <=> ($m{$b} || 0);
    }
    if($opt->{'numeric_sort'}) {
 	return $a <=> $b;
    } elsif($opt->{'numascii'}) {
 	return $a <=> $b or $a cmp $b;
    } else {
 	return $a cmp $b;
    }
 }
 sub multiply_binary_prefix(@) {
    # Evalualte numbers with binary prefix
    # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
    # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
    # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
    # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
    # 13G = 13*1024*1024*1024 = 13958643712
    # Input:
    #   $s = string with prefixes
    # Returns:
    #   $value = int with prefixes multiplied
    my @v = @_;
    for(@v) {
 	# 1E3=1000, 1E-3=0.001
 	s/e([+-]?\d+)/*10**$1/gi;
    }
    for(@v) {
 	defined $_ or next;
 	s/ki/*1024/gi;
 	s/mi/*1024*1024/gi;
 	s/gi/*1024*1024*1024/gi;
 	s/ti/*1024*1024*1024*1024/gi;
 	s/pi/*1024*1024*1024*1024*1024/gi;
 	s/ei/*1024*1024*1024*1024*1024*1024/gi;
 	s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
 	s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
 	s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
 	s/K/*1024/g;
 	s/M/*1024*1024/g;
 	s/G/*1024*1024*1024/g;
 	s/T/*1024*1024*1024*1024/g;
 	s/P/*1024*1024*1024*1024*1024/g;
 	s/E/*1024*1024*1024*1024*1024*1024/g;
 	s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
 	s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
 	s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
 	s/k/*1000/g;
 	s/m/*1000*1000/g;
 	s/g/*1000*1000*1000/g;
 	s/t/*1000*1000*1000*1000/g;
 	s/p/*1000*1000*1000*1000*1000/g;
 	s/e/*1000*1000*1000*1000*1000*1000/g;
 	s/z/*1000*1000*1000*1000*1000*1000*1000/g;
 	s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
 	s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
 	$_ = eval $_;
    }
    return wantarray ? @v : $v[0];
 }
 sub status {
    my @w = @_;
    my $fh = $Global::status_fd || *STDERR;
    print $fh map { ($_, "\n") } @w;
    flush $fh;
 }
 sub status_no_nl {
    my @w = @_;
    my $fh = $Global::status_fd || *STDERR;
    print $fh @w;
    flush $fh;
 }
 sub warning {
    my @w = @_;
    my $prog = $Global::progname || "parallel";
    status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
 }
 sub error {
    my @w = @_;
    my $prog = $Global::progname || "parallel";
    status(map { ($prog.": Error: ". $_); } @w);
 }
 sub die_bug {
    my $bugid = shift;
    print STDERR
 	("$Global::progname: This should not happen. You have found a bug.\n",
 	 "Please submit a bug at https://gitlab.com/ole.tange/tangetools/-/issues\n",
 	 "and include:\n",
 	 "* The version number: $Global::version\n",
 	 "* The bugid: $bugid\n",
 	 "* The command line being run\n",
 	 "* The files being read (put the files on a webserver if they are big)\n",
 	 "\n",
 	 "If you get the error on smaller/fewer files, please include those instead.\n");
    exit(255);
 }
 sub version {
    # Returns: N/A
    print join("\n",
               "$Global::progname $Global::version",
               "Copyright (C) 2016-2020",
 	       "Ole Tange and Free Software Foundation, Inc.",
               "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
               "This is free software: you are free to change and redistribute it.",
               "$Global::progname comes with no warranty.",
               "",
               "Web site: https://gitlab.com/ole.tange/tangetools/\n",
        );
 }
 sub my_dump(@) {
    # Returns:
    #   ascii expression of object if Data::Dump(er) is installed
    #   error code otherwise
    my @dump_this = (@_);
    eval "use Data::Dump qw(dump);";
    if ($@) {
        # Data::Dump not installed
        eval "use Data::Dumper;";
        if ($@) {
            my $err =  "Neither Data::Dump nor Data::Dumper is installed\n".
                "Not dumping output\n";
            ::status($err);
            return $err;
        } else {
            return Dumper(@dump_this);
        }
    } else {
 	# Create a dummy Data::Dump:dump as Hans Schou sometimes has
 	# it undefined
 	eval "sub Data::Dump:dump {}";
        eval "use Data::Dump qw(dump);";
        return (Data::Dump::dump(@dump_this));
    }
 }
 sub debug(@) {
    # Returns: N/A
    $Global::debug or return;
    print @_;
 }
--- a/2search/2search
+++ b/2search/2search
@ -0,0 +1,777 @@
 #!/usr/bin/perl
 =head1 NAME
 2search - binary search through sorted text files
 2grep - binary search+grep through sorted text files
 =head1 SYNOPSIS
 B<2search> [-nrfB] file string [string...]
 B<2search> --grep [-nrf] file string [string...]
 B<2grep> [-nrf] file string [string...]
 ... | B<2search> [-nrfB] file
 ... | B<2search> --grep [-nrf] file
 ... | B<2grep> [-nrf] file
 =head1 DESCRIPTION
 B<2search> searches a sorted file for a string. It outputs the
 following line or the byte position of this line, which is where the
 string would have been if it had been in the sorted file.
 B<2grep> output all lines starting with a given string. The file must
 be sorted.
 =over 9
 =item B<--ignore-leading-blanks>
 =item B<-b>
 ignore leading blanks
 =item B<--byte-offset>
 =item B<-B>
 print byte position where string would have been
 =item B<--dictionary-order> (not implemented)
 =item B<-d>
 consider only blanks and alphanumeric characters
 =item B<--debug> (not implemented)
 =item B<-D>
 annotate the part of the line used to sort, and warn about
 questionable usage to stderr
 =item B<--ignore-case>
 =item B<-f>
 fold lower case to upper case characters
 =item B<--file> I<file>
 =item B<-F> I<file>
 search for all lines in I<file>
 =item B<--general-numeric-sort> (not implemented)
 =item B<-g>
 compare according to general numerical value
 =item B<--ignore-nonprinting> (not implemented)
 =item B<-i>
 consider only printable characters
 =item B<--month-sort>
 =item B<-M>
 compare (unknown) < 'JAN' < ... < 'DEC'
 =item B<--human-numeric-sort>
 =item B<-h>
 compare human readable numbers (e.g., 2K 1G)
 =item B<--key=KEYDEF> (not implemented)
 =item B<-k>
 sort via a key; KEYDEF gives location and type
 =item B<--numeric-sort>
 =item B<-n>
 compare according to string numerical value. If numerical values are
 the same: split the string into blocks of numbers and non-numbers, and
 compare numbers as numbers and strings as strings.
 This will sort like this: chr3 chr11 3chr 11chr
 =item B<--numascii>
 =item B<-N>
 compare according to string numerical value. If numerical values are
 the same: compare as strings
 =item B<--random-sort>
 =item B<-R>
 sort by random hash of keys
 =item B<--reverse>
 =item B<-r>
 reverse the result of comparisons
 =item B<--sort=WORD> (not implemented)
 sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
 B<-M>, numeric B<-n>, random B<-R>, version B<-V>
 =item B<-t>
 =item B<--field-separator=SEP>
 use SEP instead of non-blank to blank transition
 =item B<-z>
 =item B<--zero-terminated>
 end lines with 0 byte, not newline
 =back
 =head1 EXAMPLES
 =head2 Single key
 Input is sorted by Chromosome,Position:
  SampleID Position Chromosome 
  foo      10000123 chr3       
  foo      10000125 chr3       
  foo      9999998  chr11      
  foo      10000124 chr11      
  foo      10000126 chr11      
 To find all chr3:
  2grep -n -k3 inputfile chr3
 -n will split 'chr3' into 'chr' which is compared asciibetically and
 '3' which is compared numerically.
 =head2 Not implemented
 To find all lines with chr3,10000125:
  2grep -k3n,2n inputfile chr3 10000125
 =head1 REPORTING BUGS
 B<2search> is part of tangetools. Report bugs to <tools@tange.dk>.
 =head1 AUTHOR
 Copyright (C) 2016-2020 Ole Tange http://ole.tange.dk
 =head1 LICENSE
 Copyright (C) 2013 Free Software Foundation, Inc.
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 at your option any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 =head2 Documentation license I
 Permission is granted to copy, distribute and/or modify this documentation
 under the terms of the GNU Free Documentation License, Version 1.3 or
 any later version published by the Free Software Foundation; with no
 Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
 Texts.  A copy of the license is included in the file fdl.txt.
 =head2 Documentation license II
 You are free:
 =over 9
 =item B<to Share>
 to copy, distribute and transmit the work
 =item B<to Remix>
 to adapt the work
 =back
 Under the following conditions:
 =over 9
 =item B<Attribution>
 You must attribute the work in the manner specified by the author or
 licensor (but not in any way that suggests that they endorse you or
 your use of the work).
 =item B<Share Alike>
 If you alter, transform, or build upon this work, you may distribute
 the resulting work only under the same, similar or a compatible
 license.
 =back
 With the understanding that:
 =over 9
 =item B<Waiver>
 Any of the above conditions can be waived if you get permission from
 the copyright holder.
 =item B<Public Domain>
 Where the work or any of its elements is in the public domain under
 applicable law, that status is in no way affected by the license.
 =item B<Other Rights>
 In no way are any of the following rights affected by the license:
 =over 9
 =item *
 Your fair dealing or fair use rights, or other applicable
 copyright exceptions and limitations;
 =item *
 The author's moral rights;
 =item *
 Rights other persons may have either in the work itself or in
 how the work is used, such as publicity or privacy rights.
 =back
 =item B<Notice>
 For any reuse or distribution, you must make clear to others the
 license terms of this work.
 =back
 A copy of the full license is included in the file as cc-by-sa.txt.
 =head1 DEPENDENCIES
 B<2search>/B<2grep> uses Perl.
 =head1 SEE ALSO
 B<grep>(1), B<sort>(1).
 =cut
 use strict;
 use Getopt::Long;
 Getopt::Long::Configure("bundling","require_order");
 GetOptions(
    "debug|D" => \$opt::D,
    "version" => \$opt::version,
    "verbose|v" => \$opt::verbose,
    "B|byte-offset" => \$opt::byte_offset,
    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
    "d|dictionary-order" => \$opt::dictionary_order,
    "f|ignore-case" => \$opt::ignore_case,
    "g|general-numeric-sort" => \$opt::general_numeric_sort,
    "G|grep" => \$opt::grep,
    "F|file=s" => \$opt::file,
    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
    "M|month-sort" => \$opt::month_sort,
    "h|human-numeric-sort" => \$opt::human_numeric_sort,
    "n|numeric-sort" => \$opt::numeric_sort,
    "N|numascii" => \$opt::numascii,
    "r|reverse" => \$opt::reverse,
    "R|random-sort" => \$opt::random_sort,
    "sort=s" => \$opt::sort,
    "V|version-sort" => \$opt::version_sort,
    "k|key=s" => \@opt::key,
    "t|field-separator=s" => \$opt::field_separator,
    "z|zero-terminated" => \$opt::zero_terminated,
    );
 $Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
 $Global::version = 20200328;
 if($opt::version) { version(); exit 0; }
 if($opt::zero_terminated) { $/ = "\0"; }
 if(@opt::key) { 
    # Default separator if --key = whitespace
    $Global::sep = '\s+';
    if(defined $opt::field_separator) { $Global::sep = $opt::field_separator; }
 }
 if($Global::progname eq "2grep") { $opt::grep = 1; }
 $Global::debug = $opt::D;
 parse_keydef();
 debug(my_dump(\@Global::keydefs),"\n");
 my $file = shift;
 if(@ARGV) {
    $opt::argv = 1;
 } elsif(defined $opt::file) {
    # skip
 } else {
    $opt::stdin = 1;
 }
 round:
    while(1) {
 	my @search_vals;
 	for(@Global::keydefs) {
 	    my $val = get();
 	    if(not defined $val) {
 		last round;
 	    }
 	    push @search_vals, $val;
 	}
 	if($opt::grep) {
 	    bgrep($file,@search_vals);
 	} else {
 	    print bsearch($file,@search_vals);
 	}
 }  
 {
    my $fh;
    sub get {
 	if($opt::argv) {
 	    # Search for strings on the command line
 	    return shift @ARGV;
 	}
 	if($opt::file) {
 	    # Search for strings given with --file
 	    if(not $fh) {
 		if(not open(my $fh, "<", $opt::file)) {
 		    error("Cannot open $opt::file");
 		    exit(255);
 		}
 	    }
 	    my $val = <$fh>;
 	    chomp $val;
 	    return $val;
 	}
 	if($opt::stdin) {
 	    # Search for strings on stdin
 	    my $val = <>;
 	    chomp $val;
 	    return $val;
 	}
 	die;
    }
 }
 sub bgrep {
    my $file = shift;
    my @search_vals = @_;
    $opt::byte_offset = 1;
    my $startpos = bsearch($file,@search_vals);
    my $fh;
    if(not open ($fh, "<", $file)) {
 	error("Cannot open '$file'");
 	exit 1;
    }
    seek($fh,$startpos,0) or die;
    # Allow for partial matches in grep (4 mathes 40, A matches Aaa)
    for my $keydef (@Global::keydefs) {
 	$keydef->{'partial_match'} = 1;
    }
    my $line;
    while($line = <$fh>
 	  and
 	  not compare($line,@search_vals)) {
 	    print $line;
    }
    close $fh;
    for my $keydef (@Global::keydefs) {
 	$keydef->{'partial_match'} = 0;
    }
 }
 sub bsearch {
    my $file = shift;
    my @search_vals = @_;
    my $min = 0;
    my $max = -s $file;
    my $fh;
    if(not open ($fh, "<", $file)) {
 	error("Cannot open '$file'");
 	exit 1;
    }
    my($line,$middle);
    my $minnl = $min;
    my $maxnl = $max;
    while($max - $min > 1) {
 	$middle = int(($max + $min)/2);
 	seek($fh,$middle,0) or die("Cannot seek to $middle");
 	if($middle > 0) {
 	    # Read last half of a line
 	    <$fh>;
 	}
 	my $newline_pos = tell($fh);
 	debug("$min <= $middle <= $newline_pos <= $max\n");
 	debug("$minnl <= $newline_pos <= $maxnl\n");
 	if($newline_pos == $maxnl
 	   or
 	   eof($fh)
 	   or
 	   compare(($line = <$fh>),@search_vals) >= 0) {
 	    # We have see this newline position before
 	    # or we are at the end of the file
 	    # or we should search the upper half
 	    $max = $middle;
 	    $maxnl = $newline_pos;
 	} else {
 	    # We should search the upper half
 	    $min = $middle;
 	    $minnl = $newline_pos;
 	}
    }
    seek($fh,$minnl,0) or die("Cannot seek to $minnl");
    $line = <$fh>;
    if(compare($line,@search_vals) >= 0) {
 	if($opt::byte_offset) {
 	    return $minnl."\n";
 	} else {
 	    return $line;
 	}
    } else {
 	if($opt::byte_offset) {
 	    return tell($fh)."\n";
 	} else {
 	    $line=<$fh>;
 	    return $line;
 	}
    }
 }
 sub parse_keydef {
    # parse keydef F[.C][OPTS][,F[.C][OPTS]]
    my %defaultorder = (
 	"b" => $opt::ignore_leading_blanks,
 	"d" => $opt::dictionary_order,
 	"f" => $opt::ignore_case,
 	"g" => $opt::general_numeric_sort,
 	"i" => $opt::ignore_nonprinting,
 	"M" => $opt::month_sort,
 	"h" => $opt::human_numeric_sort,
 	"n" => $opt::numeric_sort,
 	"N" => $opt::numascii,
 	"r" => $opt::reverse,
 	"R" => $opt::random_sort,
 	"V" => $opt::version_sort,
 	);
    my %ordertbl = (
 	"b" => 'ignore_leading_blanks',
 	"d" => 'dictionary_order',
 	"f" => 'ignore_case',
 	"g" => 'general_numeric_sort',
 	"i" => 'ignore_nonprinting',
 	"M" => 'month_sort',
 	"h" => 'human_numeric_sort',
 	"n" => 'numeric_sort',
 	"N" => 'numascii',
 	"r" => 'reverse',
 	"R" => 'random_sort',
 	"V" => 'version_sort',
 	);
    if(@opt::key) {
    } else {
 	# Convert -n -r to -k1rn
 	# with sep = undef
 	$Global::sep = undef;
 	my $opt;
 	$opt->{'field'} = 1;
 	$opt->{'char'} = 1;
 	for (keys %defaultorder) {
 	    $opt->{$ordertbl{$_}} = $defaultorder{$_};
 	}
 	push(@Global::keydefs,$opt);
    }
    for my $keydefs (@opt::key) {
 	for my $keydef (split /,/, $keydefs) {
 	    my $opt;
 	    if($keydef =~ /^(\d+)(\.(\d+))?([bdfgiMhnNRrV]+)?$/) {
 		# parse keydef F[.C][OPTS][,F[.C][OPTS]]
 		$opt->{'field'} = $1;
 		$opt->{'char'} = $3 || 1;
 		for (keys %defaultorder) {
 		    $opt->{$ordertbl{$_}} = $defaultorder{$_};
 		}
 		for my $o (split //, $4) {
 		    $opt->{$ordertbl{$o}} = 1;
 		}
 	    } else {
 		error("Keydef $keydef does not match F[.C][OPTS]");
 		exit(255);
 	    }
 	    push(@Global::keydefs,$opt);
 	}
    }
 }
 sub compare {
    # One key to search for per search column
    my($line,@search_vals) = @_;
    chomp($line);
    debug("Compare: $line <=> @search_vals ");
    my @field;
    if($Global::sep) {
 	# Split line
 	@field = split /$Global::sep/o, $line;
    } else {
 	@field = ($line);
    }
    my @tmp_vals = @search_vals;
    for my $keydef (@Global::keydefs) {
 	# keydef = F[.C][OPTS][,F[.C][OPTS]]
 	my $f = $keydef->{'field'};
 	my $c = $keydef->{'char'};
 	my $cmp = compare_single(substr($field[$f-1],$c-1),shift @tmp_vals,$keydef);
 	# They differ on this key
 	debug("== $cmp\n");
 	if($cmp) { return $cmp; }
    }
    # No difference on any keydefs
    return 0;
 }
 sub compare_single {
    # Compare two lines based on order options
    my ($a,$b,$opt) = @_;
    debug("$a <=> $b");
    debug(my_dump($opt),"\n");
    if($opt->{'random_sort'}) {
 	return rand() <=> rand();
    }
    if($opt->{'ignore_leading_blanks'}) {
 	$a =~ s/^\s+//;
 	$b =~ s/^\s+//;
    }
    if($opt->{'ignore_case'}) {
 	$a = uc($a);
 	$b = uc($b);
    }
    if($opt->{'partial_match'}) {
 	# String 'foo' matches 'foobar'
 	$a = substr($a,0,length $b);
    }
    if($opt->{'reverse'}) {
 	($a,$b) = ($b,$a);
    }
    if($opt->{'human_numeric_sort'}) {
 	return multiply_binary_prefix($a) <=> multiply_binary_prefix($b);
    }
    if($opt->{'month_sort'}) {
 	my %m;
 	my @mon = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
 	@m{@mon}={1..12};
 	return ($m{$a} || 0) <=> ($m{$b} || 0);
    }
    if($opt->{'numeric_sort'}) {
 	return $a <=> $b;
    } elsif($opt->{'numascii'}) {
 	return $a <=> $b or $a cmp $b;
    } else {
 	return $a cmp $b;
    }
 }
 sub multiply_binary_prefix(@) {
    # Evalualte numbers with binary prefix
    # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
    # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
    # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
    # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
    # 13G = 13*1024*1024*1024 = 13958643712
    # Input:
    #   $s = string with prefixes
    # Returns:
    #   $value = int with prefixes multiplied
    my @v = @_;
    for(@v) {
 	# 1E3=1000, 1E-3=0.001
 	s/e([+-]?\d+)/*10**$1/gi;
    }
    for(@v) {
 	defined $_ or next;
 	s/ki/*1024/gi;
 	s/mi/*1024*1024/gi;
 	s/gi/*1024*1024*1024/gi;
 	s/ti/*1024*1024*1024*1024/gi;
 	s/pi/*1024*1024*1024*1024*1024/gi;
 	s/ei/*1024*1024*1024*1024*1024*1024/gi;
 	s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
 	s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
 	s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
 	s/K/*1024/g;
 	s/M/*1024*1024/g;
 	s/G/*1024*1024*1024/g;
 	s/T/*1024*1024*1024*1024/g;
 	s/P/*1024*1024*1024*1024*1024/g;
 	s/E/*1024*1024*1024*1024*1024*1024/g;
 	s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
 	s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
 	s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
 	s/k/*1000/g;
 	s/m/*1000*1000/g;
 	s/g/*1000*1000*1000/g;
 	s/t/*1000*1000*1000*1000/g;
 	s/p/*1000*1000*1000*1000*1000/g;
 	s/e/*1000*1000*1000*1000*1000*1000/g;
 	s/z/*1000*1000*1000*1000*1000*1000*1000/g;
 	s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
 	s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
 	$_ = eval $_;
    }
    return wantarray ? @v : $v[0];
 }
 sub status {
    my @w = @_;
    my $fh = $Global::status_fd || *STDERR;
    print $fh map { ($_, "\n") } @w;
    flush $fh;
 }
 sub status_no_nl {
    my @w = @_;
    my $fh = $Global::status_fd || *STDERR;
    print $fh @w;
    flush $fh;
 }
 sub warning {
    my @w = @_;
    my $prog = $Global::progname || "parallel";
    status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
 }
 sub error {
    my @w = @_;
    my $prog = $Global::progname || "parallel";
    status(map { ($prog.": Error: ". $_); } @w);
 }
 sub die_bug {
    my $bugid = shift;
    print STDERR
 	("$Global::progname: This should not happen. You have found a bug.\n",
 	 "Please submit a bug at https://gitlab.com/ole.tange/tangetools/-/issues\n",
 	 "and include:\n",
 	 "* The version number: $Global::version\n",
 	 "* The bugid: $bugid\n",
 	 "* The command line being run\n",
 	 "* The files being read (put the files on a webserver if they are big)\n",
 	 "\n",
 	 "If you get the error on smaller/fewer files, please include those instead.\n");
    exit(255);
 }
 sub version {
    # Returns: N/A
    print join("\n",
               "$Global::progname $Global::version",
               "Copyright (C) 2016-2020",
 	       "Ole Tange and Free Software Foundation, Inc.",
               "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
               "This is free software: you are free to change and redistribute it.",
               "$Global::progname comes with no warranty.",
               "",
               "Web site: https://gitlab.com/ole.tange/tangetools/\n",
        );
 }
 sub my_dump(@) {
    # Returns:
    #   ascii expression of object if Data::Dump(er) is installed
    #   error code otherwise
    my @dump_this = (@_);
    eval "use Data::Dump qw(dump);";
    if ($@) {
        # Data::Dump not installed
        eval "use Data::Dumper;";
        if ($@) {
            my $err =  "Neither Data::Dump nor Data::Dumper is installed\n".
                "Not dumping output\n";
            ::status($err);
            return $err;
        } else {
            return Dumper(@dump_this);
        }
    } else {
 	# Create a dummy Data::Dump:dump as Hans Schou sometimes has
 	# it undefined
 	eval "sub Data::Dump:dump {}";
        eval "use Data::Dump qw(dump);";
        return (Data::Dump::dump(@dump_this));
    }
 }
 sub debug(@) {
    # Returns: N/A
    $Global::debug or return;
    print @_;
 }
--- a/2search/regressiontest
+++ b/2search/regressiontest
@ -0,0 +1,194 @@
 #!/bin/bash
 test_tmp=`tempfile`
 export test_tmp
 opt_tester() {
    opt="$@"
    tmp=$(tempfile)
    test_2search() {
 	xargs echo Search in < $tmp
 	2search $opt $tmp 0 2 2.1 100000
 	2search $opt -B $tmp 0 2 2.1 100000
    }
    (true) |
 	sort $opt > $tmp
    echo Search in null file
    test_2search
    (echo) |
 	sort $opt > $tmp
    echo Search in newline
    test_2search
    (echo 1.000000000) |
 	sort $opt > $tmp
    test_2search
    (echo 1.000000000;
     echo 2) |
 	sort $opt > $tmp
    test_2search
    (echo 1;
     echo 2.000000000) |
 	sort $opt > $tmp
    test_2search
    (echo 1.000000000;
     echo 2;
     echo 3) |
 	sort $opt > $tmp
    test_2search
    (echo 1;
     echo 2.000000000;
     echo 3) |
 	sort $opt > $tmp
    test_2search
    (echo 1;
     echo 2;
     echo 3.000000000) |
 	sort $opt > $tmp
    test_2search
    rm $tmp
 }
 export -f opt_tester
 test_n() {
    tmp=${test_tmp}_n
    true > $tmp
    echo Search in null file
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo > $tmp
    xargs echo Search in newline
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo 1.000000000 > $tmp
    xargs echo Search in < $tmp
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo 1.000000000 > $tmp
    echo 2 >> $tmp
    xargs echo Search in < $tmp
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo 1 > $tmp
    echo 2.000000000 >> $tmp
    xargs echo Search in < $tmp
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo 1.000000000 > $tmp
    echo 2 >> $tmp
    echo 3 >> $tmp
    xargs echo Search in < $tmp
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo 1 > $tmp
    echo 2.000000000 >> $tmp
    echo 3 >> $tmp
    xargs echo Search in < $tmp
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    echo 1 > $tmp
    echo 2 >> $tmp
    echo 3.000000000 >> $tmp
    xargs echo Search in < $tmp
    2search -n $tmp 0 2 2.1 100000
    2search -nB $tmp 0 2 2.1 100000
    rm $tmp
 }
 test_n_opt() {
    opt_tester -n
 }
 test_rn_opt() {
    opt_tester -rn
 }
 test_r_opt() {
    opt_tester -rn
 }
 test_k32_2n_1n() {
    tmp=$(tempfile)
    cat >$tmp <<EOF
 1	chr1	Sample 1
 11	chr1	Sample 1
 111	chr1	Sample 1
 1111	chr1	Sample 1
 11111	chr1	Sample 1
 111111	chr1	Sample 1
 1	chr2	Sample 1
 22	chr2	Sample 1
 111	chr2	Sample 1
 2222	chr2	Sample 1
 11111	chr2	Sample 1
 111111	chr2	Sample 1
 1	chr10	Sample 1
 11	chr10	Sample 1
 111	chr10	Sample 1
 1111	chr10	Sample 1
 11111	chr10	Sample 1
 111111	chr10	Sample 1
 1	chr1	Sample 2
 11	chr1	Sample 2
 111	chr1	Sample 2
 1111	chr1	Sample 2
 11111	chr1	Sample 2
 111111	chr1	Sample 2
 1	chr2	Sample 2
 22	chr2	Sample 2
 111	chr2	Sample 2
 2222	chr2	Sample 2
 11111	chr2	Sample 2
 111111	chr2	Sample 2
 1	chr10	Sample 2
 11	chr10	Sample 2
 111	chr10	Sample 2
 1111	chr10	Sample 2
 11111	chr10	Sample 2
 111111	chr10	Sample 2
 1	chr1	Sample 10
 11	chr1	Sample 10
 111	chr1	Sample 10
 1111	chr1	Sample 10
 11111	chr1	Sample 10
 111111	chr1	Sample 10
 1	chr2	Sample 10
 22	chr2	Sample 10
 111	chr2	Sample 10
 2222	chr2	Sample 10
 11111	chr2	Sample 10
 111111	chr2	Sample 10
 1	chr10	Sample 10
 11	chr10	Sample 10
 111	chr10	Sample 10
 1111	chr10	Sample 10
 11111	chr10	Sample 10
 111111	chr10	Sample 10
 EOF
    2grep -k3N,2N,1n $tmp 'Sample 10' chr10 111
    echo $tmp
 }
 test_partial_line() {
    tmp=$(tempfile)
    seq 100 | LC_ALL=C sort > $tmp
    echo '### 2search --grep'
    2search --grep $tmp 3
    echo '### 2grep'
    2grep $tmp 3
    echo '### ... | 2grep'
    echo 3 | 2grep $tmp
    rm $tmp
 }
 export -f $(compgen -A function | grep test_)
 compgen -A function | grep test_ | sort | parallel -j6 --tag -k '{} 2>&1' > regressiontest.new
 diff regressiontest.new regressiontest.out
--- a/2search/regressiontest.out
+++ b/2search/regressiontest.out
@ -0,0 +1,280 @@
 test_k32_2n_1n	111	chr10	Sample 10
 test_k32_2n_1n	1111	chr10	Sample 10
 test_k32_2n_1n	11111	chr10	Sample 10
 test_k32_2n_1n	111111	chr10	Sample 10
 test_n	Search in null file
 test_n	0
 test_n	0
 test_n	0
 test_n	0
 test_n	Search in newline
 test_n	
 test_n	0
 test_n	1
 test_n	1
 test_n	1
 test_n	Search in 1.000000000
 test_n	1.000000000
 test_n	0
 test_n	12
 test_n	12
 test_n	12
 test_n	Search in 1.000000000 2
 test_n	1.000000000
 test_n	2
 test_n	0
 test_n	12
 test_n	14
 test_n	14
 test_n	Search in 1 2.000000000
 test_n	1
 test_n	2.000000000
 test_n	0
 test_n	2
 test_n	14
 test_n	14
 test_n	Search in 1.000000000 2 3
 test_n	1.000000000
 test_n	2
 test_n	3
 test_n	0
 test_n	12
 test_n	14
 test_n	16
 test_n	Search in 1 2.000000000 3
 test_n	1
 test_n	2.000000000
 test_n	3
 test_n	0
 test_n	2
 test_n	14
 test_n	16
 test_n	Search in 1 2 3.000000000
 test_n	1
 test_n	2
 test_n	3.000000000
 test_n	0
 test_n	2
 test_n	4
 test_n	16
 test_n_opt	Search in null file
 test_n_opt	Search in
 test_n_opt	0
 test_n_opt	0
 test_n_opt	0
 test_n_opt	0
 test_n_opt	Search in newline
 test_n_opt	Search in
 test_n_opt	
 test_n_opt	0
 test_n_opt	1
 test_n_opt	1
 test_n_opt	1
 test_n_opt	Search in 1.000000000
 test_n_opt	1.000000000
 test_n_opt	0
 test_n_opt	12
 test_n_opt	12
 test_n_opt	12
 test_n_opt	Search in 1.000000000 2
 test_n_opt	1.000000000
 test_n_opt	2
 test_n_opt	0
 test_n_opt	12
 test_n_opt	14
 test_n_opt	14
 test_n_opt	Search in 1 2.000000000
 test_n_opt	1
 test_n_opt	2.000000000
 test_n_opt	0
 test_n_opt	2
 test_n_opt	14
 test_n_opt	14
 test_n_opt	Search in 1.000000000 2 3
 test_n_opt	1.000000000
 test_n_opt	2
 test_n_opt	3
 test_n_opt	0
 test_n_opt	12
 test_n_opt	14
 test_n_opt	16
 test_n_opt	Search in 1 2.000000000 3
 test_n_opt	1
 test_n_opt	2.000000000
 test_n_opt	3
 test_n_opt	0
 test_n_opt	2
 test_n_opt	14
 test_n_opt	16
 test_n_opt	Search in 1 2 3.000000000
 test_n_opt	1
 test_n_opt	2
 test_n_opt	3.000000000
 test_n_opt	0
 test_n_opt	2
 test_n_opt	4
 test_n_opt	16
 test_partial_line	### 2search --grep
 test_partial_line	3
 test_partial_line	30
 test_partial_line	31
 test_partial_line	32
 test_partial_line	33
 test_partial_line	34
 test_partial_line	35
 test_partial_line	36
 test_partial_line	37
 test_partial_line	38
 test_partial_line	39
 test_partial_line	### 2grep
 test_partial_line	3
 test_partial_line	30
 test_partial_line	31
 test_partial_line	32
 test_partial_line	33
 test_partial_line	34
 test_partial_line	35
 test_partial_line	36
 test_partial_line	37
 test_partial_line	38
 test_partial_line	39
 test_partial_line	### ... | 2grep
 test_partial_line	3
 test_partial_line	30
 test_partial_line	31
 test_partial_line	32
 test_partial_line	33
 test_partial_line	34
 test_partial_line	35
 test_partial_line	36
 test_partial_line	37
 test_partial_line	38
 test_partial_line	39
 test_rn_opt	Search in null file
 test_rn_opt	Search in
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	Search in newline
 test_rn_opt	Search in
 test_rn_opt	
 test_rn_opt	
 test_rn_opt	
 test_rn_opt	
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	Search in 1.000000000
 test_rn_opt	1.000000000
 test_rn_opt	1.000000000
 test_rn_opt	1.000000000
 test_rn_opt	12
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	Search in 2 1.000000000
 test_rn_opt	2
 test_rn_opt	2
 test_rn_opt	2
 test_rn_opt	14
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	Search in 2.000000000 1
 test_rn_opt	2.000000000
 test_rn_opt	2.000000000
 test_rn_opt	2.000000000
 test_rn_opt	14
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	0
 test_rn_opt	Search in 3 2 1.000000000
 test_rn_opt	2
 test_rn_opt	2
 test_rn_opt	3
 test_rn_opt	16
 test_rn_opt	2
 test_rn_opt	2
 test_rn_opt	0
 test_rn_opt	Search in 3 2.000000000 1
 test_rn_opt	2.000000000
 test_rn_opt	2.000000000
 test_rn_opt	3
 test_rn_opt	16
 test_rn_opt	2
 test_rn_opt	2
 test_rn_opt	0
 test_rn_opt	Search in 3.000000000 2 1
 test_rn_opt	2
 test_rn_opt	2
 test_rn_opt	3.000000000
 test_rn_opt	16
 test_rn_opt	12
 test_rn_opt	12
 test_rn_opt	0
 test_r_opt	Search in null file
 test_r_opt	Search in
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	Search in newline
 test_r_opt	Search in
 test_r_opt	
 test_r_opt	
 test_r_opt	
 test_r_opt	
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	Search in 1.000000000
 test_r_opt	1.000000000
 test_r_opt	1.000000000
 test_r_opt	1.000000000
 test_r_opt	12
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	Search in 2 1.000000000
 test_r_opt	2
 test_r_opt	2
 test_r_opt	2
 test_r_opt	14
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	Search in 2.000000000 1
 test_r_opt	2.000000000
 test_r_opt	2.000000000
 test_r_opt	2.000000000
 test_r_opt	14
 test_r_opt	0
 test_r_opt	0
 test_r_opt	0
 test_r_opt	Search in 3 2 1.000000000
 test_r_opt	2
 test_r_opt	2
 test_r_opt	3
 test_r_opt	16
 test_r_opt	2
 test_r_opt	2
 test_r_opt	0
 test_r_opt	Search in 3 2.000000000 1
 test_r_opt	2.000000000
 test_r_opt	2.000000000
 test_r_opt	3
 test_r_opt	16
 test_r_opt	2
 test_r_opt	2
 test_r_opt	0
 test_r_opt	Search in 3.000000000 2 1
 test_r_opt	2
 test_r_opt	2
 test_r_opt	3.000000000
 test_r_opt	16
 test_r_opt	12
 test_r_opt	12
 test_r_opt	0
--- a/25
+++ b/25
@ -1,21 +1,22 @@
-CMD = blink bsearch burncpu duplicate-packets em encdir field forever	\
+CMD = blink 2grep 2search burncpu duplicate-packets em encdir field forever	\
 	fxkill G gitnext gitundo goodpasswd histogram mtrr mirrorpdf	\
 	neno off pdfman pidcmd plotpipe puniq ramusage rand rclean	\
 	rina rn rrm seekmaniac shython sound-reload splitvideo stdout	\
 	swapout T timestamp tracefile transpose upsidedown vid		\
 	w4it-for-port-open whitehash wifi-reload wssh ytv yyyymmdd
-all: blink/blink.1 bsearch/bsearch.1 burncpu/burncpu.1			\
+all: blink/blink.1 2search/2grep.1 2search/2search.1			\
-	encdir/encdir.1 G/G.1 gitnext/gitnext.1 gitundo/gitundo.1	\
+	burncpu/burncpu.1 encdir/encdir.1 G/G.1 gitnext/gitnext.1	\
-	goodpasswd/goodpasswd.1 histogram/histogram.1			\
+	gitundo/gitundo.1 goodpasswd/goodpasswd.1			\
-	mirrorpdf/mirrorpdf.1 neno/neno.1 off/off.1 pdfman/pdfman.1	\
+	histogram/histogram.1 mirrorpdf/mirrorpdf.1 neno/neno.1		\
-	pidcmd/pidcmd.1 plotpipe/plotpipe.1 puniq/puniq.1 rand/rand.1	\
+	off/off.1 pdfman/pdfman.1 pidcmd/pidcmd.1 plotpipe/plotpipe.1	\
-	rina/rina.1 rn/rn.1 rrm/rrm.1 seekmaniac/seekmaniac.1		\
+	puniq/puniq.1 rand/rand.1 rina/rina.1 rn/rn.1 rrm/rrm.1		\
-	shython/shython.1 sound-reload/sound-reload.1			\
+	seekmaniac/seekmaniac.1 shython/shython.1			\
-	splitvideo/splitvideo.1 stdout/stdout.1 timestamp/timestamp.1	\
+	sound-reload/sound-reload.1 splitvideo/splitvideo.1		\
-	tracefile/tracefile.1 transpose/transpose.1 T/T.1		\
+	stdout/stdout.1 timestamp/timestamp.1 tracefile/tracefile.1	\
-	upsidedown/upsidedown.1 vid/vid.1 wifi-reload/wifi-reload.1	\
+	transpose/transpose.1 T/T.1 upsidedown/upsidedown.1 vid/vid.1	\
-	wssh/wssh.1 ytv/ytv.1 yyyymmdd/yyyymmdd.1
+	wifi-reload/wifi-reload.1 wssh/wssh.1 ytv/ytv.1			\
 	yyyymmdd/yyyymmdd.1
 %.1: %
 	pod2man $< > $@
--- a/bsearch/bsearch
+++ b/bsearch/bsearch
@ -1,404 +0,0 @@
 #!/usr/bin/perl
 =head1 NAME
 bsearch - binary search through sorted text files
 =head1 SYNOPSIS
 B<bsearch> [-nrfB] file string [string...]
 =head1 DESCRIPTION
 B<bsearch> searches a sorted file for a string. It outputs the
 following line or the byte position of this line, which is where the
 string would have been if it had been in the sorted file.
 =over 9
 =item B<--ignore-leading-blanks> (not implemented)
 =item B<-b>
 ignore leading blanks
 =item B<--byte-offset>
 =item B<-B>
 print byte position where string would have been
 =item B<--dictionary-order> (not implemented)
 =item B<-d>
 consider only blanks and alphanumeric characters
 =item B<--debug> (not implemented)
 =item B<-D>
 annotate the part of the line used to sort, and warn about
 questionable usage to stderr
 =item B<--ignore-case>
 =item B<-f>
 fold lower case to upper case characters
 =item B<--general-numeric-sort> (not implemented)
 =item B<-g>
 compare according to general numerical value
 =item B<--ignore-nonprinting> (not implemented)
 =item B<-i>
 consider only printable characters
 =item B<--month-sort> (not implemented)
 =item B<-M>
 compare (unknown) < 'JAN' < ... < 'DEC'
 =item B<--human-numeric-sort> (not implemented)
 =item B<-h>
 compare human readable numbers (e.g., 2K 1G)
 =item B<--key=KEYDEF> (not implemented)
 =item B<-k>
 sort via a key; KEYDEF gives location and type
 =item B<--numeric-sort>
 =item B<-n>
 compare according to string numerical value
 =item B<--random-sort>
 =item B<-R>
 sort by random hash of keys
 =item B<--reverse>
 =item B<-r>
 reverse the result of comparisons
 =item B<--sort=WORD> (not implemented)
 sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
 B<-M>, numeric B<-n>, random B<-R>, version B<-V>
 =item B<-t> (not implemented)
 =item B<--field-separator=SEP>
 use SEP instead of non-blank to blank transition
 =item B<-z>
 =item B<--zero-terminated>
 end lines with 0 byte, not newline
 =back
 =head1 EXAMPLES
 =head2 Missing
 Missing
 =head1 REPORTING BUGS
 B<bsearch> is part of tangetools. Report bugs to <tools@tange.dk>.
 =head1 AUTHOR
 Copyright (C) 2016 Ole Tange http://ole.tange.dk
 =head1 LICENSE
 Copyright (C) 2013 Free Software Foundation, Inc.
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 at your option any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 =head2 Documentation license I
 Permission is granted to copy, distribute and/or modify this documentation
 under the terms of the GNU Free Documentation License, Version 1.3 or
 any later version published by the Free Software Foundation; with no
 Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
 Texts.  A copy of the license is included in the file fdl.txt.
 =head2 Documentation license II
 You are free:
 =over 9
 =item B<to Share>
 to copy, distribute and transmit the work
 =item B<to Remix>
 to adapt the work
 =back
 Under the following conditions:
 =over 9
 =item B<Attribution>
 You must attribute the work in the manner specified by the author or
 licensor (but not in any way that suggests that they endorse you or
 your use of the work).
 =item B<Share Alike>
 If you alter, transform, or build upon this work, you may distribute
 the resulting work only under the same, similar or a compatible
 license.
 =back
 With the understanding that:
 =over 9
 =item B<Waiver>
 Any of the above conditions can be waived if you get permission from
 the copyright holder.
 =item B<Public Domain>
 Where the work or any of its elements is in the public domain under
 applicable law, that status is in no way affected by the license.
 =item B<Other Rights>
 In no way are any of the following rights affected by the license:
 =over 9
 =item *
 Your fair dealing or fair use rights, or other applicable
 copyright exceptions and limitations;
 =item *
 The author's moral rights;
 =item *
 Rights other persons may have either in the work itself or in
 how the work is used, such as publicity or privacy rights.
 =back
 =item B<Notice>
 For any reuse or distribution, you must make clear to others the
 license terms of this work.
 =back
 A copy of the full license is included in the file as cc-by-sa.txt.
 =head1 DEPENDENCIES
 B<bsearch> uses Perl.
 =head1 SEE ALSO
 B<grep>(1), B<sort>(1).
 =cut
 use Getopt::Long;
 Getopt::Long::Configure("bundling","require_order");
 GetOptions(
    "debug|D=s" => \$opt::D,
    "version" => \$opt::version,
    "verbose|v" => \$opt::verbose,
    "B|byte-offset" => \$opt::byte_offset,
    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
    "d|dictionary-order" => \$opt::dictionary_order,
    "f|ignore-case" => \$opt::ignore_case,
    "g|general-numeric-sort" => \$opt::general_numeric_sort,
    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
    "M|month-sort" => \$opt::month_sort,
    "h|human-numeric-sort" => \$opt::human_numeric_sort,
    "n|numeric-sort" => \$opt::numeric_sort,
    "r|reverse" => \$opt::reverse,
    "R|random-sort" => \$opt::random_sort,
    "sort=s" => \$opt::sort,
    "V|version-sort" => \$opt::version_sort,
    "k|key=s" => \@opt::key,
    "t|field-separator=s" => \$opt::field_separator,
    "z|zero-terminated" => \$opt::zero_terminated,
    );
 $Global::progname = "bsearch";
 $Global::version = 20160712;
 if($opt::version) {
    version();
    exit 0;
 }
 if($opt::zero_terminated) { $/ = "\0"; }
 my $file = shift;
 for my $key (@ARGV) {
    print bsearch($file,$key);
 }
 sub bsearch {
    my $file = shift;
    my $key = shift;
    my $min = 0;
    my $max = -s $file;
    if(not open ($fh, "<", $file)) {
 	error("Cannot open '$file'");
 	exit 1;
    }
    my $line;
    while($max - $min > 1) {
 	$middle = int(($max + $min)/2);
 	seek($fh,$middle,0) or die;
 	my $half = <$fh>;
 	if(eof($fh)
 	   or 
 	   compare(($line = <$fh>),$key) >= 0) {
 	    $max = $middle;
 	} else {
 	    $min = $middle;
 	}
    }
    seek($fh,$max,0) or die;
    $line = <$fh>;
    if(compare($line,$key) >= 0) {
 	if($opt::byte_offset) {
 	    return "0\n";
 	} else {
 	    # The very first line
 	    return "";
 	}
    } else {
 	if($opt::byte_offset) {
 	    return tell($fh)."\n";
 	} else {
 	    return $line;
 	}
    }
 }
 sub compare {
    my ($a,$b) = @_;
    if($opt::random_sort) {
 	return rand() <=> rand();
    }
    if($opt::reverse) {
 	($a,$b) = ($b,$a);
    }
    if($opt::ignore_case) {
 	$a = uc($a);
 	$b = uc($b);
    }
    if($opt::numeric_sort) {
 	return $a <=> $b;
    } elsif($opt::numascii) {
 	return $a <=> $b or $a cmp $b;
    } else {
 	return $a cmp $b;
    }
 }
 sub status {
    my @w = @_;
    my $fh = $Global::status_fd || *STDERR;
    print $fh map { ($_, "\n") } @w;
    flush $fh;
 }
 sub status_no_nl {
    my @w = @_;
    my $fh = $Global::status_fd || *STDERR;
    print $fh @w;
    flush $fh;
 }
 sub warning {
    my @w = @_;
    my $prog = $Global::progname || "parallel";
    status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
 }
 sub error {
    my @w = @_;
    my $prog = $Global::progname || "parallel";
    status(map { ($prog.": Error: ". $_); } @w);
 }
 sub die_bug {
    my $bugid = shift;
    print STDERR
 	("$Global::progname: This should not happen. You have found a bug.\n",
 	 "Please contact <parallel\@gnu.org> and include:\n",
 	 "* The version number: $Global::version\n",
 	 "* The bugid: $bugid\n",
 	 "* The command line being run\n",
 	 "* The files being read (put the files on a webserver if they are big)\n",
 	 "\n",
 	 "If you get the error on smaller/fewer files, please include those instead.\n");
    ::wait_and_exit(255);
 }
 sub version {
    # Returns: N/A
    print join("\n",
               "GNU $Global::progname $Global::version",
               "Copyright (C) 2016",
 	       "Ole Tange and Free Software Foundation, Inc.",
               "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
               "This is free software: you are free to change and redistribute it.",
               "GNU $Global::progname comes with no warranty.",
               "",
               "Web site: http://www.gnu.org/software/${Global::progname}\n",
 	       "When using programs that use GNU Parallel to process data for publication",
 	       "please cite as described in 'parallel --citation'.\n",
        );
 }
--- a/bsearch/regressiontest
+++ b/bsearch/regressiontest
@ -1,44 +0,0 @@
 #!/bin/bash
 test_tmp=`tempfile`
 export test_tmp
 test_n() {
    tmp=${test_tmp}_n
    true > $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo > $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo 1.000000000 > $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo 1.000000000 > $tmp
    echo 2 >> $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo 1 > $tmp
    echo 2.000000000 >> $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo 1.000000000 > $tmp
    echo 2 >> $tmp
    echo 3 >> $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo 1 > $tmp
    echo 2.000000000 >> $tmp
    echo 3 >> $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    echo 1 > $tmp
    echo 2 >> $tmp
    echo 3.000000000 >> $tmp
    xargs < $tmp
    bsearch -n $tmp 0 2 2.1 100000
    rm $tmp
 }
 export -f $(compgen -A function | grep test_)
 compgen -A function | grep test_ | sort | parallel -j6 --tag -k '{} 2>&1'