2grep: -F implemented. Passes test.

2020-10-11 15:52:05 +02:00 · 2020-10-11 15:52:05 +02:00 · dbf36dfd17
parent 157e498d6b
commit dbf36dfd17
2 changed files with 114 additions and 72 deletions
--- a/2search/2grep
+++ b/2search/2grep
@ -8,119 +8,136 @@
 =head1 SYNOPSIS
-B<2search> [-nrfHB] file string [string...]
+B<2search> [-nrfHB] inputfile string [string...]
-B<2search> --grep [-nrfH] file string [string...]
+B<2search> --grep [-nrfH] inputfile string [string...]
-B<2grep> [-nrfH] file string [string...]
+B<2grep> [-nrfH] inputfile string [string...]
-... | B<2search> [-nrfHB] file
+... | B<2search> [-nrfHB] inputfile
-... | B<2search> --grep [-nrfH] file
+... | B<2search> --grep [-nrfH] inputfile
-... | B<2grep> [-nrfH] file
+... | B<2grep> [-nrfH] inputfile
 =head1 DESCRIPTION
-B<2search> searches a sorted file for a string. It outputs the
+B<2search> searches a sorted file for lines starting with a string. It
-following line or the byte position of this line, which is where the
+outputs the following line or the byte position of this line, which is
-string would have been if it had been in the sorted file.
+where the string would have been if it had been in the sorted file.
 B<2grep> output all lines starting with a given string. The file must
 be sorted.
 By using B<-k> the same way as in B<sort> you can instead search for
 strings in columns, if the file is sorted using the B<-k> syntax.
 =over 9
 =item B<--ignore-leading-blanks>
 =item B<-b>
-ignore leading blanks
+Ignore leading blanks. Used if I<inputfile> is sorted with B<sort
 --ignore-leading-blanks>.
 =item B<--byte-offset>
 =item B<-B>
-print byte position where string would have been
+Print byte position where string would have been.
 =item B<--dictionary-order> (not implemented)
 =item B<-d>
-consider only blanks and alphanumeric characters
+Consider only blanks and alphanumeric characters. Used if I<inputfile>
 is sorted with B<sort --dictionary-order>.
 =item B<--debug>
 =item B<-D>
-annotate the part of the line used to sort to stderr
+Show debugging information.
 =item B<--ignore-case>
 =item B<-f>
-fold lower case to upper case characters
+Fold lower case to upper case characters. Used if I<inputfile> is
 sorted with B<sort --ignore-case>.
 =item B<-F>
 Fixed string. The search string 'foo' will not match 'foobar'.
 =item B<--file> I<file>
-=item B<-F> I<file>
+Search for all lines in I<file>.
 search for all lines in I<file>
 =item B<--general-numeric-sort> (not implemented)
 =item B<-g>
-compare according to general numerical value
+Compare according to general numerical value. Used if I<inputfile> is
 sorted with B<sort --general-numeric-sort>.
 =item B<--header>
 =item B<-H>
-treat the first line in I<file> as a header
+Treat the first line in I<file> as a header. Ignore it when searching
 and print it once in the output.
 =item B<--ignore-nonprinting> (not implemented)
 =item B<-i>
-consider only printable characters
+Consider only printable characters. Used if I<inputfile> is
 sorted with B<sort --ignore-nonprinting>.
 =item B<--month-sort>
 =item B<-M>
-compare (unknown) < 'JAN' < ... < 'DEC'
+Compare (unknown) < 'JAN' < ... < 'DEC'. Used if I<inputfile> is
 sorted with B<sort --month-sort>.
 =item B<--human-numeric-sort>
 =item B<-h>
-compare human readable numbers (e.g., 2K 1G)
+Compare human readable numbers (e.g., 2K 1G). Used if I<inputfile> is
 sorted with B<sort --human-numeric-sort>.
-=item B<--key=KEYDEF> (not implemented)
+=item B<--key=KEYDEF>
 =item B<-k>
-sort via a key; KEYDEF gives location and type
+Sort via a key; KEYDEF gives location and type. Used if I<inputfile>
 is sorted with B<sort --key=KEYDEF>.
 =item B<--numeric-sort>
 =item B<-n>
-compare according to string numerical value. If numerical values are
+Compare according to string numerical value. If numerical values are
-the same: compare as strings.
+the same: compare as strings. Used if I<inputfile> is sorted with
 B<sort --numeric-sort>.
 =item B<--numascii>
@ -140,19 +157,20 @@ This is simiar to B<--version-sort>, but without the exceptions.
 =item B<-R>
-sort by random hash of keys
+Sort by random hash of keys.
 =item B<--reverse>
 =item B<-r>
-reverse the result of comparisons
+Reverse the result of comparisons. Used if I<inputfile> is sorted with
 B<sort --reverse>.
 =item B<--sort=WORD> (not implemented)
-sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
+Sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
 B<-M>, numeric B<-n>, random B<-R>, version B<-V>
@ -160,14 +178,14 @@ B<-M>, numeric B<-n>, random B<-R>, version B<-V>
 =item B<--field-separator=SEP>
-use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
+Use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
 =item B<-z>
 =item B<--zero-terminated>
-end lines with 0 byte, not newline
+End lines with 0 (NUL) byte, not newline.
 =back
@ -262,7 +280,7 @@ To solve this sort the input with B<LC_ALL=C sort ...>.
 =head1 REPORTING BUGS
-B<2search> is part of tangetools. Report bugs on
+B<2search> and B<2grep> are part of tangetools. Report bugs on
 https://gitlab.com/ole.tange/tangetools/-/issues
@ -401,7 +419,8 @@ GetOptions(
    "f|ignore-case" => \$opt::ignore_case,
    "g|general-numeric-sort" => \$opt::general_numeric_sort,
    "G|grep" => \$opt::grep,
-    "F|file=s" => \$opt::file,
+    "F|fixed-strings" => \$opt::fixed_strings,
    "file=s" => \$opt::file,
    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
    "M|month-sort" => \$opt::month_sort,
    "h|human-numeric-sort" => \$opt::human_numeric_sort,
@ -523,10 +542,12 @@ sub bgrep {
 	exit 1;
    }
    seek($fh,$startpos,0) or die;
    if(not $opt::fixed_strings) {
 	# Allow for partial matches in grep (4 mathes 40, A matches Aaa)
 	for my $keydef (@Global::keydefs) {
 	    $keydef->{'partial_match'} = 1;
 	}
    }
    my $line;
    while($line = <$fh>
 	  and
--- a/2search/2search
+++ b/2search/2search
@ -8,119 +8,136 @@
 =head1 SYNOPSIS
-B<2search> [-nrfHB] file string [string...]
+B<2search> [-nrfHB] inputfile string [string...]
-B<2search> --grep [-nrfH] file string [string...]
+B<2search> --grep [-nrfH] inputfile string [string...]
-B<2grep> [-nrfH] file string [string...]
+B<2grep> [-nrfH] inputfile string [string...]
-... | B<2search> [-nrfHB] file
+... | B<2search> [-nrfHB] inputfile
-... | B<2search> --grep [-nrfH] file
+... | B<2search> --grep [-nrfH] inputfile
-... | B<2grep> [-nrfH] file
+... | B<2grep> [-nrfH] inputfile
 =head1 DESCRIPTION
-B<2search> searches a sorted file for a string. It outputs the
+B<2search> searches a sorted file for lines starting with a string. It
-following line or the byte position of this line, which is where the
+outputs the following line or the byte position of this line, which is
-string would have been if it had been in the sorted file.
+where the string would have been if it had been in the sorted file.
 B<2grep> output all lines starting with a given string. The file must
 be sorted.
 By using B<-k> the same way as in B<sort> you can instead search for
 strings in columns, if the file is sorted using the B<-k> syntax.
 =over 9
 =item B<--ignore-leading-blanks>
 =item B<-b>
-ignore leading blanks
+Ignore leading blanks. Used if I<inputfile> is sorted with B<sort
 --ignore-leading-blanks>.
 =item B<--byte-offset>
 =item B<-B>
-print byte position where string would have been
+Print byte position where string would have been.
 =item B<--dictionary-order> (not implemented)
 =item B<-d>
-consider only blanks and alphanumeric characters
+Consider only blanks and alphanumeric characters. Used if I<inputfile>
 is sorted with B<sort --dictionary-order>.
 =item B<--debug>
 =item B<-D>
-annotate the part of the line used to sort to stderr
+Show debugging information.
 =item B<--ignore-case>
 =item B<-f>
-fold lower case to upper case characters
+Fold lower case to upper case characters. Used if I<inputfile> is
 sorted with B<sort --ignore-case>.
 =item B<-F>
 Fixed string. The search string 'foo' will not match 'foobar'.
 =item B<--file> I<file>
-=item B<-F> I<file>
+Search for all lines in I<file>.
 search for all lines in I<file>
 =item B<--general-numeric-sort> (not implemented)
 =item B<-g>
-compare according to general numerical value
+Compare according to general numerical value. Used if I<inputfile> is
 sorted with B<sort --general-numeric-sort>.
 =item B<--header>
 =item B<-H>
-treat the first line in I<file> as a header
+Treat the first line in I<file> as a header. Ignore it when searching
 and print it once in the output.
 =item B<--ignore-nonprinting> (not implemented)
 =item B<-i>
-consider only printable characters
+Consider only printable characters. Used if I<inputfile> is
 sorted with B<sort --ignore-nonprinting>.
 =item B<--month-sort>
 =item B<-M>
-compare (unknown) < 'JAN' < ... < 'DEC'
+Compare (unknown) < 'JAN' < ... < 'DEC'. Used if I<inputfile> is
 sorted with B<sort --month-sort>.
 =item B<--human-numeric-sort>
 =item B<-h>
-compare human readable numbers (e.g., 2K 1G)
+Compare human readable numbers (e.g., 2K 1G). Used if I<inputfile> is
 sorted with B<sort --human-numeric-sort>.
-=item B<--key=KEYDEF> (not implemented)
+=item B<--key=KEYDEF>
 =item B<-k>
-sort via a key; KEYDEF gives location and type
+Sort via a key; KEYDEF gives location and type. Used if I<inputfile>
 is sorted with B<sort --key=KEYDEF>.
 =item B<--numeric-sort>
 =item B<-n>
-compare according to string numerical value. If numerical values are
+Compare according to string numerical value. If numerical values are
-the same: compare as strings.
+the same: compare as strings. Used if I<inputfile> is sorted with
 B<sort --numeric-sort>.
 =item B<--numascii>
@ -140,19 +157,20 @@ This is simiar to B<--version-sort>, but without the exceptions.
 =item B<-R>
-sort by random hash of keys
+Sort by random hash of keys.
 =item B<--reverse>
 =item B<-r>
-reverse the result of comparisons
+Reverse the result of comparisons. Used if I<inputfile> is sorted with
 B<sort --reverse>.
 =item B<--sort=WORD> (not implemented)
-sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
+Sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
 B<-M>, numeric B<-n>, random B<-R>, version B<-V>
@ -160,14 +178,14 @@ B<-M>, numeric B<-n>, random B<-R>, version B<-V>
 =item B<--field-separator=SEP>
-use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
+Use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
 =item B<-z>
 =item B<--zero-terminated>
-end lines with 0 byte, not newline
+End lines with 0 (NUL) byte, not newline.
 =back
@ -262,7 +280,7 @@ To solve this sort the input with B<LC_ALL=C sort ...>.
 =head1 REPORTING BUGS
-B<2search> is part of tangetools. Report bugs on
+B<2search> and B<2grep> are part of tangetools. Report bugs on
 https://gitlab.com/ole.tange/tangetools/-/issues
@ -401,7 +419,8 @@ GetOptions(
    "f|ignore-case" => \$opt::ignore_case,
    "g|general-numeric-sort" => \$opt::general_numeric_sort,
    "G|grep" => \$opt::grep,
-    "F|file=s" => \$opt::file,
+    "F|fixed-strings" => \$opt::fixed_strings,
    "file=s" => \$opt::file,
    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
    "M|month-sort" => \$opt::month_sort,
    "h|human-numeric-sort" => \$opt::human_numeric_sort,
@ -523,10 +542,12 @@ sub bgrep {
 	exit 1;
    }
    seek($fh,$startpos,0) or die;
    if(not $opt::fixed_strings) {
 	# Allow for partial matches in grep (4 mathes 40, A matches Aaa)
 	for my $keydef (@Global::keydefs) {
 	    $keydef->{'partial_match'} = 1;
 	}
    }
    my $line;
    while($line = <$fh>
 	  and