2grep: -F implemented. Passes test.

This commit is contained in:
Ole Tange 2020-10-11 15:52:05 +02:00
parent 157e498d6b
commit dbf36dfd17
2 changed files with 114 additions and 72 deletions

View file

@ -8,119 +8,136 @@
=head1 SYNOPSIS
B<2search> [-nrfHB] file string [string...]
B<2search> [-nrfHB] inputfile string [string...]
B<2search> --grep [-nrfH] file string [string...]
B<2search> --grep [-nrfH] inputfile string [string...]
B<2grep> [-nrfH] file string [string...]
B<2grep> [-nrfH] inputfile string [string...]
... | B<2search> [-nrfHB] file
... | B<2search> [-nrfHB] inputfile
... | B<2search> --grep [-nrfH] file
... | B<2search> --grep [-nrfH] inputfile
... | B<2grep> [-nrfH] file
... | B<2grep> [-nrfH] inputfile
=head1 DESCRIPTION
B<2search> searches a sorted file for a string. It outputs the
following line or the byte position of this line, which is where the
string would have been if it had been in the sorted file.
B<2search> searches a sorted file for lines starting with a string. It
outputs the following line or the byte position of this line, which is
where the string would have been if it had been in the sorted file.
B<2grep> output all lines starting with a given string. The file must
be sorted.
By using B<-k> the same way as in B<sort> you can instead search for
strings in columns, if the file is sorted using the B<-k> syntax.
=over 9
=item B<--ignore-leading-blanks>
=item B<-b>
ignore leading blanks
Ignore leading blanks. Used if I<inputfile> is sorted with B<sort
--ignore-leading-blanks>.
=item B<--byte-offset>
=item B<-B>
print byte position where string would have been
Print byte position where string would have been.
=item B<--dictionary-order> (not implemented)
=item B<-d>
consider only blanks and alphanumeric characters
Consider only blanks and alphanumeric characters. Used if I<inputfile>
is sorted with B<sort --dictionary-order>.
=item B<--debug>
=item B<-D>
annotate the part of the line used to sort to stderr
Show debugging information.
=item B<--ignore-case>
=item B<-f>
fold lower case to upper case characters
Fold lower case to upper case characters. Used if I<inputfile> is
sorted with B<sort --ignore-case>.
=item B<-F>
Fixed string. The search string 'foo' will not match 'foobar'.
=item B<--file> I<file>
=item B<-F> I<file>
search for all lines in I<file>
Search for all lines in I<file>.
=item B<--general-numeric-sort> (not implemented)
=item B<-g>
compare according to general numerical value
Compare according to general numerical value. Used if I<inputfile> is
sorted with B<sort --general-numeric-sort>.
=item B<--header>
=item B<-H>
treat the first line in I<file> as a header
Treat the first line in I<file> as a header. Ignore it when searching
and print it once in the output.
=item B<--ignore-nonprinting> (not implemented)
=item B<-i>
consider only printable characters
Consider only printable characters. Used if I<inputfile> is
sorted with B<sort --ignore-nonprinting>.
=item B<--month-sort>
=item B<-M>
compare (unknown) < 'JAN' < ... < 'DEC'
Compare (unknown) < 'JAN' < ... < 'DEC'. Used if I<inputfile> is
sorted with B<sort --month-sort>.
=item B<--human-numeric-sort>
=item B<-h>
compare human readable numbers (e.g., 2K 1G)
Compare human readable numbers (e.g., 2K 1G). Used if I<inputfile> is
sorted with B<sort --human-numeric-sort>.
=item B<--key=KEYDEF> (not implemented)
=item B<--key=KEYDEF>
=item B<-k>
sort via a key; KEYDEF gives location and type
Sort via a key; KEYDEF gives location and type. Used if I<inputfile>
is sorted with B<sort --key=KEYDEF>.
=item B<--numeric-sort>
=item B<-n>
compare according to string numerical value. If numerical values are
the same: compare as strings.
Compare according to string numerical value. If numerical values are
the same: compare as strings. Used if I<inputfile> is sorted with
B<sort --numeric-sort>.
=item B<--numascii>
@ -140,19 +157,20 @@ This is simiar to B<--version-sort>, but without the exceptions.
=item B<-R>
sort by random hash of keys
Sort by random hash of keys.
=item B<--reverse>
=item B<-r>
reverse the result of comparisons
Reverse the result of comparisons. Used if I<inputfile> is sorted with
B<sort --reverse>.
=item B<--sort=WORD> (not implemented)
sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
Sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
B<-M>, numeric B<-n>, random B<-R>, version B<-V>
@ -160,14 +178,14 @@ B<-M>, numeric B<-n>, random B<-R>, version B<-V>
=item B<--field-separator=SEP>
use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
Use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
=item B<-z>
=item B<--zero-terminated>
end lines with 0 byte, not newline
End lines with 0 (NUL) byte, not newline.
=back
@ -262,7 +280,7 @@ To solve this sort the input with B<LC_ALL=C sort ...>.
=head1 REPORTING BUGS
B<2search> is part of tangetools. Report bugs on
B<2search> and B<2grep> are part of tangetools. Report bugs on
https://gitlab.com/ole.tange/tangetools/-/issues
@ -401,7 +419,8 @@ GetOptions(
"f|ignore-case" => \$opt::ignore_case,
"g|general-numeric-sort" => \$opt::general_numeric_sort,
"G|grep" => \$opt::grep,
"F|file=s" => \$opt::file,
"F|fixed-strings" => \$opt::fixed_strings,
"file=s" => \$opt::file,
"i|ignore-nonprinting" => \$opt::ignore_nonprinting,
"M|month-sort" => \$opt::month_sort,
"h|human-numeric-sort" => \$opt::human_numeric_sort,
@ -523,9 +542,11 @@ sub bgrep {
exit 1;
}
seek($fh,$startpos,0) or die;
# Allow for partial matches in grep (4 mathes 40, A matches Aaa)
for my $keydef (@Global::keydefs) {
$keydef->{'partial_match'} = 1;
if(not $opt::fixed_strings) {
# Allow for partial matches in grep (4 mathes 40, A matches Aaa)
for my $keydef (@Global::keydefs) {
$keydef->{'partial_match'} = 1;
}
}
my $line;
while($line = <$fh>

View file

@ -8,119 +8,136 @@
=head1 SYNOPSIS
B<2search> [-nrfHB] file string [string...]
B<2search> [-nrfHB] inputfile string [string...]
B<2search> --grep [-nrfH] file string [string...]
B<2search> --grep [-nrfH] inputfile string [string...]
B<2grep> [-nrfH] file string [string...]
B<2grep> [-nrfH] inputfile string [string...]
... | B<2search> [-nrfHB] file
... | B<2search> [-nrfHB] inputfile
... | B<2search> --grep [-nrfH] file
... | B<2search> --grep [-nrfH] inputfile
... | B<2grep> [-nrfH] file
... | B<2grep> [-nrfH] inputfile
=head1 DESCRIPTION
B<2search> searches a sorted file for a string. It outputs the
following line or the byte position of this line, which is where the
string would have been if it had been in the sorted file.
B<2search> searches a sorted file for lines starting with a string. It
outputs the following line or the byte position of this line, which is
where the string would have been if it had been in the sorted file.
B<2grep> output all lines starting with a given string. The file must
be sorted.
By using B<-k> the same way as in B<sort> you can instead search for
strings in columns, if the file is sorted using the B<-k> syntax.
=over 9
=item B<--ignore-leading-blanks>
=item B<-b>
ignore leading blanks
Ignore leading blanks. Used if I<inputfile> is sorted with B<sort
--ignore-leading-blanks>.
=item B<--byte-offset>
=item B<-B>
print byte position where string would have been
Print byte position where string would have been.
=item B<--dictionary-order> (not implemented)
=item B<-d>
consider only blanks and alphanumeric characters
Consider only blanks and alphanumeric characters. Used if I<inputfile>
is sorted with B<sort --dictionary-order>.
=item B<--debug>
=item B<-D>
annotate the part of the line used to sort to stderr
Show debugging information.
=item B<--ignore-case>
=item B<-f>
fold lower case to upper case characters
Fold lower case to upper case characters. Used if I<inputfile> is
sorted with B<sort --ignore-case>.
=item B<-F>
Fixed string. The search string 'foo' will not match 'foobar'.
=item B<--file> I<file>
=item B<-F> I<file>
search for all lines in I<file>
Search for all lines in I<file>.
=item B<--general-numeric-sort> (not implemented)
=item B<-g>
compare according to general numerical value
Compare according to general numerical value. Used if I<inputfile> is
sorted with B<sort --general-numeric-sort>.
=item B<--header>
=item B<-H>
treat the first line in I<file> as a header
Treat the first line in I<file> as a header. Ignore it when searching
and print it once in the output.
=item B<--ignore-nonprinting> (not implemented)
=item B<-i>
consider only printable characters
Consider only printable characters. Used if I<inputfile> is
sorted with B<sort --ignore-nonprinting>.
=item B<--month-sort>
=item B<-M>
compare (unknown) < 'JAN' < ... < 'DEC'
Compare (unknown) < 'JAN' < ... < 'DEC'. Used if I<inputfile> is
sorted with B<sort --month-sort>.
=item B<--human-numeric-sort>
=item B<-h>
compare human readable numbers (e.g., 2K 1G)
Compare human readable numbers (e.g., 2K 1G). Used if I<inputfile> is
sorted with B<sort --human-numeric-sort>.
=item B<--key=KEYDEF> (not implemented)
=item B<--key=KEYDEF>
=item B<-k>
sort via a key; KEYDEF gives location and type
Sort via a key; KEYDEF gives location and type. Used if I<inputfile>
is sorted with B<sort --key=KEYDEF>.
=item B<--numeric-sort>
=item B<-n>
compare according to string numerical value. If numerical values are
the same: compare as strings.
Compare according to string numerical value. If numerical values are
the same: compare as strings. Used if I<inputfile> is sorted with
B<sort --numeric-sort>.
=item B<--numascii>
@ -140,19 +157,20 @@ This is simiar to B<--version-sort>, but without the exceptions.
=item B<-R>
sort by random hash of keys
Sort by random hash of keys.
=item B<--reverse>
=item B<-r>
reverse the result of comparisons
Reverse the result of comparisons. Used if I<inputfile> is sorted with
B<sort --reverse>.
=item B<--sort=WORD> (not implemented)
sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
Sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
B<-M>, numeric B<-n>, random B<-R>, version B<-V>
@ -160,14 +178,14 @@ B<-M>, numeric B<-n>, random B<-R>, version B<-V>
=item B<--field-separator=SEP>
use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
Use I<SEP> instead of blanks (\s+). I<SEP> is a regexp.
=item B<-z>
=item B<--zero-terminated>
end lines with 0 byte, not newline
End lines with 0 (NUL) byte, not newline.
=back
@ -262,7 +280,7 @@ To solve this sort the input with B<LC_ALL=C sort ...>.
=head1 REPORTING BUGS
B<2search> is part of tangetools. Report bugs on
B<2search> and B<2grep> are part of tangetools. Report bugs on
https://gitlab.com/ole.tange/tangetools/-/issues
@ -401,7 +419,8 @@ GetOptions(
"f|ignore-case" => \$opt::ignore_case,
"g|general-numeric-sort" => \$opt::general_numeric_sort,
"G|grep" => \$opt::grep,
"F|file=s" => \$opt::file,
"F|fixed-strings" => \$opt::fixed_strings,
"file=s" => \$opt::file,
"i|ignore-nonprinting" => \$opt::ignore_nonprinting,
"M|month-sort" => \$opt::month_sort,
"h|human-numeric-sort" => \$opt::human_numeric_sort,
@ -523,9 +542,11 @@ sub bgrep {
exit 1;
}
seek($fh,$startpos,0) or die;
# Allow for partial matches in grep (4 mathes 40, A matches Aaa)
for my $keydef (@Global::keydefs) {
$keydef->{'partial_match'} = 1;
if(not $opt::fixed_strings) {
# Allow for partial matches in grep (4 mathes 40, A matches Aaa)
for my $keydef (@Global::keydefs) {
$keydef->{'partial_match'} = 1;
}
}
my $line;
while($line = <$fh>