tangetools/find-first-fail/find-first-fail
2021-01-09 16:50:05 +01:00

460 lines
10 KiB
Bash
Executable file

#!/bin/bash
: <<'=cut'
=encoding utf8
=head1 NAME
find-first-fail - fine function for finding first failing file
fragment (or numeric argument)
=head1 SYNOPSIS
B<find-first-fail> [-2] [-q] [-s I<start>] [-e I<end>] [-v] [-V] I<command>
B<find-first-fail> -f I<inputfile> [-s I<start>] [-q] [-v] [-V] I<command>
=head1 DESCRIPTION
B<find-first-fail> runs I<command> with a single number. It returns highest
value that I<command> succeeds for.
It finds the value by first testing the I<start> value (which defaults
to 1). As long as the value succeeds, the value is doubled. When the
value fails, B<find-first-fail> does a binary search between this
value and the previous value.
If the I<start> value fails, B<find-first-fail> instead searches for
the highest value that I<command> fails for.
If given a file with B<-f> B<find-first-fail> will find minimal
section in the file the command fails for.
=head1 OPTIONS
=over 4
=item B<-f> I<inputfile>
Search for the bad line in I<inputfile>. Use B<-s> to tell how many
lines should be treated as header (e.g. 1 if a normal CSV file with a
header).
=item B<-2>
Instead of passing the command a single argument, give the command 2
arguments: I<from> I<to>.
=item B<-q>
Quiet. Ignore output from I<command>.
=item B<-s I<start>>
Start searching from the value I<start>. Normally searching will start
from the value 1.
=item B<-e I<end>>
End searching at the value I<end>. Normally this value will be
determined automatically, but you can limit the search to be below
the value I<end>.
=item B<-v>
Verbose. Show the commands being run.
=back
=head1 EXAMPLES
=head2 Find the last file
This is a silly way to find the last non-existing file (namely 244):
touch {245..800}
find-first-fail ls
This is a silly way to find the last file (namely 800):
touch {1..800}
find-first-fail ls
=head2 Test a bash function
Test how long an argument /bin/echo can take
. $(which find-first-fail)
singleecho() {
/bin/echo $(perl -e 'print "x"x'$1) >/dev/null
}
find-first-fail singleecho
=head2 Test a bash function that takes from and to as arguments
Use a function that takes two arguments. It finds the line number
after HOME=.
. $(which find-first-fail)
greplines() {
env | perl -ne "$1..$2 and print" | grep HOME=
}
find-first-fail -2 -q greplines
=head2 Test complex command and show what is run
Complex commands can also be run:
find-first-fail -v perl -e 'exit(shift > 129)'
=head2 Find the second limit of a program
Assume you have a program that is OK in the range 123..12345. How do
you find those limits?
myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; }
else { exit ($a <= 12345) }' "$@"; }
export -f myprog
# Finds 123
find-first-fail myprog
# Finds 12345
find-first-fail -s 200 myprog
=head2 Find minimal failing CSV file
Assume: example.csv
MyHeader
4
3
5
100
3
myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; }
To identify the minimal CSV file that causes myparser to fail:
find-first-fail -f example.csv -s1 myparser
=head1 REPORTING BUGS
Report bugs: https://gitlab.com/ole.tange/tangetools/-/issues
=head1 AUTHOR
Copyright (C) 2020 Ole Tange,
http://ole.tange.dk and Free Software Foundation, Inc.
=head1 LICENSE
Copyright (C) 2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
at your option any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 SEE ALSO
B<eval>(1)
=cut
find-first-fail() {
_find-first-fail() {
local low=$1
local high=$2
if [ $low -gt $(($high - 2)) ]; then
echo $low
return
fi
shift
shift
local middle=$(( ( $low + $high ) / 2 ))
if _run $low $middle $high "$@" ; then
low=$middle
else
high=$middle
fi
_find-first-fail $low $high "$@"
}
_run() {
# run:
# cmd $low $high
# or:
# cmd $value
# Output is ignored if $quiet
# Exit value is negated if $not
_inner_run() {
# _inner_run is needed if cmd is complex like:
# perl -e 'exit( (shift) + (shift) > 10)'
if $opt2 ; then
"${cmd[@]}" "$a" "$b"
else
"${cmd[@]}" "$b"
fi
}
local a="$1"
local b="$2"
local c="$3"
shift
shift
shift
local cmd=("$@")
if $opt2 ; then
$verbose && echo "$a<x<$b: ${cmd[@]}" "$a" "$b" >&2
else
$verbose && echo "$a<x<$c: ${cmd[@]}" "$b" >&2
fi
eval "$not" _inner_run "$quiet"
}
_find_in_arg() {
# If function($start) == false: run 'not function()' instead
local not
if _run "$start" "$start" "?" "$@" ; then
not=''
else
not='!'
fi
local low=$start
local high
if [ -z "$end" ] ; then
# No end value given with -e:
# exponential search for the first value that is false
# low = previous value (function($low) == true)
# high = low * 2 (function($high) == false)
high=$(( $start*2 ))
while _run $low $high "?" "$@" ; do
low=$high
high=$(( $high*2 ))
if [ $high -gt 4611686018427387900 ] ; then
echo "find-first-fail: Error: exit value does not change of '$@'" >&2
return
fi
done
else
high=$end
fi
# low = tested good
# high = tested fail
# Search low..high
# echo "low: $low high: $high not: $not"
_find-first-fail $low $high "$@"
unset low high start quiet
}
#### find-first-fail-file
_run_file() {
# build $tmpfile as line a..b
# run:
# cmd $tmpfile
# Output is ignored if $quiet
# Exit value is negated if $not
_inner_run() {
# _inner_run is needed if cmd is complex like:
# perl -e 'exit( (shift) + (shift) > 10)'
"${cmd[@]}" "$tmp"
}
local a="$1"
local b="$2"
# TODO if defined memory{$a,$b}: return value
shift
shift
local cmd=("$@")
local tmp=`tempfile -p fff`
$verbose && echo "$a<x<$b: ${cmd[@]}" "$tmp" >&2
# Build file of line a..b
perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp";
perl -ne "$a..$b and print" "$inputfile" >> "$tmp";
eval "$not" _inner_run "$quiet"
# Postpone exit code
local _exit="$?"
# TODO memory{$a,$b}="$_exit"
rm "$tmp"
return "$_exit"
}
_find-low-file() {
local low=$1
local high=$2
if [ $low -gt $(($high - 2)) ]; then
# if $low > $high - 0.2: $global_low = $low
global_low=$low
return
fi
shift
shift
local middle=$(( ( $low + $high ) / 2 ))
local middledeci=$(( $middle / 10 ))
local global_highdeci=$(( $global_high / 10 ))
if _run_file $middledeci $global_highdeci "$@" ; then
low=$middle
else
high=$middle
fi
_find-low-file $low $high "$@"
}
_find-high-file() {
local low=$1
local high=$2
if [ $low -gt $(($high - 5)) ]; then
# if $low > $high - 0.5: $global_high = $high + 0.5
global_high=$(($high + 5))
return
fi
shift
shift
local middle=$(( ( $low + $high ) / 2 ))
local middledeci=$(( $middle / 10 ))
local global_lowdeci=$(( $global_low / 10 ))
if _run_file $global_lowdeci $middledeci "$@" ; then
high=$middle
else
low=$middle
fi
_find-high-file $low $high "$@"
}
_find_in_file() {
# If function(1) = false: run 'not function()' instead
if [ ! -e "$inputfile" ] ; then
echo "find-first-fail: Error: File not found '$inputfile'" >&2
return 1
fi
local not
local global_low=$(( $start + 1 ))0
local global_high=$(wc -l < $inputfile)0
local global_lowdeci=$(( $global_low / 10 ))
local global_highdeci=$(( $global_high / 10 ))
if _run_file "$global_lowdeci" "$global_highdeci" "$@" ; then
not=''
else
not='!'
fi
# Binary search for minimal $global_high that fails
_find-high-file $global_low $global_high "$@"
# Binary search for minimal $global_low that fails
_find-low-file $global_low $global_high "$@"
local global_lowdeci=$(( $global_low / 10 ))
local global_highdeci=$(( $global_high / 10 ))
# Print the resulting minimal file
perl -ne "($start and 1..$start) and print" "$inputfile"
perl -ne "$global_lowdeci..$global_highdeci and print" "$inputfile"
unset low high start quiet
}
version() {
cat <<EOF
find-first-fail 20210109
Copyright (C) 2020-2021 Ole Tange, http://ole.tange.dk
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
find-first-fail comes with no warranty.
Web site: https://gitlab.com/ole.tange/tangetools/-/tree/master/find-first-fail
EOF
}
local opt2=false
local optf=false
local inputfile
local quiet=""
local start
local verbose=false
unset OPTIND
# Parse and remove options
while getopts "2e:f:qs:vV" options; do
case "${options}" in
(2) opt2=true;;
(f) optf=true; inputfile="$OPTARG";;
(q) quiet=">/dev/null 2>/dev/null";;
(s) start="$OPTARG";;
(e) end="$OPTARG";;
(v) verbose=true;;
(V) version; exit 0;;
(-) break;;
esac
done
shift $(( OPTIND - 1))
unset OPTIND
if $optf; then
if [ -z "$start" ] ; then
start=0
fi
_find_in_file "$@";
else
if [ -z "$start" ] ; then
start=1
fi
_find_in_arg "$@";
fi
# TODO find-optimal:
# instead of looking at exit value, look at last line
# start=--start || 1
# end=--end
# vstart = test(start)
# if end:
# vmiddle = test( (start+end)/2)
# if vstart < vmiddle:
# Search hill(start,end)
# else
# Search valley(start,t)
# else
# min = vstart
# max = vstart
# t = start*2
# while t < big:
# v = test(t)
# if v < max and vstart < max: Search hill(start,t)
# if min < v and min < vstart: Search valley(start,t)
# t = t*2
# fi
}
if [ -z "$*" ] ; then
# source the bash function
# . $(which find-first-fail)
true
else
# find-first-fail command
find-first-fail "$@"
fi