460 lines
10 KiB
Bash
Executable file
460 lines
10 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
: <<'=cut'
|
|
=encoding utf8
|
|
|
|
=head1 NAME
|
|
|
|
find-first-fail - fine function for finding first failing file
|
|
fragment (or numeric argument)
|
|
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
B<find-first-fail> [-2] [-q] [-s I<start>] [-e I<end>] [-v] [-V] I<command>
|
|
|
|
B<find-first-fail> -f I<inputfile> [-s I<start>] [-q] [-v] [-V] I<command>
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
B<find-first-fail> runs I<command> with a single number. It returns highest
|
|
value that I<command> succeeds for.
|
|
|
|
It finds the value by first testing the I<start> value (which defaults
|
|
to 1). As long as the value succeeds, the value is doubled. When the
|
|
value fails, B<find-first-fail> does a binary search between this
|
|
value and the previous value.
|
|
|
|
If the I<start> value fails, B<find-first-fail> instead searches for
|
|
the highest value that I<command> fails for.
|
|
|
|
If given a file with B<-f> B<find-first-fail> will find minimal
|
|
section in the file the command fails for.
|
|
|
|
|
|
=head1 OPTIONS
|
|
|
|
=over 4
|
|
|
|
=item B<-f> I<inputfile>
|
|
|
|
Search for the bad line in I<inputfile>. Use B<-s> to tell how many
|
|
lines should be treated as header (e.g. 1 if a normal CSV file with a
|
|
header).
|
|
|
|
|
|
=item B<-2>
|
|
|
|
Instead of passing the command a single argument, give the command 2
|
|
arguments: I<from> I<to>.
|
|
|
|
|
|
=item B<-q>
|
|
|
|
Quiet. Ignore output from I<command>.
|
|
|
|
|
|
=item B<-s I<start>>
|
|
|
|
Start searching from the value I<start>. Normally searching will start
|
|
from the value 1.
|
|
|
|
|
|
=item B<-e I<end>>
|
|
|
|
End searching at the value I<end>. Normally this value will be
|
|
determined automatically, but you can limit the search to be below
|
|
the value I<end>.
|
|
|
|
|
|
=item B<-v>
|
|
|
|
Verbose. Show the commands being run.
|
|
|
|
|
|
=back
|
|
|
|
|
|
=head1 EXAMPLES
|
|
|
|
=head2 Find the last file
|
|
|
|
This is a silly way to find the last non-existing file (namely 244):
|
|
|
|
touch {245..800}
|
|
find-first-fail ls
|
|
|
|
This is a silly way to find the last file (namely 800):
|
|
|
|
touch {1..800}
|
|
find-first-fail ls
|
|
|
|
|
|
=head2 Test a bash function
|
|
|
|
Test how long an argument /bin/echo can take
|
|
|
|
. $(which find-first-fail)
|
|
singleecho() {
|
|
/bin/echo $(perl -e 'print "x"x'$1) >/dev/null
|
|
}
|
|
find-first-fail singleecho
|
|
|
|
|
|
=head2 Test a bash function that takes from and to as arguments
|
|
|
|
Use a function that takes two arguments. It finds the line number
|
|
after HOME=.
|
|
|
|
. $(which find-first-fail)
|
|
greplines() {
|
|
env | perl -ne "$1..$2 and print" | grep HOME=
|
|
}
|
|
find-first-fail -2 -q greplines
|
|
|
|
|
|
=head2 Test complex command and show what is run
|
|
|
|
Complex commands can also be run:
|
|
|
|
find-first-fail -v perl -e 'exit(shift > 129)'
|
|
|
|
|
|
=head2 Find the second limit of a program
|
|
|
|
Assume you have a program that is OK in the range 123..12345. How do
|
|
you find those limits?
|
|
|
|
myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; }
|
|
else { exit ($a <= 12345) }' "$@"; }
|
|
export -f myprog
|
|
# Finds 123
|
|
find-first-fail myprog
|
|
# Finds 12345
|
|
find-first-fail -s 200 myprog
|
|
|
|
|
|
=head2 Find minimal failing CSV file
|
|
|
|
Assume: example.csv
|
|
|
|
MyHeader
|
|
4
|
|
3
|
|
5
|
|
100
|
|
3
|
|
|
|
myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; }
|
|
|
|
To identify the minimal CSV file that causes myparser to fail:
|
|
|
|
find-first-fail -f example.csv -s1 myparser
|
|
|
|
|
|
=head1 REPORTING BUGS
|
|
|
|
Report bugs: https://gitlab.com/ole.tange/tangetools/-/issues
|
|
|
|
|
|
=head1 AUTHOR
|
|
|
|
Copyright (C) 2020 Ole Tange,
|
|
http://ole.tange.dk and Free Software Foundation, Inc.
|
|
|
|
|
|
=head1 LICENSE
|
|
|
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
at your option any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
=head1 SEE ALSO
|
|
|
|
B<eval>(1)
|
|
|
|
=cut
|
|
|
|
find-first-fail() {
|
|
_find-first-fail() {
|
|
local low=$1
|
|
local high=$2
|
|
if [ $low -gt $(($high - 2)) ]; then
|
|
echo $low
|
|
return
|
|
fi
|
|
shift
|
|
shift
|
|
local middle=$(( ( $low + $high ) / 2 ))
|
|
if _run $low $middle $high "$@" ; then
|
|
low=$middle
|
|
else
|
|
high=$middle
|
|
fi
|
|
_find-first-fail $low $high "$@"
|
|
}
|
|
|
|
_run() {
|
|
# run:
|
|
# cmd $low $high
|
|
# or:
|
|
# cmd $value
|
|
# Output is ignored if $quiet
|
|
# Exit value is negated if $not
|
|
_inner_run() {
|
|
# _inner_run is needed if cmd is complex like:
|
|
# perl -e 'exit( (shift) + (shift) > 10)'
|
|
if $opt2 ; then
|
|
"${cmd[@]}" "$a" "$b"
|
|
else
|
|
"${cmd[@]}" "$b"
|
|
fi
|
|
}
|
|
local a="$1"
|
|
local b="$2"
|
|
local c="$3"
|
|
shift
|
|
shift
|
|
shift
|
|
local cmd=("$@")
|
|
if $opt2 ; then
|
|
$verbose && echo "$a<x<$b: ${cmd[@]}" "$a" "$b" >&2
|
|
else
|
|
$verbose && echo "$a<x<$c: ${cmd[@]}" "$b" >&2
|
|
fi
|
|
eval "$not" _inner_run "$quiet"
|
|
}
|
|
|
|
_find_in_arg() {
|
|
# If function($start) == false: run 'not function()' instead
|
|
local not
|
|
if _run "$start" "$start" "?" "$@" ; then
|
|
not=''
|
|
else
|
|
not='!'
|
|
fi
|
|
|
|
local low=$start
|
|
local high
|
|
if [ -z "$end" ] ; then
|
|
# No end value given with -e:
|
|
# exponential search for the first value that is false
|
|
# low = previous value (function($low) == true)
|
|
# high = low * 2 (function($high) == false)
|
|
high=$(( $start*2 ))
|
|
while _run $low $high "?" "$@" ; do
|
|
low=$high
|
|
high=$(( $high*2 ))
|
|
if [ $high -gt 4611686018427387900 ] ; then
|
|
echo "find-first-fail: Error: exit value does not change of '$@'" >&2
|
|
return
|
|
fi
|
|
done
|
|
else
|
|
high=$end
|
|
fi
|
|
|
|
# low = tested good
|
|
# high = tested fail
|
|
# Search low..high
|
|
# echo "low: $low high: $high not: $not"
|
|
_find-first-fail $low $high "$@"
|
|
unset low high start quiet
|
|
}
|
|
|
|
#### find-first-fail-file
|
|
|
|
_run_file() {
|
|
# build $tmpfile as line a..b
|
|
# run:
|
|
# cmd $tmpfile
|
|
# Output is ignored if $quiet
|
|
# Exit value is negated if $not
|
|
_inner_run() {
|
|
# _inner_run is needed if cmd is complex like:
|
|
# perl -e 'exit( (shift) + (shift) > 10)'
|
|
"${cmd[@]}" "$tmp"
|
|
}
|
|
local a="$1"
|
|
local b="$2"
|
|
# TODO if defined memory{$a,$b}: return value
|
|
shift
|
|
shift
|
|
local cmd=("$@")
|
|
local tmp=`tempfile -p fff`
|
|
$verbose && echo "$a<x<$b: ${cmd[@]}" "$tmp" >&2
|
|
# Build file of line a..b
|
|
perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp";
|
|
perl -ne "$a..$b and print" "$inputfile" >> "$tmp";
|
|
eval "$not" _inner_run "$quiet"
|
|
# Postpone exit code
|
|
local _exit="$?"
|
|
# TODO memory{$a,$b}="$_exit"
|
|
rm "$tmp"
|
|
return "$_exit"
|
|
}
|
|
|
|
_find-low-file() {
|
|
local low=$1
|
|
local high=$2
|
|
if [ $low -gt $(($high - 2)) ]; then
|
|
# if $low > $high - 0.2: $global_low = $low
|
|
global_low=$low
|
|
return
|
|
fi
|
|
shift
|
|
shift
|
|
local middle=$(( ( $low + $high ) / 2 ))
|
|
local middledeci=$(( $middle / 10 ))
|
|
local global_highdeci=$(( $global_high / 10 ))
|
|
if _run_file $middledeci $global_highdeci "$@" ; then
|
|
low=$middle
|
|
else
|
|
high=$middle
|
|
fi
|
|
_find-low-file $low $high "$@"
|
|
}
|
|
|
|
_find-high-file() {
|
|
local low=$1
|
|
local high=$2
|
|
if [ $low -gt $(($high - 5)) ]; then
|
|
# if $low > $high - 0.5: $global_high = $high + 0.5
|
|
global_high=$(($high + 5))
|
|
return
|
|
fi
|
|
shift
|
|
shift
|
|
local middle=$(( ( $low + $high ) / 2 ))
|
|
local middledeci=$(( $middle / 10 ))
|
|
local global_lowdeci=$(( $global_low / 10 ))
|
|
if _run_file $global_lowdeci $middledeci "$@" ; then
|
|
high=$middle
|
|
else
|
|
low=$middle
|
|
fi
|
|
_find-high-file $low $high "$@"
|
|
}
|
|
|
|
|
|
_find_in_file() {
|
|
# If function(1) = false: run 'not function()' instead
|
|
if [ ! -e "$inputfile" ] ; then
|
|
echo "find-first-fail: Error: File not found '$inputfile'" >&2
|
|
return 1
|
|
fi
|
|
local not
|
|
local global_low=$(( $start + 1 ))0
|
|
local global_high=$(wc -l < $inputfile)0
|
|
local global_lowdeci=$(( $global_low / 10 ))
|
|
local global_highdeci=$(( $global_high / 10 ))
|
|
if _run_file "$global_lowdeci" "$global_highdeci" "$@" ; then
|
|
not=''
|
|
else
|
|
not='!'
|
|
fi
|
|
# Binary search for minimal $global_high that fails
|
|
_find-high-file $global_low $global_high "$@"
|
|
# Binary search for minimal $global_low that fails
|
|
_find-low-file $global_low $global_high "$@"
|
|
local global_lowdeci=$(( $global_low / 10 ))
|
|
local global_highdeci=$(( $global_high / 10 ))
|
|
# Print the resulting minimal file
|
|
perl -ne "($start and 1..$start) and print" "$inputfile"
|
|
perl -ne "$global_lowdeci..$global_highdeci and print" "$inputfile"
|
|
unset low high start quiet
|
|
}
|
|
|
|
version() {
|
|
cat <<EOF
|
|
find-first-fail 20210109
|
|
Copyright (C) 2020-2021 Ole Tange, http://ole.tange.dk
|
|
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>
|
|
This is free software: you are free to change and redistribute it.
|
|
find-first-fail comes with no warranty.
|
|
|
|
Web site: https://gitlab.com/ole.tange/tangetools/-/tree/master/find-first-fail
|
|
EOF
|
|
}
|
|
|
|
local opt2=false
|
|
local optf=false
|
|
local inputfile
|
|
local quiet=""
|
|
local start
|
|
local verbose=false
|
|
unset OPTIND
|
|
|
|
# Parse and remove options
|
|
while getopts "2e:f:qs:vV" options; do
|
|
case "${options}" in
|
|
(2) opt2=true;;
|
|
(f) optf=true; inputfile="$OPTARG";;
|
|
(q) quiet=">/dev/null 2>/dev/null";;
|
|
(s) start="$OPTARG";;
|
|
(e) end="$OPTARG";;
|
|
(v) verbose=true;;
|
|
(V) version; exit 0;;
|
|
(-) break;;
|
|
esac
|
|
done
|
|
shift $(( OPTIND - 1))
|
|
unset OPTIND
|
|
|
|
if $optf; then
|
|
if [ -z "$start" ] ; then
|
|
start=0
|
|
fi
|
|
_find_in_file "$@";
|
|
else
|
|
if [ -z "$start" ] ; then
|
|
start=1
|
|
fi
|
|
_find_in_arg "$@";
|
|
fi
|
|
# TODO find-optimal:
|
|
# instead of looking at exit value, look at last line
|
|
# start=--start || 1
|
|
# end=--end
|
|
# vstart = test(start)
|
|
# if end:
|
|
# vmiddle = test( (start+end)/2)
|
|
# if vstart < vmiddle:
|
|
# Search hill(start,end)
|
|
# else
|
|
# Search valley(start,t)
|
|
# else
|
|
# min = vstart
|
|
# max = vstart
|
|
# t = start*2
|
|
# while t < big:
|
|
# v = test(t)
|
|
# if v < max and vstart < max: Search hill(start,t)
|
|
# if min < v and min < vstart: Search valley(start,t)
|
|
# t = t*2
|
|
# fi
|
|
}
|
|
|
|
if [ -z "$*" ] ; then
|
|
# source the bash function
|
|
# . $(which find-first-fail)
|
|
true
|
|
else
|
|
# find-first-fail command
|
|
find-first-fail "$@"
|
|
fi
|
|
|