find-first-fail: find failing file fragment.

This commit is contained in:
Ole Tange 2020-11-12 18:09:42 +01:00
parent 4f131005cb
commit c445e9352f
2 changed files with 233 additions and 55 deletions

View file

@ -5,12 +5,14 @@
=head1 NAME
find-first-fail - find the lowest argument that makes a command fail
find-first-fail - fine function for finding first failing file fragment
=head1 SYNOPSIS
B<find-first-fail> [-2] [-q] [-s I<start>] I<command>
B<find-first-fail> [-2] [-q] [-s I<start>] [-v] I<command>
B<find-first-fail> -f I<inputfile> [-s I<start>] [-q] [-v] I<command>
=head1 DESCRIPTION
@ -26,24 +28,37 @@ value and the previous value.
If the I<start> value fails, B<find-first-fail> instead searches for
the highest value that I<command> fails for.
If given a file with B<-f> B<find-first-fail> will find minimal
section in the file the command fails for.
=head1 OPTIONS
=over 4
=item B<-f> I<inputfile>
Search for the bad line in I<inputfile>. Use B<-s> to tell how many
lines should be treated as header (e.g. 1 if a normal CSV file with a
header).
=item B<-2>
Instead of passing the command a single argument, give the command 2
arguments: I<from> I<to>.
=item B<-q>
Quiet. Ignore output from I<command>.
=item B<-s I<start>>
Start searching from the value I<start>. Normally searching will start from the value 1.
=item B<-v>
Verbose. Show the commands being run.
@ -66,6 +81,7 @@ This is a silly way to find the last file (namely 800):
touch {1..800}
find-first-fail ls
=head2 Test a bash function
Test how long an argument /bin/echo can take
@ -76,6 +92,7 @@ Test how long an argument /bin/echo can take
}
find-first-fail singleecho
=head2 Test a bash function that takes from and to as arguments
Use a function that takes two arguments. It finds the line number
@ -87,6 +104,7 @@ after HOME=.
}
find-first-fail -2 -q greplines
=head2 Test complex command and show what is run
Complex commands can also be run:
@ -108,6 +126,24 @@ you find the limits?
find-first-fail -s 200 myprog
=head2 Find minimal failing CSV file
Assume: example.csv
MyHeader
4
3
5
100
3
myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; }
To identify the minimal CSV file that causes myparser to fail:
find-first-fail -f example.csv -s1 myparser
=head1 AUTHOR
Copyright (C) 2020 Ole Tange,
@ -142,7 +178,6 @@ find-first-fail() {
_find-first-fail() {
local low=$1
local high=$2
# echo $low-$high
if [ $low -gt $(($high - 2)) ]; then
echo $low
return
@ -169,10 +204,8 @@ find-first-fail() {
# _inner_run is needed if cmd is complex like:
# perl -e 'exit( (shift) + (shift) > 10)'
if $opt2 ; then
$verbose && echo "${cmd[@]}" "$a" "$b"
"${cmd[@]}" "$a" "$b"
else
$verbose && echo "${cmd[@]}" "$b"
"${cmd[@]}" "$b"
fi
}
@ -180,28 +213,16 @@ find-first-fail() {
local b="$2"
shift
shift
# echo "a=$a b=$b $@"
local cmd=( "$@" )
local cmd=("$@")
if $opt2 ; then
$verbose && echo "$a<x<$b: ${cmd[@]}" "$a" "$b" >&2
else
$verbose && echo "$a<x<$b: ${cmd[@]}" "$b" >&2
fi
eval "$not" _inner_run "$quiet"
}
local opt2=false
local quiet=""
local start=1
local verbose=false
# Parse and remove options
while getopts "2qs:v" options; do
case "${options}" in
(2) opt2=true;;
(q) quiet=">/dev/null 2>/dev/null";;
(s) start="$OPTARG";;
(v) verbose=true;;
(-) break;;
esac
done
shift $(( OPTIND - 1))
_find_in_arg() {
# If function(1) = false: run 'not function()' instead
local not
if _run "$start" "$start" "$@" ; then
@ -213,13 +234,13 @@ find-first-fail() {
# exponential search for the first value that is false
# low = previous value (function($low) == true)
# high = low * 2 (function($high) == false)
local high=$start
local low
local high=$(( $start*2 ))
local low=$start
while _run $start $high "$@" ; do
low=$high
high=$(( $high*2 ))
if [ $high -gt 4611686018427387900 ] ; then
echo "$0: Error: exit value does not change of '$@'" >&2
echo "find-first-fail: Error: exit value does not change of '$@'" >&2
return
fi
done
@ -228,8 +249,132 @@ find-first-fail() {
# high = tested fail
# Search low..high
# echo "low: $low high: $high not: $not"
_find-first-fail $low $high "$@" 2>/dev/null
unset low high start
_find-first-fail $low $high "$@"
unset low high start quiet
}
#### find-first-fail-file
_run_file() {
# build $tmpfile as line a..b
# run:
# cmd $tmpfile
# Output is ignored if $quiet
# Exit value is negated if $not
_inner_run() {
# _inner_run is needed if cmd is complex like:
# perl -e 'exit( (shift) + (shift) > 10)'
"${cmd[@]}" "$tmp"
}
local a="$1"
local b="$2"
shift
shift
local cmd=("$@")
local tmp=`tempfile -p fff`
$verbose && echo "$a<x<$b: ${cmd[@]}" "$tmp" >&2
# Build file of line a..b
perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp";
perl -ne "$a..$b and print" "$inputfile" >> "$tmp";
eval "$not" _inner_run "$quiet"
# Postpone exit code
local _exit="$?"
rm "$tmp"
return "$_exit"
}
_find-low-file() {
local low=$1
local high=$2
if [ $low -gt $(($high - 2)) ]; then
global_low=$low
return
fi
shift
shift
local middle=$(( ( $low + $high ) / 2 ))
if _run_file $middle $global_high "$@" ; then
low=$middle
else
high=$middle
fi
_find-low-file $low $high "$@"
}
_find-high-file() {
local low=$1
local high=$2
if [ $low -gt $(($high - 2)) ]; then
global_high=$high
return
fi
shift
shift
local middle=$(( ( $low + $high ) / 2 ))
if _run_file $global_low $middle "$@" ; then
high=$middle
else
low=$middle
fi
_find-high-file $low $high "$@"
}
_find_in_file() {
# If function(1) = false: run 'not function()' instead
local not
local global_low=$(($start+1))
if [ ! -e "$inputfile" ] ; then
echo "find-first-fail: Error: File not found '$inputfile'" >&2
return 1
fi
local global_high=$(wc -l < $inputfile)
if _run_file "$global_low" "$global_high" "$@" ; then
not=''
else
not='!'
fi
# Binary search for $global_low
_find-low-file $global_low $global_high "$@"
_find-high-file $global_low $global_high "$@"
perl -ne "($start and 1..$start) and print" "$inputfile"
perl -ne "$global_low..$global_high and print" "$inputfile"
unset low high start quiet
}
local opt2=false
local optf=false
local inputfile
local quiet=""
local start
local verbose=false
unset OPTIND
# Parse and remove options
while getopts "2f:qs:v" options; do
case "${options}" in
(2) opt2=true;;
(f) optf=true; inputfile="$OPTARG";;
(q) quiet=">/dev/null 2>/dev/null";;
(s) start="$OPTARG";;
(v) verbose=true;;
(-) break;;
esac
done
shift $(( OPTIND - 1))
unset OPTIND
if $optf; then
if [ -z "$start" ] ; then
start=0
fi
_find_in_file "$@";
else
if [ -z "$start" ] ; then
start=1
fi
_find_in_arg "$@";
fi
}
if [ -z "$*" ] ; then
@ -240,3 +385,4 @@ else
# find-first-fail command
find-first-fail "$@"
fi

View file

@ -4,6 +4,7 @@ test_unexported_function() {
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
# myprog is a function, so source find-first-fail first
. `which find-first-fail`
echo Find 12345678 in unexported function
find-first-fail myprog
}
@ -11,6 +12,7 @@ test_exported_function() {
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
# myprog is an exported function
export -f myprog
echo Find 12345678
find-first-fail myprog
}
@ -19,9 +21,9 @@ test_startvalue() {
myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; }
else { exit ($a <= 12345678) }' "$@"; }
export -f myprog
# Finds 123
echo Find 123
find-first-fail myprog
# Finds 12345678
echo Find 12345678
find-first-fail -s 200 myprog
}
@ -29,10 +31,40 @@ test_s_v_12() {
# Multiple options
myprog() { perl -e 'exit (shift > 12)' "$@"; }
export -f myprog
echo Find 12 with progress
find-first-fail -v -s 10 myprog
echo Find 12 with progress quiet
find-first-fail -v -q -s 10 myprog
}
test_file() {
tmp=`tempfile`
echo Header > $tmp
seq 100 >> $tmp
10_to_15() { grep ^10$ $1 && grep ^15$ $1; }
export -f 10_to_15
echo 10..15
find-first-fail -s1 -qf $tmp 10_to_15
echo not 10..15
find-first-fail -s1 -qf $tmp not 10_to_15
rm $tmp
}
test_header() {
tmp=`tempfile`
echo Header > $tmp
seq 10 >> $tmp
echo 1000 >> $tmp
seq 10 >> $tmp
myparser() { perl -ne 'if($_ > 100) { exit 1 }' "$@"; }
export -f myparser
echo Should give:
echo Header
echo 1000
find-first-fail -s1 -f $tmp myparser
}
export -f $(compgen -A function | grep test_)
compgen -A function | grep test_ | LC_ALL=C sort |
parallel --timeout 1000% --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'