find-first-fail: find failing file fragment.
This commit is contained in:
parent
4f131005cb
commit
c445e9352f
|
@ -5,12 +5,14 @@
|
|||
|
||||
=head1 NAME
|
||||
|
||||
find-first-fail - find the lowest argument that makes a command fail
|
||||
find-first-fail - fine function for finding first failing file fragment
|
||||
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<find-first-fail> [-2] [-q] [-s I<start>] I<command>
|
||||
B<find-first-fail> [-2] [-q] [-s I<start>] [-v] I<command>
|
||||
|
||||
B<find-first-fail> -f I<inputfile> [-s I<start>] [-q] [-v] I<command>
|
||||
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
@ -26,24 +28,37 @@ value and the previous value.
|
|||
If the I<start> value fails, B<find-first-fail> instead searches for
|
||||
the highest value that I<command> fails for.
|
||||
|
||||
If given a file with B<-f> B<find-first-fail> will find minimal
|
||||
section in the file the command fails for.
|
||||
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<-f> I<inputfile>
|
||||
|
||||
Search for the bad line in I<inputfile>. Use B<-s> to tell how many
|
||||
lines should be treated as header (e.g. 1 if a normal CSV file with a
|
||||
header).
|
||||
|
||||
|
||||
=item B<-2>
|
||||
|
||||
Instead of passing the command a single argument, give the command 2
|
||||
arguments: I<from> I<to>.
|
||||
|
||||
|
||||
=item B<-q>
|
||||
|
||||
Quiet. Ignore output from I<command>.
|
||||
|
||||
|
||||
=item B<-s I<start>>
|
||||
|
||||
Start searching from the value I<start>. Normally searching will start from the value 1.
|
||||
|
||||
|
||||
=item B<-v>
|
||||
|
||||
Verbose. Show the commands being run.
|
||||
|
@ -66,6 +81,7 @@ This is a silly way to find the last file (namely 800):
|
|||
touch {1..800}
|
||||
find-first-fail ls
|
||||
|
||||
|
||||
=head2 Test a bash function
|
||||
|
||||
Test how long an argument /bin/echo can take
|
||||
|
@ -76,6 +92,7 @@ Test how long an argument /bin/echo can take
|
|||
}
|
||||
find-first-fail singleecho
|
||||
|
||||
|
||||
=head2 Test a bash function that takes from and to as arguments
|
||||
|
||||
Use a function that takes two arguments. It finds the line number
|
||||
|
@ -87,6 +104,7 @@ after HOME=.
|
|||
}
|
||||
find-first-fail -2 -q greplines
|
||||
|
||||
|
||||
=head2 Test complex command and show what is run
|
||||
|
||||
Complex commands can also be run:
|
||||
|
@ -108,6 +126,24 @@ you find the limits?
|
|||
find-first-fail -s 200 myprog
|
||||
|
||||
|
||||
=head2 Find minimal failing CSV file
|
||||
|
||||
Assume: example.csv
|
||||
|
||||
MyHeader
|
||||
4
|
||||
3
|
||||
5
|
||||
100
|
||||
3
|
||||
|
||||
myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; }
|
||||
|
||||
To identify the minimal CSV file that causes myparser to fail:
|
||||
|
||||
find-first-fail -f example.csv -s1 myparser
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2020 Ole Tange,
|
||||
|
@ -142,7 +178,6 @@ find-first-fail() {
|
|||
_find-first-fail() {
|
||||
local low=$1
|
||||
local high=$2
|
||||
# echo $low-$high
|
||||
if [ $low -gt $(($high - 2)) ]; then
|
||||
echo $low
|
||||
return
|
||||
|
@ -169,10 +204,8 @@ find-first-fail() {
|
|||
# _inner_run is needed if cmd is complex like:
|
||||
# perl -e 'exit( (shift) + (shift) > 10)'
|
||||
if $opt2 ; then
|
||||
$verbose && echo "${cmd[@]}" "$a" "$b"
|
||||
"${cmd[@]}" "$a" "$b"
|
||||
else
|
||||
$verbose && echo "${cmd[@]}" "$b"
|
||||
"${cmd[@]}" "$b"
|
||||
fi
|
||||
}
|
||||
|
@ -180,28 +213,16 @@ find-first-fail() {
|
|||
local b="$2"
|
||||
shift
|
||||
shift
|
||||
# echo "a=$a b=$b $@"
|
||||
local cmd=("$@")
|
||||
if $opt2 ; then
|
||||
$verbose && echo "$a<x<$b: ${cmd[@]}" "$a" "$b" >&2
|
||||
else
|
||||
$verbose && echo "$a<x<$b: ${cmd[@]}" "$b" >&2
|
||||
fi
|
||||
eval "$not" _inner_run "$quiet"
|
||||
}
|
||||
|
||||
local opt2=false
|
||||
local quiet=""
|
||||
local start=1
|
||||
local verbose=false
|
||||
|
||||
# Parse and remove options
|
||||
while getopts "2qs:v" options; do
|
||||
case "${options}" in
|
||||
(2) opt2=true;;
|
||||
(q) quiet=">/dev/null 2>/dev/null";;
|
||||
(s) start="$OPTARG";;
|
||||
(v) verbose=true;;
|
||||
(-) break;;
|
||||
esac
|
||||
done
|
||||
shift $(( OPTIND - 1))
|
||||
|
||||
_find_in_arg() {
|
||||
# If function(1) = false: run 'not function()' instead
|
||||
local not
|
||||
if _run "$start" "$start" "$@" ; then
|
||||
|
@ -213,13 +234,13 @@ find-first-fail() {
|
|||
# exponential search for the first value that is false
|
||||
# low = previous value (function($low) == true)
|
||||
# high = low * 2 (function($high) == false)
|
||||
local high=$start
|
||||
local low
|
||||
local high=$(( $start*2 ))
|
||||
local low=$start
|
||||
while _run $start $high "$@" ; do
|
||||
low=$high
|
||||
high=$(( $high*2 ))
|
||||
if [ $high -gt 4611686018427387900 ] ; then
|
||||
echo "$0: Error: exit value does not change of '$@'" >&2
|
||||
echo "find-first-fail: Error: exit value does not change of '$@'" >&2
|
||||
return
|
||||
fi
|
||||
done
|
||||
|
@ -228,8 +249,132 @@ find-first-fail() {
|
|||
# high = tested fail
|
||||
# Search low..high
|
||||
# echo "low: $low high: $high not: $not"
|
||||
_find-first-fail $low $high "$@" 2>/dev/null
|
||||
unset low high start
|
||||
_find-first-fail $low $high "$@"
|
||||
unset low high start quiet
|
||||
}
|
||||
|
||||
#### find-first-fail-file
|
||||
|
||||
_run_file() {
|
||||
# build $tmpfile as line a..b
|
||||
# run:
|
||||
# cmd $tmpfile
|
||||
# Output is ignored if $quiet
|
||||
# Exit value is negated if $not
|
||||
_inner_run() {
|
||||
# _inner_run is needed if cmd is complex like:
|
||||
# perl -e 'exit( (shift) + (shift) > 10)'
|
||||
"${cmd[@]}" "$tmp"
|
||||
}
|
||||
local a="$1"
|
||||
local b="$2"
|
||||
shift
|
||||
shift
|
||||
local cmd=("$@")
|
||||
local tmp=`tempfile -p fff`
|
||||
$verbose && echo "$a<x<$b: ${cmd[@]}" "$tmp" >&2
|
||||
# Build file of line a..b
|
||||
perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp";
|
||||
perl -ne "$a..$b and print" "$inputfile" >> "$tmp";
|
||||
eval "$not" _inner_run "$quiet"
|
||||
# Postpone exit code
|
||||
local _exit="$?"
|
||||
rm "$tmp"
|
||||
return "$_exit"
|
||||
}
|
||||
|
||||
_find-low-file() {
|
||||
local low=$1
|
||||
local high=$2
|
||||
if [ $low -gt $(($high - 2)) ]; then
|
||||
global_low=$low
|
||||
return
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
local middle=$(( ( $low + $high ) / 2 ))
|
||||
if _run_file $middle $global_high "$@" ; then
|
||||
low=$middle
|
||||
else
|
||||
high=$middle
|
||||
fi
|
||||
_find-low-file $low $high "$@"
|
||||
}
|
||||
|
||||
_find-high-file() {
|
||||
local low=$1
|
||||
local high=$2
|
||||
if [ $low -gt $(($high - 2)) ]; then
|
||||
global_high=$high
|
||||
return
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
local middle=$(( ( $low + $high ) / 2 ))
|
||||
if _run_file $global_low $middle "$@" ; then
|
||||
high=$middle
|
||||
else
|
||||
low=$middle
|
||||
fi
|
||||
_find-high-file $low $high "$@"
|
||||
}
|
||||
|
||||
|
||||
_find_in_file() {
|
||||
# If function(1) = false: run 'not function()' instead
|
||||
local not
|
||||
local global_low=$(($start+1))
|
||||
if [ ! -e "$inputfile" ] ; then
|
||||
echo "find-first-fail: Error: File not found '$inputfile'" >&2
|
||||
return 1
|
||||
fi
|
||||
local global_high=$(wc -l < $inputfile)
|
||||
if _run_file "$global_low" "$global_high" "$@" ; then
|
||||
not=''
|
||||
else
|
||||
not='!'
|
||||
fi
|
||||
# Binary search for $global_low
|
||||
_find-low-file $global_low $global_high "$@"
|
||||
_find-high-file $global_low $global_high "$@"
|
||||
perl -ne "($start and 1..$start) and print" "$inputfile"
|
||||
perl -ne "$global_low..$global_high and print" "$inputfile"
|
||||
unset low high start quiet
|
||||
}
|
||||
|
||||
local opt2=false
|
||||
local optf=false
|
||||
local inputfile
|
||||
local quiet=""
|
||||
local start
|
||||
local verbose=false
|
||||
unset OPTIND
|
||||
|
||||
# Parse and remove options
|
||||
while getopts "2f:qs:v" options; do
|
||||
case "${options}" in
|
||||
(2) opt2=true;;
|
||||
(f) optf=true; inputfile="$OPTARG";;
|
||||
(q) quiet=">/dev/null 2>/dev/null";;
|
||||
(s) start="$OPTARG";;
|
||||
(v) verbose=true;;
|
||||
(-) break;;
|
||||
esac
|
||||
done
|
||||
shift $(( OPTIND - 1))
|
||||
unset OPTIND
|
||||
|
||||
if $optf; then
|
||||
if [ -z "$start" ] ; then
|
||||
start=0
|
||||
fi
|
||||
_find_in_file "$@";
|
||||
else
|
||||
if [ -z "$start" ] ; then
|
||||
start=1
|
||||
fi
|
||||
_find_in_arg "$@";
|
||||
fi
|
||||
}
|
||||
|
||||
if [ -z "$*" ] ; then
|
||||
|
@ -240,3 +385,4 @@ else
|
|||
# find-first-fail command
|
||||
find-first-fail "$@"
|
||||
fi
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ test_unexported_function() {
|
|||
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
|
||||
# myprog is a function, so source find-first-fail first
|
||||
. `which find-first-fail`
|
||||
echo Find 12345678 in unexported function
|
||||
find-first-fail myprog
|
||||
}
|
||||
|
||||
|
@ -11,6 +12,7 @@ test_exported_function() {
|
|||
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
|
||||
# myprog is an exported function
|
||||
export -f myprog
|
||||
echo Find 12345678
|
||||
find-first-fail myprog
|
||||
}
|
||||
|
||||
|
@ -19,9 +21,9 @@ test_startvalue() {
|
|||
myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; }
|
||||
else { exit ($a <= 12345678) }' "$@"; }
|
||||
export -f myprog
|
||||
# Finds 123
|
||||
echo Find 123
|
||||
find-first-fail myprog
|
||||
# Finds 12345678
|
||||
echo Find 12345678
|
||||
find-first-fail -s 200 myprog
|
||||
}
|
||||
|
||||
|
@ -29,10 +31,40 @@ test_s_v_12() {
|
|||
# Multiple options
|
||||
myprog() { perl -e 'exit (shift > 12)' "$@"; }
|
||||
export -f myprog
|
||||
echo Find 12 with progress
|
||||
find-first-fail -v -s 10 myprog
|
||||
echo Find 12 with progress quiet
|
||||
find-first-fail -v -q -s 10 myprog
|
||||
}
|
||||
|
||||
test_file() {
|
||||
tmp=`tempfile`
|
||||
echo Header > $tmp
|
||||
seq 100 >> $tmp
|
||||
10_to_15() { grep ^10$ $1 && grep ^15$ $1; }
|
||||
export -f 10_to_15
|
||||
echo 10..15
|
||||
find-first-fail -s1 -qf $tmp 10_to_15
|
||||
echo not 10..15
|
||||
find-first-fail -s1 -qf $tmp not 10_to_15
|
||||
rm $tmp
|
||||
}
|
||||
|
||||
test_header() {
|
||||
tmp=`tempfile`
|
||||
echo Header > $tmp
|
||||
seq 10 >> $tmp
|
||||
echo 1000 >> $tmp
|
||||
seq 10 >> $tmp
|
||||
|
||||
myparser() { perl -ne 'if($_ > 100) { exit 1 }' "$@"; }
|
||||
export -f myparser
|
||||
echo Should give:
|
||||
echo Header
|
||||
echo 1000
|
||||
find-first-fail -s1 -f $tmp myparser
|
||||
}
|
||||
|
||||
export -f $(compgen -A function | grep test_)
|
||||
compgen -A function | grep test_ | LC_ALL=C sort |
|
||||
parallel --timeout 1000% --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'
|
||||
|
|
Loading…
Reference in a new issue