find-first-fail: find failing file fragment.
This commit is contained in:
parent
4f131005cb
commit
c445e9352f
|
@ -5,12 +5,14 @@
|
||||||
|
|
||||||
=head1 NAME
|
=head1 NAME
|
||||||
|
|
||||||
find-first-fail - find the lowest argument that makes a command fail
|
find-first-fail - fine function for finding first failing file fragment
|
||||||
|
|
||||||
|
|
||||||
=head1 SYNOPSIS
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
B<find-first-fail> [-2] [-q] [-s I<start>] I<command>
|
B<find-first-fail> [-2] [-q] [-s I<start>] [-v] I<command>
|
||||||
|
|
||||||
|
B<find-first-fail> -f I<inputfile> [-s I<start>] [-q] [-v] I<command>
|
||||||
|
|
||||||
|
|
||||||
=head1 DESCRIPTION
|
=head1 DESCRIPTION
|
||||||
|
@ -26,24 +28,37 @@ value and the previous value.
|
||||||
If the I<start> value fails, B<find-first-fail> instead searches for
|
If the I<start> value fails, B<find-first-fail> instead searches for
|
||||||
the highest value that I<command> fails for.
|
the highest value that I<command> fails for.
|
||||||
|
|
||||||
|
If given a file with B<-f> B<find-first-fail> will find minimal
|
||||||
|
section in the file the command fails for.
|
||||||
|
|
||||||
|
|
||||||
=head1 OPTIONS
|
=head1 OPTIONS
|
||||||
|
|
||||||
=over 4
|
=over 4
|
||||||
|
|
||||||
|
=item B<-f> I<inputfile>
|
||||||
|
|
||||||
|
Search for the bad line in I<inputfile>. Use B<-s> to tell how many
|
||||||
|
lines should be treated as header (e.g. 1 if a normal CSV file with a
|
||||||
|
header).
|
||||||
|
|
||||||
|
|
||||||
=item B<-2>
|
=item B<-2>
|
||||||
|
|
||||||
Instead of passing the command a single argument, give the command 2
|
Instead of passing the command a single argument, give the command 2
|
||||||
arguments: I<from> I<to>.
|
arguments: I<from> I<to>.
|
||||||
|
|
||||||
|
|
||||||
=item B<-q>
|
=item B<-q>
|
||||||
|
|
||||||
Quiet. Ignore output from I<command>.
|
Quiet. Ignore output from I<command>.
|
||||||
|
|
||||||
|
|
||||||
=item B<-s I<start>>
|
=item B<-s I<start>>
|
||||||
|
|
||||||
Start searching from the value I<start>. Normally searching will start from the value 1.
|
Start searching from the value I<start>. Normally searching will start from the value 1.
|
||||||
|
|
||||||
|
|
||||||
=item B<-v>
|
=item B<-v>
|
||||||
|
|
||||||
Verbose. Show the commands being run.
|
Verbose. Show the commands being run.
|
||||||
|
@ -66,6 +81,7 @@ This is a silly way to find the last file (namely 800):
|
||||||
touch {1..800}
|
touch {1..800}
|
||||||
find-first-fail ls
|
find-first-fail ls
|
||||||
|
|
||||||
|
|
||||||
=head2 Test a bash function
|
=head2 Test a bash function
|
||||||
|
|
||||||
Test how long an argument /bin/echo can take
|
Test how long an argument /bin/echo can take
|
||||||
|
@ -76,6 +92,7 @@ Test how long an argument /bin/echo can take
|
||||||
}
|
}
|
||||||
find-first-fail singleecho
|
find-first-fail singleecho
|
||||||
|
|
||||||
|
|
||||||
=head2 Test a bash function that takes from and to as arguments
|
=head2 Test a bash function that takes from and to as arguments
|
||||||
|
|
||||||
Use a function that takes two arguments. It finds the line number
|
Use a function that takes two arguments. It finds the line number
|
||||||
|
@ -87,6 +104,7 @@ after HOME=.
|
||||||
}
|
}
|
||||||
find-first-fail -2 -q greplines
|
find-first-fail -2 -q greplines
|
||||||
|
|
||||||
|
|
||||||
=head2 Test complex command and show what is run
|
=head2 Test complex command and show what is run
|
||||||
|
|
||||||
Complex commands can also be run:
|
Complex commands can also be run:
|
||||||
|
@ -108,6 +126,24 @@ you find the limits?
|
||||||
find-first-fail -s 200 myprog
|
find-first-fail -s 200 myprog
|
||||||
|
|
||||||
|
|
||||||
|
=head2 Find minimal failing CSV file
|
||||||
|
|
||||||
|
Assume: example.csv
|
||||||
|
|
||||||
|
MyHeader
|
||||||
|
4
|
||||||
|
3
|
||||||
|
5
|
||||||
|
100
|
||||||
|
3
|
||||||
|
|
||||||
|
myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; }
|
||||||
|
|
||||||
|
To identify the minimal CSV file that causes myparser to fail:
|
||||||
|
|
||||||
|
find-first-fail -f example.csv -s1 myparser
|
||||||
|
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
||||||
Copyright (C) 2020 Ole Tange,
|
Copyright (C) 2020 Ole Tange,
|
||||||
|
@ -142,7 +178,6 @@ find-first-fail() {
|
||||||
_find-first-fail() {
|
_find-first-fail() {
|
||||||
local low=$1
|
local low=$1
|
||||||
local high=$2
|
local high=$2
|
||||||
# echo $low-$high
|
|
||||||
if [ $low -gt $(($high - 2)) ]; then
|
if [ $low -gt $(($high - 2)) ]; then
|
||||||
echo $low
|
echo $low
|
||||||
return
|
return
|
||||||
|
@ -169,10 +204,8 @@ find-first-fail() {
|
||||||
# _inner_run is needed if cmd is complex like:
|
# _inner_run is needed if cmd is complex like:
|
||||||
# perl -e 'exit( (shift) + (shift) > 10)'
|
# perl -e 'exit( (shift) + (shift) > 10)'
|
||||||
if $opt2 ; then
|
if $opt2 ; then
|
||||||
$verbose && echo "${cmd[@]}" "$a" "$b"
|
|
||||||
"${cmd[@]}" "$a" "$b"
|
"${cmd[@]}" "$a" "$b"
|
||||||
else
|
else
|
||||||
$verbose && echo "${cmd[@]}" "$b"
|
|
||||||
"${cmd[@]}" "$b"
|
"${cmd[@]}" "$b"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -180,28 +213,16 @@ find-first-fail() {
|
||||||
local b="$2"
|
local b="$2"
|
||||||
shift
|
shift
|
||||||
shift
|
shift
|
||||||
# echo "a=$a b=$b $@"
|
|
||||||
local cmd=("$@")
|
local cmd=("$@")
|
||||||
|
if $opt2 ; then
|
||||||
|
$verbose && echo "$a<x<$b: ${cmd[@]}" "$a" "$b" >&2
|
||||||
|
else
|
||||||
|
$verbose && echo "$a<x<$b: ${cmd[@]}" "$b" >&2
|
||||||
|
fi
|
||||||
eval "$not" _inner_run "$quiet"
|
eval "$not" _inner_run "$quiet"
|
||||||
}
|
}
|
||||||
|
|
||||||
local opt2=false
|
_find_in_arg() {
|
||||||
local quiet=""
|
|
||||||
local start=1
|
|
||||||
local verbose=false
|
|
||||||
|
|
||||||
# Parse and remove options
|
|
||||||
while getopts "2qs:v" options; do
|
|
||||||
case "${options}" in
|
|
||||||
(2) opt2=true;;
|
|
||||||
(q) quiet=">/dev/null 2>/dev/null";;
|
|
||||||
(s) start="$OPTARG";;
|
|
||||||
(v) verbose=true;;
|
|
||||||
(-) break;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
shift $(( OPTIND - 1))
|
|
||||||
|
|
||||||
# If function(1) = false: run 'not function()' instead
|
# If function(1) = false: run 'not function()' instead
|
||||||
local not
|
local not
|
||||||
if _run "$start" "$start" "$@" ; then
|
if _run "$start" "$start" "$@" ; then
|
||||||
|
@ -213,13 +234,13 @@ find-first-fail() {
|
||||||
# exponential search for the first value that is false
|
# exponential search for the first value that is false
|
||||||
# low = previous value (function($low) == true)
|
# low = previous value (function($low) == true)
|
||||||
# high = low * 2 (function($high) == false)
|
# high = low * 2 (function($high) == false)
|
||||||
local high=$start
|
local high=$(( $start*2 ))
|
||||||
local low
|
local low=$start
|
||||||
while _run $start $high "$@" ; do
|
while _run $start $high "$@" ; do
|
||||||
low=$high
|
low=$high
|
||||||
high=$(( $high*2 ))
|
high=$(( $high*2 ))
|
||||||
if [ $high -gt 4611686018427387900 ] ; then
|
if [ $high -gt 4611686018427387900 ] ; then
|
||||||
echo "$0: Error: exit value does not change of '$@'" >&2
|
echo "find-first-fail: Error: exit value does not change of '$@'" >&2
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
@ -228,8 +249,132 @@ find-first-fail() {
|
||||||
# high = tested fail
|
# high = tested fail
|
||||||
# Search low..high
|
# Search low..high
|
||||||
# echo "low: $low high: $high not: $not"
|
# echo "low: $low high: $high not: $not"
|
||||||
_find-first-fail $low $high "$@" 2>/dev/null
|
_find-first-fail $low $high "$@"
|
||||||
unset low high start
|
unset low high start quiet
|
||||||
|
}
|
||||||
|
|
||||||
|
#### find-first-fail-file
|
||||||
|
|
||||||
|
_run_file() {
|
||||||
|
# build $tmpfile as line a..b
|
||||||
|
# run:
|
||||||
|
# cmd $tmpfile
|
||||||
|
# Output is ignored if $quiet
|
||||||
|
# Exit value is negated if $not
|
||||||
|
_inner_run() {
|
||||||
|
# _inner_run is needed if cmd is complex like:
|
||||||
|
# perl -e 'exit( (shift) + (shift) > 10)'
|
||||||
|
"${cmd[@]}" "$tmp"
|
||||||
|
}
|
||||||
|
local a="$1"
|
||||||
|
local b="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
local cmd=("$@")
|
||||||
|
local tmp=`tempfile -p fff`
|
||||||
|
$verbose && echo "$a<x<$b: ${cmd[@]}" "$tmp" >&2
|
||||||
|
# Build file of line a..b
|
||||||
|
perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp";
|
||||||
|
perl -ne "$a..$b and print" "$inputfile" >> "$tmp";
|
||||||
|
eval "$not" _inner_run "$quiet"
|
||||||
|
# Postpone exit code
|
||||||
|
local _exit="$?"
|
||||||
|
rm "$tmp"
|
||||||
|
return "$_exit"
|
||||||
|
}
|
||||||
|
|
||||||
|
_find-low-file() {
|
||||||
|
local low=$1
|
||||||
|
local high=$2
|
||||||
|
if [ $low -gt $(($high - 2)) ]; then
|
||||||
|
global_low=$low
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
local middle=$(( ( $low + $high ) / 2 ))
|
||||||
|
if _run_file $middle $global_high "$@" ; then
|
||||||
|
low=$middle
|
||||||
|
else
|
||||||
|
high=$middle
|
||||||
|
fi
|
||||||
|
_find-low-file $low $high "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
_find-high-file() {
|
||||||
|
local low=$1
|
||||||
|
local high=$2
|
||||||
|
if [ $low -gt $(($high - 2)) ]; then
|
||||||
|
global_high=$high
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
local middle=$(( ( $low + $high ) / 2 ))
|
||||||
|
if _run_file $global_low $middle "$@" ; then
|
||||||
|
high=$middle
|
||||||
|
else
|
||||||
|
low=$middle
|
||||||
|
fi
|
||||||
|
_find-high-file $low $high "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_find_in_file() {
|
||||||
|
# If function(1) = false: run 'not function()' instead
|
||||||
|
local not
|
||||||
|
local global_low=$(($start+1))
|
||||||
|
if [ ! -e "$inputfile" ] ; then
|
||||||
|
echo "find-first-fail: Error: File not found '$inputfile'" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
local global_high=$(wc -l < $inputfile)
|
||||||
|
if _run_file "$global_low" "$global_high" "$@" ; then
|
||||||
|
not=''
|
||||||
|
else
|
||||||
|
not='!'
|
||||||
|
fi
|
||||||
|
# Binary search for $global_low
|
||||||
|
_find-low-file $global_low $global_high "$@"
|
||||||
|
_find-high-file $global_low $global_high "$@"
|
||||||
|
perl -ne "($start and 1..$start) and print" "$inputfile"
|
||||||
|
perl -ne "$global_low..$global_high and print" "$inputfile"
|
||||||
|
unset low high start quiet
|
||||||
|
}
|
||||||
|
|
||||||
|
local opt2=false
|
||||||
|
local optf=false
|
||||||
|
local inputfile
|
||||||
|
local quiet=""
|
||||||
|
local start
|
||||||
|
local verbose=false
|
||||||
|
unset OPTIND
|
||||||
|
|
||||||
|
# Parse and remove options
|
||||||
|
while getopts "2f:qs:v" options; do
|
||||||
|
case "${options}" in
|
||||||
|
(2) opt2=true;;
|
||||||
|
(f) optf=true; inputfile="$OPTARG";;
|
||||||
|
(q) quiet=">/dev/null 2>/dev/null";;
|
||||||
|
(s) start="$OPTARG";;
|
||||||
|
(v) verbose=true;;
|
||||||
|
(-) break;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
shift $(( OPTIND - 1))
|
||||||
|
unset OPTIND
|
||||||
|
|
||||||
|
if $optf; then
|
||||||
|
if [ -z "$start" ] ; then
|
||||||
|
start=0
|
||||||
|
fi
|
||||||
|
_find_in_file "$@";
|
||||||
|
else
|
||||||
|
if [ -z "$start" ] ; then
|
||||||
|
start=1
|
||||||
|
fi
|
||||||
|
_find_in_arg "$@";
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
if [ -z "$*" ] ; then
|
if [ -z "$*" ] ; then
|
||||||
|
@ -240,3 +385,4 @@ else
|
||||||
# find-first-fail command
|
# find-first-fail command
|
||||||
find-first-fail "$@"
|
find-first-fail "$@"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ test_unexported_function() {
|
||||||
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
|
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
|
||||||
# myprog is a function, so source find-first-fail first
|
# myprog is a function, so source find-first-fail first
|
||||||
. `which find-first-fail`
|
. `which find-first-fail`
|
||||||
|
echo Find 12345678 in unexported function
|
||||||
find-first-fail myprog
|
find-first-fail myprog
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,6 +12,7 @@ test_exported_function() {
|
||||||
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
|
myprog() { perl -e 'exit (shift > 12345678)' "$@"; }
|
||||||
# myprog is an exported function
|
# myprog is an exported function
|
||||||
export -f myprog
|
export -f myprog
|
||||||
|
echo Find 12345678
|
||||||
find-first-fail myprog
|
find-first-fail myprog
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,9 +21,9 @@ test_startvalue() {
|
||||||
myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; }
|
myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; }
|
||||||
else { exit ($a <= 12345678) }' "$@"; }
|
else { exit ($a <= 12345678) }' "$@"; }
|
||||||
export -f myprog
|
export -f myprog
|
||||||
# Finds 123
|
echo Find 123
|
||||||
find-first-fail myprog
|
find-first-fail myprog
|
||||||
# Finds 12345678
|
echo Find 12345678
|
||||||
find-first-fail -s 200 myprog
|
find-first-fail -s 200 myprog
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,10 +31,40 @@ test_s_v_12() {
|
||||||
# Multiple options
|
# Multiple options
|
||||||
myprog() { perl -e 'exit (shift > 12)' "$@"; }
|
myprog() { perl -e 'exit (shift > 12)' "$@"; }
|
||||||
export -f myprog
|
export -f myprog
|
||||||
|
echo Find 12 with progress
|
||||||
find-first-fail -v -s 10 myprog
|
find-first-fail -v -s 10 myprog
|
||||||
|
echo Find 12 with progress quiet
|
||||||
find-first-fail -v -q -s 10 myprog
|
find-first-fail -v -q -s 10 myprog
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test_file() {
|
||||||
|
tmp=`tempfile`
|
||||||
|
echo Header > $tmp
|
||||||
|
seq 100 >> $tmp
|
||||||
|
10_to_15() { grep ^10$ $1 && grep ^15$ $1; }
|
||||||
|
export -f 10_to_15
|
||||||
|
echo 10..15
|
||||||
|
find-first-fail -s1 -qf $tmp 10_to_15
|
||||||
|
echo not 10..15
|
||||||
|
find-first-fail -s1 -qf $tmp not 10_to_15
|
||||||
|
rm $tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
test_header() {
|
||||||
|
tmp=`tempfile`
|
||||||
|
echo Header > $tmp
|
||||||
|
seq 10 >> $tmp
|
||||||
|
echo 1000 >> $tmp
|
||||||
|
seq 10 >> $tmp
|
||||||
|
|
||||||
|
myparser() { perl -ne 'if($_ > 100) { exit 1 }' "$@"; }
|
||||||
|
export -f myparser
|
||||||
|
echo Should give:
|
||||||
|
echo Header
|
||||||
|
echo 1000
|
||||||
|
find-first-fail -s1 -f $tmp myparser
|
||||||
|
}
|
||||||
|
|
||||||
export -f $(compgen -A function | grep test_)
|
export -f $(compgen -A function | grep test_)
|
||||||
compgen -A function | grep test_ | LC_ALL=C sort |
|
compgen -A function | grep test_ | LC_ALL=C sort |
|
||||||
parallel --timeout 1000% --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'
|
parallel --timeout 1000% --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'
|
||||||
|
|
Loading…
Reference in a new issue