diff --git a/find-first-fail/find-first-fail b/find-first-fail/find-first-fail index 5bbf4b7..af90362 100755 --- a/find-first-fail/find-first-fail +++ b/find-first-fail/find-first-fail @@ -5,12 +5,14 @@ =head1 NAME -find-first-fail - find the lowest argument that makes a command fail +find-first-fail - fine function for finding first failing file fragment =head1 SYNOPSIS -B [-2] [-q] [-s I] I +B [-2] [-q] [-s I] [-v] I + +B -f I [-s I] [-q] [-v] I =head1 DESCRIPTION @@ -26,24 +28,37 @@ value and the previous value. If the I value fails, B instead searches for the highest value that I fails for. +If given a file with B<-f> B will find minimal +section in the file the command fails for. + =head1 OPTIONS =over 4 +=item B<-f> I + +Search for the bad line in I. Use B<-s> to tell how many +lines should be treated as header (e.g. 1 if a normal CSV file with a +header). + + =item B<-2> Instead of passing the command a single argument, give the command 2 arguments: I I. + =item B<-q> Quiet. Ignore output from I. + =item B<-s I> Start searching from the value I. Normally searching will start from the value 1. + =item B<-v> Verbose. Show the commands being run. @@ -58,40 +73,43 @@ Verbose. Show the commands being run. This is a silly way to find the last non-existing file (namely 244): - touch {245..800} - find-first-fail ls + touch {245..800} + find-first-fail ls This is a silly way to find the last file (namely 800): - touch {1..800} - find-first-fail ls + touch {1..800} + find-first-fail ls + =head2 Test a bash function Test how long an argument /bin/echo can take - . $(which find-first-fail) - singleecho() { - /bin/echo $(perl -e 'print "x"x'$1) >/dev/null - } - find-first-fail singleecho + . $(which find-first-fail) + singleecho() { + /bin/echo $(perl -e 'print "x"x'$1) >/dev/null + } + find-first-fail singleecho + =head2 Test a bash function that takes from and to as arguments Use a function that takes two arguments. It finds the line number after HOME=. - . $(which find-first-fail) - greplines() { + . $(which find-first-fail) + greplines() { env | perl -ne "$1..$2 and print" | grep HOME= - } - find-first-fail -2 -q greplines + } + find-first-fail -2 -q greplines + =head2 Test complex command and show what is run Complex commands can also be run: - find-first-fail -v perl -e 'exit(shift > 129)' + find-first-fail -v perl -e 'exit(shift > 129)' =head2 Find the second limit of a program @@ -108,6 +126,24 @@ you find the limits? find-first-fail -s 200 myprog +=head2 Find minimal failing CSV file + +Assume: example.csv + + MyHeader + 4 + 3 + 5 + 100 + 3 + + myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; } + +To identify the minimal CSV file that causes myparser to fail: + + find-first-fail -f example.csv -s1 myparser + + =head1 AUTHOR Copyright (C) 2020 Ole Tange, @@ -142,7 +178,6 @@ find-first-fail() { _find-first-fail() { local low=$1 local high=$2 - # echo $low-$high if [ $low -gt $(($high - 2)) ]; then echo $low return @@ -169,10 +204,8 @@ find-first-fail() { # _inner_run is needed if cmd is complex like: # perl -e 'exit( (shift) + (shift) > 10)' if $opt2 ; then - $verbose && echo "${cmd[@]}" "$a" "$b" "${cmd[@]}" "$a" "$b" else - $verbose && echo "${cmd[@]}" "$b" "${cmd[@]}" "$b" fi } @@ -180,20 +213,148 @@ find-first-fail() { local b="$2" shift shift - # echo "a=$a b=$b $@" - local cmd=( "$@" ) + local cmd=("$@") + if $opt2 ; then + $verbose && echo "$a&2 + else + $verbose && echo "$a&2 + fi eval "$not" _inner_run "$quiet" } - local opt2=false - local quiet="" - local start=1 - local verbose=false + _find_in_arg() { + # If function(1) = false: run 'not function()' instead + local not + if _run "$start" "$start" "$@" ; then + not='' + else + not='!' + fi + + # exponential search for the first value that is false + # low = previous value (function($low) == true) + # high = low * 2 (function($high) == false) + local high=$(( $start*2 )) + local low=$start + while _run $start $high "$@" ; do + low=$high + high=$(( $high*2 )) + if [ $high -gt 4611686018427387900 ] ; then + echo "find-first-fail: Error: exit value does not change of '$@'" >&2 + return + fi + done + + # low = tested good + # high = tested fail + # Search low..high + # echo "low: $low high: $high not: $not" + _find-first-fail $low $high "$@" + unset low high start quiet + } + + #### find-first-fail-file + + _run_file() { + # build $tmpfile as line a..b + # run: + # cmd $tmpfile + # Output is ignored if $quiet + # Exit value is negated if $not + _inner_run() { + # _inner_run is needed if cmd is complex like: + # perl -e 'exit( (shift) + (shift) > 10)' + "${cmd[@]}" "$tmp" + } + local a="$1" + local b="$2" + shift + shift + local cmd=("$@") + local tmp=`tempfile -p fff` + $verbose && echo "$a&2 + # Build file of line a..b + perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp"; + perl -ne "$a..$b and print" "$inputfile" >> "$tmp"; + eval "$not" _inner_run "$quiet" + # Postpone exit code + local _exit="$?" + rm "$tmp" + return "$_exit" + } + + _find-low-file() { + local low=$1 + local high=$2 + if [ $low -gt $(($high - 2)) ]; then + global_low=$low + return + fi + shift + shift + local middle=$(( ( $low + $high ) / 2 )) + if _run_file $middle $global_high "$@" ; then + low=$middle + else + high=$middle + fi + _find-low-file $low $high "$@" + } + + _find-high-file() { + local low=$1 + local high=$2 + if [ $low -gt $(($high - 2)) ]; then + global_high=$high + return + fi + shift + shift + local middle=$(( ( $low + $high ) / 2 )) + if _run_file $global_low $middle "$@" ; then + high=$middle + else + low=$middle + fi + _find-high-file $low $high "$@" + } + + _find_in_file() { + # If function(1) = false: run 'not function()' instead + local not + local global_low=$(($start+1)) + if [ ! -e "$inputfile" ] ; then + echo "find-first-fail: Error: File not found '$inputfile'" >&2 + return 1 + fi + local global_high=$(wc -l < $inputfile) + if _run_file "$global_low" "$global_high" "$@" ; then + not='' + else + not='!' + fi + # Binary search for $global_low + _find-low-file $global_low $global_high "$@" + _find-high-file $global_low $global_high "$@" + perl -ne "($start and 1..$start) and print" "$inputfile" + perl -ne "$global_low..$global_high and print" "$inputfile" + unset low high start quiet + } + + local opt2=false + local optf=false + local inputfile + local quiet="" + local start + local verbose=false + unset OPTIND + # Parse and remove options - while getopts "2qs:v" options; do + while getopts "2f:qs:v" options; do case "${options}" in (2) opt2=true;; + (f) optf=true; inputfile="$OPTARG";; (q) quiet=">/dev/null 2>/dev/null";; (s) start="$OPTARG";; (v) verbose=true;; @@ -201,35 +362,19 @@ find-first-fail() { esac done shift $(( OPTIND - 1)) - - # If function(1) = false: run 'not function()' instead - local not - if _run "$start" "$start" "$@" ; then - not='' - else - not='!' - fi + unset OPTIND - # exponential search for the first value that is false - # low = previous value (function($low) == true) - # high = low * 2 (function($high) == false) - local high=$start - local low - while _run $start $high "$@" ; do - low=$high - high=$(( $high*2 )) - if [ $high -gt 4611686018427387900 ] ; then - echo "$0: Error: exit value does not change of '$@'" >&2 - return + if $optf; then + if [ -z "$start" ] ; then + start=0 fi - done - - # low = tested good - # high = tested fail - # Search low..high - # echo "low: $low high: $high not: $not" - _find-first-fail $low $high "$@" 2>/dev/null - unset low high start + _find_in_file "$@"; + else + if [ -z "$start" ] ; then + start=1 + fi + _find_in_arg "$@"; + fi } if [ -z "$*" ] ; then @@ -240,3 +385,4 @@ else # find-first-fail command find-first-fail "$@" fi + diff --git a/find-first-fail/testsuite b/find-first-fail/testsuite index 58042c5..8b78f8e 100644 --- a/find-first-fail/testsuite +++ b/find-first-fail/testsuite @@ -4,6 +4,7 @@ test_unexported_function() { myprog() { perl -e 'exit (shift > 12345678)' "$@"; } # myprog is a function, so source find-first-fail first . `which find-first-fail` + echo Find 12345678 in unexported function find-first-fail myprog } @@ -11,6 +12,7 @@ test_exported_function() { myprog() { perl -e 'exit (shift > 12345678)' "$@"; } # myprog is an exported function export -f myprog + echo Find 12345678 find-first-fail myprog } @@ -19,9 +21,9 @@ test_startvalue() { myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; } else { exit ($a <= 12345678) }' "$@"; } export -f myprog - # Finds 123 + echo Find 123 find-first-fail myprog - # Finds 12345678 + echo Find 12345678 find-first-fail -s 200 myprog } @@ -29,10 +31,40 @@ test_s_v_12() { # Multiple options myprog() { perl -e 'exit (shift > 12)' "$@"; } export -f myprog + echo Find 12 with progress find-first-fail -v -s 10 myprog + echo Find 12 with progress quiet find-first-fail -v -q -s 10 myprog } +test_file() { + tmp=`tempfile` + echo Header > $tmp + seq 100 >> $tmp + 10_to_15() { grep ^10$ $1 && grep ^15$ $1; } + export -f 10_to_15 + echo 10..15 + find-first-fail -s1 -qf $tmp 10_to_15 + echo not 10..15 + find-first-fail -s1 -qf $tmp not 10_to_15 + rm $tmp +} + +test_header() { + tmp=`tempfile` + echo Header > $tmp + seq 10 >> $tmp + echo 1000 >> $tmp + seq 10 >> $tmp + + myparser() { perl -ne 'if($_ > 100) { exit 1 }' "$@"; } + export -f myparser + echo Should give: + echo Header + echo 1000 + find-first-fail -s1 -f $tmp myparser +} + export -f $(compgen -A function | grep test_) compgen -A function | grep test_ | LC_ALL=C sort | parallel --timeout 1000% --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'