#!/bin/bash : <<'=cut' =encoding utf8 =head1 NAME find-first-fail - fine function for finding first failing file fragment (or numeric argument) =head1 SYNOPSIS B [-2] [-q] [-s I] [-e I] [-v] [-V] I B -f I [-s I] [-q] [-v] [-V] I =head1 DESCRIPTION B runs I with a single number. It returns highest value that I succeeds for. It finds the value by first testing the I value (which defaults to 1). As long as the value succeeds, the value is doubled. When the value fails, B does a binary search between this value and the previous value. If the I value fails, B instead searches for the highest value that I fails for. If given a file with B<-f> B will find minimal section in the file the command fails for. =head1 OPTIONS =over 4 =item B<-f> I Search for the bad line in I. Use B<-s> to tell how many lines should be treated as header (e.g. 1 if a normal CSV file with a header). =item B<-2> Instead of passing the command a single argument, give the command 2 arguments: I I. =item B<-q> Quiet. Ignore output from I. =item B<-s I> Start searching from the value I. Normally searching will start from the value 1. =item B<-e I> End searching at the value I. Normally this value will be determined automatically, but you can limit the search to be below the value I. =item B<-v> Verbose. Show the commands being run. =back =head1 EXAMPLES =head2 Find the last file This is a silly way to find the last non-existing file (namely 244): touch {245..800} find-first-fail ls This is a silly way to find the last file (namely 800): touch {1..800} find-first-fail ls =head2 Test a bash function Test how long an argument /bin/echo can take . $(which find-first-fail) singleecho() { /bin/echo $(perl -e 'print "x"x'$1) >/dev/null } find-first-fail singleecho =head2 Test a bash function that takes from and to as arguments Use a function that takes two arguments. It finds the line number after HOME=. . $(which find-first-fail) greplines() { env | perl -ne "$1..$2 and print" | grep HOME= } find-first-fail -2 -q greplines =head2 Test complex command and show what is run Complex commands can also be run: find-first-fail -v perl -e 'exit(shift > 129)' =head2 Find the second limit of a program Assume you have a program that is OK in the range 123..12345. How do you find those limits? myprog() { perl -e '$a=shift;if($a <= 123) { exit 0; } else { exit ($a <= 12345) }' "$@"; } export -f myprog # Finds 123 find-first-fail myprog # Finds 12345 find-first-fail -s 200 myprog =head2 Find minimal failing CSV file Assume: example.csv MyHeader 4 3 5 100 3 myparser() { perl -ne 'if($_ > 10) { exit 1 }' "$@"; } To identify the minimal CSV file that causes myparser to fail: find-first-fail -f example.csv -s1 myparser =head1 REPORTING BUGS Report bugs: https://gitlab.com/ole.tange/tangetools/-/issues =head1 AUTHOR Copyright (C) 2020 Ole Tange, http://ole.tange.dk and Free Software Foundation, Inc. =head1 LICENSE Copyright (C) 2012 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or at your option any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . =head1 SEE ALSO B(1) =cut find-first-fail() { _find-first-fail() { local low=$1 local high=$2 if [ $low -gt $(($high - 2)) ]; then echo $low return fi shift shift local middle=$(( ( $low + $high ) / 2 )) if _run $low $middle $high "$@" ; then low=$middle else high=$middle fi _find-first-fail $low $high "$@" } _run() { # run: # cmd $low $high # or: # cmd $value # Output is ignored if $quiet # Exit value is negated if $not _inner_run() { # _inner_run is needed if cmd is complex like: # perl -e 'exit( (shift) + (shift) > 10)' if $opt2 ; then "${cmd[@]}" "$a" "$b" else "${cmd[@]}" "$b" fi } local a="$1" local b="$2" local c="$3" shift shift shift local cmd=("$@") if $opt2 ; then $verbose && echo "$a&2 else $verbose && echo "$a&2 fi eval "$not" _inner_run "$quiet" } _find_in_arg() { # If function($start) == false: run 'not function()' instead local not if _run "$start" "$start" "?" "$@" ; then not='' else not='!' fi local low=$start local high if [ -z "$end" ] ; then # No end value given with -e: # exponential search for the first value that is false # low = previous value (function($low) == true) # high = low * 2 (function($high) == false) high=$(( $start*2 )) while _run $low $high "?" "$@" ; do low=$high high=$(( $high*2 )) if [ $high -gt 4611686018427387900 ] ; then echo "find-first-fail: Error: exit value does not change of '$@'" >&2 return fi done else high=$end fi # low = tested good # high = tested fail # Search low..high # echo "low: $low high: $high not: $not" _find-first-fail $low $high "$@" unset low high start quiet } #### find-first-fail-file _run_file() { # build $tmpfile as line a..b # run: # cmd $tmpfile # Output is ignored if $quiet # Exit value is negated if $not _inner_run() { # _inner_run is needed if cmd is complex like: # perl -e 'exit( (shift) + (shift) > 10)' "${cmd[@]}" "$tmp" } local a="$1" local b="$2" # TODO if defined memory{$a,$b}: return value shift shift local cmd=("$@") local tmp=`tempfile -p fff` $verbose && echo "$a&2 # Build file of line a..b perl -ne "($start and 1..$start) and print" "$inputfile" > "$tmp"; perl -ne "$a..$b and print" "$inputfile" >> "$tmp"; eval "$not" _inner_run "$quiet" # Postpone exit code local _exit="$?" # TODO memory{$a,$b}="$_exit" rm "$tmp" return "$_exit" } _find-low-file() { local low=$1 local high=$2 if [ $low -gt $(($high - 2)) ]; then # if $low > $high - 0.2: $global_low = $low global_low=$low return fi shift shift local middle=$(( ( $low + $high ) / 2 )) local middledeci=$(( $middle / 10 )) local global_highdeci=$(( $global_high / 10 )) if _run_file $middledeci $global_highdeci "$@" ; then low=$middle else high=$middle fi _find-low-file $low $high "$@" } _find-high-file() { local low=$1 local high=$2 if [ $low -gt $(($high - 5)) ]; then # if $low > $high - 0.5: $global_high = $high + 0.5 global_high=$(($high + 5)) return fi shift shift local middle=$(( ( $low + $high ) / 2 )) local middledeci=$(( $middle / 10 )) local global_lowdeci=$(( $global_low / 10 )) if _run_file $global_lowdeci $middledeci "$@" ; then high=$middle else low=$middle fi _find-high-file $low $high "$@" } _find_in_file() { # If function(1) = false: run 'not function()' instead if [ ! -e "$inputfile" ] ; then echo "find-first-fail: Error: File not found '$inputfile'" >&2 return 1 fi local not local global_low=$(( $start + 1 ))0 local global_high=$(wc -l < $inputfile)0 local global_lowdeci=$(( $global_low / 10 )) local global_highdeci=$(( $global_high / 10 )) if _run_file "$global_lowdeci" "$global_highdeci" "$@" ; then not='' else not='!' fi # Binary search for minimal $global_high that fails _find-high-file $global_low $global_high "$@" # Binary search for minimal $global_low that fails _find-low-file $global_low $global_high "$@" local global_lowdeci=$(( $global_low / 10 )) local global_highdeci=$(( $global_high / 10 )) # Print the resulting minimal file perl -ne "($start and 1..$start) and print" "$inputfile" perl -ne "$global_lowdeci..$global_highdeci and print" "$inputfile" unset low high start quiet } version() { cat < This is free software: you are free to change and redistribute it. find-first-fail comes with no warranty. Web site: https://gitlab.com/ole.tange/tangetools/-/tree/master/find-first-fail EOF } local opt2=false local optf=false local inputfile local quiet="" local start local verbose=false unset OPTIND # Parse and remove options while getopts "2e:f:qs:vV" options; do case "${options}" in (2) opt2=true;; (f) optf=true; inputfile="$OPTARG";; (q) quiet=">/dev/null 2>/dev/null";; (s) start="$OPTARG";; (e) end="$OPTARG";; (v) verbose=true;; (V) version; exit 0;; (-) break;; esac done shift $(( OPTIND - 1)) unset OPTIND if $optf; then if [ -z "$start" ] ; then start=0 fi _find_in_file "$@"; else if [ -z "$start" ] ; then start=1 fi _find_in_arg "$@"; fi # TODO find-optimal: # instead of looking at exit value, look at last line # start=--start || 1 # end=--end # vstart = test(start) # if end: # vmiddle = test( (start+end)/2) # if vstart < vmiddle: # Search hill(start,end) # else # Search valley(start,t) # else # min = vstart # max = vstart # t = start*2 # while t < big: # v = test(t) # if v < max and vstart < max: Search hill(start,t) # if min < v and min < vstart: Search valley(start,t) # t = t*2 # fi } if [ -z "$*" ] ; then # source the bash function # . $(which find-first-fail) true else # find-first-fail command find-first-fail "$@" fi