diff --git a/histogram/histogram b/histogram/histogram index c0cbe0b..156841a 100755 --- a/histogram/histogram +++ b/histogram/histogram @@ -6,8 +6,9 @@ histogram - make and display a histogram on the command line =head1 SYNOPSIS -B [--delimiter |-d ] [--pre|--post] -[--log|-l] [--values-as-headers|-t] [--values-before-headers|-b] +B [--delimiter |-d ] +[--log|-l] [--input |-i ] +[--format |-f ] [] B | B [options] @@ -27,15 +28,15 @@ Line with CSV: B -d , 1,1.01,3.1 =item * -Line with white space separated values: B 1 1.01 3.1 +Line with white space separated values: B -i v 1 1.01 3.1 =item * -Line with white space separated headers+values: B a 1 b 1.01 c 3.1 +Line with white space separated headers+values: B 'a 1' 'b 1.01' 'c 3.1' =item * -One value per line: (echo 1; echo 1.01; echo 3.1) | B +One value per line: (echo 1; echo 1.01; echo 3.1) | B -i v =item * @@ -50,8 +51,6 @@ One comma separated header+value per line: (echo a,1; echo b,1.01; echo c,3.1) | =head1 OPTIONS -=over 9 - =item B<--delimiter> I =item B<-d> I @@ -66,89 +65,152 @@ Use I as delimiter between elements. Take the logarithm of all values. -=item B<--pre> +=item B<--input> I -Put the header before the bar. +Give format of input. B will try to guess the input format +based on different heuristics. If it guesses wrong, you can override +it with B<--input>. I is a string consisting of: -See also: B<--post> +=over 13 +=item Z<>B -=item B<--post> +This column is a header. -Put the header after the bar. +=item Z<>B -See also: B<--pre> +The rest of the line is a header. +=item Z<>B -=item B<--values-as-headers> +This column is a value. -=item B<-t> +=item Z<>B -Use the numbers as headers. +This column is ignored. +=item Z<>B -=item B<--values-before-headers> +This char is a delimiter -=item B<-b> +=back -Normally headers are given before the -value. B<--values-before-headers> looks the header after the value. +If there are no delimiter is given, it will be guessed. + +Examples: + +=over 13 + +=item B<--input h,v> + +Column 1 is header, column 2 is value, separated by B<,>. + +=item B<--input hv> + +Column 1 is header, column 2 is value, guess separator. + +=item B<--input ihv> + +Column 1 is ignored, column 2 is header, column 3 is value, guess separator. + +=back + +=item B<--format> I + +Give format of output. I is a string consisting of: + +=over 13 + +=item Z<>B + +The bar. + +=item Z<>B + +Percent of total outside bar. + +=item Z<>B + +Percent of total inside bar. + +=item Z<>B + +Header outside bar. + +=item Z<>B + +Header inside bar. + +=item Z<>B

+ +Percent of max outside bar. + +=item Z<>B

+ +Percent of max inside bar. + +=item Z<>B + +Value outside bar. + +=item Z<>B + +Value inside bar. + +=back + +A format must contain 'b'. Default is VbHP. =back -(echo 150 hundredfifty;echo 30 thirty;echo 3 three;echo 6 six)|./histogram --format Hbcp -(echo 0 zero; echo 50 fifty; echo 150 hundredfifty;echo 130 hundredthirty;echo 3 three;echo 6 six)|./histogram --format HbHCP -ls -l|tail -n +2| ./histogram --input iiiiviiih +=head1 EXAMPLE: file sizes in current dir -=head1 EXAMPLE: git: number of commits in the last year, by author - -git shortlog -s --after="1 years" | histogram -b - - -=head1 EXAMPLE: git: number of commits per day - -git log --format=%ai | cut -d\ -f1 | uniq -c | histogram -b --post - - -=head1 EXAMPLE: git: commits by hour of the day - -git log --format=%ai | perl -pe 's/.* (\d\d):.*/$1/' | sort -n | uniq -c | histogram -b - - -=head1 EXAMPLE: git: commits by day of the week - -git log --format=%ad |cut -d\ -f1 | sort -n | uniq -c | histogram -b +ls -l|tail -n +2| histogram --input iiiiviiih =head1 EXAMPLE: run time of processes -ps -e | tail -n +2 | perl -pe 's/.*(\d\d):(\d\d):(\d\d) (.*)/($1*3600+$2*60+$3)." $4"/e' | histogram -b -l +ps -e | tail -n +2 | perl -pe 's/.*(\d\d):(\d\d):(\d\d) (.*)/($1*3600+$2*60+$3)." $4"/e' | sort -n | histogram -=head1 EXAMPLE: Letter frequencies in a text file +=head1 EXAMPLE: git statistics -cat file | perl -ne 'print map {uc($_),"\n"} split//,$_' | sort | uniq -c | histogram -b +Number of commits and percentage in the last year, by author: +git shortlog -s --after="1 years" | histogram --input vH --format VbHC -=head1 EXAMPLE: Number of HTTP requests per day +Number of commits per day: -cat apache.log | cut -d\ -f4 | cut -d/ -f 1,2 | uniq -c | histogram -b +git log --format=%ai | cut -d\ -f1 | uniq -c | histogram +Number of commits by hour of the day: -=head1 EXAMPLE: Beijing Air Quality Index +git log --format=%ai | perl -pe 's/.* (\d\d):.*/$1/' | sort -n | uniq -c | histogram -curl -s https://twitter.com/statuses/user_timeline/15527964.rss | grep /description | perl -nle 'print "$1 $2" if /(\S+ \S+); PM2.5;[^;]+; (\d+)/' | histogram +Number of commits by day of the week: + +git log --format=%ad |cut -d\ -f1 | sort -n | uniq -c | histogram =head1 EXAMPLE: Visualize ping times -ping -i .2 -c 10 google.com | grep -oP 'time=\K\S*' | histogram -t --post +ping -ni .2 -c 10 google.com | grep -oP 'time=\K\S*' | histogram -=head1 EXAMPLE: Visualize filesize inside a directory +=head1 EXAMPLE: Visualize disk usage -du -s * | histogram -b +du -s * | histogram --format VbHC + + +=head1 EXAMPLE: Number of HTTP requests per day + +cat access.log | cut -d\ -f4 | cut -d: -f 1 | uniq -c | histogram + + +=head1 EXAMPLE: Letter frequencies in a text file + +cat file | perl -ne 'print map {uc($_),"\n"} split//,$_' | sort | uniq -c | histogram =head1 BUGS @@ -163,7 +225,7 @@ Report bugs to . =head1 AUTHOR -Copyright (C) 2012 Ole Tange, http://ole.tange.dk and Free +Copyright (C) 2012,2013,2014 Ole Tange, http://ole.tange.dk and Free Software Foundation, Inc. @@ -286,23 +348,16 @@ B(1) =cut -# histogram -d , a1,b2,c3 d4,5,e76 -# histogram 1 2 3 -# histogram a:1 b:2 c:3 -# histogram "a a":1 b:2 c:3 -# histogram "a a" 1 b 2 c 3 -# (echo a a 1; echo b 2; echo c 3) | histogram -# histogram --post aaaaaaaaaaa1 b10 -# seq 10 | histogram -t --pre - use strict; use Getopt::Long; +Getopt::Long::Configure("bundling","require_order"); GetOptions ("delimiter|d=s" => \$opt::delimiter, "log" => \$opt::log, "input|i=s" => \$opt::input, "format|f=s" => \$opt::format, + "debug|D" => \$opt::debug, ) || die_usage(); my @raw; @@ -313,21 +368,25 @@ if($#ARGV != -1) { chomp @raw; } + my ($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref); if(not defined $opt::input) { - # Guess opt::input my $delimiter = guess_delimiter(@raw); if($opt::delimiter) { # override guessed delimiter if given $delimiter = $opt::delimiter; - } elsif(defined $delimiter) { + } else { + # Guess opt::input + $delimiter = guess_delimiter(@raw); + } + if(defined $delimiter) { # guess format: (v delimiter h) or (h delimiter v) ($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) = - parse_raw_given_opt_input('\s*v'.$delimiter."h",@raw); + parse_raw_given_opt_input("v".$delimiter."h",@raw); } else { # guess format: v ($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) = - parse_raw_given_opt_input('\s*v',@raw); + parse_raw_given_opt_input("v",@raw); $header_ref = $value_ref; $max_value_length = $max_header_length; $max_value_header_length = $max_value_length + $max_header_length; @@ -342,8 +401,8 @@ if(not defined $opt::input) { ($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) = parse_raw_given_opt_input($opt::input, @raw); } -my $max_value = max(@$value_ref); -my $total_value = sum(@$value_ref); +my $max_value = undef_as_zero(max(@$value_ref)); +my $total_value = undef_as_zero(sum(@$value_ref)); sub parse_raw_given_opt_input { my ($input,@raw) = @_; @@ -355,24 +414,27 @@ sub parse_raw_given_opt_input { # \S+\,(\S+)\;(\S+) $input =~ /v.*v/ and die("Only one v is allow in --input"); $input =~ /h.*h/ and die("Only one h is allow in --input"); - if($input =~ /^[ivh]+$/) { + if($input =~ /^[Hivh]+$/) { # No delimiters => '\s+' (whitespace) $input = join('\s+', split//, $input); } + # strip prepending white space in input to avoid splitting on that + for(@raw) { s/^\s+//; } my %part_map = ( "h" => '(\S*)', + "H" => '(\S*(?:.*\S)*)', "i" => '\S*', "v" => '(\S*)', ); my (@regexp_part, $first_meta_var,$header,$value,@header,@value); for(split //, $input) { # Header, Value - if(/[hv]/) { + if(/[Hhv]/) { # Is this h...v or v...h $first_meta_var ||= $_; } # Header, Value, Ignore - if(/[hiv]/) { + if(/[Hhiv]/) { push @regexp_part, $part_map{$_}; next; } @@ -380,6 +442,7 @@ sub parse_raw_given_opt_input { push @regexp_part, $_; } my $regexp = join("",@regexp_part); + debug("Input: $input The regexp: $regexp\n"); for my $rawline (@raw) { $rawline =~ /$regexp/ || die("$regexp not matching $rawline"); if(defined $2) { @@ -405,6 +468,7 @@ sub parse_raw_given_opt_input { # Add the values to the table push(@header,$header); push(@value,$value); + debug("Header: $header Value: $value\n"); } return ($max_value_length, $max_header_length, $max_value_header_length, \@header, \@value); @@ -412,7 +476,10 @@ sub parse_raw_given_opt_input { my $term_width = terminal_width(); -my $format = ($opt::format || "Vbhp"); +my $format = ($opt::format || "VbHP"); +if($format !~ /b/) { + die_usage(); +} my ($front, $end) = split /b/, $format; my ($front_inside, $front_outside) = ($front,$front); $front_inside =~ s/[a-z]//g; # Remove outsides @@ -424,7 +491,7 @@ $end_outside =~ s/[A-Z]//g; # Remove insides for(my $i = 0; $i <= $#$value_ref; $i++) { # $front_outside, ( $front_inside, BAR, $end_inside ), $end_outside, my $header = $header_ref->[$i]; - my $value = $value_ref->[$i]; + my $value = undef_as_zero($value_ref->[$i]); my %end_repl = ( 'V' => sprintf(" %".$max_value_length."s",$value), 'H' => sprintf(" %".$max_header_length."s",$header), @@ -449,11 +516,7 @@ for(my $i = 0; $i <= $#$value_ref; $i++) { my $bar_length = $term_width - length($front_outside_string) - length($end_outside_string); my $factor; if($opt::log) { - if($value <= 0 or $max_value <= 0) { - $factor = 0; - } else { $factor = log($value)/log($max_value); - } } else { $factor = $value/$max_value; } @@ -466,12 +529,15 @@ sub max { # Returns: # Maximum value of array my $max; + no warnings 'numeric'; for (@_) { # Skip undefs defined $_ or next; - $_ eq "" and next; - defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef - $max = ($max > $_) ? $max : $_; + # Skip empty + $_ eq "" and next; + # Set $_ to the first non-undef (convert "10a" => 0+10) + defined $max or do { $max = 0+$_; next; }; + $max = ($max > $_) ? 0+$max : $_; } return $max; } @@ -482,6 +548,7 @@ sub sum { # Sum of values of array my @args = @_; my $sum = 0; + no warnings 'numeric'; for (@args) { # Skip undefs $_ and do { $sum += $_; } @@ -549,26 +616,68 @@ sub bar_string { sub guess_delimiter { my @raw = @_; - my (%charcount,$guess); + my %charcount; for(split//,join("",@raw)) { # [a-zA-Z0-9] should never be auto chosen for delimiter /[a-zA-Z0-9]/ and next; $charcount{$_}++ } - # The guess must be present in all lines - for my $g (sort { $charcount{$b} <=> $charcount{$a} } keys %charcount) { - defined $g or next; - if(grep { not /\Q$g\E/ } @raw) { - next; - } else { - $guess = $g; - last; - } - } + my $guess = (sort { $charcount{$b} <=> $charcount{$a} } keys %charcount)[0]; if(defined $guess and $guess =~ /\s/) { # If the guess is a white space, then use 1+ whitespaces $guess = '\s+'; } + debug("Guessed delimiter: ".undef_as_empty($guess)."\n"); return $guess; } + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty { + my $a = shift; + return $a ? $a : ""; +} + +sub debug { + # Returns: N/A + $opt::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + print @_; +} + +sub warning { + my @w = @_; + my $fh = $Global::original_stderr || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Warning: ", @w; +} + + +sub error { + my @w = @_; + my $fh = $Global::original_stderr || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Error: ", @w; +} + +sub die_usage { + # Returns: N/A + usage(); + exit(1); +} + +sub usage { + # Returns: N/A + print join + ("\n", + "Usage:", + "histogram [--delimiter |-d ] [--log|-l]", + " [--input |-i ] [--format |-f ]", + " []", + "cat | histogram [options]", + ""); +} diff --git a/histogram/tests b/histogram/tests new file mode 100755 index 0000000..51b5583 --- /dev/null +++ b/histogram/tests @@ -0,0 +1,29 @@ +#!/bin/bash + +echo "## pre space, decimal" +(echo ' 8.999 otte'; echo '16.999 seksten') | histogram +(echo ' 8.999,otte'; echo '16.999,seksten') | histogram --input v,h + +echo "## two decimal, only values" +seq 1 .09 2 | histogram --input v + +echo "## 1 2 3 command line" +histogram 1 2 3 + +echo "## a:1 b:2 c:3 command line" +histogram a:1 b:2 c:3 +echo "## a 1 b 2 c 3" +(echo a 1; echo b 2; echo c 3) | histogram +echo "## a 1 b 2 c 3" +(echo "a 1"; echo b 2; echo c 3) | histogram + +echo "## 1,a 2,b command line" +histogram 1,a 2,b +echo "## 1 2 3 4 command line" +histogram 1 2 3 4 +echo "## a 1 b 2 c 3 command line" +histogram a 1 b 2 c 3 +echo "## "a a":1 b:2 c:3 command line" +histogram "a a":1 b:2 c:3 + +(echo 150 hundredfifty;echo 30 thirty;echo 3 three;echo 6 six) | histogram --format vbH