575 lines
14 KiB
Perl
Executable file
575 lines
14 KiB
Perl
Executable file
#!/usr/bin/perl -w
|
||
|
||
=head1 NAME
|
||
|
||
histogram - make and display a histogram on the command line
|
||
|
||
=head1 SYNOPSIS
|
||
|
||
B<histogram> [--delimiter <delim>|-d <delim>] [--pre|--post]
|
||
[--log|-l] [--values-as-headers|-t] [--values-before-headers|-b] <list of numbers>
|
||
|
||
B<cat> <file with numbers> | B<histogram> [options]
|
||
|
||
=head1 DESCRIPTION
|
||
|
||
B<histogram> creates a bar chart in your terminal of data. The width
|
||
of the histogram is scaled to fit your terminal and uses a resolution
|
||
of 1/8th of a character.
|
||
|
||
The list of numbers can be formatted as:
|
||
|
||
=over 2
|
||
|
||
=item *
|
||
|
||
Line with CSV: B<histogram> -d , 1,1.01,3.1
|
||
|
||
=item *
|
||
|
||
Line with white space separated values: B<histogram> 1 1.01 3.1
|
||
|
||
=item *
|
||
|
||
Line with white space separated headers+values: B<histogram> a 1 b 1.01 c 3.1
|
||
|
||
=item *
|
||
|
||
One value per line: (echo 1; echo 1.01; echo 3.1) | B<histogram>
|
||
|
||
=item *
|
||
|
||
One white space separated header+value per line: (echo a 1; echo b 1.01; echo c 3.1) | B<histogram>
|
||
|
||
=item *
|
||
|
||
One comma separated header+value per line: (echo a,1; echo b,1.01; echo c,3.1) | B<histogram> -d ,
|
||
|
||
=back
|
||
|
||
|
||
=head1 OPTIONS
|
||
|
||
=over 9
|
||
|
||
=item B<--delimiter> I<delim>
|
||
|
||
=item B<-d> I<delim>
|
||
|
||
Use I<delim> as delimiter between elements.
|
||
|
||
|
||
=item B<--log>
|
||
|
||
=item B<-l>
|
||
|
||
Take the logarithm of all values.
|
||
|
||
|
||
=item B<--pre>
|
||
|
||
Put the header before the bar.
|
||
|
||
See also: B<--post>
|
||
|
||
|
||
=item B<--post>
|
||
|
||
Put the header after the bar.
|
||
|
||
See also: B<--pre>
|
||
|
||
|
||
=item B<--values-as-headers>
|
||
|
||
=item B<-t>
|
||
|
||
Use the numbers as headers.
|
||
|
||
|
||
=item B<--values-before-headers>
|
||
|
||
=item B<-b>
|
||
|
||
Normally headers are given before the
|
||
value. B<--values-before-headers> looks the header after the value.
|
||
|
||
|
||
=back
|
||
|
||
(echo 150 hundredfifty;echo 30 thirty;echo 3 three;echo 6 six)|./histogram --format Hbcp
|
||
(echo 0 zero; echo 50 fifty; echo 150 hundredfifty;echo 130 hundredthirty;echo 3 three;echo 6 six)|./histogram --format HbHCP
|
||
ls -l|tail -n +2| ./histogram --input iiiiviiih
|
||
|
||
=head1 EXAMPLE: git: number of commits in the last year, by author
|
||
|
||
git shortlog -s --after="1 years" | histogram -b
|
||
|
||
|
||
=head1 EXAMPLE: git: number of commits per day
|
||
|
||
git log --format=%ai | cut -d\ -f1 | uniq -c | histogram -b --post
|
||
|
||
|
||
=head1 EXAMPLE: git: commits by hour of the day
|
||
|
||
git log --format=%ai | perl -pe 's/.* (\d\d):.*/$1/' | sort -n | uniq -c | histogram -b
|
||
|
||
|
||
=head1 EXAMPLE: git: commits by day of the week
|
||
|
||
git log --format=%ad |cut -d\ -f1 | sort -n | uniq -c | histogram -b
|
||
|
||
|
||
=head1 EXAMPLE: run time of processes
|
||
|
||
ps -e | tail -n +2 | perl -pe 's/.*(\d\d):(\d\d):(\d\d) (.*)/($1*3600+$2*60+$3)." $4"/e' | histogram -b -l
|
||
|
||
|
||
=head1 EXAMPLE: Letter frequencies in a text file
|
||
|
||
cat file | perl -ne 'print map {uc($_),"\n"} split//,$_' | sort | uniq -c | histogram -b
|
||
|
||
|
||
=head1 EXAMPLE: Number of HTTP requests per day
|
||
|
||
cat apache.log | cut -d\ -f4 | cut -d/ -f 1,2 | uniq -c | histogram -b
|
||
|
||
|
||
=head1 EXAMPLE: Beijing Air Quality Index
|
||
|
||
curl -s https://twitter.com/statuses/user_timeline/15527964.rss | grep /description | perl -nle 'print "$1 $2" if /(\S+ \S+); PM2.5;[^;]+; (\d+)/' | histogram
|
||
|
||
|
||
=head1 EXAMPLE: Visualize ping times
|
||
|
||
ping -i .2 -c 10 google.com | grep -oP 'time=\K\S*' | histogram -t --post
|
||
|
||
|
||
=head1 EXAMPLE: Visualize filesize inside a directory
|
||
|
||
du -s * | histogram -b
|
||
|
||
|
||
=head1 BUGS
|
||
|
||
None known.
|
||
|
||
|
||
=head1 REPORTING BUGS
|
||
|
||
Report bugs to <bug-parallel@gnu.org>.
|
||
|
||
|
||
=head1 AUTHOR
|
||
|
||
Copyright (C) 2012 Ole Tange, http://ole.tange.dk and Free
|
||
Software Foundation, Inc.
|
||
|
||
|
||
=head1 LICENSE
|
||
|
||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||
|
||
This program is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 3 of the License, or
|
||
at your option any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
=head2 Documentation license I
|
||
|
||
Permission is granted to copy, distribute and/or modify this documentation
|
||
under the terms of the GNU Free Documentation License, Version 1.3 or
|
||
any later version published by the Free Software Foundation; with no
|
||
Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
|
||
Texts. A copy of the license is included in the file fdl.txt.
|
||
|
||
=head2 Documentation license II
|
||
|
||
You are free:
|
||
|
||
=over 9
|
||
|
||
=item B<to Share>
|
||
|
||
to copy, distribute and transmit the work
|
||
|
||
=item B<to Remix>
|
||
|
||
to adapt the work
|
||
|
||
=back
|
||
|
||
Under the following conditions:
|
||
|
||
=over 9
|
||
|
||
=item B<Attribution>
|
||
|
||
You must attribute the work in the manner specified by the author or
|
||
licensor (but not in any way that suggests that they endorse you or
|
||
your use of the work).
|
||
|
||
=item B<Share Alike>
|
||
|
||
If you alter, transform, or build upon this work, you may distribute
|
||
the resulting work only under the same, similar or a compatible
|
||
license.
|
||
|
||
=back
|
||
|
||
With the understanding that:
|
||
|
||
=over 9
|
||
|
||
=item B<Waiver>
|
||
|
||
Any of the above conditions can be waived if you get permission from
|
||
the copyright holder.
|
||
|
||
=item B<Public Domain>
|
||
|
||
Where the work or any of its elements is in the public domain under
|
||
applicable law, that status is in no way affected by the license.
|
||
|
||
=item B<Other Rights>
|
||
|
||
In no way are any of the following rights affected by the license:
|
||
|
||
=over 2
|
||
|
||
=item *
|
||
|
||
Your fair dealing or fair use rights, or other applicable
|
||
copyright exceptions and limitations;
|
||
|
||
=item *
|
||
|
||
The author's moral rights;
|
||
|
||
=item *
|
||
|
||
Rights other persons may have either in the work itself or in
|
||
how the work is used, such as publicity or privacy rights.
|
||
|
||
=back
|
||
|
||
=back
|
||
|
||
=over 9
|
||
|
||
=item B<Notice>
|
||
|
||
For any reuse or distribution, you must make clear to others the
|
||
license terms of this work.
|
||
|
||
=back
|
||
|
||
A copy of the full license is included in the file as cc-by-sa.txt.
|
||
|
||
=head1 DEPENDENCIES
|
||
|
||
B<histogram> uses Perl, and the Perl module Getopt::Long.
|
||
|
||
|
||
=head1 SEE ALSO
|
||
|
||
B<cut>(1)
|
||
|
||
=cut
|
||
|
||
# histogram -d , a1,b2,c3 d4,5,e76
|
||
# histogram 1 2 3
|
||
# histogram a:1 b:2 c:3
|
||
# histogram "a a":1 b:2 c:3
|
||
# histogram "a a" 1 b 2 c 3
|
||
# (echo a a 1; echo b 2; echo c 3) | histogram
|
||
# histogram --post aaaaaaaaaaa1 b10
|
||
# seq 10 | histogram -t --pre
|
||
|
||
use strict;
|
||
use Getopt::Long;
|
||
|
||
GetOptions
|
||
("delimiter|d=s" => \$opt::delimiter,
|
||
"log" => \$opt::log,
|
||
"input|i=s" => \$opt::input,
|
||
"format|f=s" => \$opt::format,
|
||
) || die_usage();
|
||
|
||
my @raw;
|
||
if($#ARGV != -1) {
|
||
@raw = @ARGV;
|
||
} else {
|
||
@raw = (<>);
|
||
chomp @raw;
|
||
}
|
||
|
||
my ($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref);
|
||
if(not defined $opt::input) {
|
||
# Guess opt::input
|
||
my $delimiter = guess_delimiter(@raw);
|
||
if($opt::delimiter) {
|
||
# override guessed delimiter if given
|
||
$delimiter = $opt::delimiter;
|
||
} elsif(defined $delimiter) {
|
||
# guess format: (v delimiter h) or (h delimiter v)
|
||
($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) =
|
||
parse_raw_given_opt_input('\s*v'.$delimiter."h",@raw);
|
||
} else {
|
||
# guess format: v
|
||
($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) =
|
||
parse_raw_given_opt_input('\s*v',@raw);
|
||
$header_ref = $value_ref;
|
||
$max_value_length = $max_header_length;
|
||
$max_value_header_length = $max_value_length + $max_header_length;
|
||
}
|
||
if(scalar(grep /^\d*(\.\d*)?$/, @$header_ref)
|
||
>
|
||
scalar(grep /^\d*(\.\d*)?$/, @$value_ref)) {
|
||
# More headers have numbers than values: swap them
|
||
($header_ref, $value_ref) = ($value_ref, $header_ref);
|
||
}
|
||
} else {
|
||
($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) =
|
||
parse_raw_given_opt_input($opt::input, @raw);
|
||
}
|
||
my $max_value = max(@$value_ref);
|
||
my $total_value = sum(@$value_ref);
|
||
|
||
sub parse_raw_given_opt_input {
|
||
my ($input,@raw) = @_;
|
||
# --input overrides --delimiter
|
||
# --input i v h delimiter
|
||
# --input ivh => delimiter = \s+ (whitespace)
|
||
# \S+\s+(\S+)\s+(\S+)
|
||
# --input i,v;h => ignore , value ; header
|
||
# \S+\,(\S+)\;(\S+)
|
||
$input =~ /v.*v/ and die("Only one v is allow in --input");
|
||
$input =~ /h.*h/ and die("Only one h is allow in --input");
|
||
if($input =~ /^[ivh]+$/) {
|
||
# No delimiters => '\s+' (whitespace)
|
||
$input = join('\s+', split//, $input);
|
||
}
|
||
my %part_map = (
|
||
"h" => '(\S*)',
|
||
"i" => '\S*',
|
||
"v" => '(\S*)',
|
||
);
|
||
my (@regexp_part, $first_meta_var,$header,$value,@header,@value);
|
||
for(split //, $input) {
|
||
# Header, Value
|
||
if(/[hv]/) {
|
||
# Is this h...v or v...h
|
||
$first_meta_var ||= $_;
|
||
}
|
||
# Header, Value, Ignore
|
||
if(/[hiv]/) {
|
||
push @regexp_part, $part_map{$_};
|
||
next;
|
||
}
|
||
# Delimiters
|
||
push @regexp_part, $_;
|
||
}
|
||
my $regexp = join("",@regexp_part);
|
||
for my $rawline (@raw) {
|
||
$rawline =~ /$regexp/ || die("$regexp not matching $rawline");
|
||
if(defined $2) {
|
||
if($first_meta_var eq "v") {
|
||
# value,header
|
||
$value = $1; $header = $2;
|
||
} else {
|
||
# header,value
|
||
$header = $1; $value = $2;
|
||
}
|
||
} else {
|
||
$header = $1; $value = $1;
|
||
}
|
||
# Remove white space
|
||
$header =~ s/^\s+//;
|
||
$header =~ s/\s+$//;
|
||
$value =~ s/^\s+//;
|
||
$value =~ s/\s+$//;
|
||
# Find the max string length
|
||
$max_value_length = max(length($value),$max_value_length);
|
||
$max_header_length = max(length($header),$max_header_length);
|
||
$max_value_header_length = max(length($value)+length($header),$max_value_header_length);
|
||
# Add the values to the table
|
||
push(@header,$header);
|
||
push(@value,$value);
|
||
}
|
||
|
||
return ($max_value_length, $max_header_length, $max_value_header_length, \@header, \@value);
|
||
}
|
||
|
||
my $term_width = terminal_width();
|
||
|
||
my $format = ($opt::format || "Vbhp");
|
||
my ($front, $end) = split /b/, $format;
|
||
my ($front_inside, $front_outside) = ($front,$front);
|
||
$front_inside =~ s/[a-z]//g; # Remove outsides
|
||
$front_outside =~ s/[A-Z]//g; # Remove insides
|
||
my ($end_inside, $end_outside) = ($end,$end);
|
||
$end_inside =~ s/[a-z]//g; # Remove outsides
|
||
$end_outside =~ s/[A-Z]//g; # Remove insides
|
||
|
||
for(my $i = 0; $i <= $#$value_ref; $i++) {
|
||
# $front_outside, ( $front_inside, BAR, $end_inside ), $end_outside,
|
||
my $header = $header_ref->[$i];
|
||
my $value = $value_ref->[$i];
|
||
my %end_repl = (
|
||
'V' => sprintf(" %".$max_value_length."s",$value),
|
||
'H' => sprintf(" %".$max_header_length."s",$header),
|
||
'P' => sprintf(" %3d%%",int($value/$max_value*100+0.5)),
|
||
'C' => sprintf(" %3d%%",int($value/$total_value*100+0.5)),
|
||
);
|
||
my %front_repl = (
|
||
'V' => sprintf("%".$max_value_length."s ",$value),
|
||
'H' => sprintf("%-".$max_header_length."s ",$header),
|
||
'P' => sprintf("%3d%% ",int($value/$max_value*100+0.5)),
|
||
'C' => sprintf(" %3d%%",int($value/$total_value*100+0.5)),
|
||
);
|
||
my $front_outside_string = $front_outside;
|
||
$front_outside_string =~ s/(.)/$front_repl{uc($1)}/g;
|
||
my $end_outside_string = $end_outside;
|
||
$end_outside_string =~ s/(.)/$end_repl{uc($1)}/g;
|
||
my $front_inside_string = $front_inside;
|
||
$front_inside_string =~ s/(.)/$front_repl{uc($1)}/g;
|
||
my $end_inside_string = $end_inside;
|
||
$end_inside_string =~ s/(.)/$end_repl{uc($1)}/g;
|
||
|
||
my $bar_length = $term_width - length($front_outside_string) - length($end_outside_string);
|
||
my $factor;
|
||
if($opt::log) {
|
||
if($value <= 0 or $max_value <= 0) {
|
||
$factor = 0;
|
||
} else {
|
||
$factor = log($value)/log($max_value);
|
||
}
|
||
} else {
|
||
$factor = $value/$max_value;
|
||
}
|
||
my $bar = bar_string($bar_length-1, $factor, $front_inside_string, $end_inside_string);
|
||
print $front_outside_string, $bar, $end_outside_string, "\n";
|
||
}
|
||
|
||
|
||
sub max {
|
||
# Returns:
|
||
# Maximum value of array
|
||
my $max;
|
||
for (@_) {
|
||
# Skip undefs
|
||
defined $_ or next;
|
||
$_ eq "" and next;
|
||
defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
|
||
$max = ($max > $_) ? $max : $_;
|
||
}
|
||
return $max;
|
||
}
|
||
|
||
|
||
sub sum {
|
||
# Returns:
|
||
# Sum of values of array
|
||
my @args = @_;
|
||
my $sum = 0;
|
||
for (@args) {
|
||
# Skip undefs
|
||
$_ and do { $sum += $_; }
|
||
}
|
||
return $sum;
|
||
}
|
||
|
||
{
|
||
my $columns;
|
||
|
||
sub terminal_width {
|
||
if(not $columns) {
|
||
$columns = $ENV{'COLUMNS'};
|
||
if(not $columns) {
|
||
my $resize = qx{ resize 2>/dev/null };
|
||
$resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
|
||
}
|
||
$columns ||= 80;
|
||
}
|
||
return $columns;
|
||
}
|
||
}
|
||
|
||
|
||
sub bar_string {
|
||
my ($width,$factor,$front,$end) = @_;
|
||
my @eight = (qw(█ ▉ ▊ ▋ ▌ ▍ ▎ ▏));
|
||
my $l = $width * $factor;
|
||
my $partial;
|
||
my $black = int($l);
|
||
my $white = $width-int($l);
|
||
my $rev = '[7m';
|
||
my $reset = '[0m';
|
||
$front =~ s/ *$//;
|
||
$end =~ s/^ *//;
|
||
if(length $front < $black) {
|
||
# Paint $front reverse
|
||
$black -= length $front;
|
||
$width -= length $front;
|
||
$front = $rev . $front . $reset;
|
||
} else {
|
||
# label overlaps white
|
||
$white = $width - length $front;
|
||
$front = $rev . substr($front,0,$black). $reset.substr($front,$black);
|
||
$black = 0;
|
||
$partial = " ";
|
||
}
|
||
if(length $end < $white) {
|
||
# Just append $end
|
||
$white -= length $end;
|
||
$width -= length $end;
|
||
} else {
|
||
# label overlaps black
|
||
$black = $width - length($end);
|
||
$end = $rev . substr($end,0,length($end)-$white). $reset.substr($end,length($end)-$white);
|
||
$white = 0;
|
||
$partial = $eight[0];
|
||
}
|
||
$partial ||= ($eight[7-(int($l*8))%8]);
|
||
my $middle = ($eight[0] x $black). $partial . (" "x$white);
|
||
|
||
return $front . $middle . $end;
|
||
}
|
||
|
||
|
||
sub guess_delimiter {
|
||
my @raw = @_;
|
||
my (%charcount,$guess);
|
||
|
||
for(split//,join("",@raw)) {
|
||
# [a-zA-Z0-9] should never be auto chosen for delimiter
|
||
/[a-zA-Z0-9]/ and next;
|
||
$charcount{$_}++
|
||
}
|
||
# The guess must be present in all lines
|
||
for my $g (sort { $charcount{$b} <=> $charcount{$a} } keys %charcount) {
|
||
defined $g or next;
|
||
if(grep { not /\Q$g\E/ } @raw) {
|
||
next;
|
||
} else {
|
||
$guess = $g;
|
||
last;
|
||
}
|
||
}
|
||
if(defined $guess and $guess =~ /\s/) {
|
||
# If the guess is a white space, then use 1+ whitespaces
|
||
$guess = '\s+';
|
||
}
|
||
return $guess;
|
||
}
|