708 lines
16 KiB
Perl
Executable file
708 lines
16 KiB
Perl
Executable file
#!/usr/bin/perl -w
|
||
|
||
=head1 NAME
|
||
|
||
histogram - make and display a histogram on the command line
|
||
|
||
=head1 SYNOPSIS
|
||
|
||
B<histogram> [--delimiter <delim>|-d <delim>]
|
||
[--log|-l] [--input <format>|-i <format>]
|
||
[--format <format>|-f <format>] [<list of numbers>]
|
||
|
||
B<cat> <file with numbers> | B<histogram> [options]
|
||
|
||
=head1 DESCRIPTION
|
||
|
||
B<histogram> creates a bar chart in your terminal of data. The width
|
||
of the histogram is scaled to fit your terminal and uses a resolution
|
||
of 1/8th of a character.
|
||
|
||
The list of numbers can be formatted as:
|
||
|
||
=over 2
|
||
|
||
=item *
|
||
|
||
Line with CSV: B<histogram> -d , 1,1.01,3.1
|
||
|
||
=item *
|
||
|
||
Line with white space separated values: B<histogram> -i v 1 1.01 3.1
|
||
|
||
=item *
|
||
|
||
Line with white space separated headers+values: B<histogram> 'a 1' 'b 1.01' 'c 3.1'
|
||
|
||
=item *
|
||
|
||
One value per line: (echo 1; echo 1.01; echo 3.1) | B<histogram> -i v
|
||
|
||
=item *
|
||
|
||
One white space separated header+value per line: (echo a 1; echo b 1.01; echo c 3.1) | B<histogram>
|
||
|
||
=item *
|
||
|
||
One comma separated header+value per line: (echo a,1; echo b,1.01; echo c,3.1) | B<histogram> -d ,
|
||
|
||
=back
|
||
|
||
|
||
=head1 OPTIONS
|
||
|
||
=over 9
|
||
|
||
=item B<--delimiter> I<delim>
|
||
|
||
=item B<-d> I<delim>
|
||
|
||
Use I<delim> as delimiter between elements.
|
||
|
||
|
||
=item B<--log>
|
||
|
||
=item B<-l>
|
||
|
||
Take the logarithm of all values.
|
||
|
||
|
||
=item B<--input> I<input_format>
|
||
|
||
=item B<-i> I<input_format>
|
||
|
||
Give format of input. B<histogram> will try to guess the input format
|
||
based on different heuristics. If it guesses wrong, you can override
|
||
it with B<--input>. I<input_format> is a string consisting of:
|
||
|
||
=over 13
|
||
|
||
=item Z<>B<h>
|
||
|
||
This column is a header.
|
||
|
||
=item Z<>B<H>
|
||
|
||
The rest of the line is a header.
|
||
|
||
=item Z<>B<v>
|
||
|
||
This column is a value.
|
||
|
||
=item Z<>B<i>
|
||
|
||
This column is ignored.
|
||
|
||
=item Z<>B<other char>
|
||
|
||
This char is a delimiter
|
||
|
||
=back
|
||
|
||
If there are no delimiter is given, it will be guessed.
|
||
|
||
Examples:
|
||
|
||
=over 13
|
||
|
||
=item B<--input h,v>
|
||
|
||
Column 1 is header, column 2 is value, separated by B<,>.
|
||
|
||
=item B<--input hv>
|
||
|
||
Column 1 is header, column 2 is value, guess separator.
|
||
|
||
=item B<--input ihv>
|
||
|
||
Column 1 is ignored, column 2 is header, column 3 is value, guess separator.
|
||
|
||
=back
|
||
|
||
=item B<--format> I<output_format>
|
||
|
||
=item B<-f> I<output_format>
|
||
|
||
Give format of output. I<output_format> is a string consisting of:
|
||
|
||
=over 5
|
||
|
||
=item Z<>B<b>
|
||
|
||
The bar.
|
||
|
||
=item Z<>B<c>
|
||
|
||
Percent of total outside bar.
|
||
|
||
=item Z<>B<C>
|
||
|
||
Percent of total inside bar.
|
||
|
||
=item Z<>B<h>
|
||
|
||
Header outside bar.
|
||
|
||
=item Z<>B<H>
|
||
|
||
Header inside bar.
|
||
|
||
=item Z<>B<p>
|
||
|
||
Percent of max outside bar.
|
||
|
||
=item Z<>B<p>
|
||
|
||
Percent of max inside bar.
|
||
|
||
=item Z<>B<v>
|
||
|
||
Value outside bar.
|
||
|
||
=item Z<>B<V>
|
||
|
||
Value inside bar.
|
||
|
||
=back
|
||
|
||
A format must contain 'b'. Default is VbHP.
|
||
|
||
|
||
=back
|
||
|
||
=head1 EXAMPLE: file sizes in current dir
|
||
|
||
ls -l|tail -n +2| histogram --input iiiiviiih
|
||
|
||
|
||
=head1 EXAMPLE: run time of processes
|
||
|
||
ps -e | tail -n +2 | perl -pe 's/.*(\d\d):(\d\d):(\d\d) (.*)/($1*3600+$2*60+$3)." $4"/e' | sort -n | histogram
|
||
|
||
|
||
=head1 EXAMPLE: git statistics
|
||
|
||
Number of commits and percentage in the last year, by author:
|
||
|
||
git shortlog -s --after="1 years" | histogram --input vH --format VbHC
|
||
|
||
Number of commits per day:
|
||
|
||
git log --format=%ai | cut -d\ -f1 | uniq -c | histogram
|
||
|
||
Number of commits by hour of the day:
|
||
|
||
git log --format=%ai | perl -pe 's/.* (\d\d):.*/$1/' | sort -n | uniq -c | histogram
|
||
|
||
Number of commits by day of the week:
|
||
|
||
git log --format=%ad |cut -d\ -f1 | sort -n | uniq -c | histogram
|
||
|
||
|
||
=head1 EXAMPLE: Visualize ping times
|
||
|
||
ping -ni .2 -c 10 google.com | grep -oP 'time=\K\S*' | histogram
|
||
|
||
|
||
=head1 EXAMPLE: Visualize disk usage
|
||
|
||
du -s * | histogram --format VbHC
|
||
|
||
|
||
=head1 EXAMPLE: Number of HTTP requests per day
|
||
|
||
cat access.log | cut -d\ -f4 | cut -d: -f 1 | uniq -c | histogram
|
||
|
||
|
||
=head1 EXAMPLE: Letter frequencies in a text file
|
||
|
||
cat file | perl -ne 'print map {uc($_),"\n"} split//,$_' | sort | uniq -c | histogram
|
||
|
||
|
||
=head1 BUGS
|
||
|
||
None known.
|
||
|
||
|
||
=head1 REPORTING BUGS
|
||
|
||
Report bugs to <bug-parallel@gnu.org>.
|
||
|
||
|
||
=head1 AUTHOR
|
||
|
||
Copyright (C) 2012,2013,2014 Ole Tange, http://ole.tange.dk and Free
|
||
Software Foundation, Inc.
|
||
|
||
|
||
=head1 LICENSE
|
||
|
||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||
|
||
This program is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 3 of the License, or
|
||
at your option any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
=head2 Documentation license I
|
||
|
||
Permission is granted to copy, distribute and/or modify this documentation
|
||
under the terms of the GNU Free Documentation License, Version 1.3 or
|
||
any later version published by the Free Software Foundation; with no
|
||
Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
|
||
Texts. A copy of the license is included in the file fdl.txt.
|
||
|
||
=head2 Documentation license II
|
||
|
||
You are free:
|
||
|
||
=over 9
|
||
|
||
=item B<to Share>
|
||
|
||
to copy, distribute and transmit the work
|
||
|
||
=item B<to Remix>
|
||
|
||
to adapt the work
|
||
|
||
=back
|
||
|
||
Under the following conditions:
|
||
|
||
=over 9
|
||
|
||
=item B<Attribution>
|
||
|
||
You must attribute the work in the manner specified by the author or
|
||
licensor (but not in any way that suggests that they endorse you or
|
||
your use of the work).
|
||
|
||
=item B<Share Alike>
|
||
|
||
If you alter, transform, or build upon this work, you may distribute
|
||
the resulting work only under the same, similar or a compatible
|
||
license.
|
||
|
||
=back
|
||
|
||
With the understanding that:
|
||
|
||
=over 9
|
||
|
||
=item B<Waiver>
|
||
|
||
Any of the above conditions can be waived if you get permission from
|
||
the copyright holder.
|
||
|
||
=item B<Public Domain>
|
||
|
||
Where the work or any of its elements is in the public domain under
|
||
applicable law, that status is in no way affected by the license.
|
||
|
||
=item B<Other Rights>
|
||
|
||
In no way are any of the following rights affected by the license:
|
||
|
||
=over 2
|
||
|
||
=item *
|
||
|
||
Your fair dealing or fair use rights, or other applicable
|
||
copyright exceptions and limitations;
|
||
|
||
=item *
|
||
|
||
The author's moral rights;
|
||
|
||
=item *
|
||
|
||
Rights other persons may have either in the work itself or in
|
||
how the work is used, such as publicity or privacy rights.
|
||
|
||
=back
|
||
|
||
=back
|
||
|
||
=over 9
|
||
|
||
=item B<Notice>
|
||
|
||
For any reuse or distribution, you must make clear to others the
|
||
license terms of this work.
|
||
|
||
=back
|
||
|
||
A copy of the full license is included in the file as cc-by-sa.txt.
|
||
|
||
=head1 DEPENDENCIES
|
||
|
||
B<histogram> uses Perl, and the Perl module Getopt::Long.
|
||
|
||
|
||
=head1 SEE ALSO
|
||
|
||
B<cut>(1)
|
||
|
||
=cut
|
||
|
||
use strict;
|
||
use Getopt::Long;
|
||
|
||
Getopt::Long::Configure("bundling","require_order");
|
||
GetOptions
|
||
("delimiter|d=s" => \$opt::delimiter,
|
||
"log" => \$opt::log,
|
||
"input|i=s" => \$opt::input,
|
||
"format|f=s" => \$opt::format,
|
||
"debug|D" => \$opt::debug,
|
||
) || die_usage();
|
||
|
||
my @raw;
|
||
if($#ARGV != -1) {
|
||
@raw = @ARGV;
|
||
} else {
|
||
@raw = (<>);
|
||
chomp @raw;
|
||
if(not @raw) {
|
||
# Empty input
|
||
exit;
|
||
}
|
||
}
|
||
|
||
|
||
my ($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref);
|
||
if(not defined $opt::input) {
|
||
my $delimiter;
|
||
if($opt::delimiter) {
|
||
# override guessed delimiter if given
|
||
$delimiter = $opt::delimiter;
|
||
} else {
|
||
# Guess opt::delimiter
|
||
$delimiter = guess_delimiter(@raw);
|
||
}
|
||
if(defined $delimiter) {
|
||
# guess format: (v delimiter h) or (h delimiter v)
|
||
($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) =
|
||
parse_raw_given_opt_input("v".$delimiter."h",@raw);
|
||
} else {
|
||
# guess format: v
|
||
($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) =
|
||
parse_raw_given_opt_input("v",@raw);
|
||
$header_ref = $value_ref;
|
||
$max_value_length = $max_header_length;
|
||
$max_value_header_length = $max_value_length + $max_header_length;
|
||
}
|
||
if(scalar(grep /^\d*(\.\d*)?$/, @$header_ref)
|
||
>
|
||
scalar(grep /^\d*(\.\d*)?$/, @$value_ref)) {
|
||
# More headers have numbers than values: swap them
|
||
($header_ref, $value_ref) = ($value_ref, $header_ref);
|
||
}
|
||
} else {
|
||
($max_value_length, $max_header_length, $max_value_header_length, $header_ref, $value_ref) =
|
||
parse_raw_given_opt_input($opt::input, @raw);
|
||
}
|
||
my $max_value = undef_as_zero(max(@$value_ref));
|
||
my $total_value = undef_as_zero(sum(@$value_ref));
|
||
$max_value ||= 1;
|
||
$total_value ||= 1;
|
||
|
||
sub parse_raw_given_opt_input {
|
||
my ($input,@raw) = @_;
|
||
# --input overrides --delimiter
|
||
# --input i v h delimiter
|
||
# --input ivh => delimiter = \s+ (whitespace)
|
||
# \S+\s+(\S+)\s+(\S+)
|
||
# --input i,v;h => ignore , value ; header
|
||
# \S+\,(\S+)\;(\S+)
|
||
$input =~ /v.*v/ and die("Only one v is allow in --input");
|
||
$input =~ /h.*h/ and die("Only one h is allow in --input");
|
||
if($input =~ /^[Hivh]+$/) {
|
||
# No delimiters => '\s+' (whitespace)
|
||
$input = join('\s+', split//, $input);
|
||
}
|
||
# strip prepending white space in input to avoid splitting on that
|
||
for(@raw) { s/^\s+//; }
|
||
my %part_map = (
|
||
"h" => '(\S*)',
|
||
"H" => '(\S*(?:.*\S)*)',
|
||
"i" => '\S*',
|
||
"v" => '(\S*)',
|
||
);
|
||
my (@regexp_part, $first_meta_var,$header,$value,@header,@value);
|
||
for(split //, $input) {
|
||
# Header, Value
|
||
if(/[Hhv]/) {
|
||
# Is this h...v or v...h
|
||
$first_meta_var ||= $_;
|
||
}
|
||
# Header, Value, Ignore
|
||
if(/[Hhiv]/) {
|
||
push @regexp_part, $part_map{$_};
|
||
next;
|
||
}
|
||
# Delimiters
|
||
push @regexp_part, $_;
|
||
}
|
||
my $regexp = join("",@regexp_part);
|
||
debug("Input: $input The regexp: $regexp\n");
|
||
for my $rawline (@raw) {
|
||
$rawline =~ /$regexp/ || die("$regexp not matching $rawline");
|
||
if(defined $2) {
|
||
if($first_meta_var eq "v") {
|
||
# value,header
|
||
$value = $1; $header = $2;
|
||
} else {
|
||
# header,value
|
||
$header = $1; $value = $2;
|
||
}
|
||
} else {
|
||
$header = $1; $value = $1;
|
||
}
|
||
# Remove white space
|
||
$header =~ s/^\s+//;
|
||
$header =~ s/\s+$//;
|
||
$value =~ s/^\s+//;
|
||
$value =~ s/\s+$//;
|
||
# Find the max string length
|
||
$max_value_length = max(length($value),$max_value_length);
|
||
$max_header_length = max(length($header),$max_header_length);
|
||
$max_value_header_length = max(length($value)+length($header),$max_value_header_length);
|
||
# Add the values to the table
|
||
push(@header,$header);
|
||
push(@value,$value);
|
||
debug("Header: $header Value: $value\n");
|
||
}
|
||
|
||
return ($max_value_length, $max_header_length, $max_value_header_length, \@header, \@value);
|
||
}
|
||
|
||
my $term_width = terminal_width();
|
||
|
||
my $format = ($opt::format || "VbHP");
|
||
if($format !~ /b/) {
|
||
die_usage();
|
||
}
|
||
my ($front, $end) = split /b/, $format;
|
||
my ($front_inside, $front_outside) = ($front,$front);
|
||
$front_inside =~ s/[a-z]//g; # Remove outsides
|
||
$front_outside =~ s/[A-Z]//g; # Remove insides
|
||
my ($end_inside, $end_outside) = ($end,$end);
|
||
$end_inside =~ s/[a-z]//g; # Remove outsides
|
||
$end_outside =~ s/[A-Z]//g; # Remove insides
|
||
|
||
for(my $i = 0; $i <= $#$value_ref; $i++) {
|
||
# $front_outside, ( $front_inside, BAR, $end_inside ), $end_outside,
|
||
my $header = $header_ref->[$i];
|
||
my $value = undef_as_zero($value_ref->[$i]);
|
||
my %end_repl = (
|
||
'V' => sprintf(" %".$max_value_length."s",$value),
|
||
'H' => sprintf(" %".$max_header_length."s",$header),
|
||
'P' => sprintf(" %3d%%",int($value/$max_value*100+0.5)),
|
||
'C' => sprintf(" %3d%%",int($value/$total_value*100+0.5)),
|
||
);
|
||
my %front_repl = (
|
||
'V' => sprintf("%".$max_value_length."s ",$value),
|
||
'H' => sprintf("%-".$max_header_length."s ",$header),
|
||
'P' => sprintf("%3d%% ",int($value/$max_value*100+0.5)),
|
||
'C' => sprintf(" %3d%%",int($value/$total_value*100+0.5)),
|
||
);
|
||
my $front_outside_string = $front_outside;
|
||
$front_outside_string =~ s/(.)/$front_repl{uc($1)}/g;
|
||
my $end_outside_string = $end_outside;
|
||
$end_outside_string =~ s/(.)/$end_repl{uc($1)}/g;
|
||
my $front_inside_string = $front_inside;
|
||
$front_inside_string =~ s/(.)/$front_repl{uc($1)}/g;
|
||
my $end_inside_string = $end_inside;
|
||
$end_inside_string =~ s/(.)/$end_repl{uc($1)}/g;
|
||
|
||
my $bar_length = $term_width - length($front_outside_string) - length($end_outside_string);
|
||
my $factor;
|
||
if($opt::log) {
|
||
$factor = log($value)/log($max_value);
|
||
} else {
|
||
$factor = $value/$max_value;
|
||
}
|
||
my $bar = bar_string($bar_length-1, $factor, $front_inside_string, $end_inside_string);
|
||
print $front_outside_string, $bar, $end_outside_string, "\n";
|
||
}
|
||
|
||
|
||
sub max {
|
||
# Returns:
|
||
# Maximum value of array
|
||
my $max;
|
||
no warnings 'numeric';
|
||
for (@_) {
|
||
# Skip undefs
|
||
defined $_ or next;
|
||
# Skip empty
|
||
$_ eq "" and next;
|
||
# Set $_ to the first non-undef (convert "10a" => 0+10)
|
||
defined $max or do { $max = 0+$_; next; };
|
||
$max = ($max > $_) ? 0+$max : $_;
|
||
}
|
||
return $max;
|
||
}
|
||
|
||
|
||
sub sum {
|
||
# Returns:
|
||
# Sum of values of array
|
||
my @args = @_;
|
||
my $sum = 0;
|
||
no warnings 'numeric';
|
||
for (@args) {
|
||
# Skip undefs
|
||
$_ and do { $sum += $_; }
|
||
}
|
||
return $sum;
|
||
}
|
||
|
||
{
|
||
my $columns;
|
||
|
||
sub terminal_width {
|
||
if(not $columns) {
|
||
$columns = $ENV{'COLUMNS'};
|
||
if(not $columns) {
|
||
my $resize = qx{ resize 2>/dev/null };
|
||
$resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
|
||
}
|
||
$columns ||= 80;
|
||
}
|
||
return $columns;
|
||
}
|
||
}
|
||
|
||
|
||
sub bar_string {
|
||
my ($width,$factor,$front,$end) = @_;
|
||
my @eight = (qw(█ ▉ ▊ ▋ ▌ ▍ ▎ ▏));
|
||
my $l = $width * $factor;
|
||
my $partial;
|
||
my $black = int($l);
|
||
my $white = $width-int($l);
|
||
my $rev = '[7m';
|
||
my $reset = '[0m';
|
||
$front =~ s/ *$//;
|
||
$end =~ s/^ *//;
|
||
if(length $front < $black) {
|
||
# Paint $front reverse
|
||
$black -= length $front;
|
||
$width -= length $front;
|
||
$front = $rev . $front . $reset;
|
||
} else {
|
||
# label overlaps white
|
||
$white = $width - length $front;
|
||
$front = $rev . substr($front,0,$black). $reset.substr($front,$black);
|
||
$black = 0;
|
||
$partial = " ";
|
||
}
|
||
if(length $end < $white) {
|
||
# Just append $end
|
||
$white -= length $end;
|
||
$width -= length $end;
|
||
} else {
|
||
# label overlaps black
|
||
$black = $width - length($end);
|
||
$end = $rev . substr($end,0,length($end)-$white). $reset.substr($end,length($end)-$white);
|
||
$white = 0;
|
||
$partial = $eight[0];
|
||
}
|
||
$partial ||= ($eight[7-(int($l*8))%8]);
|
||
my $middle = ($eight[0] x $black). $partial . (" "x$white);
|
||
|
||
return $front . $middle . $end;
|
||
}
|
||
|
||
|
||
sub guess_delimiter {
|
||
my @raw = @_;
|
||
my %charcount;
|
||
|
||
for(split//,join("",@raw)) {
|
||
# [a-zA-Z0-9] should never be auto chosen for delimiter
|
||
/[a-zA-Z0-9]/ and next;
|
||
$charcount{$_}++
|
||
}
|
||
my $guess;
|
||
for my $g (
|
||
# Force trying these first:
|
||
"\0", "\t", ";", ",", " ",
|
||
sort { $charcount{$b} <=> $charcount{$a} } keys %charcount) {
|
||
# The delimiter must be found in every single line
|
||
if(grep !/\Q$g\E/, @raw) {
|
||
next;
|
||
}
|
||
$guess = $g;
|
||
last;
|
||
}
|
||
|
||
if(defined $guess and $guess =~ /\s/) {
|
||
# If the guess is a white space, then use 1+ whitespaces
|
||
$guess = '\s+';
|
||
}
|
||
debug("Guessed delimiter: ".undef_as_empty($guess)."\n");
|
||
return $guess;
|
||
}
|
||
|
||
sub undef_as_zero {
|
||
my $a = shift;
|
||
return $a ? $a : 0;
|
||
}
|
||
|
||
sub undef_as_empty {
|
||
my $a = shift;
|
||
return $a ? $a : "";
|
||
}
|
||
|
||
sub debug {
|
||
# Returns: N/A
|
||
$opt::debug or return;
|
||
@_ = grep { defined $_ ? $_ : "" } @_;
|
||
print @_;
|
||
}
|
||
|
||
sub warning {
|
||
my @w = @_;
|
||
my $fh = $Global::original_stderr || *STDERR;
|
||
my $prog = $Global::progname || "parallel";
|
||
print $fh $prog, ": Warning: ", @w;
|
||
}
|
||
|
||
|
||
sub error {
|
||
my @w = @_;
|
||
my $fh = $Global::original_stderr || *STDERR;
|
||
my $prog = $Global::progname || "parallel";
|
||
print $fh $prog, ": Error: ", @w;
|
||
}
|
||
|
||
sub die_usage {
|
||
# Returns: N/A
|
||
usage();
|
||
exit(1);
|
||
}
|
||
|
||
sub usage {
|
||
# Returns: N/A
|
||
print join
|
||
("\n",
|
||
"Usage:",
|
||
"histogram [--delimiter <delim>|-d <delim>] [--log|-l]",
|
||
" [--input <format>|-i <format>] [--format <format>|-f <format>]",
|
||
" [<list of numbers>]",
|
||
"cat <file with numbers> | histogram [options]",
|
||
"");
|
||
}
|