plotpipe: Added README.
This commit is contained in:
parent
6d141ac74e
commit
8be6d39649
4
README
4
README
|
@ -30,10 +30,14 @@ mirrorpdf - mirror PDF-file horizontally.
|
||||||
|
|
||||||
neno - no error no output. Only print STDERR and STDOUT if the command fails.
|
neno - no error no output. Only print STDERR and STDOUT if the command fails.
|
||||||
|
|
||||||
|
not - flip exit value of command.
|
||||||
|
|
||||||
off - turn off monitor.
|
off - turn off monitor.
|
||||||
|
|
||||||
pdfman - convert man page to pdf and display it using evince.
|
pdfman - convert man page to pdf and display it using evince.
|
||||||
|
|
||||||
|
plotpipe - plot CSV data from a pipe.
|
||||||
|
|
||||||
puniq - print unique lines the first time they are seen.
|
puniq - print unique lines the first time they are seen.
|
||||||
|
|
||||||
ramusage - display the ram usage of a program using `time -v`.
|
ramusage - display the ram usage of a program using `time -v`.
|
||||||
|
|
|
@ -48,3 +48,5 @@ your initramfs, you need to add them by adding to
|
||||||
When all is done, update the initramfs:
|
When all is done, update the initramfs:
|
||||||
|
|
||||||
update-initramfs -u
|
update-initramfs -u
|
||||||
|
|
||||||
|
(C) 2014 Ole Tange, GPLv2 or later
|
||||||
|
|
214
parsort/parsort
214
parsort/parsort
|
@ -1,214 +0,0 @@
|
||||||
#!/usr/bin/perl
|
|
||||||
|
|
||||||
=pod
|
|
||||||
|
|
||||||
=head1 NAME
|
|
||||||
|
|
||||||
parsort - Sort in parallel
|
|
||||||
|
|
||||||
|
|
||||||
=head1 SYNOPSIS
|
|
||||||
|
|
||||||
B<parsort> I<options for sort>
|
|
||||||
|
|
||||||
|
|
||||||
=head1 DESCRIPTION
|
|
||||||
|
|
||||||
B<parsort> uses B<sort> to sort in parallel. It works just like
|
|
||||||
B<sort> but faster, if you have a multicore machine.
|
|
||||||
|
|
||||||
Hopefully these ideas will make it into GNU Sort in the future.
|
|
||||||
|
|
||||||
|
|
||||||
=head1 EXAMPLE
|
|
||||||
|
|
||||||
Sort files:
|
|
||||||
|
|
||||||
parsort *.txt > sorted.txt
|
|
||||||
|
|
||||||
Sort stdin (standard input) numerically:
|
|
||||||
|
|
||||||
cat numbers | parsort -n > sorted.txt
|
|
||||||
|
|
||||||
|
|
||||||
=head1 PERFORMANCE
|
|
||||||
|
|
||||||
B<parsort> is faster on files, because these can be read in parallel.
|
|
||||||
|
|
||||||
On a 48 core machine you should see a speedup of 3x over B<sort>.
|
|
||||||
|
|
||||||
|
|
||||||
=head1 AUTHOR
|
|
||||||
|
|
||||||
Copyright (C) 2020 Ole Tange,
|
|
||||||
http://ole.tange.dk and Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
|
|
||||||
=head1 LICENSE
|
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 3 of the License, or
|
|
||||||
at your option any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
|
|
||||||
=head1 DEPENDENCIES
|
|
||||||
|
|
||||||
B<parsort> uses B<sort>, B<bash>, B<parallel>, and B<mbuffer>.
|
|
||||||
|
|
||||||
|
|
||||||
=head1 SEE ALSO
|
|
||||||
|
|
||||||
B<sort>
|
|
||||||
|
|
||||||
|
|
||||||
=cut
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use Getopt::Long;
|
|
||||||
use POSIX qw(mkfifo);
|
|
||||||
|
|
||||||
Getopt::Long::Configure("bundling","require_order");
|
|
||||||
|
|
||||||
my @ARGV_before = @ARGV;
|
|
||||||
GetOptions(
|
|
||||||
"debug|D" => \$opt::D,
|
|
||||||
"version" => \$opt::version,
|
|
||||||
"verbose|v" => \$opt::verbose,
|
|
||||||
"b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
|
|
||||||
"d|dictionary-order" => \$opt::dictionary_order,
|
|
||||||
"f|ignore-case" => \$opt::ignore_case,
|
|
||||||
"g|general-numeric-sort" => \$opt::general_numeric_sort,
|
|
||||||
"i|ignore-nonprinting" => \$opt::ignore_nonprinting,
|
|
||||||
"M|month-sort" => \$opt::month_sort,
|
|
||||||
"h|human-numeric-sort" => \$opt::human_numeric_sort,
|
|
||||||
"n|numeric-sort" => \$opt::numeric_sort,
|
|
||||||
"N|numascii" => \$opt::numascii,
|
|
||||||
"r|reverse" => \$opt::reverse,
|
|
||||||
"R|random-sort" => \$opt::random_sort,
|
|
||||||
"sort=s" => \$opt::sort,
|
|
||||||
"V|version-sort" => \$opt::version_sort,
|
|
||||||
"k|key=s" => \@opt::key,
|
|
||||||
"t|field-separator=s" => \$opt::field_separator,
|
|
||||||
"z|zero-terminated" => \$opt::zero_terminated,
|
|
||||||
) || exit(255);
|
|
||||||
$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
|
|
||||||
$Global::version = 20200411;
|
|
||||||
if($opt::version) { version(); exit 0; }
|
|
||||||
if($opt::zero_terminated) { $/ = "\0"; }
|
|
||||||
@Global::sortoptions = @ARGV_before[0..($#ARGV_before-$#ARGV-1)];
|
|
||||||
$ENV{'TMPDIR'} ||= "/tmp";
|
|
||||||
|
|
||||||
sub merge {
|
|
||||||
# Input:
|
|
||||||
# @cmd = commands to 'cat' (part of) a file
|
|
||||||
my @cmd = @_;
|
|
||||||
chomp(@cmd);
|
|
||||||
while($#cmd > 0) {
|
|
||||||
my @tmp;
|
|
||||||
while($#cmd >= 0) {
|
|
||||||
my $a = shift @cmd;
|
|
||||||
my $b = shift @cmd;
|
|
||||||
$a &&= "<($a)";
|
|
||||||
$b &&= "<($b)";
|
|
||||||
# Ignore errors from mbuffer - it gives errors when a pipe is closed
|
|
||||||
push @tmp, "sort -m @Global::sortoptions $a $b | mbuffer -v0 -q -m 30M;";
|
|
||||||
}
|
|
||||||
@cmd = @tmp;
|
|
||||||
}
|
|
||||||
return @cmd;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub tmpname {
|
|
||||||
# Select a name that does not exist
|
|
||||||
# Do not create the file as it may be used for creating a socket (by tmux)
|
|
||||||
# Remember the name in $Global::unlink to avoid hitting the same name twice
|
|
||||||
my $name = shift;
|
|
||||||
my($tmpname);
|
|
||||||
if(not -w $ENV{'TMPDIR'}) {
|
|
||||||
if(not -e $ENV{'TMPDIR'}) {
|
|
||||||
::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
|
|
||||||
} else {
|
|
||||||
::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
|
|
||||||
}
|
|
||||||
::wait_and_exit(255);
|
|
||||||
}
|
|
||||||
do {
|
|
||||||
$tmpname = $ENV{'TMPDIR'}."/".$name.
|
|
||||||
join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
|
|
||||||
} while(-e $tmpname or $Global::unlink{$tmpname}++);
|
|
||||||
return $tmpname;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub tmpfifo {
|
|
||||||
# Find an unused name and mkfifo on it
|
|
||||||
my $tmpfifo = tmpname("psort");
|
|
||||||
mkfifo($tmpfifo,0600);
|
|
||||||
return $tmpfifo;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub sort_files {
|
|
||||||
my @files = @ARGV;
|
|
||||||
# Let GNU Parallel generate the commands to read parts of files
|
|
||||||
# The commands split at \n and there will be at least one for each CPU thread
|
|
||||||
open(my $par,"-|",qw(parallel --pipepart --block -1 --dryrun -vv sort),
|
|
||||||
@Global::sortoptions, '::::', @files) || die;
|
|
||||||
my @cmd = merge(<$par>);
|
|
||||||
close $par;
|
|
||||||
# The command uses <(...) so it is incompatible with /bin/sh
|
|
||||||
open(my $bash,"|-","bash") || die;
|
|
||||||
print $bash @cmd;
|
|
||||||
close $bash;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub sort_stdin {
|
|
||||||
my $numthreads = `parallel --number-of-threads`;
|
|
||||||
my @fifos = map { tmpfifo() } 1..$numthreads;
|
|
||||||
map { mkfifo($_,0600) } @fifos;
|
|
||||||
# This trick removes the fifo as soon as it is connected in the other end
|
|
||||||
# (rm fifo; ...) < fifo
|
|
||||||
my @cmd = map { "(rm $_; sort @Global::sortoptions) < $_" } @fifos;
|
|
||||||
@cmd = merge(@cmd);
|
|
||||||
if(fork) {
|
|
||||||
} else {
|
|
||||||
exec(qw(parallel -j),$numthreads,
|
|
||||||
# 1M 30M = 43s
|
|
||||||
# 3M 30M = 59s
|
|
||||||
# 300k 30M = 40-45s
|
|
||||||
# 100k 30M = 47s
|
|
||||||
# 500k 30M = 44s
|
|
||||||
# 300k 10M = 41-45s
|
|
||||||
# 256k 10M = 44s
|
|
||||||
# 300k 3M = 42-45s
|
|
||||||
# 300k - = 47s
|
|
||||||
qw(--block 256k --pipe --roundrobin mbuffer -v0 -q -m 10M > {} :::),@fifos);
|
|
||||||
}
|
|
||||||
# The command uses <(...) so it is incompatible with /bin/sh
|
|
||||||
open(my $bash,"|-","bash") || die;
|
|
||||||
print $bash @cmd;
|
|
||||||
close $bash;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(@ARGV) {
|
|
||||||
sort_files();
|
|
||||||
} else {
|
|
||||||
sort_stdin();
|
|
||||||
}
|
|
||||||
|
|
||||||
# Test
|
|
||||||
# -z
|
|
||||||
# OK: cat bigfile | parsort
|
|
||||||
# OK: parsort -k4n files*.txt
|
|
||||||
# OK: parsort files*.txt
|
|
||||||
# OK: parsort "file with space"
|
|
||||||
|
|
39
plotpipe/README
Normal file
39
plotpipe/README
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
PLOTPIPE
|
||||||
|
|
||||||
|
- plot data from a pipe -
|
||||||
|
|
||||||
|
|
||||||
|
URL: https://gitlab.com/ole.tange/tangetools/-/tree/master/plotpipe
|
||||||
|
|
||||||
|
We have all been there: You have a bunch of data from a pipe that you
|
||||||
|
would like to get a better understanding of.
|
||||||
|
|
||||||
|
You know you can plot them by saving the data to a file, opening the
|
||||||
|
file in a spreadsheet, and making a graph; but it is just too much
|
||||||
|
bother because you do not need a fancy graph: You just need a quick
|
||||||
|
graph based on the data, and spending 5 minutes on generating that
|
||||||
|
graph is just too much hassle.
|
||||||
|
|
||||||
|
Plotpipe is designed for this situation.
|
||||||
|
|
||||||
|
Plotpipe reads data from a pipe (or a file) and plots it. If the input
|
||||||
|
is a CSV-file it tries to autodetect the separator and whether there
|
||||||
|
is a column header. It assumes the first column is the x-axis and that
|
||||||
|
all other columns are data series. If there is only a single
|
||||||
|
column, the line number is treated as the x-axis.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
seq 1 100 | plotpipe
|
||||||
|
seq 1 100 | shuf | plotpipe
|
||||||
|
paste <(seq 1 100) <(seq 1 100) <(seq 1 100 | shuf) | plotpipe
|
||||||
|
(echo "#Title"; echo "#Subtitle";
|
||||||
|
echo "Column1 Column2 Column3";
|
||||||
|
paste <(seq 1 100) <(seq 1 100) <(seq 1 100 | shuf) ) | plotpipe
|
||||||
|
|
||||||
|
Copyright (C) 2020 Ole Tange, http://ole.tange.dk and Free Software
|
||||||
|
Foundation, Inc.
|
||||||
|
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
||||||
|
This is free software: you are free to change and redistribute it.
|
||||||
|
GNU plotpipe comes with no warranty.
|
|
@ -59,20 +59,21 @@ usage will be 10 times I<blocksize> per CPU core. Default is 100M.
|
||||||
|
|
||||||
# Generate 100000x100000 matrix
|
# Generate 100000x100000 matrix
|
||||||
100kx100k() {
|
100kx100k() {
|
||||||
100000x() {
|
XbyY() {
|
||||||
while seq 123456 | shuf; do true; done |
|
while seq 123456 | shuf; do true; done |
|
||||||
paste $(perl -e 'print map {"- "} 1..100000') |
|
paste $(perl -e 'print map {"- "} 1..'$1) |
|
||||||
head -n $1
|
head -n $2
|
||||||
}
|
}
|
||||||
export -f 100000x
|
export -f XbyY
|
||||||
seq 1000 | parallel --nice 18 --delay 0.05 --files 100000x 100 |
|
seq 1000 |
|
||||||
|
parallel --nice 18 --delay 0.05 --files XbyY 100000 100 |
|
||||||
parallel -uj1 'cat {}; nice rm {} &'
|
parallel -uj1 'cat {}; nice rm {} &'
|
||||||
}
|
}
|
||||||
100kx100k > 100kx100k
|
100kx100k > 100kx100k
|
||||||
# Transpose it
|
# Transpose it
|
||||||
transpose 100kx100k > 100kx100k.t
|
transpose 100kx100k > 100kx100k.t
|
||||||
|
|
||||||
This takes around 700 MB/core and 20 minutes to run on 64C64T.
|
This takes around 1 GB/core and 18 minutes to run on 64C64T.
|
||||||
|
|
||||||
|
|
||||||
=head1 LIMITATIONS
|
=head1 LIMITATIONS
|
||||||
|
@ -107,7 +108,7 @@ cleaned up, if B<transpose> is stopped abnormally (e.g. killed).
|
||||||
|
|
||||||
=head1 REPORTING BUGS
|
=head1 REPORTING BUGS
|
||||||
|
|
||||||
Report bugs to <tange@gnu.org>.
|
Report bugs: https://gitlab.com/ole.tange/tangetools/-/issues
|
||||||
|
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
@ -502,7 +503,7 @@ main() {
|
||||||
block_size=100M
|
block_size=100M
|
||||||
while getopts ":b:d:V" o; do
|
while getopts ":b:d:V" o; do
|
||||||
case "$o" in
|
case "$o" in
|
||||||
d)
|
(d)
|
||||||
# Convert \t to TAB using printf
|
# Convert \t to TAB using printf
|
||||||
d=$(printf "$OPTARG")
|
d=$(printf "$OPTARG")
|
||||||
if [ "'" = "$d" ] ; then
|
if [ "'" = "$d" ] ; then
|
||||||
|
@ -511,14 +512,14 @@ main() {
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
b)
|
(b)
|
||||||
block_size="$OPTARG"
|
block_size="$OPTARG"
|
||||||
;;
|
;;
|
||||||
V)
|
(V)
|
||||||
version
|
version
|
||||||
exit 0
|
exit 0
|
||||||
;;
|
;;
|
||||||
*)
|
(*)
|
||||||
usage
|
usage
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
|
@ -19,7 +19,6 @@ searchlen = int(sys.argv[1])
|
||||||
def readparts():
|
def readparts():
|
||||||
# Read file
|
# Read file
|
||||||
part = []
|
part = []
|
||||||
partno = 0
|
|
||||||
# Block of text ending in \n that is followed by a \t in next section
|
# Block of text ending in \n that is followed by a \t in next section
|
||||||
section = ""
|
section = ""
|
||||||
for i in sys.stdin:
|
for i in sys.stdin:
|
||||||
|
@ -61,7 +60,7 @@ def recur(pre,n):
|
||||||
|
|
||||||
bits = searchlen*4
|
bits = searchlen*4
|
||||||
part = readparts();
|
part = readparts();
|
||||||
tabs = math.ceil(bits/3.0)
|
tabs = int(math.ceil(bits/3.0))
|
||||||
if tabs > len(part)-1:
|
if tabs > len(part)-1:
|
||||||
print("Too few tabs: %s hex values is %s bits which needs %d tabs and there are only %s"
|
print("Too few tabs: %s hex values is %s bits which needs %d tabs and there are only %s"
|
||||||
% (searchlen,bits,tabs,len(part)))
|
% (searchlen,bits,tabs,len(part)))
|
||||||
|
|
Loading…
Reference in a new issue