From 8be6d39649bdf6f6aaf7863261337a67106d32a7 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Fri, 4 Dec 2020 18:33:50 +0100 Subject: [PATCH] plotpipe: Added README. --- README | 4 + decrypt-root-with-usb/README | 2 + parsort/parsort | 214 ----------------------------------- plotpipe/README | 39 +++++++ transpose/transpose | 25 ++-- whitehash/whitehash | 3 +- 6 files changed, 59 insertions(+), 228 deletions(-) delete mode 100755 parsort/parsort create mode 100644 plotpipe/README diff --git a/README b/README index 680a94f..5ff4ad3 100644 --- a/README +++ b/README @@ -30,10 +30,14 @@ mirrorpdf - mirror PDF-file horizontally. neno - no error no output. Only print STDERR and STDOUT if the command fails. +not - flip exit value of command. + off - turn off monitor. pdfman - convert man page to pdf and display it using evince. +plotpipe - plot CSV data from a pipe. + puniq - print unique lines the first time they are seen. ramusage - display the ram usage of a program using `time -v`. diff --git a/decrypt-root-with-usb/README b/decrypt-root-with-usb/README index 0ed89ea..82eff28 100644 --- a/decrypt-root-with-usb/README +++ b/decrypt-root-with-usb/README @@ -48,3 +48,5 @@ your initramfs, you need to add them by adding to When all is done, update the initramfs: update-initramfs -u + +(C) 2014 Ole Tange, GPLv2 or later diff --git a/parsort/parsort b/parsort/parsort deleted file mode 100755 index d25a567..0000000 --- a/parsort/parsort +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/perl - -=pod - -=head1 NAME - -parsort - Sort in parallel - - -=head1 SYNOPSIS - -B I - - -=head1 DESCRIPTION - -B uses B to sort in parallel. It works just like -B but faster, if you have a multicore machine. - -Hopefully these ideas will make it into GNU Sort in the future. - - -=head1 EXAMPLE - -Sort files: - - parsort *.txt > sorted.txt - -Sort stdin (standard input) numerically: - - cat numbers | parsort -n > sorted.txt - - -=head1 PERFORMANCE - -B is faster on files, because these can be read in parallel. - -On a 48 core machine you should see a speedup of 3x over B. - - -=head1 AUTHOR - -Copyright (C) 2020 Ole Tange, -http://ole.tange.dk and Free Software Foundation, Inc. - - -=head1 LICENSE - -Copyright (C) 2012 Free Software Foundation, Inc. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3 of the License, or -at your option any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - - -=head1 DEPENDENCIES - -B uses B, B, B, and B. - - -=head1 SEE ALSO - -B - - -=cut - -use strict; -use Getopt::Long; -use POSIX qw(mkfifo); - -Getopt::Long::Configure("bundling","require_order"); - -my @ARGV_before = @ARGV; -GetOptions( - "debug|D" => \$opt::D, - "version" => \$opt::version, - "verbose|v" => \$opt::verbose, - "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks, - "d|dictionary-order" => \$opt::dictionary_order, - "f|ignore-case" => \$opt::ignore_case, - "g|general-numeric-sort" => \$opt::general_numeric_sort, - "i|ignore-nonprinting" => \$opt::ignore_nonprinting, - "M|month-sort" => \$opt::month_sort, - "h|human-numeric-sort" => \$opt::human_numeric_sort, - "n|numeric-sort" => \$opt::numeric_sort, - "N|numascii" => \$opt::numascii, - "r|reverse" => \$opt::reverse, - "R|random-sort" => \$opt::random_sort, - "sort=s" => \$opt::sort, - "V|version-sort" => \$opt::version_sort, - "k|key=s" => \@opt::key, - "t|field-separator=s" => \$opt::field_separator, - "z|zero-terminated" => \$opt::zero_terminated, - ) || exit(255); -$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1]; -$Global::version = 20200411; -if($opt::version) { version(); exit 0; } -if($opt::zero_terminated) { $/ = "\0"; } -@Global::sortoptions = @ARGV_before[0..($#ARGV_before-$#ARGV-1)]; -$ENV{'TMPDIR'} ||= "/tmp"; - -sub merge { - # Input: - # @cmd = commands to 'cat' (part of) a file - my @cmd = @_; - chomp(@cmd); - while($#cmd > 0) { - my @tmp; - while($#cmd >= 0) { - my $a = shift @cmd; - my $b = shift @cmd; - $a &&= "<($a)"; - $b &&= "<($b)"; - # Ignore errors from mbuffer - it gives errors when a pipe is closed - push @tmp, "sort -m @Global::sortoptions $a $b | mbuffer -v0 -q -m 30M;"; - } - @cmd = @tmp; - } - return @cmd; -} - -sub tmpname { - # Select a name that does not exist - # Do not create the file as it may be used for creating a socket (by tmux) - # Remember the name in $Global::unlink to avoid hitting the same name twice - my $name = shift; - my($tmpname); - if(not -w $ENV{'TMPDIR'}) { - if(not -e $ENV{'TMPDIR'}) { - ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'"); - } else { - ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'"); - } - ::wait_and_exit(255); - } - do { - $tmpname = $ENV{'TMPDIR'}."/".$name. - join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); - } while(-e $tmpname or $Global::unlink{$tmpname}++); - return $tmpname; -} - -sub tmpfifo { - # Find an unused name and mkfifo on it - my $tmpfifo = tmpname("psort"); - mkfifo($tmpfifo,0600); - return $tmpfifo; -} - -sub sort_files { - my @files = @ARGV; - # Let GNU Parallel generate the commands to read parts of files - # The commands split at \n and there will be at least one for each CPU thread - open(my $par,"-|",qw(parallel --pipepart --block -1 --dryrun -vv sort), - @Global::sortoptions, '::::', @files) || die; - my @cmd = merge(<$par>); - close $par; - # The command uses <(...) so it is incompatible with /bin/sh - open(my $bash,"|-","bash") || die; - print $bash @cmd; - close $bash; -} - -sub sort_stdin { - my $numthreads = `parallel --number-of-threads`; - my @fifos = map { tmpfifo() } 1..$numthreads; - map { mkfifo($_,0600) } @fifos; - # This trick removes the fifo as soon as it is connected in the other end - # (rm fifo; ...) < fifo - my @cmd = map { "(rm $_; sort @Global::sortoptions) < $_" } @fifos; - @cmd = merge(@cmd); - if(fork) { - } else { - exec(qw(parallel -j),$numthreads, - # 1M 30M = 43s - # 3M 30M = 59s - # 300k 30M = 40-45s - # 100k 30M = 47s - # 500k 30M = 44s - # 300k 10M = 41-45s - # 256k 10M = 44s - # 300k 3M = 42-45s - # 300k - = 47s - qw(--block 256k --pipe --roundrobin mbuffer -v0 -q -m 10M > {} :::),@fifos); - } - # The command uses <(...) so it is incompatible with /bin/sh - open(my $bash,"|-","bash") || die; - print $bash @cmd; - close $bash; -} - -if(@ARGV) { - sort_files(); -} else { - sort_stdin(); -} - -# Test -# -z -# OK: cat bigfile | parsort -# OK: parsort -k4n files*.txt -# OK: parsort files*.txt -# OK: parsort "file with space" - diff --git a/plotpipe/README b/plotpipe/README new file mode 100644 index 0000000..e1c92e3 --- /dev/null +++ b/plotpipe/README @@ -0,0 +1,39 @@ + + PLOTPIPE + + - plot data from a pipe - + + +URL: https://gitlab.com/ole.tange/tangetools/-/tree/master/plotpipe + +We have all been there: You have a bunch of data from a pipe that you +would like to get a better understanding of. + +You know you can plot them by saving the data to a file, opening the +file in a spreadsheet, and making a graph; but it is just too much +bother because you do not need a fancy graph: You just need a quick +graph based on the data, and spending 5 minutes on generating that +graph is just too much hassle. + +Plotpipe is designed for this situation. + +Plotpipe reads data from a pipe (or a file) and plots it. If the input +is a CSV-file it tries to autodetect the separator and whether there +is a column header. It assumes the first column is the x-axis and that +all other columns are data series. If there is only a single +column, the line number is treated as the x-axis. + +Examples: + + seq 1 100 | plotpipe + seq 1 100 | shuf | plotpipe + paste <(seq 1 100) <(seq 1 100) <(seq 1 100 | shuf) | plotpipe + (echo "#Title"; echo "#Subtitle"; + echo "Column1 Column2 Column3"; + paste <(seq 1 100) <(seq 1 100) <(seq 1 100 | shuf) ) | plotpipe + +Copyright (C) 2020 Ole Tange, http://ole.tange.dk and Free Software +Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +GNU plotpipe comes with no warranty. diff --git a/transpose/transpose b/transpose/transpose index d155ac5..b97c717 100755 --- a/transpose/transpose +++ b/transpose/transpose @@ -59,20 +59,21 @@ usage will be 10 times I per CPU core. Default is 100M. # Generate 100000x100000 matrix 100kx100k() { - 100000x() { + XbyY() { while seq 123456 | shuf; do true; done | - paste $(perl -e 'print map {"- "} 1..100000') | - head -n $1 + paste $(perl -e 'print map {"- "} 1..'$1) | + head -n $2 } - export -f 100000x - seq 1000 | parallel --nice 18 --delay 0.05 --files 100000x 100 | - parallel -uj1 'cat {}; nice rm {} &' + export -f XbyY + seq 1000 | + parallel --nice 18 --delay 0.05 --files XbyY 100000 100 | + parallel -uj1 'cat {}; nice rm {} &' } 100kx100k > 100kx100k # Transpose it transpose 100kx100k > 100kx100k.t -This takes around 700 MB/core and 20 minutes to run on 64C64T. +This takes around 1 GB/core and 18 minutes to run on 64C64T. =head1 LIMITATIONS @@ -107,7 +108,7 @@ cleaned up, if B is stopped abnormally (e.g. killed). =head1 REPORTING BUGS -Report bugs to . +Report bugs: https://gitlab.com/ole.tange/tangetools/-/issues =head1 AUTHOR @@ -502,7 +503,7 @@ main() { block_size=100M while getopts ":b:d:V" o; do case "$o" in - d) + (d) # Convert \t to TAB using printf d=$(printf "$OPTARG") if [ "'" = "$d" ] ; then @@ -511,14 +512,14 @@ main() { exit 0 fi ;; - b) + (b) block_size="$OPTARG" ;; - V) + (V) version exit 0 ;; - *) + (*) usage ;; esac diff --git a/whitehash/whitehash b/whitehash/whitehash index edd3e2e..58f1d57 100755 --- a/whitehash/whitehash +++ b/whitehash/whitehash @@ -19,7 +19,6 @@ searchlen = int(sys.argv[1]) def readparts(): # Read file part = [] - partno = 0 # Block of text ending in \n that is followed by a \t in next section section = "" for i in sys.stdin: @@ -61,7 +60,7 @@ def recur(pre,n): bits = searchlen*4 part = readparts(); -tabs = math.ceil(bits/3.0) +tabs = int(math.ceil(bits/3.0)) if tabs > len(part)-1: print("Too few tabs: %s hex values is %s bits which needs %d tabs and there are only %s" % (searchlen,bits,tabs,len(part)))