plotpipe: Added README.

2020-12-04 18:33:50 +01:00 · 2020-12-04 18:33:50 +01:00 · 8be6d39649
parent 6d141ac74e
commit 8be6d39649
6 changed files with 59 additions and 228 deletions
--- a/4
+++ b/4
@ -30,10 +30,14 @@ mirrorpdf - mirror PDF-file horizontally.

 neno - no error no output. Only print STDERR and STDOUT if the command fails.

+not - flip exit value of command.
+
 off - turn off monitor.

 pdfman - convert man page to pdf and display it using evince.

+plotpipe - plot CSV data from a pipe.
+
 puniq - print unique lines the first time they are seen.

 ramusage - display the ram usage of a program using `time -v`.
--- a/decrypt-root-with-usb/README
+++ b/decrypt-root-with-usb/README
@ -48,3 +48,5 @@ your initramfs, you need to add them by adding to
 When all is done, update the initramfs:

    update-initramfs -u
+
+(C) 2014 Ole Tange, GPLv2 or later
--- a/parsort/parsort
+++ b/parsort/parsort
@ -1,214 +0,0 @@
-#!/usr/bin/perl
-
-=pod
-
-=head1 NAME
-
-parsort - Sort in parallel
-
-
-=head1 SYNOPSIS
-
-B<parsort> I<options for sort>
-
-
-=head1 DESCRIPTION
-
-B<parsort> uses B<sort> to sort in parallel. It works just like
-B<sort> but faster, if you have a multicore machine.
-
-Hopefully these ideas will make it into GNU Sort in the future.
-
-
-=head1 EXAMPLE
-
-Sort files:
-
-  parsort *.txt > sorted.txt
-
-Sort stdin (standard input) numerically:
-
-  cat numbers | parsort -n > sorted.txt
-
-
-=head1 PERFORMANCE
-
-B<parsort> is faster on files, because these can be read in parallel.
-
-On a 48 core machine you should see a speedup of 3x over B<sort>.
-
-
-=head1 AUTHOR
-
-Copyright (C) 2020 Ole Tange,
-http://ole.tange.dk and Free Software Foundation, Inc.
-
-
-=head1 LICENSE
-
-Copyright (C) 2012 Free Software Foundation, Inc.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3 of the License, or
-at your option any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-
-=head1 DEPENDENCIES
-
-B<parsort> uses B<sort>, B<bash>, B<parallel>, and B<mbuffer>.
-
-
-=head1 SEE ALSO
-
-B<sort>
-
-
-=cut
-
-use strict;
-use Getopt::Long;
-use POSIX qw(mkfifo);
-
-Getopt::Long::Configure("bundling","require_order");
-
-my @ARGV_before = @ARGV;
-GetOptions(
-    "debug|D" => \$opt::D,
-    "version" => \$opt::version,
-    "verbose|v" => \$opt::verbose,
-    "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
-    "d|dictionary-order" => \$opt::dictionary_order,
-    "f|ignore-case" => \$opt::ignore_case,
-    "g|general-numeric-sort" => \$opt::general_numeric_sort,
-    "i|ignore-nonprinting" => \$opt::ignore_nonprinting,
-    "M|month-sort" => \$opt::month_sort,
-    "h|human-numeric-sort" => \$opt::human_numeric_sort,
-    "n|numeric-sort" => \$opt::numeric_sort,
-    "N|numascii" => \$opt::numascii,
-    "r|reverse" => \$opt::reverse,
-    "R|random-sort" => \$opt::random_sort,
-    "sort=s" => \$opt::sort,
-    "V|version-sort" => \$opt::version_sort,
-    "k|key=s" => \@opt::key,
-    "t|field-separator=s" => \$opt::field_separator,
-    "z|zero-terminated" => \$opt::zero_terminated,
-    ) || exit(255);
-$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
-$Global::version = 20200411;
-if($opt::version) { version(); exit 0; }
-if($opt::zero_terminated) { $/ = "\0"; }
-@Global::sortoptions = @ARGV_before[0..($#ARGV_before-$#ARGV-1)];
-$ENV{'TMPDIR'} ||= "/tmp";
-
-sub merge {
-    # Input:
-    #   @cmd = commands to 'cat' (part of) a file
-    my @cmd = @_;
-    chomp(@cmd);
-    while($#cmd > 0) {
-	my @tmp;
-	while($#cmd >= 0) {
-	    my $a = shift @cmd;
-	    my $b = shift @cmd;
-	    $a &&= "<($a)";
-	    $b &&= "<($b)";
-	    # Ignore errors from mbuffer - it gives errors when a pipe is closed
-	    push @tmp, "sort -m @Global::sortoptions $a $b | mbuffer -v0 -q -m 30M;";
-	}
-	@cmd = @tmp;
-    }
-    return @cmd;
-}
-
-sub tmpname {
-    # Select a name that does not exist
-    # Do not create the file as it may be used for creating a socket (by tmux)
-    # Remember the name in $Global::unlink to avoid hitting the same name twice
-    my $name = shift;
-    my($tmpname);
-    if(not -w $ENV{'TMPDIR'}) {
-	if(not -e $ENV{'TMPDIR'}) {
-	    ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'");
-	} else {
-	    ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'");
-	}
-	::wait_and_exit(255);
-    }
-    do {
-	$tmpname = $ENV{'TMPDIR'}."/".$name.
-	    join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
-    } while(-e $tmpname or $Global::unlink{$tmpname}++);
-    return $tmpname;
-}
-
-sub tmpfifo {
-    # Find an unused name and mkfifo on it
-    my $tmpfifo = tmpname("psort");
-    mkfifo($tmpfifo,0600);
-    return $tmpfifo;
-}
-
-sub sort_files {
-    my @files = @ARGV;
-    # Let GNU Parallel generate the commands to read parts of files
-    # The commands split at \n and there will be at least one for each CPU thread
-    open(my $par,"-|",qw(parallel --pipepart --block -1 --dryrun -vv sort),
-	 @Global::sortoptions, '::::', @files) || die;
-    my @cmd = merge(<$par>);
-    close $par;
-    # The command uses <(...) so it is incompatible with /bin/sh
-    open(my $bash,"|-","bash") || die;
-    print $bash @cmd;
-    close $bash;
-}
-
-sub sort_stdin {
-    my $numthreads = `parallel --number-of-threads`;
-    my @fifos = map { tmpfifo() } 1..$numthreads;
-    map { mkfifo($_,0600) } @fifos;
-    # This trick removes the fifo as soon as it is connected in the other end
-    # (rm fifo; ...) < fifo 
-    my @cmd = map { "(rm $_; sort @Global::sortoptions) < $_" } @fifos;
-    @cmd = merge(@cmd);
-    if(fork) {
-    } else {
-	exec(qw(parallel -j),$numthreads,
-	     # 1M 30M = 43s
-	     # 3M 30M = 59s
-	     # 300k 30M = 40-45s
-	     # 100k 30M = 47s
-	     # 500k 30M = 44s
-	     # 300k 10M = 41-45s
-	     # 256k 10M = 44s
-	     # 300k 3M = 42-45s
-	     # 300k - = 47s
-	     qw(--block 256k --pipe --roundrobin mbuffer -v0 -q -m 10M > {} :::),@fifos);
-    }
-    # The command uses <(...) so it is incompatible with /bin/sh
-    open(my $bash,"|-","bash") || die;
-    print $bash @cmd;
-    close $bash;   
-}
-
-if(@ARGV) {
-    sort_files();
-} else {
-    sort_stdin();
-}
-
-# Test
-# -z
-# OK: cat bigfile | parsort
-# OK: parsort -k4n files*.txt
-# OK: parsort files*.txt
-# OK: parsort "file with space"
-	
--- a/plotpipe/README
+++ b/plotpipe/README
@ -0,0 +1,39 @@
+
+			       PLOTPIPE
+
+		      - plot data from a pipe -
+
+
+URL: https://gitlab.com/ole.tange/tangetools/-/tree/master/plotpipe
+
+We have all been there: You have a bunch of data from a pipe that you
+would like to get a better understanding of.
+
+You know you can plot them by saving the data to a file, opening the
+file in a spreadsheet, and making a graph; but it is just too much
+bother because you do not need a fancy graph: You just need a quick
+graph based on the data, and spending 5 minutes on generating that
+graph is just too much hassle.
+
+Plotpipe is designed for this situation.
+
+Plotpipe reads data from a pipe (or a file) and plots it. If the input
+is a CSV-file it tries to autodetect the separator and whether there
+is a column header. It assumes the first column is the x-axis and that
+all other columns are data series. If there is only a single
+column, the line number is treated as the x-axis.
+
+Examples:
+
+    seq 1 100 | plotpipe
+    seq 1 100 | shuf | plotpipe
+    paste <(seq 1 100) <(seq 1 100) <(seq 1 100 | shuf) | plotpipe
+    (echo "#Title"; echo "#Subtitle";
+     echo "Column1 Column2 Column3";
+     paste <(seq 1 100) <(seq 1 100) <(seq 1 100 | shuf) ) | plotpipe
+
+Copyright (C) 2020 Ole Tange, http://ole.tange.dk and Free Software
+Foundation, Inc.
+License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
+This is free software: you are free to change and redistribute it.
+GNU plotpipe comes with no warranty.
--- a/transpose/transpose
+++ b/transpose/transpose
@ -59,20 +59,21 @@ usage will be 10 times I<blocksize> per CPU core. Default is 100M.

    # Generate 100000x100000 matrix
    100kx100k() {
-        100000x() {
+        XbyY() {
            while seq 123456 | shuf; do true; done |
-                paste $(perl -e 'print map {"- "} 1..100000') |
-                head -n $1
+                paste $(perl -e 'print map {"- "} 1..'$1) |
+                head -n $2
        }
-        export -f 100000x
-        seq 1000 | parallel --nice 18 --delay 0.05 --files 100000x 100 |
-            parallel -uj1 'cat {}; nice rm {} &'
+        export -f XbyY
+        seq 1000 |
+          parallel --nice 18 --delay 0.05 --files XbyY 100000 100 |
+          parallel -uj1 'cat {}; nice rm {} &'
    }
    100kx100k > 100kx100k
    # Transpose it
    transpose 100kx100k > 100kx100k.t

-This takes around 700 MB/core and 20 minutes to run on 64C64T.
+This takes around 1 GB/core and 18 minutes to run on 64C64T.


 =head1 LIMITATIONS
@ -107,7 +108,7 @@ cleaned up, if B<transpose> is stopped abnormally (e.g. killed).

 =head1 REPORTING BUGS

-Report bugs to <tange@gnu.org>.
+Report bugs: https://gitlab.com/ole.tange/tangetools/-/issues


 =head1 AUTHOR
@ -502,7 +503,7 @@ main() {
    block_size=100M
    while getopts ":b:d:V" o; do
 	case "$o" in
-	    d)
+	    (d)
 		# Convert \t to TAB using printf
 		d=$(printf "$OPTARG")
 		if [ "'" = "$d" ] ; then
@ -511,14 +512,14 @@ main() {
 		    exit 0
 		fi
 		;;
-	    b)
+	    (b)
 		block_size="$OPTARG"
 		;;
-	    V)
+	    (V)
 		version
 		exit 0
 		;;
-	    *)
+	    (*)
 		usage
 		;;
 	esac
--- a/whitehash/whitehash
+++ b/whitehash/whitehash
@ -19,7 +19,6 @@ searchlen = int(sys.argv[1])
 def readparts():
    # Read file
    part = []
-    partno = 0
    # Block of text ending in \n that is followed by a \t in next section
    section = ""
    for i in sys.stdin:
@ -61,7 +60,7 @@ def recur(pre,n):
            
 bits = searchlen*4
 part = readparts();
-tabs = math.ceil(bits/3.0)
+tabs = int(math.ceil(bits/3.0))
 if tabs > len(part)-1:
    print("Too few tabs: %s hex values is %s bits which needs %d tabs and there are only %s"
          % (searchlen,bits,tabs,len(part)))