transpose: parallelized.

w4it-for-port-open: -q (quiet) implemented.
2018-03-27 02:56:54 +02:00 · 2018-03-27 02:56:54 +02:00 · f6a34e1200
parent 0a9b2b3503
commit f6a34e1200
4 changed files with 178 additions and 113 deletions
--- a/transpose/transpose
+++ b/transpose/transpose
@ -1,125 +1,160 @@
-#!/usr/bin/perl -w
+#!/bin/bash

-use English;
-use FileCache;
-use File::Temp;
+# transpose [-d delimiter] [-b blocksize] table.csv > transposed.csv
+# cat table.csv | transpose [-d delimiter] [-b blocksize] > transposed.csv

-my $delimiter = shift;
-my $buffer = shift;
+transpose_inner() {
+    # simple in-memory transpose
+    # -d sep
+    # Input:
+    #   data to be transposed
+    # Output:
+    #   transposed data
+    perl <(cat <<'cut-here-UbsAqi0j6GoOuk5W5yWA'
+use Text::CSV;
+use Getopt::Long;

-$delimiter ||= ",";
-# Use at most 1000M before flushing
-$buffer ||= "1000M";
-$buffer = multiply_binary_prefix($buffer);
-# Perl makes the buffer baloon to 10 times the requested value
-$buffer /= 10;
-# max_col_size will be lowered after first line read.
-$max_col_size = $buffer;
-my $delimiter_regexp = $delimiter;
-$delimiter_regexp =~ s/(\W)/\\$1/g;
-my @current;
-my $col_no_last_line = 0;
-my $lineno = 0;
-my %col;
-while(<>) {
-    chomp;
-    my $col_no = 0;
-    my @to_be_flushed = ();
-    map {
-	push(@{$col{$col_no}},$_);
-	$col_size{$col_no} += length $_;
-	if($col_size{$col_no} > $max_col_size) {
-	    push @to_be_flushed, $col_no;
-	    $col_size{$col_no} = 0;
-	}
-	$col_no++;
-    } split /$delimiter_regexp/o, $_; # This should do de-csv'ing
-    if(@to_be_flushed) {
-	flush(\%col,@to_be_flushed);
-    }
-    if($col_no != $col_no_last_line) {
-	if(0 == $col_no_last_line) {
-	    # This is first time around
-	    $col_no_last_line = $col_no;
-	    $max_col_size = $buffer/$col_no_last_line;
+Getopt::Long::Configure("bundling","require_order");
+my $retval = GetOptions("debug|D=s" => \$opt::debug,
+			"delimiter|d=s" => \$opt::delimiter,
+			"verbose|v" => \@opt::verbose,
+			"simple|s" => \$opt::simple,
+    );
+
+if(defined $opt::delimiter) {
+    simple();
 } else {
-	    warning("Number of columns in line $NR: $col_no != $col_no_last_line\n");
-	}
-    }
-}
-flush(\%col);
-merge();
-
-sub flush {
-    my $col_ref = shift;
-    my @cols_to_flush = @_;
-    if(not @cols_to_flush) {
-	@cols_to_flush = keys %$col_ref;
-    }
-    for my $c (@cols_to_flush) {
-	$Global::tempfile{$c} ||= tmpnam();
-	my $fh = cacheout $Global::tempfile{$c};
-	# This will print one delimiter too much, which we will deal with later
-	print $fh map { $_,$delimiter } @{$col_ref->{$c}};
-	delete $col_ref->{$c};
-    }
+    die("-d must be set");
 }

-sub merge {
-    for my $c (sort keys %Global::tempfile) {
-	my $fh = cacheout $Global::tempfile{$c};
-	# truncate by length of delimiter to get rid of the last $delimiter
-	seek $fh,-length($delimiter),SEEK_END;
-	truncate $fh, tell $fh;
-	# Make sure the file is closed of writing
-	close $fh;
-	open($fh, "<", $Global::tempfile{$c}) || die;
-	my $buf;
-	while(sysread($fh,$buf,1000_000)) {
-	    print $buf;
+sub simple {
+    my (@table);
+    my $col = 0;
+    my $csv_setting = { binary => 1, sep_char => $opt::delimiter };
+    my $sep = $csv_setting->{sep_char};
+    my $csv = Text::CSV->new($csv_setting)
+	or die "Cannot use CSV: ".Text::CSV->error_diag ();
+
+    while(my $l = <>) {
+	if(not $csv->parse($l)) {
+	    die "CSV has unexpected format";
 	}
-	print "\n";
-	unlink $Global::tempfile{$c};
+	# append to each row
+	my $row = 0;
+	for($csv->fields()) {
+	    $table[$row][$col] = defined($_) ? $_ : '';
+	    $row++;
 	}
+	$col++;
+    }
+    print map { join($sep,@$_),"\n" } @table;
+}
+cut-here-UbsAqi0j6GoOuk5W5yWA
+	  ) "$@"
+}
+export -f transpose_inner
+
+stdin_to_paste_files() {
+    # Run transpose_inner on blocks from stdin
+    # output each block as file name
+    local block_size
+    local sep
+    block_size="$1"
+    sep="$2"
+    PARALLEL="-k --files --block $block_size" \
+	    parallel --pipe transpose_inner -d "'$sep'"
 }

-sub warning {
-    my @w = @_;
-    print STDERR "transpose: Warning: ", @w;
+file_to_paste_files() {
+    # Run transpose_inner on blocks from $file
+    # output each block as file name
+    local block_size
+    local sep
+    block_size="$1"
+    sep="$2"
+    file="$3"
+    PARALLEL="-k --files --block $block_size" \
+	    parallel --pipe-part -a "$file" transpose_inner -d "'$sep'"
 }

-sub error {
-    my @w = @_;
-    print STDERR "transpose: Error: ", @w;
+super_paste() {
+    # Like 'paste' up to 1000000 files
+    # The files are read from stdin
+    local sep
+    local paste_files
+    local fifo
+    sep="$1"
+    paste_files=`tempfile`
+    # basename
+    fifo=`tempfile`
+    rm $fifo
+    cat > $paste_files
+
+    # Define replacement string {0#} to 0-pad job number
+    PARALLEL="--rpl "\''{0#} $f=1+int("".(log(total_jobs())/log(10)));
+                    $_=sprintf("%0${f}d",seq())'\'
+
+    # Make fifos that can be read from
+    cat $paste_files | parallel -n1000 "rm -f $fifo{0#}; mkfifo $fifo{0#}"
+
+    # Start a paste process for every 1000 files
+    cat $paste_files | parallel -n1000 -j0 "paste -d '$sep' {} > $fifo{0#}" &
+
+    # Paste all the fifos
+    eval paste -d "'$sep'" $fifo*
+
+    # Cleanup
+    cat $paste_files | parallel -n1000 "rm -f {} $fifo{0#}"
+    rm $paste_files
 }

-sub multiply_binary_prefix {
-    # Evalualte numbers with binary prefix
-    # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
-    # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80
-    # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
-    # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
-    # 13G = 13*1024*1024*1024 = 13958643712
-    my $s = shift;
-    $s =~ s/k/*1000/g;
-    $s =~ s/M/*1000*1000/g;
-    $s =~ s/G/*1000*1000*1000/g;
-    $s =~ s/T/*1000*1000*1000*1000/g;
-    $s =~ s/P/*1000*1000*1000*1000*1000/g;
-    $s =~ s/E/*1000*1000*1000*1000*1000*1000/g;
-    $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g;
-    $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
-    $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
-
-    $s =~ s/Ki?/*1024/gi;
-    $s =~ s/Mi?/*1024*1024/gi;
-    $s =~ s/Gi?/*1024*1024*1024/gi;
-    $s =~ s/Ti?/*1024*1024*1024*1024/gi;
-    $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi;
-    $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi;
-    $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi;
-    $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
-    $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
-    $s = eval $s;
-    return $s;
+stdin_detect_sep() {
+    # Read the first 3 lines and detect the separator
+    # Save the read input to file
+    local file
+    file="$1"
+    # TODO
+    echo "$d"
 }
+
+usage() {
+    echo "Usage: $0 [-d delimiter] [-b blocksize]" 1>&2; exit 1;
+}
+
+block_size=10M
+while getopts ":b:d:" o; do
+    case "${o}" in
+	d)
+	    d="$(printf "${OPTARG}")"
+	    if [ "'" = "${d}" ] ; then
+		echo "Delimiter cannot be '"
+		usage
+		exit
+	    fi
+	    ;;
+	b)
+	    block_size="${OPTARG}"
+	    ;;
+	*)
+	    usage
+	    ;;
+    esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${d}" ] ; then
+    d="$(printf "\t")"
+fi
+
+# Sep cannot be '
+file="$@"
+first_lines=`tempfile`
+if [ -z "$file" ]; then
+    sep="$(stdin_detect_sep $first_lines)"
+    (cat $first_lines; rm $first_lines; cat) |
+	stdin_to_paste_files $block_size "$sep" | super_paste "$sep"
+else
+    sep="$(stdin_detect_sep < "$file" $first_lines)"
+    rm $first_lines
+    file_to_paste_files $block_size "$sep" "$file" | super_paste "$sep"
+fi
--- a/treeoflife/treeoflife
+++ b/treeoflife/treeoflife
@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Download tree-of-life.jpg
+# from http://www.open.edu/openlearn/nature-environment/natural-history/tree-life
+
+base=www2.open.ac.uk/openlearn/treeoflife/treeOfLifePoster/TileGroup
+parallel -j30 wget -c $base{3}/6-{1}-{2}.jpg ::: {0..28} ::: {4..42} ::: {1..7}
+parallel eval convert +append 6-{0..28}-{}.jpg line{}.jpg ::: {4..42}
+convert -append line{4..42}.jpg tree-of-life.jpg
+
+echo Cleanup with
+echo rm line*.jpg 6-*-*.jpg
--- a/w4it-for-port-open/w4it-for-port-open
+++ b/w4it-for-port-open/w4it-for-port-open
@ -1,5 +1,19 @@
 #!/bin/bash

+QUIET=false
+
+while getopts ":q" opt; do
+    case $opt in
+	q)
+	    QUIET=true
+	    shift
+	    ;;
+	\?)
+	    echo "Invalid option: -$OPTARG" >&2
+	    ;;
+    esac
+done
+
 HOST=$1
 PORT=$2

@ -13,7 +27,11 @@ usage () {
 }

 print_not_reachable () {
+    if $QUIET; then
+	true skip
+    else
 	echo -n .
+    fi
 }

 is_port_open () {
--- a/wifi-reload/wifi-reload
+++ b/wifi-reload/wifi-reload
@ -70,7 +70,7 @@ if tty -s ; then
    #    timeout 12 forever dmesg | puniq
 fi

-sudo bash -c 'cat >> /etc/resolv.conf' < /etc/resolvconf/resolv.conf.d/head 
-sudo iwconfig wls1 essid Turris
-sudo dhclient wls1 &
-sudo wpa_supplicant -Dwext -c/etc/wpa_supplicant.conf -iwls1 -d &
+#sudo bash -c 'cat >> /etc/resolv.conf' < /etc/resolvconf/resolv.conf.d/head 
+#sudo iwconfig wls1 essid Turris
+#sudo dhclient wls1 &
+#sudo wpa_supplicant -Dwext -c/etc/wpa_supplicant.conf -iwls1 -d &