From f6a34e120096dab27547d537b04a7ab8dca2834f Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Tue, 27 Mar 2018 02:56:54 +0200 Subject: [PATCH] transpose: parallelized. w4it-for-port-open: -q (quiet) implemented. --- transpose/transpose | 251 +++++++++++++++----------- treeoflife/treeoflife | 12 ++ w4it-for-port-open/w4it-for-port-open | 20 +- wifi-reload/wifi-reload | 8 +- 4 files changed, 178 insertions(+), 113 deletions(-) create mode 100755 treeoflife/treeoflife diff --git a/transpose/transpose b/transpose/transpose index 35da6e8..cc9eb24 100755 --- a/transpose/transpose +++ b/transpose/transpose @@ -1,125 +1,160 @@ -#!/usr/bin/perl -w +#!/bin/bash -use English; -use FileCache; -use File::Temp; +# transpose [-d delimiter] [-b blocksize] table.csv > transposed.csv +# cat table.csv | transpose [-d delimiter] [-b blocksize] > transposed.csv -my $delimiter = shift; -my $buffer = shift; +transpose_inner() { + # simple in-memory transpose + # -d sep + # Input: + # data to be transposed + # Output: + # transposed data + perl <(cat <<'cut-here-UbsAqi0j6GoOuk5W5yWA' +use Text::CSV; +use Getopt::Long; -$delimiter ||= ","; -# Use at most 1000M before flushing -$buffer ||= "1000M"; -$buffer = multiply_binary_prefix($buffer); -# Perl makes the buffer baloon to 10 times the requested value -$buffer /= 10; -# max_col_size will be lowered after first line read. -$max_col_size = $buffer; -my $delimiter_regexp = $delimiter; -$delimiter_regexp =~ s/(\W)/\\$1/g; -my @current; -my $col_no_last_line = 0; -my $lineno = 0; -my %col; -while(<>) { - chomp; - my $col_no = 0; - my @to_be_flushed = (); - map { - push(@{$col{$col_no}},$_); - $col_size{$col_no} += length $_; - if($col_size{$col_no} > $max_col_size) { - push @to_be_flushed, $col_no; - $col_size{$col_no} = 0; +Getopt::Long::Configure("bundling","require_order"); +my $retval = GetOptions("debug|D=s" => \$opt::debug, + "delimiter|d=s" => \$opt::delimiter, + "verbose|v" => \@opt::verbose, + "simple|s" => \$opt::simple, + ); + +if(defined $opt::delimiter) { + simple(); +} else { + die("-d must be set"); +} + +sub simple { + my (@table); + my $col = 0; + my $csv_setting = { binary => 1, sep_char => $opt::delimiter }; + my $sep = $csv_setting->{sep_char}; + my $csv = Text::CSV->new($csv_setting) + or die "Cannot use CSV: ".Text::CSV->error_diag (); + + while(my $l = <>) { + if(not $csv->parse($l)) { + die "CSV has unexpected format"; } - $col_no++; - } split /$delimiter_regexp/o, $_; # This should do de-csv'ing - if(@to_be_flushed) { - flush(\%col,@to_be_flushed); - } - if($col_no != $col_no_last_line) { - if(0 == $col_no_last_line) { - # This is first time around - $col_no_last_line = $col_no; - $max_col_size = $buffer/$col_no_last_line; - } else { - warning("Number of columns in line $NR: $col_no != $col_no_last_line\n"); + # append to each row + my $row = 0; + for($csv->fields()) { + $table[$row][$col] = defined($_) ? $_ : ''; + $row++; } + $col++; } + print map { join($sep,@$_),"\n" } @table; } -flush(\%col); -merge(); +cut-here-UbsAqi0j6GoOuk5W5yWA + ) "$@" +} +export -f transpose_inner -sub flush { - my $col_ref = shift; - my @cols_to_flush = @_; - if(not @cols_to_flush) { - @cols_to_flush = keys %$col_ref; - } - for my $c (@cols_to_flush) { - $Global::tempfile{$c} ||= tmpnam(); - my $fh = cacheout $Global::tempfile{$c}; - # This will print one delimiter too much, which we will deal with later - print $fh map { $_,$delimiter } @{$col_ref->{$c}}; - delete $col_ref->{$c}; - } +stdin_to_paste_files() { + # Run transpose_inner on blocks from stdin + # output each block as file name + local block_size + local sep + block_size="$1" + sep="$2" + PARALLEL="-k --files --block $block_size" \ + parallel --pipe transpose_inner -d "'$sep'" } -sub merge { - for my $c (sort keys %Global::tempfile) { - my $fh = cacheout $Global::tempfile{$c}; - # truncate by length of delimiter to get rid of the last $delimiter - seek $fh,-length($delimiter),SEEK_END; - truncate $fh, tell $fh; - # Make sure the file is closed of writing - close $fh; - open($fh, "<", $Global::tempfile{$c}) || die; - my $buf; - while(sysread($fh,$buf,1000_000)) { - print $buf; - } - print "\n"; - unlink $Global::tempfile{$c}; - } +file_to_paste_files() { + # Run transpose_inner on blocks from $file + # output each block as file name + local block_size + local sep + block_size="$1" + sep="$2" + file="$3" + PARALLEL="-k --files --block $block_size" \ + parallel --pipe-part -a "$file" transpose_inner -d "'$sep'" } -sub warning { - my @w = @_; - print STDERR "transpose: Warning: ", @w; +super_paste() { + # Like 'paste' up to 1000000 files + # The files are read from stdin + local sep + local paste_files + local fifo + sep="$1" + paste_files=`tempfile` + # basename + fifo=`tempfile` + rm $fifo + cat > $paste_files + + # Define replacement string {0#} to 0-pad job number + PARALLEL="--rpl "\''{0#} $f=1+int("".(log(total_jobs())/log(10))); + $_=sprintf("%0${f}d",seq())'\' + + # Make fifos that can be read from + cat $paste_files | parallel -n1000 "rm -f $fifo{0#}; mkfifo $fifo{0#}" + + # Start a paste process for every 1000 files + cat $paste_files | parallel -n1000 -j0 "paste -d '$sep' {} > $fifo{0#}" & + + # Paste all the fifos + eval paste -d "'$sep'" $fifo* + + # Cleanup + cat $paste_files | parallel -n1000 "rm -f {} $fifo{0#}" + rm $paste_files } -sub error { - my @w = @_; - print STDERR "transpose: Error: ", @w; +stdin_detect_sep() { + # Read the first 3 lines and detect the separator + # Save the read input to file + local file + file="$1" + # TODO + echo "$d" } -sub multiply_binary_prefix { - # Evalualte numbers with binary prefix - # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 - # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80 - # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 - # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 - # 13G = 13*1024*1024*1024 = 13958643712 - my $s = shift; - $s =~ s/k/*1000/g; - $s =~ s/M/*1000*1000/g; - $s =~ s/G/*1000*1000*1000/g; - $s =~ s/T/*1000*1000*1000*1000/g; - $s =~ s/P/*1000*1000*1000*1000*1000/g; - $s =~ s/E/*1000*1000*1000*1000*1000*1000/g; - $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; - - $s =~ s/Ki?/*1024/gi; - $s =~ s/Mi?/*1024*1024/gi; - $s =~ s/Gi?/*1024*1024*1024/gi; - $s =~ s/Ti?/*1024*1024*1024*1024/gi; - $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi; - $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; - $s = eval $s; - return $s; +usage() { + echo "Usage: $0 [-d delimiter] [-b blocksize]" 1>&2; exit 1; } + +block_size=10M +while getopts ":b:d:" o; do + case "${o}" in + d) + d="$(printf "${OPTARG}")" + if [ "'" = "${d}" ] ; then + echo "Delimiter cannot be '" + usage + exit + fi + ;; + b) + block_size="${OPTARG}" + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${d}" ] ; then + d="$(printf "\t")" +fi + +# Sep cannot be ' +file="$@" +first_lines=`tempfile` +if [ -z "$file" ]; then + sep="$(stdin_detect_sep $first_lines)" + (cat $first_lines; rm $first_lines; cat) | + stdin_to_paste_files $block_size "$sep" | super_paste "$sep" +else + sep="$(stdin_detect_sep < "$file" $first_lines)" + rm $first_lines + file_to_paste_files $block_size "$sep" "$file" | super_paste "$sep" +fi diff --git a/treeoflife/treeoflife b/treeoflife/treeoflife new file mode 100755 index 0000000..07de6b3 --- /dev/null +++ b/treeoflife/treeoflife @@ -0,0 +1,12 @@ +#!/bin/bash + +# Download tree-of-life.jpg +# from http://www.open.edu/openlearn/nature-environment/natural-history/tree-life + +base=www2.open.ac.uk/openlearn/treeoflife/treeOfLifePoster/TileGroup +parallel -j30 wget -c $base{3}/6-{1}-{2}.jpg ::: {0..28} ::: {4..42} ::: {1..7} +parallel eval convert +append 6-{0..28}-{}.jpg line{}.jpg ::: {4..42} +convert -append line{4..42}.jpg tree-of-life.jpg + +echo Cleanup with +echo rm line*.jpg 6-*-*.jpg diff --git a/w4it-for-port-open/w4it-for-port-open b/w4it-for-port-open/w4it-for-port-open index 90da417..f6c0463 100755 --- a/w4it-for-port-open/w4it-for-port-open +++ b/w4it-for-port-open/w4it-for-port-open @@ -1,5 +1,19 @@ #!/bin/bash +QUIET=false + +while getopts ":q" opt; do + case $opt in + q) + QUIET=true + shift + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + ;; + esac +done + HOST=$1 PORT=$2 @@ -13,7 +27,11 @@ usage () { } print_not_reachable () { - echo -n . + if $QUIET; then + true skip + else + echo -n . + fi } is_port_open () { diff --git a/wifi-reload/wifi-reload b/wifi-reload/wifi-reload index 409bc52..c39b161 100755 --- a/wifi-reload/wifi-reload +++ b/wifi-reload/wifi-reload @@ -70,7 +70,7 @@ if tty -s ; then # timeout 12 forever dmesg | puniq fi -sudo bash -c 'cat >> /etc/resolv.conf' < /etc/resolvconf/resolv.conf.d/head -sudo iwconfig wls1 essid Turris -sudo dhclient wls1 & -sudo wpa_supplicant -Dwext -c/etc/wpa_supplicant.conf -iwls1 -d & +#sudo bash -c 'cat >> /etc/resolv.conf' < /etc/resolvconf/resolv.conf.d/head +#sudo iwconfig wls1 essid Turris +#sudo dhclient wls1 & +#sudo wpa_supplicant -Dwext -c/etc/wpa_supplicant.conf -iwls1 -d &