transpose: parallelized.
w4it-for-port-open: -q (quiet) implemented.
This commit is contained in:
parent
0a9b2b3503
commit
f6a34e1200
|
@ -1,125 +1,160 @@
|
||||||
#!/usr/bin/perl -w
|
#!/bin/bash
|
||||||
|
|
||||||
use English;
|
# transpose [-d delimiter] [-b blocksize] table.csv > transposed.csv
|
||||||
use FileCache;
|
# cat table.csv | transpose [-d delimiter] [-b blocksize] > transposed.csv
|
||||||
use File::Temp;
|
|
||||||
|
|
||||||
my $delimiter = shift;
|
transpose_inner() {
|
||||||
my $buffer = shift;
|
# simple in-memory transpose
|
||||||
|
# -d sep
|
||||||
|
# Input:
|
||||||
|
# data to be transposed
|
||||||
|
# Output:
|
||||||
|
# transposed data
|
||||||
|
perl <(cat <<'cut-here-UbsAqi0j6GoOuk5W5yWA'
|
||||||
|
use Text::CSV;
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
$delimiter ||= ",";
|
Getopt::Long::Configure("bundling","require_order");
|
||||||
# Use at most 1000M before flushing
|
my $retval = GetOptions("debug|D=s" => \$opt::debug,
|
||||||
$buffer ||= "1000M";
|
"delimiter|d=s" => \$opt::delimiter,
|
||||||
$buffer = multiply_binary_prefix($buffer);
|
"verbose|v" => \@opt::verbose,
|
||||||
# Perl makes the buffer baloon to 10 times the requested value
|
"simple|s" => \$opt::simple,
|
||||||
$buffer /= 10;
|
);
|
||||||
# max_col_size will be lowered after first line read.
|
|
||||||
$max_col_size = $buffer;
|
|
||||||
my $delimiter_regexp = $delimiter;
|
|
||||||
$delimiter_regexp =~ s/(\W)/\\$1/g;
|
|
||||||
my @current;
|
|
||||||
my $col_no_last_line = 0;
|
|
||||||
my $lineno = 0;
|
|
||||||
my %col;
|
|
||||||
while(<>) {
|
|
||||||
chomp;
|
|
||||||
my $col_no = 0;
|
|
||||||
my @to_be_flushed = ();
|
|
||||||
map {
|
|
||||||
push(@{$col{$col_no}},$_);
|
|
||||||
$col_size{$col_no} += length $_;
|
|
||||||
if($col_size{$col_no} > $max_col_size) {
|
|
||||||
push @to_be_flushed, $col_no;
|
|
||||||
$col_size{$col_no} = 0;
|
|
||||||
}
|
|
||||||
$col_no++;
|
|
||||||
} split /$delimiter_regexp/o, $_; # This should do de-csv'ing
|
|
||||||
if(@to_be_flushed) {
|
|
||||||
flush(\%col,@to_be_flushed);
|
|
||||||
}
|
|
||||||
if($col_no != $col_no_last_line) {
|
|
||||||
if(0 == $col_no_last_line) {
|
|
||||||
# This is first time around
|
|
||||||
$col_no_last_line = $col_no;
|
|
||||||
$max_col_size = $buffer/$col_no_last_line;
|
|
||||||
} else {
|
|
||||||
warning("Number of columns in line $NR: $col_no != $col_no_last_line\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
flush(\%col);
|
|
||||||
merge();
|
|
||||||
|
|
||||||
sub flush {
|
if(defined $opt::delimiter) {
|
||||||
my $col_ref = shift;
|
simple();
|
||||||
my @cols_to_flush = @_;
|
} else {
|
||||||
if(not @cols_to_flush) {
|
die("-d must be set");
|
||||||
@cols_to_flush = keys %$col_ref;
|
|
||||||
}
|
|
||||||
for my $c (@cols_to_flush) {
|
|
||||||
$Global::tempfile{$c} ||= tmpnam();
|
|
||||||
my $fh = cacheout $Global::tempfile{$c};
|
|
||||||
# This will print one delimiter too much, which we will deal with later
|
|
||||||
print $fh map { $_,$delimiter } @{$col_ref->{$c}};
|
|
||||||
delete $col_ref->{$c};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub merge {
|
sub simple {
|
||||||
for my $c (sort keys %Global::tempfile) {
|
my (@table);
|
||||||
my $fh = cacheout $Global::tempfile{$c};
|
my $col = 0;
|
||||||
# truncate by length of delimiter to get rid of the last $delimiter
|
my $csv_setting = { binary => 1, sep_char => $opt::delimiter };
|
||||||
seek $fh,-length($delimiter),SEEK_END;
|
my $sep = $csv_setting->{sep_char};
|
||||||
truncate $fh, tell $fh;
|
my $csv = Text::CSV->new($csv_setting)
|
||||||
# Make sure the file is closed of writing
|
or die "Cannot use CSV: ".Text::CSV->error_diag ();
|
||||||
close $fh;
|
|
||||||
open($fh, "<", $Global::tempfile{$c}) || die;
|
while(my $l = <>) {
|
||||||
my $buf;
|
if(not $csv->parse($l)) {
|
||||||
while(sysread($fh,$buf,1000_000)) {
|
die "CSV has unexpected format";
|
||||||
print $buf;
|
|
||||||
}
|
}
|
||||||
print "\n";
|
# append to each row
|
||||||
unlink $Global::tempfile{$c};
|
my $row = 0;
|
||||||
|
for($csv->fields()) {
|
||||||
|
$table[$row][$col] = defined($_) ? $_ : '';
|
||||||
|
$row++;
|
||||||
}
|
}
|
||||||
|
$col++;
|
||||||
|
}
|
||||||
|
print map { join($sep,@$_),"\n" } @table;
|
||||||
|
}
|
||||||
|
cut-here-UbsAqi0j6GoOuk5W5yWA
|
||||||
|
) "$@"
|
||||||
|
}
|
||||||
|
export -f transpose_inner
|
||||||
|
|
||||||
|
stdin_to_paste_files() {
|
||||||
|
# Run transpose_inner on blocks from stdin
|
||||||
|
# output each block as file name
|
||||||
|
local block_size
|
||||||
|
local sep
|
||||||
|
block_size="$1"
|
||||||
|
sep="$2"
|
||||||
|
PARALLEL="-k --files --block $block_size" \
|
||||||
|
parallel --pipe transpose_inner -d "'$sep'"
|
||||||
}
|
}
|
||||||
|
|
||||||
sub warning {
|
file_to_paste_files() {
|
||||||
my @w = @_;
|
# Run transpose_inner on blocks from $file
|
||||||
print STDERR "transpose: Warning: ", @w;
|
# output each block as file name
|
||||||
|
local block_size
|
||||||
|
local sep
|
||||||
|
block_size="$1"
|
||||||
|
sep="$2"
|
||||||
|
file="$3"
|
||||||
|
PARALLEL="-k --files --block $block_size" \
|
||||||
|
parallel --pipe-part -a "$file" transpose_inner -d "'$sep'"
|
||||||
}
|
}
|
||||||
|
|
||||||
sub error {
|
super_paste() {
|
||||||
my @w = @_;
|
# Like 'paste' up to 1000000 files
|
||||||
print STDERR "transpose: Error: ", @w;
|
# The files are read from stdin
|
||||||
|
local sep
|
||||||
|
local paste_files
|
||||||
|
local fifo
|
||||||
|
sep="$1"
|
||||||
|
paste_files=`tempfile`
|
||||||
|
# basename
|
||||||
|
fifo=`tempfile`
|
||||||
|
rm $fifo
|
||||||
|
cat > $paste_files
|
||||||
|
|
||||||
|
# Define replacement string {0#} to 0-pad job number
|
||||||
|
PARALLEL="--rpl "\''{0#} $f=1+int("".(log(total_jobs())/log(10)));
|
||||||
|
$_=sprintf("%0${f}d",seq())'\'
|
||||||
|
|
||||||
|
# Make fifos that can be read from
|
||||||
|
cat $paste_files | parallel -n1000 "rm -f $fifo{0#}; mkfifo $fifo{0#}"
|
||||||
|
|
||||||
|
# Start a paste process for every 1000 files
|
||||||
|
cat $paste_files | parallel -n1000 -j0 "paste -d '$sep' {} > $fifo{0#}" &
|
||||||
|
|
||||||
|
# Paste all the fifos
|
||||||
|
eval paste -d "'$sep'" $fifo*
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
cat $paste_files | parallel -n1000 "rm -f {} $fifo{0#}"
|
||||||
|
rm $paste_files
|
||||||
}
|
}
|
||||||
|
|
||||||
sub multiply_binary_prefix {
|
stdin_detect_sep() {
|
||||||
# Evalualte numbers with binary prefix
|
# Read the first 3 lines and detect the separator
|
||||||
# k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
|
# Save the read input to file
|
||||||
# K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80
|
local file
|
||||||
# Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
|
file="$1"
|
||||||
# ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
|
# TODO
|
||||||
# 13G = 13*1024*1024*1024 = 13958643712
|
echo "$d"
|
||||||
my $s = shift;
|
|
||||||
$s =~ s/k/*1000/g;
|
|
||||||
$s =~ s/M/*1000*1000/g;
|
|
||||||
$s =~ s/G/*1000*1000*1000/g;
|
|
||||||
$s =~ s/T/*1000*1000*1000*1000/g;
|
|
||||||
$s =~ s/P/*1000*1000*1000*1000*1000/g;
|
|
||||||
$s =~ s/E/*1000*1000*1000*1000*1000*1000/g;
|
|
||||||
$s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g;
|
|
||||||
$s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
|
|
||||||
$s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
|
|
||||||
|
|
||||||
$s =~ s/Ki?/*1024/gi;
|
|
||||||
$s =~ s/Mi?/*1024*1024/gi;
|
|
||||||
$s =~ s/Gi?/*1024*1024*1024/gi;
|
|
||||||
$s =~ s/Ti?/*1024*1024*1024*1024/gi;
|
|
||||||
$s =~ s/Pi?/*1024*1024*1024*1024*1024/gi;
|
|
||||||
$s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi;
|
|
||||||
$s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi;
|
|
||||||
$s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
|
|
||||||
$s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
|
|
||||||
$s = eval $s;
|
|
||||||
return $s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 [-d delimiter] [-b blocksize]" 1>&2; exit 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
block_size=10M
|
||||||
|
while getopts ":b:d:" o; do
|
||||||
|
case "${o}" in
|
||||||
|
d)
|
||||||
|
d="$(printf "${OPTARG}")"
|
||||||
|
if [ "'" = "${d}" ] ; then
|
||||||
|
echo "Delimiter cannot be '"
|
||||||
|
usage
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
b)
|
||||||
|
block_size="${OPTARG}"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
shift $((OPTIND-1))
|
||||||
|
|
||||||
|
if [ -z "${d}" ] ; then
|
||||||
|
d="$(printf "\t")"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Sep cannot be '
|
||||||
|
file="$@"
|
||||||
|
first_lines=`tempfile`
|
||||||
|
if [ -z "$file" ]; then
|
||||||
|
sep="$(stdin_detect_sep $first_lines)"
|
||||||
|
(cat $first_lines; rm $first_lines; cat) |
|
||||||
|
stdin_to_paste_files $block_size "$sep" | super_paste "$sep"
|
||||||
|
else
|
||||||
|
sep="$(stdin_detect_sep < "$file" $first_lines)"
|
||||||
|
rm $first_lines
|
||||||
|
file_to_paste_files $block_size "$sep" "$file" | super_paste "$sep"
|
||||||
|
fi
|
||||||
|
|
12
treeoflife/treeoflife
Executable file
12
treeoflife/treeoflife
Executable file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Download tree-of-life.jpg
|
||||||
|
# from http://www.open.edu/openlearn/nature-environment/natural-history/tree-life
|
||||||
|
|
||||||
|
base=www2.open.ac.uk/openlearn/treeoflife/treeOfLifePoster/TileGroup
|
||||||
|
parallel -j30 wget -c $base{3}/6-{1}-{2}.jpg ::: {0..28} ::: {4..42} ::: {1..7}
|
||||||
|
parallel eval convert +append 6-{0..28}-{}.jpg line{}.jpg ::: {4..42}
|
||||||
|
convert -append line{4..42}.jpg tree-of-life.jpg
|
||||||
|
|
||||||
|
echo Cleanup with
|
||||||
|
echo rm line*.jpg 6-*-*.jpg
|
|
@ -1,5 +1,19 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
QUIET=false
|
||||||
|
|
||||||
|
while getopts ":q" opt; do
|
||||||
|
case $opt in
|
||||||
|
q)
|
||||||
|
QUIET=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
\?)
|
||||||
|
echo "Invalid option: -$OPTARG" >&2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
HOST=$1
|
HOST=$1
|
||||||
PORT=$2
|
PORT=$2
|
||||||
|
|
||||||
|
@ -13,7 +27,11 @@ usage () {
|
||||||
}
|
}
|
||||||
|
|
||||||
print_not_reachable () {
|
print_not_reachable () {
|
||||||
|
if $QUIET; then
|
||||||
|
true skip
|
||||||
|
else
|
||||||
echo -n .
|
echo -n .
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
is_port_open () {
|
is_port_open () {
|
||||||
|
|
|
@ -70,7 +70,7 @@ if tty -s ; then
|
||||||
# timeout 12 forever dmesg | puniq
|
# timeout 12 forever dmesg | puniq
|
||||||
fi
|
fi
|
||||||
|
|
||||||
sudo bash -c 'cat >> /etc/resolv.conf' < /etc/resolvconf/resolv.conf.d/head
|
#sudo bash -c 'cat >> /etc/resolv.conf' < /etc/resolvconf/resolv.conf.d/head
|
||||||
sudo iwconfig wls1 essid Turris
|
#sudo iwconfig wls1 essid Turris
|
||||||
sudo dhclient wls1 &
|
#sudo dhclient wls1 &
|
||||||
sudo wpa_supplicant -Dwext -c/etc/wpa_supplicant.conf -iwls1 -d &
|
#sudo wpa_supplicant -Dwext -c/etc/wpa_supplicant.conf -iwls1 -d &
|
||||||
|
|
Loading…
Reference in a new issue