f6a34e1200
w4it-for-port-open: -q (quiet) implemented.
161 lines
3.6 KiB
Bash
Executable file
161 lines
3.6 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# transpose [-d delimiter] [-b blocksize] table.csv > transposed.csv
|
|
# cat table.csv | transpose [-d delimiter] [-b blocksize] > transposed.csv
|
|
|
|
transpose_inner() {
|
|
# simple in-memory transpose
|
|
# -d sep
|
|
# Input:
|
|
# data to be transposed
|
|
# Output:
|
|
# transposed data
|
|
perl <(cat <<'cut-here-UbsAqi0j6GoOuk5W5yWA'
|
|
use Text::CSV;
|
|
use Getopt::Long;
|
|
|
|
Getopt::Long::Configure("bundling","require_order");
|
|
my $retval = GetOptions("debug|D=s" => \$opt::debug,
|
|
"delimiter|d=s" => \$opt::delimiter,
|
|
"verbose|v" => \@opt::verbose,
|
|
"simple|s" => \$opt::simple,
|
|
);
|
|
|
|
if(defined $opt::delimiter) {
|
|
simple();
|
|
} else {
|
|
die("-d must be set");
|
|
}
|
|
|
|
sub simple {
|
|
my (@table);
|
|
my $col = 0;
|
|
my $csv_setting = { binary => 1, sep_char => $opt::delimiter };
|
|
my $sep = $csv_setting->{sep_char};
|
|
my $csv = Text::CSV->new($csv_setting)
|
|
or die "Cannot use CSV: ".Text::CSV->error_diag ();
|
|
|
|
while(my $l = <>) {
|
|
if(not $csv->parse($l)) {
|
|
die "CSV has unexpected format";
|
|
}
|
|
# append to each row
|
|
my $row = 0;
|
|
for($csv->fields()) {
|
|
$table[$row][$col] = defined($_) ? $_ : '';
|
|
$row++;
|
|
}
|
|
$col++;
|
|
}
|
|
print map { join($sep,@$_),"\n" } @table;
|
|
}
|
|
cut-here-UbsAqi0j6GoOuk5W5yWA
|
|
) "$@"
|
|
}
|
|
export -f transpose_inner
|
|
|
|
stdin_to_paste_files() {
|
|
# Run transpose_inner on blocks from stdin
|
|
# output each block as file name
|
|
local block_size
|
|
local sep
|
|
block_size="$1"
|
|
sep="$2"
|
|
PARALLEL="-k --files --block $block_size" \
|
|
parallel --pipe transpose_inner -d "'$sep'"
|
|
}
|
|
|
|
file_to_paste_files() {
|
|
# Run transpose_inner on blocks from $file
|
|
# output each block as file name
|
|
local block_size
|
|
local sep
|
|
block_size="$1"
|
|
sep="$2"
|
|
file="$3"
|
|
PARALLEL="-k --files --block $block_size" \
|
|
parallel --pipe-part -a "$file" transpose_inner -d "'$sep'"
|
|
}
|
|
|
|
super_paste() {
|
|
# Like 'paste' up to 1000000 files
|
|
# The files are read from stdin
|
|
local sep
|
|
local paste_files
|
|
local fifo
|
|
sep="$1"
|
|
paste_files=`tempfile`
|
|
# basename
|
|
fifo=`tempfile`
|
|
rm $fifo
|
|
cat > $paste_files
|
|
|
|
# Define replacement string {0#} to 0-pad job number
|
|
PARALLEL="--rpl "\''{0#} $f=1+int("".(log(total_jobs())/log(10)));
|
|
$_=sprintf("%0${f}d",seq())'\'
|
|
|
|
# Make fifos that can be read from
|
|
cat $paste_files | parallel -n1000 "rm -f $fifo{0#}; mkfifo $fifo{0#}"
|
|
|
|
# Start a paste process for every 1000 files
|
|
cat $paste_files | parallel -n1000 -j0 "paste -d '$sep' {} > $fifo{0#}" &
|
|
|
|
# Paste all the fifos
|
|
eval paste -d "'$sep'" $fifo*
|
|
|
|
# Cleanup
|
|
cat $paste_files | parallel -n1000 "rm -f {} $fifo{0#}"
|
|
rm $paste_files
|
|
}
|
|
|
|
stdin_detect_sep() {
|
|
# Read the first 3 lines and detect the separator
|
|
# Save the read input to file
|
|
local file
|
|
file="$1"
|
|
# TODO
|
|
echo "$d"
|
|
}
|
|
|
|
usage() {
|
|
echo "Usage: $0 [-d delimiter] [-b blocksize]" 1>&2; exit 1;
|
|
}
|
|
|
|
block_size=10M
|
|
while getopts ":b:d:" o; do
|
|
case "${o}" in
|
|
d)
|
|
d="$(printf "${OPTARG}")"
|
|
if [ "'" = "${d}" ] ; then
|
|
echo "Delimiter cannot be '"
|
|
usage
|
|
exit
|
|
fi
|
|
;;
|
|
b)
|
|
block_size="${OPTARG}"
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
shift $((OPTIND-1))
|
|
|
|
if [ -z "${d}" ] ; then
|
|
d="$(printf "\t")"
|
|
fi
|
|
|
|
# Sep cannot be '
|
|
file="$@"
|
|
first_lines=`tempfile`
|
|
if [ -z "$file" ]; then
|
|
sep="$(stdin_detect_sep $first_lines)"
|
|
(cat $first_lines; rm $first_lines; cat) |
|
|
stdin_to_paste_files $block_size "$sep" | super_paste "$sep"
|
|
else
|
|
sep="$(stdin_detect_sep < "$file" $first_lines)"
|
|
rm $first_lines
|
|
file_to_paste_files $block_size "$sep" "$file" | super_paste "$sep"
|
|
fi
|