transpose: transpose csv
This commit is contained in:
parent
8123e71cf3
commit
ff313b457f
88
transpose/transpose
Executable file
88
transpose/transpose
Executable file
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use English;
|
||||
use FileCache;
|
||||
use File::Temp;
|
||||
|
||||
my $delimiter = shift;
|
||||
my $buffer = shift;
|
||||
|
||||
# Use at most 1000M before flushing
|
||||
$buffer ||= 1000_000_000;
|
||||
# Perl makes the buffer baloon to 10 times the requested value
|
||||
$buffer /= 10;
|
||||
$max_col_size = $buffer;
|
||||
my $delimiter_regexp = $delimiter;
|
||||
$delimiter_regexp =~ s/(\W)/\\$1/g;
|
||||
my @current;
|
||||
my $last_t = 0;
|
||||
my $lineno = 0;
|
||||
my %col;
|
||||
while(<>) {
|
||||
chomp;
|
||||
# Split current line into columns
|
||||
@current = split /$delimiter_regexp/o, $_;
|
||||
my $t = 0;
|
||||
map {
|
||||
push(@{$col{$t}},$_);
|
||||
$col_size{$t} += length $_;
|
||||
if($col_size{$t} > $max_col_size) {
|
||||
flush(\%col,$t);
|
||||
$col_size{$t} = 0;
|
||||
}
|
||||
$t++;
|
||||
} @current;
|
||||
if($t != $last_t) {
|
||||
if(0 == $last_t) {
|
||||
$last_t = $t;
|
||||
$max_col_size = $buffer/$last_t;
|
||||
} else {
|
||||
warning("Number of columns in line $NR: $t != $last_t\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
flush(\%col);
|
||||
merge();
|
||||
|
||||
sub flush {
|
||||
my $col_ref = shift;
|
||||
my @cols_to_flush = @_;
|
||||
if(not @cols_to_flush) {
|
||||
@cols_to_flush = keys %$col_ref;
|
||||
}
|
||||
for my $c (@cols_to_flush) {
|
||||
$Global::tempfile{$c} ||= tmpnam();
|
||||
my $fh = cacheout $Global::tempfile{$c};
|
||||
# This will print one delimiter too much, which we will deal with later
|
||||
print $fh map { $_,$delimiter } @{$col_ref->{$c}};
|
||||
delete $col_ref->{$c};
|
||||
}
|
||||
}
|
||||
|
||||
sub merge {
|
||||
for my $c (sort keys %Global::tempfile) {
|
||||
my $fh = cacheout $Global::tempfile{$c};
|
||||
# truncate by length of delimiter to get rid of the last $delimiter
|
||||
seek $fh,-length($delimiter),SEEK_END;
|
||||
truncate $fh, tell $fh;
|
||||
# Make sure the file is closed of writing
|
||||
close $fh;
|
||||
open($fh, "<", $Global::tempfile{$c}) || die;
|
||||
my $buf;
|
||||
while(sysread($fh,$buf,1000_000)) {
|
||||
print $buf;
|
||||
}
|
||||
print "\n";
|
||||
unlink $Global::tempfile{$c};
|
||||
}
|
||||
}
|
||||
|
||||
sub warning {
|
||||
my @w = @_;
|
||||
print STDERR "transpose: Warning: ", @w;
|
||||
}
|
||||
|
||||
sub error {
|
||||
my @w = @_;
|
||||
print STDERR "transpose: Error: ", @w;
|
||||
}
|
Loading…
Reference in a new issue