Merge branch 'master' of gitlab.com:ole.tange/tangetools
This commit is contained in:
commit
ee13554589
6
Makefile
6
Makefile
|
@ -1,12 +1,12 @@
|
|||
CMD = blink histogram upsidedown tracefile timestamp rand rrm goodpasswd gitnext
|
||||
CMD = blink bsearch histogram upsidedown tracefile timestamp rand rrm goodpasswd gitnext
|
||||
|
||||
all: blink/blink.1 goodpasswd/goodpasswd.1 histogram/histogram.1 rand/rand.1 rrm/rrm.1 timestamp/timestamp.1 tracefile/tracefile.1 upsidedown/upsidedown.1 wssh/wssh.1
|
||||
all: blink/blink.1 bsearch/bsearch.1 goodpasswd/goodpasswd.1 histogram/histogram.1 rand/rand.1 rrm/rrm.1 timestamp/timestamp.1 tracefile/tracefile.1 upsidedown/upsidedown.1 wssh/wssh.1
|
||||
|
||||
%.1: %
|
||||
pod2man $< > $@
|
||||
|
||||
install:
|
||||
mkdir -p /usr/local/bin
|
||||
parallel eval ln -sf `pwd`/*/{} /usr/local/bin/{} ::: blink reniced em field forever neno rn stdout tracefile w4it-for-port-open upsidedown histogram goodpasswd mtrr not summer timestamp transpose wssh aptsearch rand rrm gitnext
|
||||
parallel eval ln -sf `pwd`/*/{} /usr/local/bin/{} ::: blink bsearch reniced em field forever neno rn stdout tracefile w4it-for-port-open upsidedown histogram goodpasswd mtrr not summer timestamp transpose wssh aptsearch rand rrm gitnext
|
||||
mkdir -p /usr/local/share/man/man1
|
||||
parallel ln -sf `pwd`/{} /usr/local/share/man/man1/{/} ::: */*.1
|
||||
|
|
2
README
2
README
|
@ -2,6 +2,8 @@ Tools developed by Ole Tange <ole@tange.dk>.
|
|||
|
||||
Probably not useful for you, but then again you never now.
|
||||
|
||||
bsearch - binary search through sorted text files.
|
||||
|
||||
em - Force emacs to run in terminal. Use xemacs if installed.
|
||||
|
||||
field - Split on space. Give the given field number. Supports syntax 1-3,6-
|
||||
|
|
400
bsearch/bsearch
Executable file
400
bsearch/bsearch
Executable file
|
@ -0,0 +1,400 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
=head1 NAME
|
||||
|
||||
bsearch - binary search through sorted text files
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<bsearch> [-nrfB] file string [string...]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
B<bsearch> searches a sorted file for a string. It outputs the
|
||||
following line or the byte position of this line, which is where the
|
||||
string would have been if it had been in the sorted file.
|
||||
|
||||
=over 9
|
||||
|
||||
=item B<--ignore-leading-blanks> (not implemented)
|
||||
|
||||
=item B<-b>
|
||||
|
||||
ignore leading blanks
|
||||
|
||||
=item B<--byte-offset>
|
||||
|
||||
=item B<-B>
|
||||
|
||||
print byte position where string would have been
|
||||
|
||||
=item B<--dictionary-order> (not implemented)
|
||||
|
||||
=item B<-d>
|
||||
|
||||
consider only blanks and alphanumeric characters
|
||||
|
||||
=item B<--debug> (not implemented)
|
||||
|
||||
=item B<-D>
|
||||
|
||||
annotate the part of the line used to sort, and warn about
|
||||
questionable usage to stderr
|
||||
|
||||
=item B<--ignore-case>
|
||||
|
||||
=item B<-f>
|
||||
|
||||
fold lower case to upper case characters
|
||||
|
||||
=item B<--general-numeric-sort> (not implemented)
|
||||
|
||||
=item B<-g>
|
||||
|
||||
compare according to general numerical value
|
||||
|
||||
=item B<--ignore-nonprinting> (not implemented)
|
||||
|
||||
=item B<-i>
|
||||
|
||||
consider only printable characters
|
||||
|
||||
=item B<--month-sort> (not implemented)
|
||||
|
||||
=item B<-M>
|
||||
|
||||
compare (unknown) < 'JAN' < ... < 'DEC'
|
||||
|
||||
=item B<--human-numeric-sort> (not implemented)
|
||||
|
||||
=item B<-h>
|
||||
|
||||
compare human readable numbers (e.g., 2K 1G)
|
||||
|
||||
=item B<--key=KEYDEF> (not implemented)
|
||||
|
||||
=item B<-k>
|
||||
|
||||
sort via a key; KEYDEF gives location and type
|
||||
|
||||
=item B<--numeric-sort>
|
||||
|
||||
=item B<-n>
|
||||
|
||||
compare according to string numerical value
|
||||
|
||||
=item B<--random-sort> (not implemented)
|
||||
|
||||
=item B<-R>
|
||||
|
||||
sort by random hash of keys
|
||||
|
||||
=item B<--reverse>
|
||||
|
||||
=item B<-r>
|
||||
|
||||
reverse the result of comparisons
|
||||
|
||||
=item B<--sort=WORD> (not implemented)
|
||||
|
||||
sort according to WORD: general-numeric B<-g>, human-numeric B<-h>, month
|
||||
B<-M>, numeric B<-n>, random B<-R>, version B<-V>
|
||||
|
||||
=item B<-t> (not implemented)
|
||||
|
||||
=item B<--field-separator=SEP>
|
||||
|
||||
use SEP instead of non-blank to blank transition
|
||||
|
||||
=item B<-z> (not implemented)
|
||||
|
||||
=item B<--zero-terminated>
|
||||
|
||||
end lines with 0 byte, not newline
|
||||
|
||||
=back
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
=head2 Missing
|
||||
|
||||
Missing
|
||||
|
||||
|
||||
=head1 REPORTING BUGS
|
||||
|
||||
B<bsearch> is part of tangetools. Report bugs to <tools@tange.dk>.
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2016 Ole Tange http://ole.tange.dk
|
||||
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
at your option any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
=head2 Documentation license I
|
||||
|
||||
Permission is granted to copy, distribute and/or modify this documentation
|
||||
under the terms of the GNU Free Documentation License, Version 1.3 or
|
||||
any later version published by the Free Software Foundation; with no
|
||||
Invariant Sections, with no Front-Cover Texts, and with no Back-Cover
|
||||
Texts. A copy of the license is included in the file fdl.txt.
|
||||
|
||||
=head2 Documentation license II
|
||||
|
||||
You are free:
|
||||
|
||||
=over 9
|
||||
|
||||
=item B<to Share>
|
||||
|
||||
to copy, distribute and transmit the work
|
||||
|
||||
=item B<to Remix>
|
||||
|
||||
to adapt the work
|
||||
|
||||
=back
|
||||
|
||||
Under the following conditions:
|
||||
|
||||
=over 9
|
||||
|
||||
=item B<Attribution>
|
||||
|
||||
You must attribute the work in the manner specified by the author or
|
||||
licensor (but not in any way that suggests that they endorse you or
|
||||
your use of the work).
|
||||
|
||||
=item B<Share Alike>
|
||||
|
||||
If you alter, transform, or build upon this work, you may distribute
|
||||
the resulting work only under the same, similar or a compatible
|
||||
license.
|
||||
|
||||
=back
|
||||
|
||||
With the understanding that:
|
||||
|
||||
=over 9
|
||||
|
||||
=item B<Waiver>
|
||||
|
||||
Any of the above conditions can be waived if you get permission from
|
||||
the copyright holder.
|
||||
|
||||
=item B<Public Domain>
|
||||
|
||||
Where the work or any of its elements is in the public domain under
|
||||
applicable law, that status is in no way affected by the license.
|
||||
|
||||
=item B<Other Rights>
|
||||
|
||||
In no way are any of the following rights affected by the license:
|
||||
|
||||
=over 9
|
||||
|
||||
=item *
|
||||
|
||||
Your fair dealing or fair use rights, or other applicable
|
||||
copyright exceptions and limitations;
|
||||
|
||||
=item *
|
||||
|
||||
The author's moral rights;
|
||||
|
||||
=item *
|
||||
|
||||
Rights other persons may have either in the work itself or in
|
||||
how the work is used, such as publicity or privacy rights.
|
||||
|
||||
=back
|
||||
|
||||
=item B<Notice>
|
||||
|
||||
For any reuse or distribution, you must make clear to others the
|
||||
license terms of this work.
|
||||
|
||||
=back
|
||||
|
||||
A copy of the full license is included in the file as cc-by-sa.txt.
|
||||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
B<bsearch> uses Perl.
|
||||
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
B<grep>(1), B<sort>(1).
|
||||
|
||||
=cut
|
||||
|
||||
use Getopt::Long;
|
||||
|
||||
Getopt::Long::Configure("bundling","require_order");
|
||||
|
||||
GetOptions(
|
||||
"debug|D=s" => \$opt::D,
|
||||
"version" => \$opt::version,
|
||||
"verbose|v" => \$opt::verbose,
|
||||
"B|byte-offset" => \$opt::byte_offset,
|
||||
"b|ignore-leading-blanks" => \$opt::ignore_leading_blanks,
|
||||
"d|dictionary-order" => \$opt::dictionary_order,
|
||||
"f|ignore-case" => \$opt::ignore_case,
|
||||
"g|general-numeric-sort" => \$opt::general_numeric_sort,
|
||||
"i|ignore-nonprinting" => \$opt::ignore_nonprinting,
|
||||
"M|month-sort" => \$opt::month_sort,
|
||||
"h|human-numeric-sort" => \$opt::human_numeric_sort,
|
||||
"n|numeric-sort" => \$opt::numeric_sort,
|
||||
"r|reverse" => \$opt::reverse,
|
||||
"sort=s" => \$opt::sort,
|
||||
"V|version-sort" => \$opt::version_sort,
|
||||
"k|key=s" => \@opt::key,
|
||||
"t|field-separator=s" => \$opt::field_separator,
|
||||
"z|zero-terminated" => \$opt::zero_terminated,
|
||||
);
|
||||
$Global::progname = "bsearch";
|
||||
$Global::version = 20160712;
|
||||
if($opt::version) {
|
||||
version();
|
||||
exit 0;
|
||||
}
|
||||
if($opt::zero_terminated) { $/ = "\0"; }
|
||||
|
||||
my $file = shift;
|
||||
|
||||
for my $key (@ARGV) {
|
||||
print bsearch($file,$key);
|
||||
}
|
||||
|
||||
sub bsearch {
|
||||
my $file = shift;
|
||||
my $key = shift;
|
||||
my $min = 0;
|
||||
my $max = -s $file;
|
||||
|
||||
if(not open ($fh, "<", $file)) {
|
||||
error("Cannot open '$file'");
|
||||
exit 1;
|
||||
}
|
||||
my $line;
|
||||
while($max - $min > 1) {
|
||||
$middle = int(($max + $min)/2);
|
||||
seek($fh,$middle,0) or die;
|
||||
my $half = <$fh>;
|
||||
if(eof($fh)
|
||||
or
|
||||
compare(($line = <$fh>),$key) >= 0) {
|
||||
$max = $middle;
|
||||
} else {
|
||||
$min = $middle;
|
||||
}
|
||||
}
|
||||
seek($fh,$max,0) or die;
|
||||
$line = <$fh>;
|
||||
if(compare($line,$key) >= 0) {
|
||||
if($opt::byte_offset) {
|
||||
return "0\n";
|
||||
} else {
|
||||
# The very first line
|
||||
return "";
|
||||
}
|
||||
} else {
|
||||
if($opt::byte_offset) {
|
||||
return tell($fh)."\n";
|
||||
} else {
|
||||
return $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub compare {
|
||||
my ($a,$b) = @_;
|
||||
if($opt::reverse) {
|
||||
($a,$b) = ($b,$a);
|
||||
}
|
||||
if($opt::ignore_case) {
|
||||
$a = uc($a);
|
||||
$b = uc($b);
|
||||
}
|
||||
if($opt::numeric_sort) {
|
||||
return $a <=> $b;
|
||||
} elsif($opt::numascii) {
|
||||
return $a <=> $b or $a cmp $b;
|
||||
} else {
|
||||
return $a cmp $b;
|
||||
}
|
||||
}
|
||||
|
||||
sub status {
|
||||
my @w = @_;
|
||||
my $fh = $Global::status_fd || *STDERR;
|
||||
print $fh map { ($_, "\n") } @w;
|
||||
flush $fh;
|
||||
}
|
||||
|
||||
sub status_no_nl {
|
||||
my @w = @_;
|
||||
my $fh = $Global::status_fd || *STDERR;
|
||||
print $fh @w;
|
||||
flush $fh;
|
||||
}
|
||||
|
||||
sub warning {
|
||||
my @w = @_;
|
||||
my $prog = $Global::progname || "parallel";
|
||||
status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w);
|
||||
}
|
||||
|
||||
sub error {
|
||||
my @w = @_;
|
||||
my $prog = $Global::progname || "parallel";
|
||||
status(map { ($prog.": Error: ". $_); } @w);
|
||||
}
|
||||
|
||||
sub die_bug {
|
||||
my $bugid = shift;
|
||||
print STDERR
|
||||
("$Global::progname: This should not happen. You have found a bug.\n",
|
||||
"Please contact <parallel\@gnu.org> and include:\n",
|
||||
"* The version number: $Global::version\n",
|
||||
"* The bugid: $bugid\n",
|
||||
"* The command line being run\n",
|
||||
"* The files being read (put the files on a webserver if they are big)\n",
|
||||
"\n",
|
||||
"If you get the error on smaller/fewer files, please include those instead.\n");
|
||||
::wait_and_exit(255);
|
||||
}
|
||||
|
||||
sub version {
|
||||
# Returns: N/A
|
||||
print join("\n",
|
||||
"GNU $Global::progname $Global::version",
|
||||
"Copyright (C) 2016",
|
||||
"Ole Tange and Free Software Foundation, Inc.",
|
||||
"License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
|
||||
"This is free software: you are free to change and redistribute it.",
|
||||
"GNU $Global::progname comes with no warranty.",
|
||||
"",
|
||||
"Web site: http://www.gnu.org/software/${Global::progname}\n",
|
||||
"When using programs that use GNU Parallel to process data for publication",
|
||||
"please cite as described in 'parallel --citation'.\n",
|
||||
);
|
||||
}
|
44
bsearch/regressiontest
Executable file
44
bsearch/regressiontest
Executable file
|
@ -0,0 +1,44 @@
|
|||
#!/bin/bash
|
||||
|
||||
test_tmp=`tempfile`
|
||||
export test_tmp
|
||||
|
||||
test_n() {
|
||||
tmp=${test_tmp}_n
|
||||
true > $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo > $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo 1.000000000 > $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo 1.000000000 > $tmp
|
||||
echo 2 >> $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo 1 > $tmp
|
||||
echo 2.000000000 >> $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo 1.000000000 > $tmp
|
||||
echo 2 >> $tmp
|
||||
echo 3 >> $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo 1 > $tmp
|
||||
echo 2.000000000 >> $tmp
|
||||
echo 3 >> $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
echo 1 > $tmp
|
||||
echo 2 >> $tmp
|
||||
echo 3.000000000 >> $tmp
|
||||
xargs < $tmp
|
||||
bsearch -n $tmp 0 2 2.1 100000
|
||||
rm $tmp
|
||||
}
|
||||
|
||||
|
||||
export -f $(compgen -A function | grep test_)
|
||||
compgen -A function | grep test_ | sort | parallel -j6 --tag -k '{} 2>&1'
|
|
@ -7,37 +7,43 @@ use File::Temp;
|
|||
my $delimiter = shift;
|
||||
my $buffer = shift;
|
||||
|
||||
$delimiter ||= ",";
|
||||
# Use at most 1000M before flushing
|
||||
$buffer ||= 1000_000_000;
|
||||
$buffer ||= "1000M";
|
||||
$buffer = multiply_binary_prefix($buffer);
|
||||
# Perl makes the buffer baloon to 10 times the requested value
|
||||
$buffer /= 10;
|
||||
# max_col_size will be lowered after first line read.
|
||||
$max_col_size = $buffer;
|
||||
my $delimiter_regexp = $delimiter;
|
||||
$delimiter_regexp =~ s/(\W)/\\$1/g;
|
||||
my @current;
|
||||
my $last_t = 0;
|
||||
my $col_no_last_line = 0;
|
||||
my $lineno = 0;
|
||||
my %col;
|
||||
while(<>) {
|
||||
chomp;
|
||||
# Split current line into columns
|
||||
@current = split /$delimiter_regexp/o, $_;
|
||||
my $t = 0;
|
||||
my $col_no = 0;
|
||||
my @to_be_flushed = ();
|
||||
map {
|
||||
push(@{$col{$t}},$_);
|
||||
$col_size{$t} += length $_;
|
||||
if($col_size{$t} > $max_col_size) {
|
||||
flush(\%col,$t);
|
||||
$col_size{$t} = 0;
|
||||
push(@{$col{$col_no}},$_);
|
||||
$col_size{$col_no} += length $_;
|
||||
if($col_size{$col_no} > $max_col_size) {
|
||||
push @to_be_flushed, $col_no;
|
||||
$col_size{$col_no} = 0;
|
||||
}
|
||||
$t++;
|
||||
} @current;
|
||||
if($t != $last_t) {
|
||||
if(0 == $last_t) {
|
||||
$last_t = $t;
|
||||
$max_col_size = $buffer/$last_t;
|
||||
$col_no++;
|
||||
} split /$delimiter_regexp/o, $_; # This should do de-csv'ing
|
||||
if(@to_be_flushed) {
|
||||
flush(\%col,@to_be_flushed);
|
||||
}
|
||||
if($col_no != $col_no_last_line) {
|
||||
if(0 == $col_no_last_line) {
|
||||
# This is first time around
|
||||
$col_no_last_line = $col_no;
|
||||
$max_col_size = $buffer/$col_no_last_line;
|
||||
} else {
|
||||
warning("Number of columns in line $NR: $t != $last_t\n");
|
||||
warning("Number of columns in line $NR: $col_no != $col_no_last_line\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -86,3 +92,34 @@ sub error {
|
|||
my @w = @_;
|
||||
print STDERR "transpose: Error: ", @w;
|
||||
}
|
||||
|
||||
sub multiply_binary_prefix {
|
||||
# Evalualte numbers with binary prefix
|
||||
# k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
|
||||
# K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80
|
||||
# Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
|
||||
# ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
|
||||
# 13G = 13*1024*1024*1024 = 13958643712
|
||||
my $s = shift;
|
||||
$s =~ s/k/*1000/g;
|
||||
$s =~ s/M/*1000*1000/g;
|
||||
$s =~ s/G/*1000*1000*1000/g;
|
||||
$s =~ s/T/*1000*1000*1000*1000/g;
|
||||
$s =~ s/P/*1000*1000*1000*1000*1000/g;
|
||||
$s =~ s/E/*1000*1000*1000*1000*1000*1000/g;
|
||||
$s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g;
|
||||
$s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
|
||||
$s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
|
||||
|
||||
$s =~ s/Ki?/*1024/gi;
|
||||
$s =~ s/Mi?/*1024*1024/gi;
|
||||
$s =~ s/Gi?/*1024*1024*1024/gi;
|
||||
$s =~ s/Ti?/*1024*1024*1024*1024/gi;
|
||||
$s =~ s/Pi?/*1024*1024*1024*1024*1024/gi;
|
||||
$s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi;
|
||||
$s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi;
|
||||
$s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
|
||||
$s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
|
||||
$s = eval $s;
|
||||
return $s;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
#!/usr/local/bin/parallel --shebang-wrap --pipe --block 10m -k --files /usr/bin/perl | xargs paste
|
||||
|
||||
use Text::CSV;
|
||||
use File::Temp qw(tempfile tempdir);
|
||||
|
||||
|
@ -32,6 +30,65 @@ while(my $l = <>) {
|
|||
print map { join("\t",@$_),"\n" } @table;
|
||||
|
||||
sub guess_csv_setting {
|
||||
# Based on a single line guess the csv_setting
|
||||
return { binary => 1 };
|
||||
# Based on two lines guess the csv_setting
|
||||
my $line = shift;
|
||||
# Potential field separators
|
||||
# Priority:
|
||||
# \0 if both lines have the same number
|
||||
# \t if both lines have the same number
|
||||
my @fieldsep = (",", "\t", "\0", ":", ";", "|", "/");
|
||||
my %count;
|
||||
@count{@fieldsep} = (0,0,0,0,0,0);
|
||||
# Count characters
|
||||
map { $count{$_}++ } split //,$line;
|
||||
my @sepsort = sort { $count{$b} <=> $count{$a} } @fieldsep;
|
||||
my $guessed_sep;
|
||||
if($count{"\0"} > 0) {
|
||||
# \0 is in the line => this is definitely the field sep
|
||||
$guessed_sep = "\0";
|
||||
} elsif($count{"\t"} > 0) {
|
||||
# \t is in the line => this is definitely the field sep
|
||||
$guessed_sep = "\t";
|
||||
} else {
|
||||
$guessed_sep = $sepsort[0];
|
||||
}
|
||||
return { binary => 1, sep_char => $guessed_sep };
|
||||
}
|
||||
|
||||
sub _guess_csv_setting {
|
||||
# Try different csv_settings
|
||||
# Return a $csv object with the best setting
|
||||
my @csv_file_types =
|
||||
( { binary => 1, sep_char => "\0" },
|
||||
{ binary => 1, sep_char => "\t" },
|
||||
{ binary => 1, sep_char => "," },
|
||||
{ binary => 1 },
|
||||
);
|
||||
|
||||
my $succesful_csv_type;
|
||||
my $csv;
|
||||
for my $csv_file_type (@csv_file_types) {
|
||||
$csv = Text::CSV->new ( $csv_file_type )
|
||||
or die "Cannot use CSV: ($csv_file_type) ".Text::CSV->error_diag ();
|
||||
$succesful_csv_type = $csv_file_type;
|
||||
my $last_n_fields;
|
||||
for my $line (@lines) {
|
||||
if($csv->parse($line)) {
|
||||
my $n_fields = ($csv->fields());
|
||||
$last_fields ||= $n_fields;
|
||||
|
||||
} else{
|
||||
$succesful_csv_type = 0;
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if(not $succesful_csv_type) {
|
||||
$csv->error_diag();
|
||||
}
|
||||
|
||||
$csv = Text::CSV->new ( $succesful_csv_type ) # should set binary attribute.
|
||||
or die "Cannot use CSV: ".Text::CSV->error_diag ();
|
||||
return($csv);
|
||||
}
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
Can it be done more simple?
|
||||
|
||||
zcat D.gz | perl -ne 's/\s+/\n/g; open(OUT,">","out".(++$out)); print OUT' ; paste out* | pigz >Dt.gz
|
||||
|
||||
Chop CSV into fields
|
||||
|
||||
multi file paste
|
||||
|
||||
paste out1 out2 | paste - out3
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
|
||||
use File::Temp qw(tempfile tempdir);
|
||||
|
||||
#$Global::debug = 1;
|
||||
|
@ -8,7 +7,7 @@ my $block = "30m";
|
|||
debug("parallel --pipe --block $block -k --files -j150% transpose-par.pl\n");
|
||||
my @files = `parallel --pipe --block $block -k --files -j150% transpose-par.pl`;
|
||||
chomp(@files);
|
||||
my $tmp = File::Temp::tempdir(CLEANUP => 0);
|
||||
my $tmp = File::Temp::tempdir(CLEANUP => 1);
|
||||
my $fifo = "$tmp/0000000";
|
||||
my $cmd = "mkfifo $fifo; paste > $fifo ";
|
||||
my (@fifos, @args);
|
||||
|
|
Loading…
Reference in a new issue