parallel: --csv initial version.

This commit is contained in:
Ole Tange 2018-04-21 21:50:42 +02:00
parent 8b050b68d4
commit bbcef1032c
4 changed files with 85 additions and 15 deletions

View file

@ -1047,6 +1047,7 @@ sub options_hash {
"max-args|maxargs|n=i" => \$opt::max_args, "max-args|maxargs|n=i" => \$opt::max_args,
"max-replace-args|N=i" => \$opt::max_replace_args, "max-replace-args|N=i" => \$opt::max_replace_args,
"colsep|col-sep|C=s" => \$opt::colsep, "colsep|col-sep|C=s" => \$opt::colsep,
"csv"=> \$opt::csv,
"help|h" => \$opt::help, "help|h" => \$opt::help,
"L=f" => \$opt::L, "L=f" => \$opt::L,
"max-lines|l:f" => \$opt::max_lines, "max-lines|l:f" => \$opt::max_lines,
@ -1175,6 +1176,14 @@ sub parse_options {
} }
if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; } if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
if(defined $opt::colsep) { $Global::trim = 'lr'; } if(defined $opt::colsep) { $Global::trim = 'lr'; }
if(defined $opt::csv) {
$Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
$opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
my $csv_setting = { binary => 1, sep_char => $opt::colsep };
my $sep = $csv_setting->{sep_char};
$Global::csv = Text::CSV->new($csv_setting)
or die "Cannot use CSV: ".Text::CSV->error_diag ();
}
if(defined $opt::header) { if(defined $opt::header) {
$opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
} }
@ -1242,11 +1251,11 @@ sub parse_options {
# Is the output a dir or CSV-file? # Is the output a dir or CSV-file?
if($opt::results =~ /\.csv$/i) { if($opt::results =~ /\.csv$/i) {
# CSV with , as separator # CSV with , as separator
$Global::csv = ","; $Global::csvsep = ",";
$Global::membuffer ||= 1; $Global::membuffer ||= 1;
} elsif($opt::results =~ /\.tsv$/i) { } elsif($opt::results =~ /\.tsv$/i) {
# CSV with TAB as separator # CSV with TAB as separator
$Global::csv = "\t"; $Global::csvsep = "\t";
$Global::membuffer ||= 1; $Global::membuffer ||= 1;
} }
} }
@ -1254,7 +1263,7 @@ sub parse_options {
my ($compress, $decompress) = find_compression_program(); my ($compress, $decompress) = find_compression_program();
$opt::compress_program ||= $compress; $opt::compress_program ||= $compress;
$opt::decompress_program ||= $decompress; $opt::decompress_program ||= $decompress;
if(($opt::results and not $Global::csv) or $opt::files) { if(($opt::results and not $Global::csvsep) or $opt::files) {
# No need for decompressing # No need for decompressing
$opt::decompress_program = "cat >/dev/null"; $opt::decompress_program = "cat >/dev/null";
} }
@ -4098,7 +4107,7 @@ sub usage {
"If you use programs that use GNU Parallel to process data for an article in a", "If you use programs that use GNU Parallel to process data for an article in a",
"scientific publication, please cite:", "scientific publication, please cite:",
"", "",
" O. Tange (2018): GNU Parallel 2018, Apr 2018, ISBN 9781387509881,", " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
" DOI https://doi.org/10.5281/zenodo.1146014", " DOI https://doi.org/10.5281/zenodo.1146014",
"", "",
"This helps funding further development; AND IT WON'T COST YOU A CENT.", "This helps funding further development; AND IT WON'T COST YOU A CENT.",
@ -4126,7 +4135,7 @@ sub citation_notice {
"If you use programs that use GNU Parallel to process data for an article in a", "If you use programs that use GNU Parallel to process data for an article in a",
"scientific publication, please cite:", "scientific publication, please cite:",
"", "",
" O. Tange (2018): GNU Parallel 2018, Apr 2018, ISBN 9781387509881,", " O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
" DOI https://doi.org/10.5281/zenodo.1146014", " DOI https://doi.org/10.5281/zenodo.1146014",
"", "",
"This helps funding further development; AND IT WON'T COST YOU A CENT.", "This helps funding further development; AND IT WON'T COST YOU A CENT.",
@ -4220,7 +4229,7 @@ sub citation {
" author = {Tange, Ole},", " author = {Tange, Ole},",
" title = {GNU Parallel 2018},", " title = {GNU Parallel 2018},",
" publisher = {Ole Tange},", " publisher = {Ole Tange},",
" month = Apr,", " month = Mar,",
" year = 2018,", " year = 2018,",
" ISBN = {9781387509881},", " ISBN = {9781387509881},",
" doi = {10.5281/zenodo.1146014},", " doi = {10.5281/zenodo.1146014},",
@ -7146,7 +7155,7 @@ sub openoutputfiles {
} }
# Return immediately because we do not need setting filenames # Return immediately because we do not need setting filenames
return; return;
} elsif($opt::results and not $Global::csv) { } elsif($opt::results and not $Global::csvsep) {
my $out = $self->{'commandline'}->results_out(); my $out = $self->{'commandline'}->results_out();
my $seqname; my $seqname;
if($out eq $opt::results or $out =~ m:/$:) { if($out eq $opt::results or $out =~ m:/$:) {
@ -8841,7 +8850,7 @@ sub print {
if($opt::sqlworker and not $opt::results) { if($opt::sqlworker and not $opt::results) {
$Global::sql->output($self); $Global::sql->output($self);
} }
if($Global::csv) { if($Global::csvsep) {
# Add output to CSV when finished # Add output to CSV when finished
$self->print_csv(); $self->print_csv();
} }
@ -8905,7 +8914,7 @@ sub print {
sub combine_ref { sub combine_ref {
# Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu) # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
my @part = @_; my @part = @_;
my $sep = $Global::csv; my $sep = $Global::csvsep;
my $quot = '"'; my $quot = '"';
my @out = (); my @out = ();
@ -9008,7 +9017,7 @@ sub print_linebuffer {
} }
} }
if(not $self->virgin()) { if(not $self->virgin()) {
if($opt::files or ($opt::results and not $Global::csv)) { if($opt::files or ($opt::results and not $Global::csvsep)) {
# Print filename # Print filename
if($fdno == 1 and not $self->fh($fdno,"printed")) { if($fdno == 1 and not $self->fh($fdno,"printed")) {
print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n"; print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
@ -9055,7 +9064,7 @@ sub print_linebuffer {
$self->add_returnsize($outputlength); $self->add_returnsize($outputlength);
} }
if(defined $self->{'exitstatus'}) { if(defined $self->{'exitstatus'}) {
if($opt::files or ($opt::results and not $Global::csv)) { if($opt::files or ($opt::results and not $Global::csvsep)) {
$self->add_returnsize(-s $self->fh($fdno,"name")); $self->add_returnsize(-s $self->fh($fdno,"name"));
} else { } else {
# If the job is dead: print the remaining partial line # If the job is dead: print the remaining partial line
@ -10459,7 +10468,7 @@ sub new {
# Open SQL table # Open SQL table
$arg_sub_queue = SQLRecordQueue->new(); $arg_sub_queue = SQLRecordQueue->new();
} elsif(defined $colsep) { } elsif(defined $colsep) {
# Open one file with colsep # Open one file with colsep or CSV
$arg_sub_queue = RecordColQueue->new($fhs); $arg_sub_queue = RecordColQueue->new($fhs);
} else { } else {
# Open one or more files if multiple -a # Open one or more files if multiple -a
@ -10561,9 +10570,20 @@ sub get {
my $line = $arg->orig(); my $line = $arg->orig();
::debug("run", "line='$line'\n"); ::debug("run", "line='$line'\n");
if($line ne "") { if($line ne "") {
if($opt::csv) {
# Parse CSV
chomp $line;
if(not $Global::csv->parse($line)) {
die "CSV has unexpected format: ^$line^";
}
for($Global::csv->fields()) {
push @out_record, Arg->new($_);
}
} else {
for my $s (split /$opt::colsep/o, $line, -1) { for my $s (split /$opt::colsep/o, $line, -1) {
push @out_record, Arg->new($s); push @out_record, Arg->new($s);
} }
}
} else { } else {
push @out_record, Arg->new(""); push @out_record, Arg->new("");
} }
@ -10797,15 +10817,30 @@ sub read_arg_from_fh {
my $fh = shift; my $fh = shift;
my $prepend; my $prepend;
my $arg; my $arg;
my $double_quotes = 0;
do {{ do {{
# This makes 10% faster # This makes 10% faster
if(not ($arg = <$fh>)) { if(not defined ($arg = <$fh>)) {
if(defined $prepend) { if(defined $prepend) {
return Arg->new($prepend); return Arg->new($prepend);
} else { } else {
return undef; return undef;
} }
} }
if($opt::csv) {
# We need to read a full CSV line.
$double_quotes += ($arg =~ y/"/"/);
if($double_quotes % 2) {
# CSV halflines with quoting:
# col1,"col2 2""x3"" board newline <-this one
# cont",col3
$prepend .= $arg;
redo;
} else {
# Now we have a full CSV line
$double_quotes = 0;
}
}
# Remove delimiter # Remove delimiter
chomp $arg; chomp $arg;
if($Global::end_of_file_string and if($Global::end_of_file_string and

View file

@ -592,6 +592,21 @@ Use I<prg> for (de)compressing temporary files. It is assumed that I<prg
output) unless B<--decompress-program> is given. output) unless B<--decompress-program> is given.
=item B<--csv> (alpha testing)
Treat input as CSV-format. B<--colsep> sets the field delimiter. It
works very much like B<--colsep> except it deals correctly with
quoting:
echo '"1 big, 2 small","2""x4"" plank",12.34' |
parallel --csv echo {1} of {2} at {3}
Even quoted newlines are parsed correctly:
(echo '"Start of field 1 with newline'
echo 'Line 2 in field 1";value 2') |
parallel --csv --colsep ';' echo Field 1: {1} Field 2: {2}
=item B<--delimiter> I<delim> =item B<--delimiter> I<delim>
=item B<-d> I<delim> =item B<-d> I<delim>

View file

@ -847,6 +847,20 @@ par_dryrun_append_joblog() {
wc -l < /tmp/jl.$$ wc -l < /tmp/jl.$$
} }
par_0_no_newline() {
echo 'A single zero without \n should not be ignored'
echo -n 0 | parallel echo
}
par_csv() {
(echo '"col1""x3""","new'
echo 'line col2","new2'
echo 'line col3",col 4') |
parallel --csv echo {1}-{2}-{3}-{4}
echo '"2""x3"" board","Value with ,",Column 3' |
parallel --csv echo {1}-{2}-{3}
}
export -f $(compgen -A function | grep par_) export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | sort | compgen -A function | grep par_ | sort |
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1' parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'

View file

@ -1329,6 +1329,8 @@ echo '### Test --tty'
/dev/tty /dev/tty
### 1 .par file from --files expected ### 1 .par file from --files expected
0 0
par_0_no_newline A single zero without \n should not be ignored
par_0_no_newline 0
par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero
par_X_eta_div_zero par_X_eta_div_zero
par_X_eta_div_zero Computers / CPU cores / Max jobs to run par_X_eta_div_zero Computers / CPU cores / Max jobs to run
@ -1360,6 +1362,10 @@ par_blocking_redir stdout
par_colsep_0 bug --colsep 0 par_colsep_0 bug --colsep 0
par_colsep_0 OK par_colsep_0 OK
par_colsep_0 OK par_colsep_0 OK
par_csv col1"x3"-new
par_csv line col2-new2
par_csv line col3-col 4
par_csv 2"x3" board-Value with ,-Column 3
par_dryrun_append_joblog --dry-run should not append to joblog par_dryrun_append_joblog --dry-run should not append to joblog
par_dryrun_append_joblog 1 par_dryrun_append_joblog 1
par_dryrun_append_joblog 2 par_dryrun_append_joblog 2