mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
parallel: --csv initial version.
This commit is contained in:
parent
8b050b68d4
commit
bbcef1032c
61
src/parallel
61
src/parallel
|
@ -1047,6 +1047,7 @@ sub options_hash {
|
|||
"max-args|maxargs|n=i" => \$opt::max_args,
|
||||
"max-replace-args|N=i" => \$opt::max_replace_args,
|
||||
"colsep|col-sep|C=s" => \$opt::colsep,
|
||||
"csv"=> \$opt::csv,
|
||||
"help|h" => \$opt::help,
|
||||
"L=f" => \$opt::L,
|
||||
"max-lines|l:f" => \$opt::max_lines,
|
||||
|
@ -1175,6 +1176,14 @@ sub parse_options {
|
|||
}
|
||||
if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
|
||||
if(defined $opt::colsep) { $Global::trim = 'lr'; }
|
||||
if(defined $opt::csv) {
|
||||
$Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
|
||||
$opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
|
||||
my $csv_setting = { binary => 1, sep_char => $opt::colsep };
|
||||
my $sep = $csv_setting->{sep_char};
|
||||
$Global::csv = Text::CSV->new($csv_setting)
|
||||
or die "Cannot use CSV: ".Text::CSV->error_diag ();
|
||||
}
|
||||
if(defined $opt::header) {
|
||||
$opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
|
||||
}
|
||||
|
@ -1242,11 +1251,11 @@ sub parse_options {
|
|||
# Is the output a dir or CSV-file?
|
||||
if($opt::results =~ /\.csv$/i) {
|
||||
# CSV with , as separator
|
||||
$Global::csv = ",";
|
||||
$Global::csvsep = ",";
|
||||
$Global::membuffer ||= 1;
|
||||
} elsif($opt::results =~ /\.tsv$/i) {
|
||||
# CSV with TAB as separator
|
||||
$Global::csv = "\t";
|
||||
$Global::csvsep = "\t";
|
||||
$Global::membuffer ||= 1;
|
||||
}
|
||||
}
|
||||
|
@ -1254,7 +1263,7 @@ sub parse_options {
|
|||
my ($compress, $decompress) = find_compression_program();
|
||||
$opt::compress_program ||= $compress;
|
||||
$opt::decompress_program ||= $decompress;
|
||||
if(($opt::results and not $Global::csv) or $opt::files) {
|
||||
if(($opt::results and not $Global::csvsep) or $opt::files) {
|
||||
# No need for decompressing
|
||||
$opt::decompress_program = "cat >/dev/null";
|
||||
}
|
||||
|
@ -4098,7 +4107,7 @@ sub usage {
|
|||
"If you use programs that use GNU Parallel to process data for an article in a",
|
||||
"scientific publication, please cite:",
|
||||
"",
|
||||
" O. Tange (2018): GNU Parallel 2018, Apr 2018, ISBN 9781387509881,",
|
||||
" O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
|
||||
" DOI https://doi.org/10.5281/zenodo.1146014",
|
||||
"",
|
||||
"This helps funding further development; AND IT WON'T COST YOU A CENT.",
|
||||
|
@ -4126,7 +4135,7 @@ sub citation_notice {
|
|||
"If you use programs that use GNU Parallel to process data for an article in a",
|
||||
"scientific publication, please cite:",
|
||||
"",
|
||||
" O. Tange (2018): GNU Parallel 2018, Apr 2018, ISBN 9781387509881,",
|
||||
" O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
|
||||
" DOI https://doi.org/10.5281/zenodo.1146014",
|
||||
"",
|
||||
"This helps funding further development; AND IT WON'T COST YOU A CENT.",
|
||||
|
@ -4220,7 +4229,7 @@ sub citation {
|
|||
" author = {Tange, Ole},",
|
||||
" title = {GNU Parallel 2018},",
|
||||
" publisher = {Ole Tange},",
|
||||
" month = Apr,",
|
||||
" month = Mar,",
|
||||
" year = 2018,",
|
||||
" ISBN = {9781387509881},",
|
||||
" doi = {10.5281/zenodo.1146014},",
|
||||
|
@ -7146,7 +7155,7 @@ sub openoutputfiles {
|
|||
}
|
||||
# Return immediately because we do not need setting filenames
|
||||
return;
|
||||
} elsif($opt::results and not $Global::csv) {
|
||||
} elsif($opt::results and not $Global::csvsep) {
|
||||
my $out = $self->{'commandline'}->results_out();
|
||||
my $seqname;
|
||||
if($out eq $opt::results or $out =~ m:/$:) {
|
||||
|
@ -8841,7 +8850,7 @@ sub print {
|
|||
if($opt::sqlworker and not $opt::results) {
|
||||
$Global::sql->output($self);
|
||||
}
|
||||
if($Global::csv) {
|
||||
if($Global::csvsep) {
|
||||
# Add output to CSV when finished
|
||||
$self->print_csv();
|
||||
}
|
||||
|
@ -8905,7 +8914,7 @@ sub print {
|
|||
sub combine_ref {
|
||||
# Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
|
||||
my @part = @_;
|
||||
my $sep = $Global::csv;
|
||||
my $sep = $Global::csvsep;
|
||||
my $quot = '"';
|
||||
my @out = ();
|
||||
|
||||
|
@ -9008,7 +9017,7 @@ sub print_linebuffer {
|
|||
}
|
||||
}
|
||||
if(not $self->virgin()) {
|
||||
if($opt::files or ($opt::results and not $Global::csv)) {
|
||||
if($opt::files or ($opt::results and not $Global::csvsep)) {
|
||||
# Print filename
|
||||
if($fdno == 1 and not $self->fh($fdno,"printed")) {
|
||||
print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
|
||||
|
@ -9055,7 +9064,7 @@ sub print_linebuffer {
|
|||
$self->add_returnsize($outputlength);
|
||||
}
|
||||
if(defined $self->{'exitstatus'}) {
|
||||
if($opt::files or ($opt::results and not $Global::csv)) {
|
||||
if($opt::files or ($opt::results and not $Global::csvsep)) {
|
||||
$self->add_returnsize(-s $self->fh($fdno,"name"));
|
||||
} else {
|
||||
# If the job is dead: print the remaining partial line
|
||||
|
@ -10459,7 +10468,7 @@ sub new {
|
|||
# Open SQL table
|
||||
$arg_sub_queue = SQLRecordQueue->new();
|
||||
} elsif(defined $colsep) {
|
||||
# Open one file with colsep
|
||||
# Open one file with colsep or CSV
|
||||
$arg_sub_queue = RecordColQueue->new($fhs);
|
||||
} else {
|
||||
# Open one or more files if multiple -a
|
||||
|
@ -10561,9 +10570,20 @@ sub get {
|
|||
my $line = $arg->orig();
|
||||
::debug("run", "line='$line'\n");
|
||||
if($line ne "") {
|
||||
if($opt::csv) {
|
||||
# Parse CSV
|
||||
chomp $line;
|
||||
if(not $Global::csv->parse($line)) {
|
||||
die "CSV has unexpected format: ^$line^";
|
||||
}
|
||||
for($Global::csv->fields()) {
|
||||
push @out_record, Arg->new($_);
|
||||
}
|
||||
} else {
|
||||
for my $s (split /$opt::colsep/o, $line, -1) {
|
||||
push @out_record, Arg->new($s);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
push @out_record, Arg->new("");
|
||||
}
|
||||
|
@ -10797,15 +10817,30 @@ sub read_arg_from_fh {
|
|||
my $fh = shift;
|
||||
my $prepend;
|
||||
my $arg;
|
||||
my $double_quotes = 0;
|
||||
do {{
|
||||
# This makes 10% faster
|
||||
if(not ($arg = <$fh>)) {
|
||||
if(not defined ($arg = <$fh>)) {
|
||||
if(defined $prepend) {
|
||||
return Arg->new($prepend);
|
||||
} else {
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
if($opt::csv) {
|
||||
# We need to read a full CSV line.
|
||||
$double_quotes += ($arg =~ y/"/"/);
|
||||
if($double_quotes % 2) {
|
||||
# CSV halflines with quoting:
|
||||
# col1,"col2 2""x3"" board newline <-this one
|
||||
# cont",col3
|
||||
$prepend .= $arg;
|
||||
redo;
|
||||
} else {
|
||||
# Now we have a full CSV line
|
||||
$double_quotes = 0;
|
||||
}
|
||||
}
|
||||
# Remove delimiter
|
||||
chomp $arg;
|
||||
if($Global::end_of_file_string and
|
||||
|
|
|
@ -592,6 +592,21 @@ Use I<prg> for (de)compressing temporary files. It is assumed that I<prg
|
|||
output) unless B<--decompress-program> is given.
|
||||
|
||||
|
||||
=item B<--csv> (alpha testing)
|
||||
|
||||
Treat input as CSV-format. B<--colsep> sets the field delimiter. It
|
||||
works very much like B<--colsep> except it deals correctly with
|
||||
quoting:
|
||||
|
||||
echo '"1 big, 2 small","2""x4"" plank",12.34' |
|
||||
parallel --csv echo {1} of {2} at {3}
|
||||
|
||||
Even quoted newlines are parsed correctly:
|
||||
|
||||
(echo '"Start of field 1 with newline'
|
||||
echo 'Line 2 in field 1";value 2') |
|
||||
parallel --csv --colsep ';' echo Field 1: {1} Field 2: {2}
|
||||
|
||||
=item B<--delimiter> I<delim>
|
||||
|
||||
=item B<-d> I<delim>
|
||||
|
|
|
@ -847,6 +847,20 @@ par_dryrun_append_joblog() {
|
|||
wc -l < /tmp/jl.$$
|
||||
}
|
||||
|
||||
par_0_no_newline() {
|
||||
echo 'A single zero without \n should not be ignored'
|
||||
echo -n 0 | parallel echo
|
||||
}
|
||||
|
||||
par_csv() {
|
||||
(echo '"col1""x3""","new'
|
||||
echo 'line col2","new2'
|
||||
echo 'line col3",col 4') |
|
||||
parallel --csv echo {1}-{2}-{3}-{4}
|
||||
echo '"2""x3"" board","Value with ,",Column 3' |
|
||||
parallel --csv echo {1}-{2}-{3}
|
||||
}
|
||||
|
||||
export -f $(compgen -A function | grep par_)
|
||||
compgen -A function | grep par_ | sort |
|
||||
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'
|
||||
|
|
|
@ -1329,6 +1329,8 @@ echo '### Test --tty'
|
|||
/dev/tty
|
||||
### 1 .par file from --files expected
|
||||
0
|
||||
par_0_no_newline A single zero without \n should not be ignored
|
||||
par_0_no_newline 0
|
||||
par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero
|
||||
par_X_eta_div_zero
|
||||
par_X_eta_div_zero Computers / CPU cores / Max jobs to run
|
||||
|
@ -1360,6 +1362,10 @@ par_blocking_redir stdout
|
|||
par_colsep_0 bug --colsep 0
|
||||
par_colsep_0 OK
|
||||
par_colsep_0 OK
|
||||
par_csv col1"x3"-new
|
||||
par_csv line col2-new2
|
||||
par_csv line col3-col 4
|
||||
par_csv 2"x3" board-Value with ,-Column 3
|
||||
par_dryrun_append_joblog --dry-run should not append to joblog
|
||||
par_dryrun_append_joblog 1
|
||||
par_dryrun_append_joblog 2
|
||||
|
|
Loading…
Reference in a new issue