mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 14:07:55 +00:00
parallel: --csv initial version.
This commit is contained in:
parent
8b050b68d4
commit
bbcef1032c
61
src/parallel
61
src/parallel
|
@ -1047,6 +1047,7 @@ sub options_hash {
|
||||||
"max-args|maxargs|n=i" => \$opt::max_args,
|
"max-args|maxargs|n=i" => \$opt::max_args,
|
||||||
"max-replace-args|N=i" => \$opt::max_replace_args,
|
"max-replace-args|N=i" => \$opt::max_replace_args,
|
||||||
"colsep|col-sep|C=s" => \$opt::colsep,
|
"colsep|col-sep|C=s" => \$opt::colsep,
|
||||||
|
"csv"=> \$opt::csv,
|
||||||
"help|h" => \$opt::help,
|
"help|h" => \$opt::help,
|
||||||
"L=f" => \$opt::L,
|
"L=f" => \$opt::L,
|
||||||
"max-lines|l:f" => \$opt::max_lines,
|
"max-lines|l:f" => \$opt::max_lines,
|
||||||
|
@ -1175,6 +1176,14 @@ sub parse_options {
|
||||||
}
|
}
|
||||||
if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
|
if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; }
|
||||||
if(defined $opt::colsep) { $Global::trim = 'lr'; }
|
if(defined $opt::colsep) { $Global::trim = 'lr'; }
|
||||||
|
if(defined $opt::csv) {
|
||||||
|
$Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;";
|
||||||
|
$opt::colsep = defined $opt::colsep ? $opt::colsep : ",";
|
||||||
|
my $csv_setting = { binary => 1, sep_char => $opt::colsep };
|
||||||
|
my $sep = $csv_setting->{sep_char};
|
||||||
|
$Global::csv = Text::CSV->new($csv_setting)
|
||||||
|
or die "Cannot use CSV: ".Text::CSV->error_diag ();
|
||||||
|
}
|
||||||
if(defined $opt::header) {
|
if(defined $opt::header) {
|
||||||
$opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
|
$opt::colsep = defined $opt::colsep ? $opt::colsep : "\t";
|
||||||
}
|
}
|
||||||
|
@ -1242,11 +1251,11 @@ sub parse_options {
|
||||||
# Is the output a dir or CSV-file?
|
# Is the output a dir or CSV-file?
|
||||||
if($opt::results =~ /\.csv$/i) {
|
if($opt::results =~ /\.csv$/i) {
|
||||||
# CSV with , as separator
|
# CSV with , as separator
|
||||||
$Global::csv = ",";
|
$Global::csvsep = ",";
|
||||||
$Global::membuffer ||= 1;
|
$Global::membuffer ||= 1;
|
||||||
} elsif($opt::results =~ /\.tsv$/i) {
|
} elsif($opt::results =~ /\.tsv$/i) {
|
||||||
# CSV with TAB as separator
|
# CSV with TAB as separator
|
||||||
$Global::csv = "\t";
|
$Global::csvsep = "\t";
|
||||||
$Global::membuffer ||= 1;
|
$Global::membuffer ||= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1254,7 +1263,7 @@ sub parse_options {
|
||||||
my ($compress, $decompress) = find_compression_program();
|
my ($compress, $decompress) = find_compression_program();
|
||||||
$opt::compress_program ||= $compress;
|
$opt::compress_program ||= $compress;
|
||||||
$opt::decompress_program ||= $decompress;
|
$opt::decompress_program ||= $decompress;
|
||||||
if(($opt::results and not $Global::csv) or $opt::files) {
|
if(($opt::results and not $Global::csvsep) or $opt::files) {
|
||||||
# No need for decompressing
|
# No need for decompressing
|
||||||
$opt::decompress_program = "cat >/dev/null";
|
$opt::decompress_program = "cat >/dev/null";
|
||||||
}
|
}
|
||||||
|
@ -4098,7 +4107,7 @@ sub usage {
|
||||||
"If you use programs that use GNU Parallel to process data for an article in a",
|
"If you use programs that use GNU Parallel to process data for an article in a",
|
||||||
"scientific publication, please cite:",
|
"scientific publication, please cite:",
|
||||||
"",
|
"",
|
||||||
" O. Tange (2018): GNU Parallel 2018, Apr 2018, ISBN 9781387509881,",
|
" O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
|
||||||
" DOI https://doi.org/10.5281/zenodo.1146014",
|
" DOI https://doi.org/10.5281/zenodo.1146014",
|
||||||
"",
|
"",
|
||||||
"This helps funding further development; AND IT WON'T COST YOU A CENT.",
|
"This helps funding further development; AND IT WON'T COST YOU A CENT.",
|
||||||
|
@ -4126,7 +4135,7 @@ sub citation_notice {
|
||||||
"If you use programs that use GNU Parallel to process data for an article in a",
|
"If you use programs that use GNU Parallel to process data for an article in a",
|
||||||
"scientific publication, please cite:",
|
"scientific publication, please cite:",
|
||||||
"",
|
"",
|
||||||
" O. Tange (2018): GNU Parallel 2018, Apr 2018, ISBN 9781387509881,",
|
" O. Tange (2018): GNU Parallel 2018, Mar 2018, ISBN 9781387509881,",
|
||||||
" DOI https://doi.org/10.5281/zenodo.1146014",
|
" DOI https://doi.org/10.5281/zenodo.1146014",
|
||||||
"",
|
"",
|
||||||
"This helps funding further development; AND IT WON'T COST YOU A CENT.",
|
"This helps funding further development; AND IT WON'T COST YOU A CENT.",
|
||||||
|
@ -4220,7 +4229,7 @@ sub citation {
|
||||||
" author = {Tange, Ole},",
|
" author = {Tange, Ole},",
|
||||||
" title = {GNU Parallel 2018},",
|
" title = {GNU Parallel 2018},",
|
||||||
" publisher = {Ole Tange},",
|
" publisher = {Ole Tange},",
|
||||||
" month = Apr,",
|
" month = Mar,",
|
||||||
" year = 2018,",
|
" year = 2018,",
|
||||||
" ISBN = {9781387509881},",
|
" ISBN = {9781387509881},",
|
||||||
" doi = {10.5281/zenodo.1146014},",
|
" doi = {10.5281/zenodo.1146014},",
|
||||||
|
@ -7146,7 +7155,7 @@ sub openoutputfiles {
|
||||||
}
|
}
|
||||||
# Return immediately because we do not need setting filenames
|
# Return immediately because we do not need setting filenames
|
||||||
return;
|
return;
|
||||||
} elsif($opt::results and not $Global::csv) {
|
} elsif($opt::results and not $Global::csvsep) {
|
||||||
my $out = $self->{'commandline'}->results_out();
|
my $out = $self->{'commandline'}->results_out();
|
||||||
my $seqname;
|
my $seqname;
|
||||||
if($out eq $opt::results or $out =~ m:/$:) {
|
if($out eq $opt::results or $out =~ m:/$:) {
|
||||||
|
@ -8841,7 +8850,7 @@ sub print {
|
||||||
if($opt::sqlworker and not $opt::results) {
|
if($opt::sqlworker and not $opt::results) {
|
||||||
$Global::sql->output($self);
|
$Global::sql->output($self);
|
||||||
}
|
}
|
||||||
if($Global::csv) {
|
if($Global::csvsep) {
|
||||||
# Add output to CSV when finished
|
# Add output to CSV when finished
|
||||||
$self->print_csv();
|
$self->print_csv();
|
||||||
}
|
}
|
||||||
|
@ -8905,7 +8914,7 @@ sub print {
|
||||||
sub combine_ref {
|
sub combine_ref {
|
||||||
# Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
|
# Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu)
|
||||||
my @part = @_;
|
my @part = @_;
|
||||||
my $sep = $Global::csv;
|
my $sep = $Global::csvsep;
|
||||||
my $quot = '"';
|
my $quot = '"';
|
||||||
my @out = ();
|
my @out = ();
|
||||||
|
|
||||||
|
@ -9008,7 +9017,7 @@ sub print_linebuffer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(not $self->virgin()) {
|
if(not $self->virgin()) {
|
||||||
if($opt::files or ($opt::results and not $Global::csv)) {
|
if($opt::files or ($opt::results and not $Global::csvsep)) {
|
||||||
# Print filename
|
# Print filename
|
||||||
if($fdno == 1 and not $self->fh($fdno,"printed")) {
|
if($fdno == 1 and not $self->fh($fdno,"printed")) {
|
||||||
print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
|
print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
|
||||||
|
@ -9055,7 +9064,7 @@ sub print_linebuffer {
|
||||||
$self->add_returnsize($outputlength);
|
$self->add_returnsize($outputlength);
|
||||||
}
|
}
|
||||||
if(defined $self->{'exitstatus'}) {
|
if(defined $self->{'exitstatus'}) {
|
||||||
if($opt::files or ($opt::results and not $Global::csv)) {
|
if($opt::files or ($opt::results and not $Global::csvsep)) {
|
||||||
$self->add_returnsize(-s $self->fh($fdno,"name"));
|
$self->add_returnsize(-s $self->fh($fdno,"name"));
|
||||||
} else {
|
} else {
|
||||||
# If the job is dead: print the remaining partial line
|
# If the job is dead: print the remaining partial line
|
||||||
|
@ -10459,7 +10468,7 @@ sub new {
|
||||||
# Open SQL table
|
# Open SQL table
|
||||||
$arg_sub_queue = SQLRecordQueue->new();
|
$arg_sub_queue = SQLRecordQueue->new();
|
||||||
} elsif(defined $colsep) {
|
} elsif(defined $colsep) {
|
||||||
# Open one file with colsep
|
# Open one file with colsep or CSV
|
||||||
$arg_sub_queue = RecordColQueue->new($fhs);
|
$arg_sub_queue = RecordColQueue->new($fhs);
|
||||||
} else {
|
} else {
|
||||||
# Open one or more files if multiple -a
|
# Open one or more files if multiple -a
|
||||||
|
@ -10561,9 +10570,20 @@ sub get {
|
||||||
my $line = $arg->orig();
|
my $line = $arg->orig();
|
||||||
::debug("run", "line='$line'\n");
|
::debug("run", "line='$line'\n");
|
||||||
if($line ne "") {
|
if($line ne "") {
|
||||||
|
if($opt::csv) {
|
||||||
|
# Parse CSV
|
||||||
|
chomp $line;
|
||||||
|
if(not $Global::csv->parse($line)) {
|
||||||
|
die "CSV has unexpected format: ^$line^";
|
||||||
|
}
|
||||||
|
for($Global::csv->fields()) {
|
||||||
|
push @out_record, Arg->new($_);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
for my $s (split /$opt::colsep/o, $line, -1) {
|
for my $s (split /$opt::colsep/o, $line, -1) {
|
||||||
push @out_record, Arg->new($s);
|
push @out_record, Arg->new($s);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
push @out_record, Arg->new("");
|
push @out_record, Arg->new("");
|
||||||
}
|
}
|
||||||
|
@ -10797,15 +10817,30 @@ sub read_arg_from_fh {
|
||||||
my $fh = shift;
|
my $fh = shift;
|
||||||
my $prepend;
|
my $prepend;
|
||||||
my $arg;
|
my $arg;
|
||||||
|
my $double_quotes = 0;
|
||||||
do {{
|
do {{
|
||||||
# This makes 10% faster
|
# This makes 10% faster
|
||||||
if(not ($arg = <$fh>)) {
|
if(not defined ($arg = <$fh>)) {
|
||||||
if(defined $prepend) {
|
if(defined $prepend) {
|
||||||
return Arg->new($prepend);
|
return Arg->new($prepend);
|
||||||
} else {
|
} else {
|
||||||
return undef;
|
return undef;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if($opt::csv) {
|
||||||
|
# We need to read a full CSV line.
|
||||||
|
$double_quotes += ($arg =~ y/"/"/);
|
||||||
|
if($double_quotes % 2) {
|
||||||
|
# CSV halflines with quoting:
|
||||||
|
# col1,"col2 2""x3"" board newline <-this one
|
||||||
|
# cont",col3
|
||||||
|
$prepend .= $arg;
|
||||||
|
redo;
|
||||||
|
} else {
|
||||||
|
# Now we have a full CSV line
|
||||||
|
$double_quotes = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
# Remove delimiter
|
# Remove delimiter
|
||||||
chomp $arg;
|
chomp $arg;
|
||||||
if($Global::end_of_file_string and
|
if($Global::end_of_file_string and
|
||||||
|
|
|
@ -592,6 +592,21 @@ Use I<prg> for (de)compressing temporary files. It is assumed that I<prg
|
||||||
output) unless B<--decompress-program> is given.
|
output) unless B<--decompress-program> is given.
|
||||||
|
|
||||||
|
|
||||||
|
=item B<--csv> (alpha testing)
|
||||||
|
|
||||||
|
Treat input as CSV-format. B<--colsep> sets the field delimiter. It
|
||||||
|
works very much like B<--colsep> except it deals correctly with
|
||||||
|
quoting:
|
||||||
|
|
||||||
|
echo '"1 big, 2 small","2""x4"" plank",12.34' |
|
||||||
|
parallel --csv echo {1} of {2} at {3}
|
||||||
|
|
||||||
|
Even quoted newlines are parsed correctly:
|
||||||
|
|
||||||
|
(echo '"Start of field 1 with newline'
|
||||||
|
echo 'Line 2 in field 1";value 2') |
|
||||||
|
parallel --csv --colsep ';' echo Field 1: {1} Field 2: {2}
|
||||||
|
|
||||||
=item B<--delimiter> I<delim>
|
=item B<--delimiter> I<delim>
|
||||||
|
|
||||||
=item B<-d> I<delim>
|
=item B<-d> I<delim>
|
||||||
|
|
|
@ -847,6 +847,20 @@ par_dryrun_append_joblog() {
|
||||||
wc -l < /tmp/jl.$$
|
wc -l < /tmp/jl.$$
|
||||||
}
|
}
|
||||||
|
|
||||||
|
par_0_no_newline() {
|
||||||
|
echo 'A single zero without \n should not be ignored'
|
||||||
|
echo -n 0 | parallel echo
|
||||||
|
}
|
||||||
|
|
||||||
|
par_csv() {
|
||||||
|
(echo '"col1""x3""","new'
|
||||||
|
echo 'line col2","new2'
|
||||||
|
echo 'line col3",col 4') |
|
||||||
|
parallel --csv echo {1}-{2}-{3}-{4}
|
||||||
|
echo '"2""x3"" board","Value with ,",Column 3' |
|
||||||
|
parallel --csv echo {1}-{2}-{3}
|
||||||
|
}
|
||||||
|
|
||||||
export -f $(compgen -A function | grep par_)
|
export -f $(compgen -A function | grep par_)
|
||||||
compgen -A function | grep par_ | sort |
|
compgen -A function | grep par_ | sort |
|
||||||
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'
|
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'
|
||||||
|
|
|
@ -1329,6 +1329,8 @@ echo '### Test --tty'
|
||||||
/dev/tty
|
/dev/tty
|
||||||
### 1 .par file from --files expected
|
### 1 .par file from --files expected
|
||||||
0
|
0
|
||||||
|
par_0_no_newline A single zero without \n should not be ignored
|
||||||
|
par_0_no_newline 0
|
||||||
par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero
|
par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero
|
||||||
par_X_eta_div_zero
|
par_X_eta_div_zero
|
||||||
par_X_eta_div_zero Computers / CPU cores / Max jobs to run
|
par_X_eta_div_zero Computers / CPU cores / Max jobs to run
|
||||||
|
@ -1360,6 +1362,10 @@ par_blocking_redir stdout
|
||||||
par_colsep_0 bug --colsep 0
|
par_colsep_0 bug --colsep 0
|
||||||
par_colsep_0 OK
|
par_colsep_0 OK
|
||||||
par_colsep_0 OK
|
par_colsep_0 OK
|
||||||
|
par_csv col1"x3"-new
|
||||||
|
par_csv line col2-new2
|
||||||
|
par_csv line col3-col 4
|
||||||
|
par_csv 2"x3" board-Value with ,-Column 3
|
||||||
par_dryrun_append_joblog --dry-run should not append to joblog
|
par_dryrun_append_joblog --dry-run should not append to joblog
|
||||||
par_dryrun_append_joblog 1
|
par_dryrun_append_joblog 1
|
||||||
par_dryrun_append_joblog 2
|
par_dryrun_append_joblog 2
|
||||||
|
|
Loading…
Reference in a new issue