parallel: --header with {colnames} for multiple :::'s

This commit is contained in:
Ole Tange 2012-01-22 04:42:05 +01:00
parent add040c278
commit 5e8e95ecf0
10 changed files with 87 additions and 48 deletions

View file

@ -1,4 +1,4 @@
AC_INIT([parallel], [20111222], [bug-parallel@gnu.org])
AC_INIT([parallel], [20120122], [bug-parallel@gnu.org])
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([

View file

@ -25,7 +25,7 @@ http://nd.gd/0s http://www.youtube.com/watch?v=OpaiGYxkSuQ
http://nd.gd/0t http://en.wikipedia.org/wiki/Xargs#The_separator_problem
http://nd.gd/3k http://www.gnu.org/software/parallel/man.html#differences_between_xargs_and_gnu_parallel
http://nd.gd/po http://www.gnu.org/software/parallel/man.html#example__distributing_work_to_local_and_remote_computers
http://nd.gd/039 http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1
http://ur1.ca/7h7yx http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1
If you like xargs you may love GNU Parallel: http://nd.gd/0s
@ -64,7 +64,8 @@ You can install GNU Parallel simply by:
chmod 755 parallel
cp parallel sem
Watch the intro videos for GNU Parallel to learn more: http://nd.gd/039
Watch the intro videos for GNU Parallel to learn more:
https://www.youtube.com/playlist?list=PL284C9FF2488BC6D1
GNU Parallel also makes it possible to run small scripts. Try this:

View file

@ -185,27 +185,42 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
Jesse Alama <jesse.alama@gmail.com>
Subject: GNU Parallel 20120122 ('Fhqwhgads') released
Subject: GNU Parallel 20120122 ('Dead SOPA') released
GNU Parallel 20120122 ('Fhqwhgads') has been released. It is
GNU Parallel 20120122 ('Dead SOPA') has been released. It is
available for download at: http://ftp.gnu.org/gnu/parallel/
New in this release:
* niceload now propagates exit status correctly. Passes testsuite.
* --header : uses the first input line as column names and you can
then use {colname} as a replacement string in the command. This also
works with multiple :::'s.
* Show your support for GNU Parallel. For 20 EUR incl world wide
shipping get a GNU Parallel T-shirt+mug+pen+100 postcards. Email
your shirt size and address for details to
parallel-support@tange.dk.
* --header <regexp> matches a header as a regular expression and
repeats the header for each block with --pipe.
* --header uses the first input line as column names and you can then
use {colname} as a replacement string in the command.
* --resume resumes from the last unfinished job. Useful if you stop
GNU Parallel and restart it later with the same arguments.
* --resume resumes from the last unfinished job.
* niceload now propagates exit status correctly.
* Show your support for GNU Parallel. For a limited time you can get a
GNU Parallel T-shirt+mug+pen+100 postcards at 20 EUR. See
https://www.gnu.org/software/parallel/merchandise.html
* Options -g -B -T -U -W -Y are retired as warned 6 months ago.
* GNU Parallel referenced in article on Holographic Grid Cloud. Thanks
to Stefano Gallozzi. http://arxiv.org/pdf/1112.6128
* Article in IEEE Software on GNU Parallel. Thanks to Diomidis
Spinellis. http://www.spinellis.gr/blog/20110911/
* An article on Narwhal which uses GNU Parallel. They forgot to add
the reference. Please remember --bibtex if you use GNU Parallel in
an article.
http://bioinformatics.oxfordjournals.org/content/early/2011/11/08/bioinformatics.btr613.full.pdf
* Blog post on using GNU Parallel to speed up BLAST queries:
http://blog.mckuhn.de/2012/01/embarrassingly-parallel-blast-search.html

View file

@ -24,7 +24,7 @@
use strict;
use Getopt::Long;
$Global::progname="niceload";
$Global::version = 20111222;
$Global::version = 20120122;
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV) || die_usage();
if($::opt_version) {

View file

@ -74,17 +74,19 @@ if($::opt_skip_first_line) {
}
if($::opt_header and not $::opt_pipe) {
my $fh = $fhlist[0];
my $line = <$fh>;
chomp($line);
# split with colsep or \t
# TODO should $header force $colsep = \t if undef?
my $delimiter = $::opt_colsep;
my $id = 1;
::debug("Delimiter: '$delimiter'");
for my $s (split /$delimiter/o, $line) {
::debug("Colname: '$s'");
$command =~ s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
$id++;
for my $fh (@fhlist) {
my $line = <$fh>;
chomp($line);
::debug("Delimiter: '$delimiter'");
for my $s (split /$delimiter/o, $line) {
::debug("Colname: '$s'");
$command =~ s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
$id++;
}
}
}
@ -541,7 +543,7 @@ sub get_options_from_array {
sub parse_options {
# Returns: N/A
# Defaults:
$Global::version = 20120113;
$Global::version = 20120122;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;

View file

@ -1382,6 +1382,8 @@ Compare these two:
parallel echo {1} {2} ::: 1 2 3 ::: a b c
parallel --xapply echo {1} {2} ::: 1 2 3 ::: a b c
See also B<--header>.
=item B<--shebang>
@ -1688,30 +1690,6 @@ can be written like this:
B<cat list | parallel "do_something {} scale {.}.jpg ; do_step2 <{} {.}" | process_output>
=head1 EXAMPLE: Using shell variables
When using shell variables you need to quote them correctly as they
may otherwise be split on spaces.
Notice the difference between:
V=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar)
parallel echo ::: ${V[@]} # This is probably not what you want
and:
V=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar)
parallel echo ::: "${V[@]}"
When using variables in the actual command that contains special
characters (e.g. space) you can quote them using B<'"$VAR"'> or using
"'s and B<-q>:
V="Here are two "
parallel echo "'$V'" ::: spaces
parallel -q echo "$V" ::: spaces
=head1 EXAMPLE: Rewriting nested for-loops
Nested for-loops like this:
@ -1739,6 +1717,44 @@ can be written like this:
B<parallel echo {1} {2} ::: M F ::: S M L XL XXL | sort>
=head1 EXAMPLE: for-loops with column names
When doing multiple nested for-loops it can be easier to keep track of
the loop variable if is is named instead of just having a number. Use
B<--header :> to let the first argument be an named alias for the
positional replacement string:
parallel --header : echo {gender} {size} ::: gender M F ::: size S M L XL XXL
This also works if the input file is a file with columns:
cat addressbook.tsv | parallel --colsep '\t' --header : echo {Name} {E-mail address}
=head1 EXAMPLE: Using shell variables
When using shell variables you need to quote them correctly as they
may otherwise be split on spaces.
Notice the difference between:
V=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar)
parallel echo ::: ${V[@]} # This is probably not what you want
and:
V=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar)
parallel echo ::: "${V[@]}"
When using variables in the actual command that contains special
characters (e.g. space) you can quote them using B<'"$VAR"'> or using
"'s and B<-q>:
V="Here are two "
parallel echo "'$V'" ::: spaces
parallel -q echo "$V" ::: spaces
=head1 EXAMPLE: Group output lines
When running jobs that output data, you often do not want the output

View file

@ -556,7 +556,7 @@ $Global::Initfile && unlink $Global::Initfile;
exit ($err);
sub parse_options {
$Global::version = 20111222;
$Global::version = 20120122;
$Global::progname = 'sql';
# This must be done first as this may exec myself

View file

@ -18,6 +18,9 @@ echo "### Test --header with -N";
echo "### Test --header with --block 1k";
(echo h1; echo h2; perl -e '$a="x"x110;for(1..22){print $_,$a,"\n"'})| parallel -j1 --pipe -k --block 1k --header '\n.*\n' echo Start\;cat \; echo Stop
echo "### Test --header with multiple :::"
parallel --header : echo {a} {b} {1} {2} ::: b b1 ::: a a2
echo '### Test --shellquote'
cat <<'_EOF' | parallel --shellquote
awk -v FS="\",\"" '{print $1, $3, $4, $5, $9, $14}' | grep -v "#" | sed -e '1d' -e 's/\"//g' -e 's/\/\/\//\t/g' | cut -f1-6,11 | sed -e 's/\/\//\t/g' -e 's/ /\t/g

View file

@ -16,9 +16,9 @@ OK
### Test --timeout
1.1
1.1
5.5
6.6
7.7
8.8
### Test retired
parallel: -g has been retired. Use --group.
parallel: -B has been retired. Use --bf.

View file

@ -58,5 +58,7 @@ Start
h1
h2
Stop
### Test --header with multiple :::
a2 b1 b1 a2
### Test --shellquote
awk\ -v\ FS=\"\\\",\\\"\"\ \'\{print\ \$1,\ \$3,\ \$4,\ \$5,\ \$9,\ \$14\}\'\ \|\ grep\ -v\ \"\#\"\ \|\ sed\ -e\ \'1d\'\ -e\ \'s/\\\"//g\'\ -e\ \'s/\\/\\/\\//\\t/g\'\ \|\ cut\ -f1-6,11\ \|\ sed\ -e\ \'s/\\/\\//\\t/g\'\ -e\ \'s/\ /\\t/g