parallel: --record_env and --env _ implemented. Passes test suite.

This commit is contained in:
Ole Tange 2013-08-14 20:11:00 +02:00
parent df204a69fa
commit ee529c2d2a
13 changed files with 151 additions and 56 deletions

View file

@ -207,11 +207,14 @@ New in this release:
* New signing key. Due to recommendations from NIST
http://www.keylength.com/en/4/ the signing key was changed from
1024D/4000g/ID:FFFFFFF1 to 9888R/ID:88888888.
1024D/ID:FFFFFFF1 to 9888R/ID:88888888.
* Agalma: an automated phylogenomics workflow
http://arxiv.org/pdf/1307.6432
* Transient Beowulf Clustering with GNU Parallel and SSHfs
http://www.reddit.com/r/linux/comments/1ka8mn/transient_beowulf_clustering_with_gnu_parallel/
* Aligning to unique regions
http://davetang.org/muse/2013/07/22/aligning-to-unique-regions/
@ -224,6 +227,12 @@ New in this release:
* Compression of files in parallel using GNU parallel
http://codextechnicanum.blogspot.dk/2013/07/compression-of-files-in-parallel-using.html
* TimeMachineっぽいバックアップスクリプト
http://rio.tc/2013/07/timemachine-1.html
* Some useful comments on GNU Parallel
https://news.ycombinator.com/item?id=6209767
* Bug fixes and man page updates.

View file

@ -432,7 +432,7 @@ sub spreadstdin {
$Global::start_no_new_jobs ||= 1;
if($opt::roundrobin) {
for my $job (values %Global::running) {
my $fh = $job->stdin();
my $fh = $job->fd(0);
close $fh;
}
my %incomplete_jobs = %Global::running;
@ -608,7 +608,7 @@ sub options_hash {
"nice=i" => \$opt::nice,
"timeout=s" => \$opt::timeout,
"tag" => \$opt::tag,
"tagstring=s" => \$opt::tagstring,
"tagstring|tag-string=s" => \$opt::tagstring,
"onall" => \$opt::onall,
"nonall" => \$opt::nonall,
"filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
@ -639,6 +639,7 @@ sub options_hash {
"arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
"trim=s" => \$opt::trim,
"env=s" => \@opt::env,
"recordenv|record-env" => \$opt::record_env,
"plain" => \$opt::plain,
"profile|J=s" => \@opt::profile,
"pipe|spreadstdin" => \$opt::pipe,
@ -727,7 +728,7 @@ sub get_options_from_array {
sub parse_options {
# Returns: N/A
# Defaults:
$Global::version = 20130730;
$Global::version = 20130814;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;
@ -756,16 +757,6 @@ sub parse_options {
@ARGV=read_options();
if(defined $opt::retired) {
::error("-g has been retired. Use --group.\n");
::error("-B has been retired. Use --bf.\n");
::error("-T has been retired. Use --tty.\n");
::error("-U has been retired. Use --er.\n");
::error("-W has been retired. Use --wd.\n");
::error("-Y has been retired. Use --shebang.\n");
::error("-H has been retired. Use --halt.\n");
::wait_and_exit(255);
}
if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
$Global::debug = (defined $opt::D);
if(defined $opt::X) { $Global::ContextReplace = 1; }
@ -811,6 +802,7 @@ sub parse_options {
}
if(defined $opt::version) { version(); wait_and_exit(0); }
if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); }
if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
if(defined $opt::show_limits) { show_limits(); }
if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
@ -962,6 +954,16 @@ sub parse_options {
if(defined $opt::eta) {
$opt::progress = $opt::eta;
}
if(defined $opt::retired) {
::error("-g has been retired. Use --group.\n");
::error("-B has been retired. Use --bf.\n");
::error("-T has been retired. Use --tty.\n");
::error("-U has been retired. Use --er.\n");
::error("-W has been retired. Use --wd.\n");
::error("-Y has been retired. Use --shebang.\n");
::error("-H has been retired. Use --halt.\n");
::wait_and_exit(255);
}
parse_sshlogin();
parse_env_var();
@ -987,6 +989,17 @@ sub env_quote {
return $v;
}
sub record_env {
# Record current %ENV-keys in ~/.parallel/recorded_env
# Returns: N/A
if(open(my $vars_fh, ">", $ENV{'HOME'} . "/.parallel/recorded_env")) {
print $vars_fh map { $_,"\n" } keys %ENV;
} else {
::error("Cannot write to ".$ENV{'HOME'} . "/.parallel/recorded_env\n");
::wait_and_exit(255);
}
}
sub parse_env_var {
# Parse --env and set $Global::envvar
# Returns: N/A
@ -996,6 +1009,21 @@ sub parse_env_var {
# Split up --env VAR1,VAR2
push @vars, split /,/, $varstring;
}
if(grep { /^_$/ } @vars) {
# Include all vars that are not in a clean environment
if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/recorded_env")) {
my @ignore = <$vars_fh>;
chomp @ignore;
my %ignore;
@ignore{@ignore} = @ignore;
close $vars_fh;
push @vars, grep { not defined $ignore{$_} } keys %ENV;
@vars = grep { not /^_$/ } @vars;
} else {
::error("Run '$Global::progname --record-env' in a clean environment first.\n");
::wait_and_exit(255);
}
}
# Keep only defined variables
@vars = grep { defined($ENV{$_}) } @vars;
my @qcsh = map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } @vars;
@ -3729,13 +3757,10 @@ sub non_block_write {
my $self = shift;
my $something_written = 0;
use POSIX qw(:errno_h);
use Fcntl;
my $flags = '';
# use Fcntl;
# my $flags = '';
for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) {
my $in = $self->{'stdin'};
my $in = $self->fd(0);
# fcntl($in, F_GETFL, $flags)
# or die "Couldn't get flags for HANDLE : $!\n";
# $flags |= O_NONBLOCK;

View file

@ -335,7 +335,7 @@ Run command in background thus GNU B<parallel> will not wait for
completion of the command before exiting. This is the default if
B<--semaphore> is set.
See also: B<--fg>, B<man sem>
See also: B<--fg>, B<man sem>.
Implies B<--semaphore>.
@ -467,6 +467,11 @@ remote execution.
In Bash I<var> can also be a Bash function - just remember to B<export
-f> the function.
The variable '_' is special. It will copy all enviroment variables
except for the ones mentioned in ~/.parallel/recorded_env.
See also: B<--record-env>.
=item B<--eta>
@ -481,7 +486,7 @@ Implies B<--progress>.
Run command in foreground thus GNU B<parallel> will wait for
completion of the command before exiting.
See also: B<--bg>, B<man sem>
See also B<--bg>, B<man sem>.
Implies B<--semaphore>.
@ -838,7 +843,7 @@ reading everything at startup.
By sending GNU B<parallel> SIGUSR2 you can toggle turning on/off
B<--progress> on a running GNU B<parallel> process.
See also: B<--eta>
See also B<--eta>.
=item B<--max-args>=I<max-args>
@ -950,6 +955,14 @@ If the stdin (standard input) only contains whitespace, do not run the command.
If used with B<--pipe> this is slow.
=item B<--record-env> (alpha testing)
Record current environment variables in ~/.parallel/recorded_env. This
is useful before using B<--env _>.
See also B<--env>.
=item B<--recstart> I<startstring>
=item B<--recend> I<endstring>
@ -1042,7 +1055,7 @@ there. As GNU B<parallel> only looks at the sequence numbers in
B<--joblog> then the input, the command, and B<--joblog> all have to
remain unchanged; otherwise GNU B<parallel> may run wrong commands.
See also: B<--joblog>, B<--resume-failed>.
See also B<--joblog>, B<--resume-failed>.
=item B<--resume-failed>
@ -1055,7 +1068,7 @@ numbers in B<--joblog> then the input, the command, and B<--joblog>
all have to remain unchanged; otherwise GNU B<parallel> may run wrong
commands.
See also: B<--joblog>, B<--resume>.
See also B<--joblog>, B<--resume>.
=item B<--retries> I<n>
@ -1151,7 +1164,7 @@ Used with B<--fg>, B<--wait>, and B<--semaphorename>.
The command B<sem> is an alias for B<parallel --semaphore>.
See also: B<man sem>
See also B<man sem>.
=item B<--semaphorename> I<name>
@ -1169,7 +1182,7 @@ The semaphore is stored in ~/.parallel/semaphores/
Implies B<--semaphore>.
See also: B<man sem>
See also B<man sem>.
=item B<--semaphoretimeout> I<secs> (not implemented)
@ -1178,7 +1191,7 @@ If the semaphore is not released within secs seconds, take it anyway.
Implies B<--semaphore>.
See also: B<man sem>
See also B<man sem>.
=item B<--seqreplace> I<replace-str>
@ -1414,7 +1427,7 @@ To override use B<--gnu>.
Print the job to be run on stderr (standard error).
See also B<-v> and B<-p>.
See also B<-v>, B<-p>.
=item B<--transfer>
@ -1563,7 +1576,7 @@ Wait for all commands to complete.
Implies B<--semaphore>.
See also: B<man sem>
See also B<man sem>.
=item B<-X>
@ -2286,7 +2299,7 @@ B<rsync -Havessh src-dir/ fooserver:/dest-dir/>
If you are unable to push data, but need to pull them and the files
are called digits.png (e.g. 000000.png) you might be able to do:
<seq -w 0 99 | parallel rsync -Havessh fooserver:src-path/*{}.png destdir/>
B<seq -w 0 99 | parallel rsync -Havessh fooserver:src-path/*{}.png destdir/>
=head1 EXAMPLE: Use multiple inputs in one command
@ -3672,8 +3685,8 @@ it also uses rsync with ssh.
=head1 SEE ALSO
B<ssh>(1), B<rsync>(1), B<find>(1), B<xargs>(1), B<dirname>,
B<ssh>(1), B<rsync>(1), B<find>(1), B<xargs>(1), B<dirname>(1),
B<make>(1), B<pexec>(1), B<ppss>(1), B<xjobs>(1), B<prll>(1),
B<dxargs>(1), B<mdm>(1),
B<dxargs>(1), B<mdm>(1)
=cut

View file

@ -355,7 +355,7 @@ Run command in background thus GNU @strong{parallel} will not wait for
completion of the command before exiting. This is the default if
@strong{--semaphore} is set.
See also: @strong{--fg}, @strong{man sem}
See also: @strong{--fg}, @strong{man sem}.
Implies @strong{--semaphore}.
@ -495,6 +495,11 @@ remote execution.
In Bash @emph{var} can also be a Bash function - just remember to @strong{export
-f} the function.
The variable '_' is special. It will copy all enviroment variables
except for the ones mentioned in ~/.parallel/recorded_env.
See also: @strong{--record-env}.
@item @strong{--eta}
@anchor{@strong{--eta}}
@ -509,7 +514,7 @@ Implies @strong{--progress}.
Run command in foreground thus GNU @strong{parallel} will wait for
completion of the command before exiting.
See also: @strong{--bg}, @strong{man sem}
See also @strong{--bg}, @strong{man sem}.
Implies @strong{--semaphore}.
@ -895,7 +900,7 @@ reading everything at startup.
By sending GNU @strong{parallel} SIGUSR2 you can toggle turning on/off
@strong{--progress} on a running GNU @strong{parallel} process.
See also: @strong{--eta}
See also @strong{--eta}.
@item @strong{--max-args}=@emph{max-args}
@anchor{@strong{--max-args}=@emph{max-args}}
@ -1012,6 +1017,14 @@ If the stdin (standard input) only contains whitespace, do not run the command.
If used with @strong{--pipe} this is slow.
@item @strong{--record-env} (alpha testing)
@anchor{@strong{--record-env} (alpha testing)}
Record current environment variables in ~/.parallel/recorded_env. This
is useful before using @strong{--env _}.
See also @strong{--env}.
@item @strong{--recstart} @emph{startstring}
@anchor{@strong{--recstart} @emph{startstring}}
@ -1117,7 +1130,7 @@ there. As GNU @strong{parallel} only looks at the sequence numbers in
@strong{--joblog} then the input, the command, and @strong{--joblog} all have to
remain unchanged; otherwise GNU @strong{parallel} may run wrong commands.
See also: @strong{--joblog}, @strong{--resume-failed}.
See also @strong{--joblog}, @strong{--resume-failed}.
@item @strong{--resume-failed}
@anchor{@strong{--resume-failed}}
@ -1130,7 +1143,7 @@ numbers in @strong{--joblog} then the input, the command, and @strong{--joblog}
all have to remain unchanged; otherwise GNU @strong{parallel} may run wrong
commands.
See also: @strong{--joblog}, @strong{--resume}.
See also @strong{--joblog}, @strong{--resume}.
@item @strong{--retries} @emph{n}
@anchor{@strong{--retries} @emph{n}}
@ -1234,7 +1247,7 @@ Used with @strong{--fg}, @strong{--wait}, and @strong{--semaphorename}.
The command @strong{sem} is an alias for @strong{parallel --semaphore}.
See also: @strong{man sem}
See also @strong{man sem}.
@item @strong{--semaphorename} @emph{name}
@anchor{@strong{--semaphorename} @emph{name}}
@ -1253,7 +1266,7 @@ The semaphore is stored in ~/.parallel/semaphores/
Implies @strong{--semaphore}.
See also: @strong{man sem}
See also @strong{man sem}.
@item @strong{--semaphoretimeout} @emph{secs} (not implemented)
@anchor{@strong{--semaphoretimeout} @emph{secs} (not implemented)}
@ -1262,7 +1275,7 @@ If the semaphore is not released within secs seconds, take it anyway.
Implies @strong{--semaphore}.
See also: @strong{man sem}
See also @strong{man sem}.
@item @strong{--seqreplace} @emph{replace-str}
@anchor{@strong{--seqreplace} @emph{replace-str}}
@ -1520,7 +1533,7 @@ To override use @strong{--gnu}.
Print the job to be run on stderr (standard error).
See also @strong{-v} and @strong{-p}.
See also @strong{-v}, @strong{-p}.
@item @strong{--transfer}
@anchor{@strong{--transfer}}
@ -1680,7 +1693,7 @@ Wait for all commands to complete.
Implies @strong{--semaphore}.
See also: @strong{man sem}
See also @strong{man sem}.
@item @strong{-X}
@anchor{@strong{-X}}
@ -2474,7 +2487,7 @@ are not being transferred. To fix those run @strong{rsync} a final time:
If you are unable to push data, but need to pull them and the files
are called digits.png (e.g. 000000.png) you might be able to do:
<seq -w 0 99 | parallel rsync -Havessh fooserver:src-path/*@{@}.png destdir/>
@strong{seq -w 0 99 | parallel rsync -Havessh fooserver:src-path/*@{@}.png destdir/}
@chapter EXAMPLE: Use multiple inputs in one command
@anchor{EXAMPLE: Use multiple inputs in one command}
@ -3911,8 +3924,8 @@ it also uses rsync with ssh.
@chapter SEE ALSO
@anchor{SEE ALSO}
@strong{ssh}(1), @strong{rsync}(1), @strong{find}(1), @strong{xargs}(1), @strong{dirname},
@strong{ssh}(1), @strong{rsync}(1), @strong{find}(1), @strong{xargs}(1), @strong{dirname}(1),
@strong{make}(1), @strong{pexec}(1), @strong{ppss}(1), @strong{xjobs}(1), @strong{prll}(1),
@strong{dxargs}(1), @strong{mdm}(1),
@strong{dxargs}(1), @strong{mdm}(1)
@bye

View file

@ -3,4 +3,10 @@
cat <<'EOF' | sed -e s/\$SERVER1/$SERVER1/\;s/\$SERVER2/$SERVER2/ | parallel -j10 -k -L1
echo '### Test bug #34241: --pipe should not spawn unneeded processes'
seq 5 | ssh csh@lo parallel -k --block 5 --pipe -j10 cat\\\;echo Block_end
echo '### --env _'
fUbAr="OK FUBAR" parallel -S csh@lo --env _ echo '$fUbAr $PATH' ::: test
echo '### --env _ with explicit mentioning of normally ignored var $PATH'
fUbAr="OK FUBAR" parallel -S csh@lo --env PATH,_ echo '$fUbAr $PATH' ::: test
EOF

View file

@ -1,7 +1,7 @@
#!/bin/bash
rsync -Ha --delete input-files/testdir/ tmp/
cd tmp
rsync -Ha --delete input-files/testdir/ /tmp/parallel_$$
cd /tmp/parallel_$$
echo echo test of cat pipe sh | parallel -j 50 2>&1
find . -name '*.jpg' | parallel -j +0 convert -geometry 120 {} {//}/thumb_{/}
@ -11,15 +11,18 @@ ls | parallel echo ls | sort
ls | parallel -j 1 echo ls | sort
find -type f | parallel diff {} a/foo ">"{}.diff | sort
ls | parallel -v --group "ls {}|wc;echo {}" | sort
echo '### Check that we can have more input than max procs (-j 0)'
echo '### Check that we can have more input than max procs (-j 0) - touch'
perl -e 'print map {"more_than_5000-$_\n" } (4000..9999)' | parallel -vj 0 touch | sort | tail
echo '### rm'
perl -e 'print map {"more_than_5000-$_\n" } (4000..9900)' | parallel -j 0 rm | sort
cat <<'EOF' | sed -e 's/;$/; /;s/$SERVER1/'$SERVER1'/;s/$SERVER2/'$SERVER2'/' | stdout nice parallel -k -L1
ls | parallel -j500 'sleep 1; find {} -type f | perl -ne "END{print $..\" {}\n\"}"' | sort
ls | parallel --group -j500 'sleep 1; find {} -type f | perl -ne "END{print $..\" {}\n\"}"' | sort
find . -type f | parallel --group "perl -ne '/^\\S+\\s+\\S+$/ and print \$ARGV,\"\\n\"'" | sort
find . -type f | parallel -v --group "perl -ne '/^\\S+\\s+\\S+$/ and print \$ARGV,\"\\n\"'" | sort
find . -type f | parallel -q --group perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' | sort
find . -type f | parallel -qv --group perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' | sort
EOF
cd ..
rm -rf tmp
cd -
rm -rf /tmp/parallel_$$

View file

@ -17,6 +17,12 @@ echo '### bug #39554: Feature request: line buffered output --tag';
parallel --tag -j0 --linebuffer 'echo -n start {};sleep 0.{#};echo middle -n {};sleep 1.{#}5;echo next to last {};sleep 1.{#};echo -n last {}' ::: A B C
echo
echo '### test round-robin';
seq 1000 | parallel --block 1k --pipe --round-robin wc | sort
echo '### --version must have higher priority than retired options'
parallel --version -g -Y -U -W -T | tail
EOF
rm -rf tmp

View file

@ -5,7 +5,7 @@ parallel --joblog /tmp/parallel_joblog_exitval 'sleep {} && echo sleep was not k
parallel --joblog /tmp/parallel_joblog_signal 'sleep {}' ::: 100 2>/dev/null &
sleep 1
killall -6 sleep
sleep 0.1
sleep 1
grep -q 134 /tmp/parallel_joblog_exitval && echo exitval OK
grep -q '[^0-9]6[^0-9]' /tmp/parallel_joblog_signal && echo signal OK

View file

@ -6,3 +6,7 @@ Block_end
4
5
Block_end
### --env _
OK FUBAR /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games test
### --env _ with explicit mentioning of normally ignored var $PATH
OK FUBAR /home/tange/bin:/home/tange/bin:/home/tange/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/sbin:/usr/sbin:/home/tange/terminals/data-import:/usr/lib/oracle/xe/app/oracle/product/10.2.0/server/bin:/home/tange/bin:/sbin:/usr/sbin:/home/tange/terminals/data-import:/usr/lib/oracle/xe/app/oracle/product/10.2.0/server/bin:/home/tange/bin test

View file

@ -50,7 +50,7 @@ ls \ ab|wc;echo \ ab
ls a\ b|wc;echo a\ b
ls a|wc;echo a
ls b|wc;echo b
### Check that we can have more input than max procs (-j 0)
### Check that we can have more input than max procs (-j 0) - touch
touch more_than_5000-9990
touch more_than_5000-9991
touch more_than_5000-9992
@ -61,6 +61,7 @@ touch more_than_5000-9996
touch more_than_5000-9997
touch more_than_5000-9998
touch more_than_5000-9999
### rm
1 1-col.txt
1 1-col.txt.diff
1 2-col.txt
@ -591,3 +592,4 @@ perl -ne /^\\S+\\s+\\S+\$/\ and\ print\ \$ARGV,\"\\n\" ./more_than_5000-9996
perl -ne /^\\S+\\s+\\S+\$/\ and\ print\ \$ARGV,\"\\n\" ./more_than_5000-9997
perl -ne /^\\S+\\s+\\S+\$/\ and\ print\ \$ARGV,\"\\n\" ./more_than_5000-9998
perl -ne /^\\S+\\s+\\S+\$/\ and\ print\ \$ARGV,\"\\n\" ./more_than_5000-9999
/home/tange/privat/parallel/testsuite

View file

@ -28,3 +28,17 @@ A next to last A
B next to last B
C next to last C
A last AB last BC last C
### test round-robin
473 473 1893
527 527 2000
### --version must have higher priority than retired options
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
GNU parallel comes with no warranty.
Web site: http://www.gnu.org/software/parallel
When using GNU Parallel for a publication please cite:
O. Tange (2011): GNU Parallel - The Command-Line Power Tool,
;login: The USENIX Magazine, February 2011:42-47.

View file

@ -33,7 +33,7 @@ redhat.polarhome.com Works on redhat.polarhome.com
hpux.polarhome.com Works on hpux.polarhome.com
qnx.polarhome.com Works on qnx.polarhome.com
qnx.polarhome.com parallel: Warning: Cannot figure out number of CPU cores. Using 1.
irix.polarhome.com Unknown open() mode '>&=' at bin/parallel line 1318.
irix.polarhome.com Unknown open() mode '>&=' at bin/parallel line 1346.
openindiana.polarhome.com Works on openindiana.polarhome.com
openindiana.polarhome.com parallel: Warning: Cannot figure out number of CPU cores. Using 1.
suse.polarhome.com Works on suse.polarhome.com

View file

@ -1,5 +1,5 @@
### Test -k
parallel: Warning: Only enough file handles to run 19 jobs in parallel.
parallel: Warning: Only enough file handles to run 18 jobs in parallel.
Raising ulimit -n or /etc/security/limits.conf may help.
begin
1
@ -17,8 +17,8 @@ begin
13
14
15
16
parallel: Warning: No more file handles. Raising ulimit -n or /etc/security/limits.conf may help.
16
17
18
19