Fixed bug #50229: --tee -S fails.

Minor adjustments to dynamic --rpl's in --plus.
Documentation prettification for man.pdf.
This commit is contained in:
Ole Tange 2017-03-21 22:20:38 +01:00
parent d08c8ac417
commit 05afa7ec65
10 changed files with 290 additions and 190 deletions

View file

@ -1,6 +1,7 @@
#!/bin/bash
# Copyright (C) 2013 Ole Tange and Free Software Foundation, Inc.
# Copyright (C) 2013,2014,2015,2017 Ole Tange and Free Software
# Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -13,108 +14,119 @@
# It first tries to install it globally.
# If that fails, it does a personal installation.
# If that fails, it copies to $HOME/bin
#
# Download and run the script directly by:
# (wget -O - pi.dk/3 || curl pi.dk/3/ || fetch -o - http://pi.dk/3) | bash
# tail on openindiana must be /usr/xpg4/bin/tail
TAIL=$(echo | tail -n 1 2>/dev/null && echo tail || (echo | /usr/xpg4/bin/tail -n 1 && echo /usr/xpg4/bin/tail))
# grep on openindiana must be /usr/xpg4/bin/grep
GREP=$(echo | grep -vE . 2>/dev/null && echo grep || (echo | /usr/xpg4/bin/grep -vE . && echo /usr/xpg4/bin/grep))
# FreeBSD prefers 'fetch', MacOS prefers 'curl', Linux prefers 'wget'
GET=$(
(fetch -o /dev/null file:///bin/sh && echo fetch -o -) ||
(curl -h >/dev/null && echo curl -L) ||
(wget -h >/dev/null && echo wget -qO -) ||
echo 'No wget, curl, fetch: Please inform parallel@gnu.org what you use for downloading URLs' >&2
)
if test "$GET" = ""; then
exit 1
fi
run() {
# tail on openindiana must be /usr/xpg4/bin/tail
TAIL=$(echo | tail -n 1 2>/dev/null && echo tail ||
(echo | /usr/xpg4/bin/tail -n 1 && echo /usr/xpg4/bin/tail))
# grep on openindiana must be /usr/xpg4/bin/grep
GREP=$(echo | grep -vE . 2>/dev/null && echo grep ||
(echo | /usr/xpg4/bin/grep -vE . && echo /usr/xpg4/bin/grep))
# FreeBSD prefers 'fetch', MacOS prefers 'curl', Linux prefers 'wget'
GET=$(
(fetch -o /dev/null file:///bin/sh && echo fetch -o -) ||
(curl -h >/dev/null && echo curl -L) ||
(wget -h >/dev/null && echo wget -qO -) ||
echo 'No wget, curl, fetch: Please inform parallel@gnu.org what you use for downloading URLs' >&2
)
if test "$GET" = ""; then
exit 1
fi
if ! perl -e 1; then
echo No perl installed. GNU Parallel depends on perl. Install perl and retry.
exit 1
fi
if ! perl -e 1; then
echo No perl installed. GNU Parallel depends on perl. Install perl and retry.
exit 1
fi
LANG=C
LATEST=$($GET http://ftpmirror.gnu.org/parallel | perl -ne '/.*(parallel-\d{8})/ and print $1."\n"' | sort | $TAIL -n1)
if test \! -e $LATEST.tar.bz2; then
# Source tar does not exist
rm -f $LATEST.tar.bz2 $LATEST.tar.bz2.sig
$GET http://ftpmirror.gnu.org/parallel/$LATEST.tar.bz2 > $LATEST.tar.bz2
$GET http://ftpmirror.gnu.org/parallel/$LATEST.tar.bz2.sig > $LATEST.tar.bz2.sig
fi
LANG=C
LATEST=$($GET http://ftpmirror.gnu.org/parallel |
perl -ne '/.*(parallel-\d{8})/ and print $1."\n"' |
sort | $TAIL -n1)
if test \! -e $LATEST.tar.bz2; then
# Source tar does not exist
rm -f $LATEST.tar.bz2 $LATEST.tar.bz2.sig
$GET http://ftpmirror.gnu.org/parallel/$LATEST.tar.bz2 > $LATEST.tar.bz2
$GET http://ftpmirror.gnu.org/parallel/$LATEST.tar.bz2.sig > $LATEST.tar.bz2.sig
fi
# Check signature - in case ftpmirror.gnu.org is compromised
if gpg -h 2>/dev/null >/dev/null; then
# GnuPG installed
# Setup .gnupg/gpg.conf if not already done
echo | gpg 2>/dev/null >/dev/null
gpg --keyserver keys.gnupg.net --recv-key FFFFFFF1
gpg --keyserver keys.gnupg.net --recv-key 88888888
if gpg --with-fingerprint $LATEST.tar.bz2.sig 2>&1 | $GREP -E '^Primary key fingerprint: BE9C B493 81DE 3166 A3BC 66C1 2C62 29E2 FFFF FFF1|^Primary key fingerprint: CDA0 1A42 08C4 F745 0610 7E7B D1AB 4516 8888 8888' ; then
# Source code signed by Ole Tange <ole@tange.dk> KeyID FFFFFFF1/88888888
true
else
# GnuPG signature failed
echo
echo "The signature on $LATEST.tar.bz2 is wrong. This may indicate that a criminal has changed the code."
echo "THIS IS BAD AND THE CODE WILL NOT BE INSTALLED."
echo
echo "See http://git.savannah.gnu.org/cgit/parallel.git/tree/README for other installation methods."
exit 1
fi
else
# GnuPG not installed
echo "GnuPG (gpg) is not installed so the signature cannot be checked."
echo "This means that if the code has been changed by criminals, you will not discover that!"
echo
echo "Continue anyway? (y/n)"
read YN </dev/tty
if test "$YN" = "n"; then
# Stop
exit 2
else
# Continue
true
fi
fi
# Check signature - in case ftpmirror.gnu.org is compromised
if gpg -h 2>/dev/null >/dev/null; then
# GnuPG installed
# Setup .gnupg/gpg.conf if not already done
echo | gpg 2>/dev/null >/dev/null
gpg --keyserver keys.gnupg.net --recv-key FFFFFFF1
gpg --keyserver keys.gnupg.net --recv-key 88888888
if gpg --with-fingerprint $LATEST.tar.bz2.sig 2>&1 |
$GREP -E '^Primary key fingerprint: BE9C B493 81DE 3166 A3BC 66C1 2C62 29E2 FFFF FFF1|^Primary key fingerprint: CDA0 1A42 08C4 F745 0610 7E7B D1AB 4516 8888 8888' ; then
# Source code signed by Ole Tange <ole@tange.dk>
# KeyID FFFFFFF1/88888888
true
else
# GnuPG signature failed
echo
echo "The signature on $LATEST.tar.bz2 is wrong. This may indicate that a criminal has changed the code."
echo "THIS IS BAD AND THE CODE WILL NOT BE INSTALLED."
echo
echo "See http://git.savannah.gnu.org/cgit/parallel.git/tree/README for other installation methods."
exit 1
fi
else
# GnuPG not installed
echo "GnuPG (gpg) is not installed so the signature cannot be checked."
echo "This means that if the code has been changed by criminals, you will not discover that!"
echo
echo "Continue anyway? (y/n)"
read YN </dev/tty
if test "$YN" = "n"; then
# Stop
exit 2
else
# Continue
true
fi
fi
bzip2 -dc $LATEST.tar.bz2 | tar xf -
cd $LATEST || exit 2
if ./configure && make && make install; then
echo
echo GNU $LATEST installed globally
else
if ./configure --prefix=$HOME && make && make install; then
echo
echo GNU $LATEST installed in $HOME/bin
else
mkdir -p $HOME/bin/;
chmod 755 src/*;
cp src/parallel src/env_parallel* src/sem src/sql src/niceload $HOME/bin;
echo
echo GNU $LATEST copied to $HOME/bin
fi
bzip2 -dc $LATEST.tar.bz2 | tar xf -
cd $LATEST || exit 2
if ./configure && make && make install; then
echo
echo GNU $LATEST installed globally
else
if ./configure --prefix=$HOME && make && make install; then
echo
echo GNU $LATEST installed in $HOME/bin
else
mkdir -p $HOME/bin/;
chmod 755 src/*;
cp src/parallel src/env_parallel* src/sem src/sql src/niceload src/parcat $HOME/bin;
echo
echo GNU $LATEST copied to $HOME/bin
fi
# Is $HOME/bin already in $PATH?
if echo $PATH | grep $HOME/bin >/dev/null; then
# $HOME/bin is already in $PATH
true
else
# Add $HOME/bin to $PATH for both bash and csh
echo 'PATH=$PATH:$HOME/bin' >> $HOME/.bashrc
echo 'setenv PATH ${PATH}:${HOME}/bin' >> $HOME/.cshrc
fi
# Is $HOME/bin already in $PATH?
if echo $PATH | grep $HOME/bin >/dev/null; then
# $HOME/bin is already in $PATH
true
else
# Add $HOME/bin to $PATH for both bash and csh
echo 'PATH=$PATH:$HOME/bin' >> $HOME/.bashrc
echo 'setenv PATH ${PATH}:${HOME}/bin' >> $HOME/.cshrc
fi
# Is $HOME/share/man already in $MANPATH?
if echo $MANPATH | grep $HOME/share/man >/dev/null; then
# $HOME/share/man is already in $MANPATH
true
else
# Add $HOME/share/man to $MANPATH for both bash and csh
echo 'MANPATH=$MANPATH:$HOME/share/man' >> $HOME/.bashrc
echo 'setenv MANPATH ${MANPATH}:${HOME}/share/man' >> $HOME/.cshrc
fi
fi
# Is $HOME/share/man already in $MANPATH?
if echo $MANPATH | grep $HOME/share/man >/dev/null; then
# $HOME/share/man is already in $MANPATH
true
else
# Add $HOME/share/man to $MANPATH for both bash and csh
echo 'MANPATH=$MANPATH:$HOME/share/man' >> $HOME/.bashrc
echo 'setenv MANPATH ${MANPATH}:${HOME}/share/man' >> $HOME/.cshrc
fi
fi
}
# Make sure the whole script is downloaded before starting
run

8
README
View file

@ -100,3 +100,11 @@ publication please cite:
= New versions =
New versions will be released at: ftp://ftp.gnu.org/gnu/parallel/
= Dependencies =
GNU Parallel should work with a normal full Perl installation. However,
if you system has split up Perl into multiple packages then these are
the important ones:
opkg install perlbase-getopt perlbase-ipc procps-ng-ps perlbase-mime

View file

@ -208,6 +208,8 @@ New in this release:
* --rpl can now take arguments by adding '(regexp)' in the replacement string.
https://blogs.oracle.com/LetTheSunShineIn/entry/gnu_parallel_%E3%81%A8_sql_loader
http://www.beyeler.com.br/2017/03/trabalhando-como-paralelismo-no-bash-usando-o-gnu-parallel/
https://joss.theoj.org/papers/3cde54de7dfbcada7c0fc04f569b36c7
https://link.springer.com/article/10.1134/S0016793217010108
http://biorxiv.org/content/biorxiv/early/2017/02/17/109280.full.pdf

View file

@ -219,7 +219,7 @@ sub pipepart_setup {
# Prepend each command with
# cat file
my $cat_string = "cat ".::shell_quote_scalar($opt::a[0]);
for(1..max_jobs_running()) {
for(1..$Global::JobQueue->total_jobs()) {
push @Global::cat_prepends, $cat_string;
}
} else {
@ -261,10 +261,9 @@ sub pipepart_setup {
sub pipe_tee_setup {
# mkfifo t1..5
my @fifos;
for(1..max_jobs_running()) {
for(1..$Global::JobQueue->total_jobs()) {
push @fifos, tmpfifo();
}
# cat foo | tee t1 t2 t3 t4 t5 > /dev/null
if(not fork()){
# Let tee inheirit our stdin
@ -1361,7 +1360,7 @@ sub check_invalid_option_combinations {
sub init_globals {
# Defaults:
$Global::version = 20170223;
$Global::version = 20170311;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;
@ -1407,14 +1406,14 @@ sub init_globals {
'{#([^#].*?)}' => 's/^$$1//;',
# Bash ${a%def}
'{%(.+?)}' => 's/$$1$//;',
# Bash ${a/def/ghi}
'{/(.+?)/(.+?)}' => 's/$$1/$$2/;',
# Bash ${a/def/ghi} ${a/def/}
'{/(.+?)/(.*?)}' => 's/$$1/$$2/;',
# Bash ${a^a}
'{^(.+?)}' => 's/($$1)/uc($1)/e;',
'{^(.+?)}' => 's/^($$1)/uc($1)/e;',
# Bash ${a^^a}
'{^^(.+?)}' => 's/($$1)/uc($1)/eg;',
# Bash ${a,A}
'{,(.+?)}' => 's/($$1)/lc($1)/e;',
'{,(.+?)}' => 's/^($$1)/lc($1)/e;',
# Bash ${a,,A}
'{,,(.+?)}' => 's/($$1)/lc($1)/eg;',
);
@ -3543,8 +3542,9 @@ sub onall {
# $Global::exitstatus
# $Global::debug
# $Global::joblog
# $opt::tag
# $opt::joblog
# $opt::tag
# $opt::tee
# Input:
# @command = command to run on all hosts
# Returns: N/A
@ -3590,6 +3590,7 @@ sub onall {
((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
((defined $opt::plain) ? "--plain" : ""),
((defined $opt::ungroup) ? "-u" : ""),
((defined $opt::tee) ? "--tee" : ""),
);
my $suboptions =
join(" ",
@ -3607,6 +3608,7 @@ sub onall {
((defined $opt::retries) ? "--retries ".$opt::retries : ""),
((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
((defined $opt::ungroup) ? "-u" : ""),
((defined $opt::tee) ? "--tee" : ""),
((defined $opt::workdir) ? "--wd ".$opt::workdir : ""),
(@Global::transfer_files ? map { "--tf ".::shell_quote_scalar($_) }
@Global::transfer_files : ""),
@ -6461,7 +6463,7 @@ sub empty {
sub total_jobs {
my $self = shift;
if(not defined $self->{'total_jobs'}) {
if($opt::pipe) {
if($opt::pipe and not $opt::tee) {
::error("--pipe is incompatible with --eta/--bar/--shuf");
::wait_and_exit(255);
}
@ -8277,7 +8279,7 @@ sub print {
# cleanup tmp files if --files was set
::rm($self->fh(1,"name"));
}
if($opt::pipe and $self->virgin()) {
if($opt::pipe and $self->virgin() and not $opt::tee) {
# Skip --joblog, --dryrun, --verbose
} else {
if($opt::ungroup) {
@ -9950,15 +9952,15 @@ sub get {
and $Global::max_number_of_args == 0) {
::debug("run", "Read 1 but return 0 args\n");
# \0 => nothing (not the empty string)
return [Arg->new("\0")];
$ret = [Arg->new("\0")];
} else {
# Flush cached computed replacements in Arg-objects
# To fix: parallel --bar echo {%} ::: a b c ::: d e f
if($ret) {
map { $_->flush_cache() } @$ret;
}
return $ret;
}
return $ret;
}
sub unget {
@ -10365,7 +10367,7 @@ sub pQ {
}
sub total_jobs {
return $Global::JobQueue->total_jobs()
return $Global::JobQueue->total_jobs();
}
{

View file

@ -18,6 +18,8 @@ B<parallel> --semaphore [options] I<command>
B<#!/usr/bin/parallel> --shebang [options] [I<command> [arguments]]
B<#!/usr/bin/parallel> --shebang-wrap [options] [I<command> [arguments]]
=head1 DESCRIPTION
@ -89,7 +91,9 @@ B<Bash functions>: B<export -f> the function first or use B<env_parallel>.
B<Bash aliases>: Use B<env_parallel>.
B<Ksh functions and aliases>: Use B<env_parallel>.
B<Csh aliases>: Use B<env_parallel>.
B<Tcsh aliases>: Use B<env_parallel>.
B<Zsh functions and aliases>: Use B<env_parallel>.
@ -858,10 +862,10 @@ Example:
parallel --hostgroups \
--sshlogin @grp1/myserver1 -S @grp1+grp2/myserver2 \
--sshlogin @grp3/myserver3 \
echo ::: my_grp1_arg@grp1 arg_for_grp2@grp2 third_arg@grp1+grp3
echo ::: my_grp1_arg@grp1 arg_for_grp2@grp2 third@grp1+grp3
B<my_grp1_arg> may be run on either B<myserver1> or B<myserver2>,
B<third_arg> may be run on either B<myserver1> or B<myserver3>,
B<third> may be run on either B<myserver1> or B<myserver3>,
but B<arg_for_grp2> will only be run on B<myserver2>.
See also: B<--sshlogin>.
@ -1125,6 +1129,9 @@ are free, no more jobs will be started. If less than 50% I<size> bytes
are free, the youngest job will be killed, and put back on the queue
to be run later.
B<--retries> must be set to determine how many times GNU B<parallel>
should retry a given job.
=item B<--minversion> I<version>
@ -1248,6 +1255,20 @@ Activate additional replacement strings: {+/} {+.} {+..} {+...} {..}
B<{##}> is the number of jobs to be run. It is incompatible with
B<-X>/B<-m>/B<--xargs>.
The following dynamic replacement strings are also activated. They are
inspired by bash's parameter expansion:
{:-str} str if the value is empty
{:num} remove the first num characters
{:num1:num2} characters from num1 to num2
{#str} remove prefix str
{%str} remove postfix str
{/str1/str2} replace str1 with str2
{^str} uppercase str if found at the start
{^^str} uppercase str
{,str} lowercase str if found at the start
{,,str} lowercase str
=item B<--progress>
@ -1263,7 +1284,7 @@ reading everything at startup.
By sending GNU B<parallel> SIGUSR2 you can toggle turning on/off
B<--progress> on a running GNU B<parallel> process.
See also B<--eta>.
See also B<--eta> and B<--bar>.
=item B<--max-args>=I<max-args>
@ -1610,11 +1631,12 @@ B<parallel> will figure out the failed jobs and run those again.
B<--retry-failed> ignores the command and arguments on the command
line: It only looks at the joblog.
B<Differences between --resume, --resume-failed, --retry-failed>
B<Differences between --resume, --resume-failed, --retry-failed>
In this example B<exit {= $_%=2 =}> will cause every other job to fail.
timeout -k 1 4 parallel --joblog log -j10 'sleep {}; exit {= $_%=2 =}' ::: {10..1}
timeout -k 1 4 parallel --joblog log -j10 \
'sleep {}; exit {= $_%=2 =}' ::: {10..1}
4 jobs completed. 2 failed:
@ -1628,7 +1650,8 @@ B<--resume> does not care about the Exitval, but only looks at Seq. If
the Seq is run, it will not be run again. So if needed, you can change
the command for the seqs not run yet:
parallel --resume --joblog log -j10 'sleep .{}; exit {= $_%=2 =}' ::: {10..1}
parallel --resume --joblog log -j10 \
'sleep .{}; exit {= $_%=2 =}' ::: {10..1}
Seq [...] Exitval Signal Command
[... as above ...]
@ -1644,7 +1667,8 @@ to figure out which commands to run. Again this means you can change
the command, but not the arguments. It will run the failed seqs and
the seqs not yet run:
parallel --resume-failed --joblog log -j10 'echo {};sleep .{}; exit {= $_%=3 =}' ::: {10..1}
parallel --resume-failed --joblog log -j10 \
'echo {};sleep .{}; exit {= $_%=3 =}' ::: {10..1}
Seq [...] Exitval Signal Command
[... as above ...]
@ -1744,7 +1768,8 @@ it possible to define your own replacement strings. GNU B<parallel>'s
--rpl '{#} 1 $_=$job->seq()'
--rpl '{%} 1 $_=$job->slot()'
--rpl '{/} s:.*/::'
--rpl '{//} $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);'
--rpl '{//} $Global::use{"File::Basename"} ||=
eval "use File::Basename; 1;"; $_ = dirname($_);'
--rpl '{/.} s:.*/::; s:\.[^/.]+$::;'
--rpl '{.} s:\.[^/.]+$::'
@ -1757,17 +1782,17 @@ The B<--plus> replacement strings are implemented as:
--rpl '{..} s:\.[^/.]+$::; s:\.[^/.]+$::'
--rpl '{...} s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::'
--rpl '{/..} s:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::'
--rpl '{/...} s:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::'
--rpl '{/...} s:.*/::;s:\.[^/.]+$::;s:\.[^/.]+$::;s:\.[^/.]+$::'
--rpl '{##} $_=total_jobs()'
--rpl '{:-(.+?)} $_ ||= $$1'
--rpl '{:(\d+?)} substr($_,0,$$1) = ""'
--rpl '{:(\d+?):(\d+?)} $_ = substr($_,$$1,$$2);'
--rpl '{#([^#].*?)} s/^$$1//;'
--rpl '{%(.+?)} s/$$1$//;'
--rpl '{/(.+?)/(.+?)} s/$$1/$$2/;'
--rpl '{^(.+?)} s/($$1)/uc($1)/e;'
--rpl '{/(.+?)/(.*?)} s/$$1/$$2/;'
--rpl '{^(.+?)} s/^($$1)/uc($1)/e;'
--rpl '{^^(.+?)} s/($$1)/uc($1)/eg;'
--rpl '{,(.+?)} s/($$1)/lc($1)/e;'
--rpl '{,(.+?)} s/^($$1)/lc($1)/e;'
--rpl '{,,(.+?)} s/($$1)/lc($1)/eg;'
@ -1782,7 +1807,8 @@ Here are a few examples:
Is the job sequence even or odd?
--rpl '{odd} $_ = seq() % 2 ? "odd" : "even"'
Pad job sequence with leading zeros to get equal width
--rpl '{0#} $f=1+int("".(log(total_jobs())/log(10))); $_=sprintf("%0${f}d",seq())'
--rpl '{0#} $f=1+int("".(log(total_jobs())/log(10)));
$_=sprintf("%0${f}d",seq())'
Job sequence counting from 0
--rpl '{#0} $_ = seq() - 1'
Job slot counting from 2
@ -1792,7 +1818,7 @@ You can have dynamic replacement strings by including parenthesis in
the replacement string and adding a regular expression between the
parenthesis. The matching string will be inserted as $$1:
parallel --rpl '{%(.*?)} s/$$1//' echo {%.tar.gz} ::: file.tar.gz
parallel --rpl '{%(.*?)} s/$$1//' echo {%.tar.gz} ::: my.tar.gz
You can even use multiple matches:
@ -1802,7 +1828,8 @@ You can even use multiple matches:
$$1 = ($$1%100 + 1900); $_="$$1-$$2-$$3"
' echo {@99-12-31} {@12.31.99} {@31/12-1999} ::: a
parallel --rpl '{(.*?)/(.*?)} $_="$$2$_$$1"' echo {swap/these} ::: -middle-
parallel --rpl '{(.*?)/(.*?)} $_="$$2$_$$1"' \
echo {swap/these} ::: -middle-
See also: B<{= perl expression =}> B<--parens>
@ -1923,7 +1950,7 @@ line. If the program can be run like this:
then the script can be changed to:
#!/usr/bin/parallel --shebang-wrap /the/original/parser --with-options
#!/usr/bin/parallel --shebang-wrap /original/parser --options
E.g.
@ -1935,7 +1962,7 @@ If the program can be run like this:
then the script can be changed to:
#!/usr/bin/parallel --shebang-wrap --pipe /the/original/parser --with-options
#!/usr/bin/parallel --shebang-wrap --pipe /orig/parser --opts
E.g.
@ -1992,7 +2019,7 @@ complete.
The format of a DBURL is:
[sql:]vendor://[[user][:password]@][host][:port]/[database]/table
[sql:]vendor://[[user][:pwd]@][host][:port]/[db]/table
E.g.
@ -2204,7 +2231,8 @@ B<:::>.
How many numbers in 1..1000 contain 0..9, and how many bytes do they
fill:
seq 1000 | parallel --pipe --tee --tag 'grep {1} | wc {2}' ::: {0..9} ::: -l -c
seq 1000 | parallel --pipe --tee --tag \
'grep {1} | wc {2}' ::: {0..9} ::: -l -c
How many words contain a..z and how many bytes do they fill?
@ -2222,7 +2250,7 @@ I<sequence> determines how the job is killed. The default is:
which sends a TERM signal, waits 200 ms, sends another TERM signal,
waits 100 ms, sends another TERM signal, waits 50 ms, sends a KILL
signal, waits 25 ms, and exits. GNU B<parallel> discovers if a process
signal, waits 25 ms, and exits. GNU B<parallel> detects if a process
dies before the waiting time is up.
@ -2250,8 +2278,7 @@ jobs.
=item B<--timeout> I<secs>
Time out for command. If the command runs for longer than I<secs>
seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms
later, followed by SIGKILL 200 ms later.
seconds it will get killed as per B<--termseq>.
If I<secs> is followed by a % then the timeout will dynamically be
computed as a percentage of the median average runtime. Only values
@ -2281,22 +2308,22 @@ remote computers. The files will be transferred using B<rsync> and
will be put relative to the default work dir. If the path contains /./
the remaining path will be relative to the work dir. E.g.
echo foo/bar.txt | parallel \
--sshlogin server.example.com --transferfile {} wc
echo foo/bar.txt | parallel --transferfile {} \
--sshlogin server.example.com wc
This will transfer the file I<foo/bar.txt> to the computer
I<server.example.com> to the file I<$HOME/foo/bar.txt> before running
B<wc foo/bar.txt> on I<server.example.com>.
echo /tmp/foo/bar.txt | parallel \
--sshlogin server.example.com --transferfile {} wc
echo /tmp/foo/bar.txt | parallel --transferfile {} \
--sshlogin server.example.com wc
This will transfer the file I</tmp/foo/bar.txt> to the computer
I<server.example.com> to the file I</tmp/foo/bar.txt> before running
B<wc /tmp/foo/bar.txt> on I<server.example.com>.
echo /tmp/./foo/bar.txt | parallel \
--sshlogin server.example.com --transferfile {} wc {= s:.*/./:./: =}
echo /tmp/./foo/bar.txt | parallel --transferfile {} \
--sshlogin server.example.com wc {= s:.*/./:./: =}
This will transfer the file I</tmp/foo/bar.txt> to the computer
I<server.example.com> to the file I<foo/bar.txt> before running
@ -2622,7 +2649,8 @@ Find the files in a list that do not exist
You have a dir with files named as 24 hours in 5 minute intervals:
00:00, 00:05, 00:10 .. 23:55. You want to find the files missing:
parallel [ -f {1}:{2} ] "||" echo {1}:{2} does not exist ::: {00..23} ::: {00..55..5}
parallel [ -f {1}:{2} ] "||" echo {1}:{2} does not exist \
::: {00..23} ::: {00..55..5}
=head1 EXAMPLE: Calling Bash functions
@ -2731,18 +2759,26 @@ Let us assume a website stores images like:
where YYYYMMDD is the date and ## is the number 01-24. This will
download images for the past 30 days:
parallel wget http://www.example.com/path/to/'$(date -d "today -{1} days" +%Y%m%d)_{2}.jpg' ::: $(seq 30) ::: $(seq -w 24)
getit() {
date=$(date -d "today -$1 days" +%Y%m%d)
num=$2
echo wget http://www.example.com/path/to/${date}_${num}.jpg
}
export -f getit
parallel getit ::: $(seq 30) ::: $(seq -w 24)
B<$(date -d "today -{1} days" +%Y%m%d)> will give the dates in
YYYYMMDD with B<{1}> days subtracted.
B<$(date -d "today -$1 days" +%Y%m%d)> will give the dates in
YYYYMMDD with B<$1> days subtracted.
=head1 EXAMPLE: Copy files as last modified date (ISO8601) with added random digits
find . | parallel cp {} \
'../destdir/{= $a=int(10000*rand); $_=`date -r "$_" +%FT%T"$a"`; chomp; =}'
find . | parallel cp {} '../destdir/{= $a=int(10000*rand); $_=pQ($_);
$_=`date -r "$_" +%FT%T"$a"`; chomp; =}'
B<{=> and B<=}> mark a perl expression. B<date +%FT%T> is the date in ISO8601 with time.
B<{=> and B<=}> mark a perl expression. B<pQ> quotes the
string. B<date +%FT%T> is the date in ISO8601 with time.
=head1 EXAMPLE: Digtal clock with "blinking" :
@ -2812,7 +2848,8 @@ URLs and the process is started over until no unseen links are found.
cat $URLLIST |
parallel lynx -listonly -image_links -dump {} \; \
wget -qm -l1 -Q1 {} \; echo Spidered: {} \>\&2 |
perl -ne 's/#.*//; s/\s+\d+.\s(\S+)$/$1/ and do { $seen{$1}++ or print }' |
perl -ne 's/#.*//; s/\s+\d+.\s(\S+)$/$1/ and
do { $seen{$1}++ or print }' |
grep -F $BASEURL |
grep -v -x -F -f $SEEN | tee -a $SEEN > $URLLIST2
mv $URLLIST2 $URLLIST
@ -2860,7 +2897,7 @@ If the processing requires more steps the for-loop like this:
(for x in `cat list` ; do
no_extension=${x%.*};
do_something $x scale $no_extension.jpg
do_step1 $x scale $no_extension.jpg
do_step2 <$x $no_extension
done) | process_output
@ -2868,13 +2905,13 @@ and while-loops like this:
cat list | (while read x ; do
no_extension=${x%.*};
do_something $x scale $no_extension.jpg
do_step1 $x scale $no_extension.jpg
do_step2 <$x $no_extension
done) | process_output
can be written like this:
cat list | parallel "do_something {} scale {.}.jpg ; do_step2 <{} {.}" |\
cat list | parallel "do_step1 {} scale {.}.jpg ; do_step2 <{} {.}" |\
process_output
If the body of the loop is bigger, it improves readability to use a function:
@ -2945,7 +2982,8 @@ the loop variable if is is named instead of just having a number. Use
B<--header :> to let the first argument be an named alias for the
positional replacement string:
parallel --header : echo {colour} {size} ::: colour red green blue ::: size S M L XL XXL
parallel --header : echo {colour} {size} \
::: colour red green blue ::: size S M L XL XXL
This also works if the input file is a file with columns:
@ -3021,7 +3059,8 @@ printed as soon as possible you can use B<-u>.
Compare the output of:
parallel traceroute ::: qubes-os.org debian.org freenetproject.org
parallel --line-buffer traceroute ::: qubes-os.org debian.org freenetproject.org
parallel --line-buffer traceroute ::: \
qubes-os.org debian.org freenetproject.org
parallel -u traceroute ::: qubes-os.org debian.org freenetproject.org
@ -3031,7 +3070,8 @@ GNU B<parallel> groups the output lines, but it can be hard to see
where the different jobs begin. B<--tag> prepends the argument to make
that more visible:
parallel --tag traceroute ::: qubes-os.org debian.org freenetproject.org
parallel --tag traceroute ::: \
qubes-os.org debian.org freenetproject.org
B<--tag> works with B<--line-buffer> but not with B<-u>:
@ -3156,7 +3196,7 @@ is much faster.
If it still does not fit in memory you can do this:
parallel --pipepart -a regexps.txt --block 1M grep -F -f - -n bigfile |
parallel --pipepart -a regexps.txt --block 1M grep -Ff - -n bigfile |
sort -un | perl -pe 's/^\d+://'
The 1M should be your free memory divided by the number of cores and
@ -3167,19 +3207,22 @@ GNU/Linux you can do:
END { print sum }' /proc/meminfo)
percpu=$((free / 200 / $(parallel --number-of-cores)))k
parallel --pipepart -a regexps.txt --block $percpu --compress grep -F -f - -n bigfile |
parallel --pipepart -a regexps.txt --block $percpu --compress \
grep -F -f - -n bigfile |
sort -un | perl -pe 's/^\d+://'
If you can live with duplicated lines and wrong order, it is faster to do:
parallel --pipepart -a regexps.txt --block $percpu --compress grep -F -f - bigfile
parallel --pipepart -a regexps.txt --block $percpu --compress \
grep -F -f - bigfile
=head2 Limiting factor: CPU
If the CPU is the limiting factor parallelization should be done on
the regexps:
cat regexp.txt | parallel --pipe -L1000 --round-robin --compress grep -f - -n bigfile |
cat regexp.txt | parallel --pipe -L1000 --round-robin --compress \
grep -f - -n bigfile |
sort -un | perl -pe 's/^\d+://'
The command will start one B<grep> per CPU and read I<bigfile> one
@ -3191,7 +3234,8 @@ Some storage systems perform better when reading multiple chunks in
parallel. This is true for some RAID systems and for some network file
systems. To parallelize the reading of I<bigfile>:
parallel --pipepart --block 100M -a bigfile -k --compress grep -f regexp.txt
parallel --pipepart --block 100M -a bigfile -k --compress \
grep -f regexp.txt
This will split I<bigfile> into 100MB chunks and run B<grep> on each of
these chunks. To parallelize both reading of I<bigfile> and I<regexp.txt>
@ -3442,7 +3486,8 @@ prepend B<torsocks> to B<ssh>:
If not all hosts are accessible through TOR:
parallel -S 'torsocks ssh izjafdceobowklhz.onion,host2,host3' echo ::: a b c
parallel -S 'torsocks ssh izjafdceobowklhz.onion,host2,host3' \
echo ::: a b c
See more B<ssh> tricks on https://en.wikibooks.org/wiki/OpenSSH/Cookbook/Proxies_and_Jump_Hosts
@ -3468,7 +3513,7 @@ are not being transferred. To fix those run B<rsync> a final time:
If you are unable to push data, but need to pull them and the files
are called digits.png (e.g. 000000.png) you might be able to do:
seq -w 0 99 | parallel rsync -Havessh fooserver:src-path/*{}.png destdir/
seq -w 0 99 | parallel rsync -Havessh fooserver:src/*{}.png destdir/
=head1 EXAMPLE: Use multiple inputs in one command
@ -3555,7 +3600,8 @@ Or MySQL:
DBTABLEURL=$DBURL/mytable
parallel --sqlandworker $DBTABLEURL seq ::: {1..10}
sql -p -B $DBURL "SELECT * FROM mytable;" > mytable.tsv
perl -pe 's/"/""/g;s/\t/","/g;s/^/"/;s/$/"/;s/\\\\/\\/g;s/\\t/\t/g;s/\\n/\n/g;' mytable.tsv
perl -pe 's/"/""/g; s/\t/","/g; s/^/"/; s/$/"/; s/\\\\/\\/g;
s/\\t/\t/g; s/\\n/\n/g;' mytable.tsv
=head1 EXAMPLE: Output to CSV-file for R
@ -3685,7 +3731,7 @@ separator before each customer (column 2 = $F[1]). Here we first make
a 50 character random string, which we then use as the separator:
sep=`perl -e 'print map { ("a".."z","A".."Z")[rand(52)] } (1..50);'`
cat my.csv | perl -ape '$F[1] ne $last and print "'$sep'"; $last = $F[1]' |
cat my.csv | perl -ape '$F[1] ne $l and print "'$sep'"; $l = $F[1]' |
parallel --recend $sep --rrs --pipe -N1 wc
If your program can process multiple customers replace B<-N1> with a
@ -3875,7 +3921,7 @@ If you have a dir in which users drop files that needs to be processed
you can do this on GNU/Linux (If you know what B<inotifywait> is
called on other platforms file a bug report):
inotifywait -q -m -r -e MOVED_TO -e CLOSE_WRITE --format %w%f my_dir |\
inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f my_dir |\
parallel -u echo
This will run the command B<echo> on each file put into B<my_dir> or
@ -3884,7 +3930,7 @@ subdirs of B<my_dir>.
You can of course use B<-S> to distribute the jobs to remote
computers:
inotifywait -q -m -r -e MOVED_TO -e CLOSE_WRITE --format %w%f my_dir |\
inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f my_dir |\
parallel -S .. -u echo
If the files to be processed are in a tar file then unpacking one file

View file

@ -18,7 +18,7 @@ To run this tutorial you must have the following:
=item parallel >= version 20160822
Install the newest version using your package manager (recommended for
security reasons) or with this command:
security reasons), the way described in README, or with this command:
(wget -O - pi.dk/3 || curl pi.dk/3/ || fetch -o - http://pi.dk/3) | bash

View file

@ -79,6 +79,14 @@ par_env_parallel_fifo() {
env_parallel --pipe -S lo --cat 'cat {};myfunc'
}
par_tee_ssh() {
seq 1000000 | parallel --pipe --tee -kS lo,csh@lo,tcsh@lo --tag 'echo {};wc' ::: A B ::: {1..4}
seq 1000000 > /tmp/1000000
parallel --pipepart -a /tmp/1000000 --tee -kS lo,csh@lo,tcsh@lo --tag 'echo {};wc' ::: A B ::: {1..4}
echo "Do we get different shells?"
parallel --pipepart -a /tmp/1000000 --tee -kS lo,csh@lo,tcsh@lo 'echo $SHELL' ::: A B ::: {1..4} | sort | uniq -c | field 1 | sort -n
}
export -f $(compgen -A function | grep par_)
#compgen -A function | grep par_ | sort | parallel --delay $D -j$P --tag -k '{} 2>&1'
compgen -A function | grep par_ | sort |

View file

@ -53,15 +53,51 @@ par_pipepart_remote 11 11 33 /tmp/parallel-local-ssh2-tmpdir/parXXXXX
par_pipepart_remote 11 11 33 /tmp/parallel-local-ssh2-tmpdir/parXXXXX
par_pipepart_remote 11 11 33 /tmp/parallel-local-ssh2-tmpdir/parXXXXX
par_pipepart_remote 9 9 28 /tmp/parallel-local-ssh2-tmpdir/parXXXXX
par_tee_ssh A 1 A 1
par_tee_ssh A 1 1000000 1000000 6888896
par_tee_ssh A 2 A 2
par_tee_ssh A 2 1000000 1000000 6888896
par_tee_ssh A 3 A 3
par_tee_ssh A 3 1000000 1000000 6888896
par_tee_ssh A 4 A 4
par_tee_ssh A 4 1000000 1000000 6888896
par_tee_ssh B 1 B 1
par_tee_ssh B 1 1000000 1000000 6888896
par_tee_ssh B 2 B 2
par_tee_ssh B 2 1000000 1000000 6888896
par_tee_ssh B 3 B 3
par_tee_ssh B 3 1000000 1000000 6888896
par_tee_ssh B 4 B 4
par_tee_ssh B 4 1000000 1000000 6888896
par_tee_ssh A 1 A 1
par_tee_ssh A 1 1000000 1000000 6888896
par_tee_ssh A 2 A 2
par_tee_ssh A 2 1000000 1000000 6888896
par_tee_ssh A 3 A 3
par_tee_ssh A 3 1000000 1000000 6888896
par_tee_ssh A 4 A 4
par_tee_ssh A 4 1000000 1000000 6888896
par_tee_ssh B 1 B 1
par_tee_ssh B 1 1000000 1000000 6888896
par_tee_ssh B 2 B 2
par_tee_ssh B 2 1000000 1000000 6888896
par_tee_ssh B 3 B 3
par_tee_ssh B 3 1000000 1000000 6888896
par_tee_ssh B 4 B 4
par_tee_ssh B 4 1000000 1000000 6888896
par_tee_ssh Do we get different shells?
par_tee_ssh 2
par_tee_ssh 3
par_tee_ssh 3
par_wd_no_such_dir ### --wd no-such-dir - csh
par_wd_no_such_dir mkdir: cannot create directory /no-such-dir: Permission denied
par_wd_no_such_dir mkdir: cannot create directory '/no-such-dir': Permission denied
par_wd_no_such_dir parallel: Cannot chdir to /no-such-dir
par_wd_no_such_dir Exit code 1
par_wd_no_such_dir ### --wd no-such-dir - tcsh
par_wd_no_such_dir mkdir: cannot create directory /no-such-dir: Permission denied
par_wd_no_such_dir mkdir: cannot create directory '/no-such-dir': Permission denied
par_wd_no_such_dir parallel: Cannot chdir to /no-such-dir
par_wd_no_such_dir Exit code 1
par_wd_no_such_dir ### --wd no-such-dir - bash
par_wd_no_such_dir mkdir: cannot create directory /no-such-dir: Permission denied
par_wd_no_such_dir mkdir: cannot create directory '/no-such-dir': Permission denied
par_wd_no_such_dir parallel: Cannot chdir to /no-such-dir
par_wd_no_such_dir Exit code 1

View file

@ -264,15 +264,15 @@ par_fish_underscore aliases and arrays in functions work
par_fish_underscore aliases functions work
par_fish_underscore aliases functions work
par_fish_underscore ^
par_fish_underscore in function “myfuncâ€<EFBFBD>
par_fish_underscore in function 'myfunc'
par_fish_underscore called on standard input
par_fish_underscore with parameter list “workâ€<EFBFBD>
par_fish_underscore with parameter list 'work'
par_fish_underscore
par_fish_underscore OK if ^^^^^^^^^^^^^^^^^ no myecho
par_fish_underscore ^
par_fish_underscore in function “myfuncâ€<EFBFBD>
par_fish_underscore in function 'myfunc'
par_fish_underscore called on standard input
par_fish_underscore with parameter list “workâ€<EFBFBD>
par_fish_underscore with parameter list 'work'
par_fish_underscore
par_fish_underscore OK if ^^^^^^^^^^^^^^^^^ no myecho
par_fish_underscore ^
@ -364,9 +364,9 @@ par_tcsh_funky
par_tcsh_funky
par_tcsh_funky 3 arg alias_works
par_tcsh_funky 3 arg alias_works_over_ssh
par_tcsh_funky Funky-  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€<EFBFBD>ƒ„…†‡ˆ‰ŠŒ<EFBFBD>Ž<EFBFBD><EFBFBD>“”•˜™šœ<EFBFBD>žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùú-funky alias_var_works
par_tcsh_funky Funky-  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€<EFBFBD>ƒ„…†‡ˆ‰ŠŒ<EFBFBD>Ž<EFBFBD><EFBFBD>“”•˜™šœ<EFBFBD>žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùú-funky alias_var_works_over_ssh
par_tcsh_funky \\\\\\\\ \ \ \ \ \\\\\\\\\\\\\\ \!\"\#\$%\&\'\(\)\*+,-./0123456789:\;\<\=\>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]\^_\`abcdefghijklmnopqrstuvwxyz\{\|\}\~€<EFBFBD>\\ƒ\„\…\†\‡\ˆ\‰\Š\\Œ\<5C>\Ž\<5C>\<5C>\\\“\”\•\\—\˜\™\š\\œ\<5C>\ž\Ÿ\ \¡\¢\£\¤\¥\¦\§\¨\©\ª\«\¬\­\®\¯\°\±\²\³\´\µ\¶\·\¸\¹\º\»\¼\½\¾\¿\À\Á\Â\Ã\Ä\Å\Æ\Ç\È\É\Ê\Ë\Ì\Í\Î\Ï\Ð\Ñ\Ò\Ó\Ô\Õ\Ö\×\Ø\Ù\Ú\Û\Ü\Ý\Þ\ß\à\á\â\ã\ä\å\æ\ç\è\é\ê\ë\ì\í\î\ï\ð\ñ\ò\ó\ô\õ\ö\÷\ø\ù\ú
par_tcsh_funky Funky-  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€亗儎厗噲墛媽崕彁憭摂晼棙櫄洔潪煚、¥ウЖ┆<EFBFBD><EFBFBD><EFBFBD>辈炒刀犯购患骄坷谅媚牌侨墒颂臀闲岩釉罩棕仝圮蒉哙徕沅彐玷殛腱眍镳耱篝貊鼬<EFBFBD><EFBFBD><EFBFBD><EFBFBD>-funky alias_var_works
par_tcsh_funky Funky-  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€亗儎厗噲墛媽崕彁憭摂晼棙櫄洔潪煚、¥ウЖ┆<EFBFBD><EFBFBD><EFBFBD>辈炒刀犯购患骄坷谅媚牌侨墒颂臀闲岩釉罩棕仝圮蒉哙徕沅彐玷殛腱眍镳耱篝貊鼬<EFBFBD><EFBFBD><EFBFBD><EFBFBD>-funky alias_var_works_over_ssh
par_tcsh_funky \\\\\\\\ \ \ \ \ \\\\\\\\\\\\\\ \!\"\#\$%\&\'\(\)\*+,-./0123456789:\;\<\=\>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]\^_\`abcdefghijklmnopqrstuvwxyz\{\|\}\~€乗俓僜刓匼哱嘰圽塡奬媆孿峔嶾廫怽慭抃揬擻昞朶梊榎橽歕沑淺漒瀄焅燶<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>猏玕琝璡甛痋癨盶瞈砛碶礬禱穃竆筡篭籠糪絓綷縗繺羂耚肻腬臷芢荺萛蒤蔦薥蘚蚛蝄蟎衆裓襖覾診誠謀譢豛賊赲踈躙輁轡運郳醆鈂鉢鋅錦鎈鏫鑌閈闬隲靄韁頫颸餦馶騖骪鬨鮘鯸鱘鳿鵟鶿鸤黒齖㘎<EFBFBD>
par_tcsh_funky func_echo: Command not found.
par_tcsh_funky func_echo: Command not found.
par_tcsh_funky myvar works

View file

@ -7,26 +7,12 @@ par_keeporder job0
par_keeporder job1
par_keeporder job2
par_path_remote_bash bug #47695: How to set $PATH on remote? Bash
par_path_remote_bash
par_path_remote_bash * Documentation: https://help.ubuntu.com
par_path_remote_bash * Management: https://landscape.canonical.com
par_path_remote_bash * Support: https://ubuntu.com/advantage
par_path_remote_bash
par_path_remote_bash 0 updates are security updates.
par_path_remote_bash
par_path_remote_bash BASH Path before: /bin:/usr/bin with no parallel
par_path_remote_bash -bash: line 2: parallel: command not found
par_path_remote_bash ^^^^^^^^ Not found is OK
par_path_remote_bash /bin:/usr/bin:/tmp OK
par_path_remote_bash
par_path_remote_csh bug #47695: How to set $PATH on remote? csh
par_path_remote_csh
par_path_remote_csh * Documentation: https://help.ubuntu.com
par_path_remote_csh * Management: https://landscape.canonical.com
par_path_remote_csh * Support: https://ubuntu.com/advantage
par_path_remote_csh
par_path_remote_csh 0 updates are security updates.
par_path_remote_csh
par_path_remote_csh Warning: no access to tty (Bad file descriptor).
par_path_remote_csh Thus no job control in this shell.
par_path_remote_csh CSH Path before: /bin:/usr/bin with no parallel