parallel: Cleanup 20150607alpha.

This commit is contained in:
Ole Tange 2015-06-14 23:43:58 +02:00
parent 0ab644d156
commit 2c1ebf9904
12 changed files with 99 additions and 205 deletions

14
README
View file

@ -40,10 +40,10 @@ document.
Full installation of GNU Parallel is as simple as:
wget http://ftpmirror.gnu.org/parallel/parallel-20150522.tar.bz2
bzip2 -dc parallel-20150522.tar.bz2 | tar xvf -
cd parallel-20150522
./configure && make && make install
wget http://ftpmirror.gnu.org/parallel/parallel-20150607.tar.bz2
bzip2 -dc parallel-20150607.tar.bz2 | tar xvf -
cd parallel-20150607
./configure && make && sudo make install
= Personal installation =
@ -51,9 +51,9 @@ Full installation of GNU Parallel is as simple as:
If you are not root you can add ~/bin to your path and install in
~/bin and ~/share:
wget http://ftpmirror.gnu.org/parallel/parallel-20150522.tar.bz2
bzip2 -dc parallel-20150522.tar.bz2 | tar xvf -
cd parallel-20150522
wget http://ftpmirror.gnu.org/parallel/parallel-20150607.tar.bz2
bzip2 -dc parallel-20150607.tar.bz2 | tar xvf -
cd parallel-20150607
./configure --prefix=$HOME && make && make install
Or if your system lacks 'make' you can simply copy src/parallel

20
configure vendored
View file

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for parallel 20150522.
# Generated by GNU Autoconf 2.69 for parallel 20150607.
#
# Report bugs to <bug-parallel@gnu.org>.
#
@ -579,8 +579,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='parallel'
PACKAGE_TARNAME='parallel'
PACKAGE_VERSION='20150522'
PACKAGE_STRING='parallel 20150522'
PACKAGE_VERSION='20150607'
PACKAGE_STRING='parallel 20150607'
PACKAGE_BUGREPORT='bug-parallel@gnu.org'
PACKAGE_URL=''
@ -1203,7 +1203,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures parallel 20150522 to adapt to many kinds of systems.
\`configure' configures parallel 20150607 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1269,7 +1269,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of parallel 20150522:";;
short | recursive ) echo "Configuration of parallel 20150607:";;
esac
cat <<\_ACEOF
@ -1345,7 +1345,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
parallel configure 20150522
parallel configure 20150607
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@ -1362,7 +1362,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by parallel $as_me 20150522, which was
It was created by parallel $as_me 20150607, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -2225,7 +2225,7 @@ fi
# Define the identity of the package.
PACKAGE='parallel'
VERSION='20150522'
VERSION='20150607'
cat >>confdefs.h <<_ACEOF
@ -2867,7 +2867,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by parallel $as_me 20150522, which was
This file was extended by parallel $as_me 20150607, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -2929,7 +2929,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
parallel config.status 20150522
parallel config.status 20150607
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View file

@ -1,4 +1,4 @@
AC_INIT([parallel], [20150522], [bug-parallel@gnu.org])
AC_INIT([parallel], [20150607], [bug-parallel@gnu.org])
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([

View file

@ -15,6 +15,7 @@
if ! /tmp/bin/parallel-20140722 --version; then
wget -c ftp://ftp.gnu.org/old-gnu/parallel/p*
wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p*
parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig
parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix /tmp/{.}-bin && make && make install' ::: *sig
@ -31,26 +32,29 @@ measure() {
INNER=$2
CORES=$3
VERSION=$4
GHZ=3.0
# Force cpuspeed at 1.7GHz - seems to give tighter results
forever 'sleep 10;parallel sudo cpufreq-set -f 1700MHz -c{} ::: {0..7}' &
# forever 'sleep 10;parallel sudo cpufreq-set -f ${GHZ}GHz -c{} ::: {0..7}' &
# forever 'sleep 10;parallel sudo cpufreq-set -f 1700MHz -c{} ::: {0..7}' &
PATH=/tmp/bin:$PATH
cd /tmp/bin
ls parallel-* | shuf | parallel -j$CORES --joblog /tmp/joblog.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
# ls parallel-* | shuf | parallel -j$CORES --joblog /tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
ls parallel-* | parallel --shuf -j$CORES --joblog /tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
killall forever
Rscript - <<_
jl<-read.csv("/tmp/joblog.csv",sep="\t");
jl<-read.csv("/tmp/joblog$CORES-$INNER-$OUTER.csv",sep="\t");
jl\$Command <- as.factor(substr(jl\$Command, 12, nchar(as.character(jl\$Command))-5))
pdf("/tmp/boxplot.pdf");
par(cex.axis=0.5);
boxplot(JobRuntime/$INNER*1000~Command,data=jl,las=2,outline=F,
ylab="milliseconds/job",main="GNU Parallel performance\n$OUTER trials each running $INNER");
_
cp /tmp/boxplot.pdf /tmp/boxplot-j$CORES-1.7ghz-$OUTER-${INNER}v$VERSION.pdf
cp /tmp/boxplot.pdf /tmp/boxplot-j$CORES-${GHZ}ghz-$OUTER-${INNER}v$VERSION.pdf
evince /tmp/boxplot.pdf
}
measure 3000 1000 8 1
measure 3000 1000 2 1

View file

@ -224,20 +224,34 @@ New in this release:
* An empty argument would previously cause no string to be inserted. This is now changed to '' being inserted, thus prepending a space to the output of: parallel echo {} b ::: ''
* $PARALLEL_ENV can now be set to an environment prepending the command. Used in env_parallel as mentioned in the manpage.
* --retry-failed will retry all failed jobs in a joblog. It will ignore any command given.
* --halt has been rewritten completely. You can now combine percentages with success or fail. See the man page.
* Exit values 102..254 have been removed. 101 means more than 100 jobs failed.
* $PARALLEL_ENV can now be set to an environment prepending the command. Used in env_parallel as mentioned in the manpage.
* --retry-failed will retry all failed jobs in a joblog. It will ignore any command given.
* --ssh and $PARALLEL_SSH can be used to set the command used for ssh. The command is assume to behave the same as ssh.
* --fifo now works in csh, too.
* Killing through --timeout, --memfree, or --halt is now done as a process group.
* GNU Parallel was cited in: Contrasting regional architectures of schizophrenia and other complex diseases using fast variance components analysis http://biorxiv.org/content/biorxiv/early/2015/06/05/016527.full.pdf
* GNU Parallel was cited in: DockBench: An Integrated Informatic Platform Bridging the Gap between the Robust Validation of Docking Protocols and Virtual Screening Simulations http://www.mdpi.com/1420-3049/20/6/9977
* GNU Parallel was cited in: Swedes Online: You Are More Tracked Than You Think http://www.diva-portal.org/smash/get/diva2:807623/FULLTEXT01.pdf
* GNU Parallel was cited in: Tutorial: Lorenz-Mie theory for 2D scattering and resonance calculations http://arxiv.org/pdf/1505.07691.pdf
* GNU Parallel was cited in: A quantitative assessment of the Hadoop framework for analyzing massively parallel DNA sequencing data http://link.springer.com/article/10.1186/s13742-015-0058-5
* GNU Parallel was cited in: A composite genome approach to identify phylogenetically informative data from next-generation sequencing http://www.biomedcentral.com/content/pdf/s12859-015-0632-y.pdf
Tutorial: Lorenz-Mie theory for 2D scattering and resonance calculations http://arxiv.org/pdf/1505.07691.pdf
* <<Har angiveligt submittet ny version - afventer opdatering>> GNU Parallel was used (unfortunately without citation) in: MUGBAS: a species free gene-based programme suite for post-GWAS analysis http://www.ncbi.nlm.nih.gov/pubmed/25765345
* <<Afventer updateret publisering>> GNU Parallel was used in: Large Scale Author Name Disambiguation in Digital Libraries http://ieeexplore.ieee.org/xpl/abstractReferences.jsp?tp=&arnumber=7004487&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D7004487
@ -250,8 +264,12 @@ New in this release:
* GNU Parallel is used in: https://github.com/d2207197/local-mapreduce
* GNU Parallel is used in: A Fingerprint Identification System https://curve.carleton.ca/system/files/theses/28733.pdf
* Job ad asking for GNU Parallel experience: http://searchjobs.intel.com/gdansk-pol/software-validation-engineer/63A06826DAF24797AB414DC146201C2E/job/
* Using BLAT http://wangzhengyuan.blogspot.dk/2015/06/using-blat.html
* Bug fixes and man page updates.
GNU Parallel - For people who live life in the parallel lane.

View file

@ -24,7 +24,7 @@
use strict;
use Getopt::Long;
$Global::progname="niceload";
$Global::version = 20150522;
$Global::version = 20150607;
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV) || die_usage();
if($opt::version) {

View file

@ -1076,7 +1076,7 @@ sub parse_options {
sub init_globals {
# Defaults:
$Global::version = 20150531;
$Global::version = 20150607;
$Global::progname = 'parallel';
$Global::infinity = 2**31;
$Global::debug = 0;
@ -2813,6 +2813,7 @@ sub parse_host_filtering {
my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
for (@_) {
::debug("init",$_);
chomp;
my @col = split /\t/, $_;
if(defined $col[6]) {
@ -2829,8 +2830,8 @@ sub parse_host_filtering {
my $host = $1;
$host =~ tr/\\//d;
$Global::host{$host} or next;
if($col[6] eq "255" or $col[7] eq "15") {
# exit == 255 or signal == 15: ssh failed
if($col[6] eq "255" or $col[6] eq "-1") {
# exit == 255 or exit == timeout (-1): ssh failed/timedout
# Remove sshlogin
::debug("init", "--filtered $host\n");
push(@down_hosts, $host);
@ -2916,7 +2917,8 @@ sub parallelized_host_filtering {
# --retries 3: If the ssh daemon it overloaded, try 3 times
# -s 16000: Half of the max line on UnixWare
# TODO sh -c wrapper to work in csh
my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null";
my $unlinkcmd = $Global::debug ? "true" : "rm $tmpfile";
my $cmd = "($unlinkcmd; cat -) < $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null";
::debug("init", $cmd, "\n");
my @out;
my $prepend = "";
@ -2933,7 +2935,6 @@ sub parallelized_host_filtering {
push @out, $_;
}
close $host_fh;
$Global::debug or unlink $tmpfile;
return @out;
}
@ -3196,31 +3197,8 @@ sub reaper {
sub __USAGE__ {}
sub _killall {
$Global::start_no_new_jobs ||= 1;
$Global::killall = 1;
my @term_seq = ("TERM",300,"TERM",300);
while(@term_seq) {
kill_my_pgrp(shift @term_seq, shift @term_seq);
}
}
sub kill_my_pgrp {
my $signal = shift;
my $time = shift;
my %S = %SIG;
$SIG{$signal} = 'IGNORE';
kill $signal, -$$;
%SIG = %S;
::usleep($time);
my @pids = family_pids(keys %Global::running);
::debug("kill","kill_my_pgrp $signal ",(join " ",sort @pids),"\n");
}
sub killall {
# Kill all jobs
# Send all (grand*)children TERM,TERM,KILL
# Wait up to 200 ms between each
# Kill all jobs by killing their process groups
$Global::start_no_new_jobs ||= 1;
$Global::killall ||= 1;
@ -3228,8 +3206,10 @@ sub killall {
}
sub kill_sleep_seq {
# Send jobs TERM,TERM,KILL
# processgroups (-$pid)
# Send jobs TERM,TERM,KILL to processgroups
# Input:
# @pids = list of pids that are also processgroups
# Convert pids to process groups ($processgroup = -$pid)
my @pgrps = map { -$_ } @_;
my @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
while(@term_seq) {
@ -3272,34 +3252,6 @@ sub kill_sleep {
return @pids;
}
sub family_pids {
# Find the pids with this->pid as (grand)*parent
# Input:
# @parents = pids of parents
# Returns:
# @pids = pids of (grand)*children
my @parents = @_;
my @pids;
my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
# Start out with the live parents
my @more = grep { kill( 0, $_) } @parents;
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of_ref->{$parent}) {
# add the children of this parent
push @m, @{$children_of_ref->{$parent}};
}
}
@more = @m;
}
return (@pids);
}
sub wait_and_exit {
# If we do not wait, we sometimes get segfault
# Returns: N/A
@ -3755,8 +3707,8 @@ sub which {
# ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
# posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh
my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh
ksh93 lksh mksh pdksh posh rbash rc rush rzsh
sash sh static-sh tcsh yash zsh -sh -csh),
ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh
static-sh tcsh yash zsh -sh -csh),
'-sh (sh)' # sh on FreeBSD
);
# Can be formatted as:
@ -6240,7 +6192,7 @@ sub set_endtime {
$self->{'endtime'} = $endtime;
}
sub timedout {
sub is_timedout {
# Is the job timedout?
# Input:
# $delta_time = time that the job may run
@ -6257,72 +6209,6 @@ sub kill {
::kill_sleep_seq($self->pid());
}
sub _kill {
# Kill the job.
# Send the signals to (grand)*children and pid.
# If no signals: TERM TERM KILL
# Wait 200 ms after each TERM.
# Input:
# @signals = signals to send
my $self = shift;
my @signals = @_;
my @family_pids = $self->family_pids();
# Record this jobs as failed
$self->set_exitstatus(-1);
# Send two TERMs to give time to clean up
::debug("run", "Kill seq ", $self->seq(), " signal '@signals'\n");
my @send_signals = @signals || ("TERM", "TERM", "KILL");
for my $signal (@send_signals) {
my $alive = 0;
for my $pid (@family_pids) {
if(CORE::kill 0, $pid) {
# The job still running
CORE::kill $signal, $pid;
$alive = 1;
::debug("run","$pid is alive\n");
}
}
# If a signal was given as input, do not do the sleep below
@signals and next;
if($signal eq "TERM" and $alive) {
# Wait up to 200 ms between TERMs - but only if any pids are alive
my $sleep = 1;
for (my $sleepsum = 0; CORE::kill 0, $family_pids[0] and $sleepsum < 200;
$sleepsum += $sleep) {
$sleep = ::reap_usleep($sleep);
}
}
}
}
sub family_pids {
# Find the pids with this->pid as (grand)*parent
# Returns:
# @pids = pids of (grand)*children
my $self = shift;
my $pid = $self->pid();
my @pids;
my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
my @more = ($pid);
# While more (grand)*children
while(@more) {
my @m;
push @pids, @more;
for my $parent (@more) {
if($children_of_ref->{$parent}) {
# add the children of this parent
push @m, @{$children_of_ref->{$parent}};
}
}
@more = @m;
}
return (@pids);
}
sub failed {
# return number of times failed for this $sshlogin
# Input:
@ -7104,6 +6990,18 @@ sub start {
1;
};
$job->set_fh(0,"w",$stdin_fh);
} elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and
open(my $devtty_fh, "<", "/dev/tty")) {
# Give /dev/tty to the command if no one else is using it
*IN = $devtty_fh;
# The eval is needed to catch exception from open3
eval {
$pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
::die_bug("open3-/dev/tty");
$Global::tty_taken = $pid;
close $devtty_fh;
1;
};
} elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1
and $job->sshlogin()->string() eq ":") {
# Give STDIN to the first job if using -a (but only if running
@ -7118,18 +7016,6 @@ sub start {
# Re-open to avoid complaining
open(STDIN, "<&", $Global::original_stdin)
or ::die_bug("dup-\$Global::original_stdin: $!");
} elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and
open(my $devtty_fh, "<", "/dev/tty")) {
# Give /dev/tty to the command if no one else is using it
*IN = $devtty_fh;
# The eval is needed to catch exception from open3
eval {
$pid = ::open3("<&IN", ">&OUT", ">&ERR", @setpgrp_wrap, $command) ||
::die_bug("open3-/dev/tty");
$Global::tty_taken = $pid;
close $devtty_fh;
1;
};
} else {
# The eval is needed to catch exception from open3
eval {
@ -9232,7 +9118,7 @@ sub process_timeouts {
# Job already finished. No need to timeout the job
# This could be because of --keep-order
shift @{$self->{'queue'}};
} elsif($job->timedout($self->{'delta_time'})) {
} elsif($job->is_timedout($self->{'delta_time'})) {
# Need to shift off queue before kill
# because kill calls usleep that calls process_timeouts
shift @{$self->{'queue'}};

View file

@ -1928,7 +1928,7 @@ Use B<tmux> for output. Start a B<tmux> session and run each job in a
window in that session. No other output will be produced.
=item B<--timeout> I<val>
=item B<--timeout> I<val> (alpha testing)
Time out for command. If the command runs for longer than I<val>
seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms
@ -4469,12 +4469,13 @@ version.
A complete example that others can run that shows the problem
including all files needed to run the example. This should preferably
be small and simple. A combination of B<yes>, B<seq>, B<cat>, B<echo>,
and B<sleep> can reproduce most errors. If your example requires large
files, see if you can make them by something like B<seq 1000000> >
B<file> or B<yes | head -n 10000000> > B<file>. If your example
requires remote execution, see if you can use B<localhost> - maybe
using another login.
be small and simple, so try to remove as many options as possible. A
combination of B<yes>, B<seq>, B<cat>, B<echo>, and B<sleep> can
reproduce most errors. If your example requires large files, see if
you can make them by something like B<seq 1000000> > B<file> or B<yes
| head -n 10000000> > B<file>. If your example requires remote
execution, see if you can use B<localhost> - maybe using another
login.
=item *

View file

@ -503,6 +503,8 @@ not known to B<bash>.
=back
If GNU B<parallel> guesses wrong in these situation, set the shell using
$PARALLEL_SHELL.
=head2 Quoting

View file

@ -566,7 +566,7 @@ $Global::Initfile && unlink $Global::Initfile;
exit ($err);
sub parse_options {
$Global::version = 20150522;
$Global::version = 20150607;
$Global::progname = 'sql';
# This must be done first as this may exec myself

View file

@ -2,15 +2,17 @@
rm -f ~/.parallel/will-cite
echo '### Test stdin goes to first command only ("-" as argument)'
cat >/tmp/parallel-script-for-script <<EOF
#!/bin/bash
echo via first cat |parallel --tty -kv cat ::: - -
EOF
chmod 755 /tmp/parallel-script-for-script
echo via pseudotty | script -q -f -c /tmp/parallel-script-for-script /dev/null
sleep 2
rm /tmp/parallel-script-for-script
# Disabled 2015-06-01
#
# echo '### Test stdin goes to first command only ("-" as argument)'
# cat >/tmp/parallel-script-for-script <<EOF
# #!/bin/bash
# echo via first cat |parallel --tty -kv cat ::: - -
# EOF
# chmod 755 /tmp/parallel-script-for-script
# echo via pseudotty | script -q -f -c /tmp/parallel-script-for-script /dev/null
# sleep 2
# rm /tmp/parallel-script-for-script
echo '### Test stdin goes to first command only ("cat" as argument)'
cat >/tmp/parallel-script-for-script2 <<EOF

View file

@ -1,22 +1,3 @@
### Test stdin goes to first command only ("-" as argument)
Academic tradition requires you to cite works you base your article on.
When using programs that use GNU Parallel to process data for publication
please cite:
O. Tange (2011): GNU Parallel - The Command-Line Power Tool,
;login: The USENIX Magazine, February 2011:42-47.
This helps funding further development; and it won't cost you a cent.
If you pay 10000 EUR you should feel free to use GNU Parallel without citing.
To silence the citation notice: run 'parallel --bibtex'.
cat -
via first cat
cat -
cat -
via pseudotty
cat -
### Test stdin goes to first command only ("cat" as argument)
Academic tradition requires you to cite works you base your article on.
When using programs that use GNU Parallel to process data for publication