From 2c1ebf9904e869168e0bcdeacb6fab0900bf7e27 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Sun, 14 Jun 2015 23:43:58 +0200 Subject: [PATCH] parallel: Cleanup 20150607alpha. --- README | 14 +- configure | 20 +-- configure.ac | 2 +- doc/boxplot-runtime | 14 +- doc/release_new_version | 26 +++- src/niceload | 2 +- src/parallel | 168 ++++----------------- src/parallel.pod | 15 +- src/parallel_design.pod | 2 + src/sql | 2 +- testsuite/tests-to-run/parallel-local12.sh | 20 +-- testsuite/wanted-results/parallel-local12 | 19 --- 12 files changed, 99 insertions(+), 205 deletions(-) diff --git a/README b/README index 8a68a187..dadf3fcf 100644 --- a/README +++ b/README @@ -40,10 +40,10 @@ document. Full installation of GNU Parallel is as simple as: - wget http://ftpmirror.gnu.org/parallel/parallel-20150522.tar.bz2 - bzip2 -dc parallel-20150522.tar.bz2 | tar xvf - - cd parallel-20150522 - ./configure && make && make install + wget http://ftpmirror.gnu.org/parallel/parallel-20150607.tar.bz2 + bzip2 -dc parallel-20150607.tar.bz2 | tar xvf - + cd parallel-20150607 + ./configure && make && sudo make install = Personal installation = @@ -51,9 +51,9 @@ Full installation of GNU Parallel is as simple as: If you are not root you can add ~/bin to your path and install in ~/bin and ~/share: - wget http://ftpmirror.gnu.org/parallel/parallel-20150522.tar.bz2 - bzip2 -dc parallel-20150522.tar.bz2 | tar xvf - - cd parallel-20150522 + wget http://ftpmirror.gnu.org/parallel/parallel-20150607.tar.bz2 + bzip2 -dc parallel-20150607.tar.bz2 | tar xvf - + cd parallel-20150607 ./configure --prefix=$HOME && make && make install Or if your system lacks 'make' you can simply copy src/parallel diff --git a/configure b/configure index 0c3fe556..61170b43 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for parallel 20150522. +# Generated by GNU Autoconf 2.69 for parallel 20150607. # # Report bugs to . # @@ -579,8 +579,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='parallel' PACKAGE_TARNAME='parallel' -PACKAGE_VERSION='20150522' -PACKAGE_STRING='parallel 20150522' +PACKAGE_VERSION='20150607' +PACKAGE_STRING='parallel 20150607' PACKAGE_BUGREPORT='bug-parallel@gnu.org' PACKAGE_URL='' @@ -1203,7 +1203,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures parallel 20150522 to adapt to many kinds of systems. +\`configure' configures parallel 20150607 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1269,7 +1269,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of parallel 20150522:";; + short | recursive ) echo "Configuration of parallel 20150607:";; esac cat <<\_ACEOF @@ -1345,7 +1345,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -parallel configure 20150522 +parallel configure 20150607 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1362,7 +1362,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by parallel $as_me 20150522, which was +It was created by parallel $as_me 20150607, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2225,7 +2225,7 @@ fi # Define the identity of the package. PACKAGE='parallel' - VERSION='20150522' + VERSION='20150607' cat >>confdefs.h <<_ACEOF @@ -2867,7 +2867,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by parallel $as_me 20150522, which was +This file was extended by parallel $as_me 20150607, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2929,7 +2929,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -parallel config.status 20150522 +parallel config.status 20150607 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 69ec4358..25156a71 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([parallel], [20150522], [bug-parallel@gnu.org]) +AC_INIT([parallel], [20150607], [bug-parallel@gnu.org]) AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([ diff --git a/doc/boxplot-runtime b/doc/boxplot-runtime index 142995ef..41eaf6cd 100644 --- a/doc/boxplot-runtime +++ b/doc/boxplot-runtime @@ -15,6 +15,7 @@ if ! /tmp/bin/parallel-20140722 --version; then + wget -c ftp://ftp.gnu.org/old-gnu/parallel/p* wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p* parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix /tmp/{.}-bin && make && make install' ::: *sig @@ -31,26 +32,29 @@ measure() { INNER=$2 CORES=$3 VERSION=$4 + GHZ=3.0 # Force cpuspeed at 1.7GHz - seems to give tighter results - forever 'sleep 10;parallel sudo cpufreq-set -f 1700MHz -c{} ::: {0..7}' & +# forever 'sleep 10;parallel sudo cpufreq-set -f ${GHZ}GHz -c{} ::: {0..7}' & +# forever 'sleep 10;parallel sudo cpufreq-set -f 1700MHz -c{} ::: {0..7}' & PATH=/tmp/bin:$PATH cd /tmp/bin - ls parallel-* | shuf | parallel -j$CORES --joblog /tmp/joblog.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) - +# ls parallel-* | shuf | parallel -j$CORES --joblog /tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) - + ls parallel-* | parallel --shuf -j$CORES --joblog /tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) - killall forever Rscript - <<_ - jl<-read.csv("/tmp/joblog.csv",sep="\t"); + jl<-read.csv("/tmp/joblog$CORES-$INNER-$OUTER.csv",sep="\t"); jl\$Command <- as.factor(substr(jl\$Command, 12, nchar(as.character(jl\$Command))-5)) pdf("/tmp/boxplot.pdf"); par(cex.axis=0.5); boxplot(JobRuntime/$INNER*1000~Command,data=jl,las=2,outline=F, ylab="milliseconds/job",main="GNU Parallel performance\n$OUTER trials each running $INNER"); _ - cp /tmp/boxplot.pdf /tmp/boxplot-j$CORES-1.7ghz-$OUTER-${INNER}v$VERSION.pdf + cp /tmp/boxplot.pdf /tmp/boxplot-j$CORES-${GHZ}ghz-$OUTER-${INNER}v$VERSION.pdf evince /tmp/boxplot.pdf } -measure 3000 1000 8 1 +measure 3000 1000 2 1 diff --git a/doc/release_new_version b/doc/release_new_version index eabdf77c..fa77e1cf 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -224,20 +224,34 @@ New in this release: * An empty argument would previously cause no string to be inserted. This is now changed to '' being inserted, thus prepending a space to the output of: parallel echo {} b ::: '' -* $PARALLEL_ENV can now be set to an environment prepending the command. Used in env_parallel as mentioned in the manpage. - -* --retry-failed will retry all failed jobs in a joblog. It will ignore any command given. - * --halt has been rewritten completely. You can now combine percentages with success or fail. See the man page. * Exit values 102..254 have been removed. 101 means more than 100 jobs failed. +* $PARALLEL_ENV can now be set to an environment prepending the command. Used in env_parallel as mentioned in the manpage. + +* --retry-failed will retry all failed jobs in a joblog. It will ignore any command given. + * --ssh and $PARALLEL_SSH can be used to set the command used for ssh. The command is assume to behave the same as ssh. * --fifo now works in csh, too. +* Killing through --timeout, --memfree, or --halt is now done as a process group. + +* GNU Parallel was cited in: Contrasting regional architectures of schizophrenia and other complex diseases using fast variance components analysis http://biorxiv.org/content/biorxiv/early/2015/06/05/016527.full.pdf + * GNU Parallel was cited in: DockBench: An Integrated Informatic Platform Bridging the Gap between the Robust Validation of Docking Protocols and Virtual Screening Simulations http://www.mdpi.com/1420-3049/20/6/9977 +* GNU Parallel was cited in: Swedes Online: You Are More Tracked Than You Think http://www.diva-portal.org/smash/get/diva2:807623/FULLTEXT01.pdf + +* GNU Parallel was cited in: Tutorial: Lorenz-Mie theory for 2D scattering and resonance calculations http://arxiv.org/pdf/1505.07691.pdf + +* GNU Parallel was cited in: A quantitative assessment of the Hadoop framework for analyzing massively parallel DNA sequencing data http://link.springer.com/article/10.1186/s13742-015-0058-5 + +* GNU Parallel was cited in: A composite genome approach to identify phylogenetically informative data from next-generation sequencing http://www.biomedcentral.com/content/pdf/s12859-015-0632-y.pdf + +Tutorial: Lorenz-Mie theory for 2D scattering and resonance calculations http://arxiv.org/pdf/1505.07691.pdf + * <> GNU Parallel was used (unfortunately without citation) in: MUGBAS: a species free gene-based programme suite for post-GWAS analysis http://www.ncbi.nlm.nih.gov/pubmed/25765345 * <> GNU Parallel was used in: Large Scale Author Name Disambiguation in Digital Libraries http://ieeexplore.ieee.org/xpl/abstractReferences.jsp?tp=&arnumber=7004487&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D7004487 @@ -250,8 +264,12 @@ New in this release: * GNU Parallel is used in: https://github.com/d2207197/local-mapreduce +* GNU Parallel is used in: A Fingerprint Identification System https://curve.carleton.ca/system/files/theses/28733.pdf + * Job ad asking for GNU Parallel experience: http://searchjobs.intel.com/gdansk-pol/software-validation-engineer/63A06826DAF24797AB414DC146201C2E/job/ +* Using BLAT http://wangzhengyuan.blogspot.dk/2015/06/using-blat.html + * Bug fixes and man page updates. GNU Parallel - For people who live life in the parallel lane. diff --git a/src/niceload b/src/niceload index ba7af537..c98f6e00 100755 --- a/src/niceload +++ b/src/niceload @@ -24,7 +24,7 @@ use strict; use Getopt::Long; $Global::progname="niceload"; -$Global::version = 20150522; +$Global::version = 20150607; Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV) || die_usage(); if($opt::version) { diff --git a/src/parallel b/src/parallel index 787422c7..e5755f4f 100755 --- a/src/parallel +++ b/src/parallel @@ -1076,7 +1076,7 @@ sub parse_options { sub init_globals { # Defaults: - $Global::version = 20150531; + $Global::version = 20150607; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; @@ -2813,6 +2813,7 @@ sub parse_host_filtering { my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); for (@_) { + ::debug("init",$_); chomp; my @col = split /\t/, $_; if(defined $col[6]) { @@ -2829,8 +2830,8 @@ sub parse_host_filtering { my $host = $1; $host =~ tr/\\//d; $Global::host{$host} or next; - if($col[6] eq "255" or $col[7] eq "15") { - # exit == 255 or signal == 15: ssh failed + if($col[6] eq "255" or $col[6] eq "-1") { + # exit == 255 or exit == timeout (-1): ssh failed/timedout # Remove sshlogin ::debug("init", "--filtered $host\n"); push(@down_hosts, $host); @@ -2916,7 +2917,8 @@ sub parallelized_host_filtering { # --retries 3: If the ssh daemon it overloaded, try 3 times # -s 16000: Half of the max line on UnixWare # TODO sh -c wrapper to work in csh - my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; + my $unlinkcmd = $Global::debug ? "true" : "rm $tmpfile"; + my $cmd = "($unlinkcmd; cat -) < $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; ::debug("init", $cmd, "\n"); my @out; my $prepend = ""; @@ -2933,7 +2935,6 @@ sub parallelized_host_filtering { push @out, $_; } close $host_fh; - $Global::debug or unlink $tmpfile; return @out; } @@ -3196,31 +3197,8 @@ sub reaper { sub __USAGE__ {} -sub _killall { - $Global::start_no_new_jobs ||= 1; - $Global::killall = 1; - my @term_seq = ("TERM",300,"TERM",300); - while(@term_seq) { - kill_my_pgrp(shift @term_seq, shift @term_seq); - } -} - -sub kill_my_pgrp { - my $signal = shift; - my $time = shift; - my %S = %SIG; - $SIG{$signal} = 'IGNORE'; - kill $signal, -$$; - %SIG = %S; - ::usleep($time); - my @pids = family_pids(keys %Global::running); - ::debug("kill","kill_my_pgrp $signal ",(join " ",sort @pids),"\n"); -} - sub killall { - # Kill all jobs - # Send all (grand*)children TERM,TERM,KILL - # Wait up to 200 ms between each + # Kill all jobs by killing their process groups $Global::start_no_new_jobs ||= 1; $Global::killall ||= 1; @@ -3228,8 +3206,10 @@ sub killall { } sub kill_sleep_seq { - # Send jobs TERM,TERM,KILL - # processgroups (-$pid) + # Send jobs TERM,TERM,KILL to processgroups + # Input: + # @pids = list of pids that are also processgroups + # Convert pids to process groups ($processgroup = -$pid) my @pgrps = map { -$_ } @_; my @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); while(@term_seq) { @@ -3272,34 +3252,6 @@ sub kill_sleep { return @pids; } -sub family_pids { - # Find the pids with this->pid as (grand)*parent - # Input: - # @parents = pids of parents - # Returns: - # @pids = pids of (grand)*children - my @parents = @_; - my @pids; - - my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); - - # Start out with the live parents - my @more = grep { kill( 0, $_) } @parents; - # While more (grand)*children - while(@more) { - my @m; - push @pids, @more; - for my $parent (@more) { - if($children_of_ref->{$parent}) { - # add the children of this parent - push @m, @{$children_of_ref->{$parent}}; - } - } - @more = @m; - } - return (@pids); -} - sub wait_and_exit { # If we do not wait, we sometimes get segfault # Returns: N/A @@ -3755,8 +3707,8 @@ sub which { # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh - ksh93 lksh mksh pdksh posh rbash rc rush rzsh - sash sh static-sh tcsh yash zsh -sh -csh), + ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh + static-sh tcsh yash zsh -sh -csh), '-sh (sh)' # sh on FreeBSD ); # Can be formatted as: @@ -6240,7 +6192,7 @@ sub set_endtime { $self->{'endtime'} = $endtime; } -sub timedout { +sub is_timedout { # Is the job timedout? # Input: # $delta_time = time that the job may run @@ -6257,72 +6209,6 @@ sub kill { ::kill_sleep_seq($self->pid()); } -sub _kill { - # Kill the job. - # Send the signals to (grand)*children and pid. - # If no signals: TERM TERM KILL - # Wait 200 ms after each TERM. - # Input: - # @signals = signals to send - my $self = shift; - my @signals = @_; - my @family_pids = $self->family_pids(); - - # Record this jobs as failed - $self->set_exitstatus(-1); - # Send two TERMs to give time to clean up - ::debug("run", "Kill seq ", $self->seq(), " signal '@signals'\n"); - my @send_signals = @signals || ("TERM", "TERM", "KILL"); - for my $signal (@send_signals) { - my $alive = 0; - for my $pid (@family_pids) { - if(CORE::kill 0, $pid) { - # The job still running - CORE::kill $signal, $pid; - $alive = 1; - ::debug("run","$pid is alive\n"); - } - } - # If a signal was given as input, do not do the sleep below - @signals and next; - - if($signal eq "TERM" and $alive) { - # Wait up to 200 ms between TERMs - but only if any pids are alive - my $sleep = 1; - for (my $sleepsum = 0; CORE::kill 0, $family_pids[0] and $sleepsum < 200; - $sleepsum += $sleep) { - $sleep = ::reap_usleep($sleep); - } - } - } -} - -sub family_pids { - # Find the pids with this->pid as (grand)*parent - # Returns: - # @pids = pids of (grand)*children - my $self = shift; - my $pid = $self->pid(); - my @pids; - - my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); - - my @more = ($pid); - # While more (grand)*children - while(@more) { - my @m; - push @pids, @more; - for my $parent (@more) { - if($children_of_ref->{$parent}) { - # add the children of this parent - push @m, @{$children_of_ref->{$parent}}; - } - } - @more = @m; - } - return (@pids); -} - sub failed { # return number of times failed for this $sshlogin # Input: @@ -7104,6 +6990,18 @@ sub start { 1; }; $job->set_fh(0,"w",$stdin_fh); + } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-/dev/tty"); + $Global::tty_taken = $pid; + close $devtty_fh; + 1; + }; } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 and $job->sshlogin()->string() eq ":") { # Give STDIN to the first job if using -a (but only if running @@ -7118,18 +7016,6 @@ sub start { # Re-open to avoid complaining open(STDIN, "<&", $Global::original_stdin) or ::die_bug("dup-\$Global::original_stdin: $!"); - } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and - open(my $devtty_fh, "<", "/dev/tty")) { - # Give /dev/tty to the command if no one else is using it - *IN = $devtty_fh; - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3("<&IN", ">&OUT", ">&ERR", @setpgrp_wrap, $command) || - ::die_bug("open3-/dev/tty"); - $Global::tty_taken = $pid; - close $devtty_fh; - 1; - }; } else { # The eval is needed to catch exception from open3 eval { @@ -9232,7 +9118,7 @@ sub process_timeouts { # Job already finished. No need to timeout the job # This could be because of --keep-order shift @{$self->{'queue'}}; - } elsif($job->timedout($self->{'delta_time'})) { + } elsif($job->is_timedout($self->{'delta_time'})) { # Need to shift off queue before kill # because kill calls usleep that calls process_timeouts shift @{$self->{'queue'}}; diff --git a/src/parallel.pod b/src/parallel.pod index dbce8f38..784d82f1 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -1928,7 +1928,7 @@ Use B for output. Start a B session and run each job in a window in that session. No other output will be produced. -=item B<--timeout> I +=item B<--timeout> I (alpha testing) Time out for command. If the command runs for longer than I seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms @@ -4469,12 +4469,13 @@ version. A complete example that others can run that shows the problem including all files needed to run the example. This should preferably -be small and simple. A combination of B, B, B, B, -and B can reproduce most errors. If your example requires large -files, see if you can make them by something like B > -B or B > B. If your example -requires remote execution, see if you can use B - maybe -using another login. +be small and simple, so try to remove as many options as possible. A +combination of B, B, B, B, and B can +reproduce most errors. If your example requires large files, see if +you can make them by something like B > B or B > B. If your example requires remote +execution, see if you can use B - maybe using another +login. =item * diff --git a/src/parallel_design.pod b/src/parallel_design.pod index 5699c687..ff938aeb 100644 --- a/src/parallel_design.pod +++ b/src/parallel_design.pod @@ -503,6 +503,8 @@ not known to B. =back +If GNU B guesses wrong in these situation, set the shell using +$PARALLEL_SHELL. =head2 Quoting diff --git a/src/sql b/src/sql index 0b4634da..c3008535 100755 --- a/src/sql +++ b/src/sql @@ -566,7 +566,7 @@ $Global::Initfile && unlink $Global::Initfile; exit ($err); sub parse_options { - $Global::version = 20150522; + $Global::version = 20150607; $Global::progname = 'sql'; # This must be done first as this may exec myself diff --git a/testsuite/tests-to-run/parallel-local12.sh b/testsuite/tests-to-run/parallel-local12.sh index 6cb5c60c..32c2e489 100644 --- a/testsuite/tests-to-run/parallel-local12.sh +++ b/testsuite/tests-to-run/parallel-local12.sh @@ -2,15 +2,17 @@ rm -f ~/.parallel/will-cite -echo '### Test stdin goes to first command only ("-" as argument)' -cat >/tmp/parallel-script-for-script </tmp/parallel-script-for-script </tmp/parallel-script-for-script2 <