diff --git a/NEWS b/NEWS index 24d3737b..5fa07548 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,42 @@ +20120622 + +* '-L n --pipe' will use records of n lines. This is useful when + processing data that have fixed records with a fixed number of + lines (e.g. fastq). + +* --filter-hosts will remove down hosts. For each remote host: check + that login through ssh works. If not: do not use this host. + Currently you can not put --filter-hosts in a profile, $PARALLEL, + /etc/parallel/config or similar. This is because GNU Parallel uses + GNU Parallel to compute this, so you will get an infinite loop. This + will likely be fixed in a later release. + +* --pipe now uses fork() instead of busy wait. The performance should + be better on computers with >10 cores while remaining the same on + computers with few cores. + +* GNU Parallel will be represented at Chiang Mai Bar Camp + http://barcampchiangmai.org/ + +* Indexing Big Data on Amazon AWS: The Screencast + (Check out his T-shirt at 18:40-21:00) + http://www.opensourceconnections.com/2012/06/06/indexing-big-data-on-amazon-aws-screencast/ + +* biotoolbox uses GNU Parallel. + https://code.google.com/p/biotoolbox/wiki/Pod_novo_wrapper + +* Spiceweasel uses GNU Parallel. + https://github.com/mattray/spiceweasel + +* GNU Parallel part of The Administrators Challenge + http://challenge.twistedrack.com/questions/qs3.php + +* Finding Oldest Firefox Code using GNU Parallel + http://gregoryszorc.com/blog/2012/06/18/finding-oldest-firefox-code/ + +* Bug fixes (quite a few for remote job running) and man page updates. + + 20120522 * Timings in --joblog now with milliseconds. diff --git a/configure b/configure index 46077594..93f73630 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.68 for parallel 20120524. +# Generated by GNU Autoconf 2.68 for parallel 20120622. # # Report bugs to . # @@ -559,8 +559,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='parallel' PACKAGE_TARNAME='parallel' -PACKAGE_VERSION='20120524' -PACKAGE_STRING='parallel 20120524' +PACKAGE_VERSION='20120622' +PACKAGE_STRING='parallel 20120622' PACKAGE_BUGREPORT='bug-parallel@gnu.org' PACKAGE_URL='' @@ -1176,7 +1176,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures parallel 20120524 to adapt to many kinds of systems. +\`configure' configures parallel 20120622 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1242,7 +1242,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of parallel 20120524:";; + short | recursive ) echo "Configuration of parallel 20120622:";; esac cat <<\_ACEOF @@ -1309,7 +1309,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -parallel configure 20120524 +parallel configure 20120622 generated by GNU Autoconf 2.68 Copyright (C) 2010 Free Software Foundation, Inc. @@ -1326,7 +1326,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by parallel $as_me 20120524, which was +It was created by parallel $as_me 20120622, which was generated by GNU Autoconf 2.68. Invocation command line was $ $0 $@ @@ -2141,7 +2141,7 @@ fi # Define the identity of the package. PACKAGE='parallel' - VERSION='20120524' + VERSION='20120622' cat >>confdefs.h <<_ACEOF @@ -2704,7 +2704,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by parallel $as_me 20120524, which was +This file was extended by parallel $as_me 20120622, which was generated by GNU Autoconf 2.68. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2766,7 +2766,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -parallel config.status 20120524 +parallel config.status 20120622 configured by $0, generated by GNU Autoconf 2.68, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 37ab4d4a..15d9ad30 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([parallel], [20120524], [bug-parallel@gnu.org]) +AC_INIT([parallel], [20120622], [bug-parallel@gnu.org]) AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([ diff --git a/doc/release_new_version b/doc/release_new_version index 6456f58e..3fbe3f2a 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -171,21 +171,48 @@ cc:Sandro Cazzaniga , Ryoichiro Suzuki , Jesse Alama -Subject: GNU Parallel 20120622 ('Thailand') released +Subject: GNU Parallel 20120622 ('Chiang Mai') released -GNU Parallel 20120622 ('Thailand') has been released. It is +GNU Parallel 20120622 ('Chiang Mai') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ New in this release: -* biotoolbox uses GNU Parallel. - https://code.google.com/p/biotoolbox/wiki/Pod_novo_wrapper +* '-L n --pipe' will use records of n lines. This is useful when + processing data that have fixed records with a fixed number of + lines (e.g. fastq). + +* --filter-hosts will remove down hosts. For each remote host: check + that login through ssh works. If not: do not use this host. + Currently you can not put --filter-hosts in a profile, $PARALLEL, + /etc/parallel/config or similar. This is because GNU Parallel uses + GNU Parallel to compute this, so you will get an infinite loop. This + will likely be fixed in a later release. + +* --pipe now uses fork() instead of busy wait. The performance should + be better on computers with >10 cores while remaining the same on + computers with few cores. + +* GNU Parallel will be represented at Chiang Mai Bar Camp + http://barcampchiangmai.org/ * Indexing Big Data on Amazon AWS: The Screencast (Check out his T-shirt at 18:40-21:00) http://www.opensourceconnections.com/2012/06/06/indexing-big-data-on-amazon-aws-screencast/ -* Bug fixes and man page updates. +* biotoolbox uses GNU Parallel. + https://code.google.com/p/biotoolbox/wiki/Pod_novo_wrapper + +* Spiceweasel uses GNU Parallel. + https://github.com/mattray/spiceweasel + +* GNU Parallel part of The Administrators Challenge + http://challenge.twistedrack.com/questions/qs3.php + +* Finding Oldest Firefox Code using GNU Parallel + http://gregoryszorc.com/blog/2012/06/18/finding-oldest-firefox-code/ + +* Bug fixes (quite a few for remote job running) and man page updates. = About GNU Parallel = diff --git a/packager/obs/home:tange/parallel/parallel.spec b/packager/obs/home:tange/parallel/parallel.spec index b09a9527..81866d18 100644 --- a/packager/obs/home:tange/parallel/parallel.spec +++ b/packager/obs/home:tange/parallel/parallel.spec @@ -1,6 +1,6 @@ Summary: Shell tool for executing jobs in parallel Name: parallel -Version: 20120522 +Version: 20120622 Release: 1 License: GPL Group: Productivity/File utilities @@ -12,21 +12,27 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-buildroot %description GNU Parallel is a shell tool for executing jobs in parallel using one -or more computers. A job is typically a single command or a small -script that has to be run for each of the lines in the input. The -typical input is a list of files, a list of hosts, a list of users, or -a list of tables. +or more computers. A job can be a single command or a small script +that has to be run for each of the lines in the input. The typical +input is a list of files, a list of hosts, a list of users, a list of +URLs, or a list of tables. A job can also be a command that reads from +a pipe. GNU Parallel can then split the input and pipe it into +commands in parallel. -If you use xargs today you will find GNU Parallel very easy to use. If +If you use xargs and tee today you will find GNU Parallel very easy to +use as GNU Parallel is written to have the same options as xargs. If you write loops in shell, you will find GNU Parallel may be able to -replace most of the loops and make them run faster by running jobs in -parallel. If you use ppss or pexec you will find GNU Parallel will -often make the command easier to read. +replace most of the loops and make them run faster by running several +jobs in parallel. -GNU Parallel also makes sure output from the commands is the same -output as you would get had you run the commands sequentially. This -makes it possible to use output from GNU Parallel as input for other -programs. +GNU Parallel makes sure output from the commands is the same output as +you would get had you run the commands sequentially. This makes it +possible to use output from GNU Parallel as input for other programs. + +For each line of input GNU Parallel will execute command with the line +as arguments. If no command is given, the line of input is +executed. Several lines will be run in parallel. GNU Parallel can +often be used as a substitute for xargs or cat | bash. %prep if [ "${RPM_BUILD_ROOT}x" == "x" ]; then diff --git a/src/niceload b/src/niceload index 4b0de4b6..864a8b12 100755 --- a/src/niceload +++ b/src/niceload @@ -24,7 +24,7 @@ use strict; use Getopt::Long; $Global::progname="niceload"; -$Global::version = 20120524; +$Global::version = 20120622; Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV) || die_usage(); if($::opt_version) { diff --git a/src/parallel b/src/parallel index e173e1db..5aa301c4 100755 --- a/src/parallel +++ b/src/parallel @@ -301,7 +301,7 @@ sub spreadstdin { } elsif($Global::max_lines) { # Read $Global::max_lines lines eof($in) and last piperead; - for(my $t = 0; !eof($in) and + for(my $t = 0; !eof($in) and substr($buf,length $buf,0) = <$in> and $t < $Global::max_lines; $t++) {} } else { @@ -485,7 +485,7 @@ sub options_hash { "tagstring=s" => \$::opt_tagstring, "onall" => \$::opt_onall, "nonall" => \$::opt_nonall, - "filter-hosts" => \$::opt_filter_hosts, + "filter-hosts|filterhosts|filter-host" => \$::opt_filter_hosts, "sshlogin|S=s" => \@::opt_sshlogin, "sshloginfile|slf=s" => \@::opt_sshloginfile, "controlmaster|M" => \$::opt_controlmaster, @@ -584,7 +584,7 @@ sub get_options_from_array { sub parse_options { # Returns: N/A # Defaults: - $Global::version = 20120614; + $Global::version = 20120622; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; diff --git a/src/parallel.pod b/src/parallel.pod index 154cb13d..8834e3d1 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -462,6 +462,17 @@ See also: B<--bg>, B Implies B<--semaphore>. +=item B<--filter-hosts> (alpha testing) + +Remove down hosts. For each remote host: check that login through ssh +works. If not: do not use this host. + +Currently you can not put B<--filter-hosts> in a profile, +$PARALLEL, /etc/parallel/config or similar. This is because GNU +B uses GNU B to compute this, so you will get an +infinite loop. This will likely be fixed in a later release. + + =item B<--gnu> Behave like GNU B. If B<--tollef> and B<--gnu> are both set, @@ -634,7 +645,7 @@ to see the difference: parallel -j4 sleep {}\; echo {} ::: 2 1 4 3 parallel -j4 -k sleep {}\; echo {} ::: 2 1 4 3 -=item B<-L> I +=item B<-L> I (alpha testing) When used with B<--pipe>: Read records of I. @@ -1221,14 +1232,14 @@ different dir for the files. Setting B<--tmpdir> is equivalent to setting $TMPDIR. -=item B<--timeout> I +=item B<--timeout> I (alpha testing) Time out for command. If the command runs for longer than I seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms later, followed by SIGKILL 200 ms later. -=item B<--tollef> +=item B<--tollef> (alpha testing) Make GNU B behave more like Tollef's parallel command. It activates B<-u>, B<-q>, and B<--arg-sep -->. It also causes B<-l> to @@ -1434,9 +1445,9 @@ Compare these two: See also B<--header>. -=item B<--shebang> +=item B<--shebang> (alpha testing) -=item B<--hashbang> +=item B<--hashbang> (alpha testing) GNU B can be called as a shebang (#!) command as the first line of a script. Like this: diff --git a/src/parallel.texi b/src/parallel.texi index c83d1fd3..2680ad7c 100644 --- a/src/parallel.texi +++ b/src/parallel.texi @@ -489,6 +489,17 @@ See also: @strong{--bg}, @strong{man sem} Implies @strong{--semaphore}. +@item @strong{--filter-hosts} (alpha testing) +@anchor{@strong{--filter-hosts} (alpha testing)} + +Remove down hosts. For each remote host: check that login through ssh +works. If not: do not use this host. + +Currently you can not put @strong{--filter-hosts} in a profile, +$PARALLEL, /etc/parallel/config or similar. This is because GNU +@strong{parallel} uses GNU @strong{parallel} to compute this, so you will get an +infinite loop. This will likely be fixed in a later release. + @item @strong{--gnu} @anchor{@strong{--gnu}} @@ -685,8 +696,8 @@ to see the difference: parallel -j4 -k sleep {}\; echo {} ::: 2 1 4 3 @end verbatim -@item @strong{-L} @emph{max-lines} -@anchor{@strong{-L} @emph{max-lines}} +@item @strong{-L} @emph{max-lines} (alpha testing) +@anchor{@strong{-L} @emph{max-lines} (alpha testing)} When used with @strong{--pipe}: Read records of @emph{max-lines}. @@ -1300,15 +1311,15 @@ into temporary files in /tmp. By setting @strong{--tmpdir} you can use a different dir for the files. Setting @strong{--tmpdir} is equivalent to setting $TMPDIR. -@item @strong{--timeout} @emph{sec} -@anchor{@strong{--timeout} @emph{sec}} +@item @strong{--timeout} @emph{sec} (alpha testing) +@anchor{@strong{--timeout} @emph{sec} (alpha testing)} Time out for command. If the command runs for longer than @emph{sec} seconds it will get killed with SIGTERM, followed by SIGTERM 200 ms later, followed by SIGKILL 200 ms later. -@item @strong{--tollef} -@anchor{@strong{--tollef}} +@item @strong{--tollef} (alpha testing) +@anchor{@strong{--tollef} (alpha testing)} Make GNU @strong{parallel} behave more like Tollef's parallel command. It activates @strong{-u}, @strong{-q}, and @strong{--arg-sep --}. It also causes @strong{-l} to @@ -1529,11 +1540,11 @@ Compare these two: See also @strong{--header}. -@item @strong{--shebang} -@anchor{@strong{--shebang}} +@item @strong{--shebang} (alpha testing) +@anchor{@strong{--shebang} (alpha testing)} -@item @strong{--hashbang} -@anchor{@strong{--hashbang}} +@item @strong{--hashbang} (alpha testing) +@anchor{@strong{--hashbang} (alpha testing)} GNU @strong{Parallel} can be called as a shebang (#!) command as the first line of a script. Like this: diff --git a/src/sql b/src/sql index 33eaf132..5e5e65f0 100755 --- a/src/sql +++ b/src/sql @@ -556,7 +556,7 @@ $Global::Initfile && unlink $Global::Initfile; exit ($err); sub parse_options { - $Global::version = 20120524; + $Global::version = 20120622; $Global::progname = 'sql'; # This must be done first as this may exec myself diff --git a/testsuite/tests-to-run/test30.sh b/testsuite/tests-to-run/test30.sh index c83419d9..89eb05f5 100644 --- a/testsuite/tests-to-run/test30.sh +++ b/testsuite/tests-to-run/test30.sh @@ -18,7 +18,7 @@ echo '### bug #34422: parallel -X --eta crashes with div by zero' seq 2 | stdout parallel -X --eta echo echo '### --timeout on remote machines' -parallel -j0 --timeout 4 --onall -S localhost,parallel@parallel-server1 'sleep {}; echo slept {}' ::: 1 8 9 ; echo jobs failed: $? +parallel -j0 --timeout 6 --onall -S localhost,parallel@parallel-server1 'sleep {}; echo slept {}' ::: 1 8 9 ; echo jobs failed: $? echo '### --pipe without command' seq -w 10 | stdout parallel --pipe