parallel: implemented --regexp. Prepended 'parallel:' to warnings

2024-11-22 05:57:54 +00:00 · 2011-02-02 16:36:29 +01:00 · 2011-02-02 16:36:29 +01:00 · ad61df30f0
parent cb468fb6d3
commit ad61df30f0
8 changed files with 248 additions and 48 deletions
--- a/20
+++ b/20
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.67 for parallel 20110126.
+# Generated by GNU Autoconf 2.67 for parallel 20110130.
 #
 # Report bugs to <bug-parallel@gnu.org>.
 #
@ -551,8 +551,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='parallel'
 PACKAGE_TARNAME='parallel'
-PACKAGE_VERSION='20110126'
+PACKAGE_VERSION='20110130'
-PACKAGE_STRING='parallel 20110126'
+PACKAGE_STRING='parallel 20110130'
 PACKAGE_BUGREPORT='bug-parallel@gnu.org'
 PACKAGE_URL=''
@ -1168,7 +1168,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures parallel 20110126 to adapt to many kinds of systems.
+\`configure' configures parallel 20110130 to adapt to many kinds of systems.
 Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1234,7 +1234,7 @@ fi
 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of parallel 20110126:";;
+     short | recursive ) echo "Configuration of parallel 20110130:";;
   esac
  cat <<\_ACEOF
@ -1301,7 +1301,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-parallel configure 20110126
+parallel configure 20110130
 generated by GNU Autoconf 2.67
 Copyright (C) 2010 Free Software Foundation, Inc.
@ -1318,7 +1318,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
-It was created by parallel $as_me 20110126, which was
+It was created by parallel $as_me 20110130, which was
 generated by GNU Autoconf 2.67.  Invocation command line was
  $ $0 $@
@ -2133,7 +2133,7 @@ fi
 # Define the identity of the package.
 PACKAGE='parallel'
- VERSION='20110126'
+ VERSION='20110130'
 cat >>confdefs.h <<_ACEOF
@ -2684,7 +2684,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by parallel $as_me 20110126, which was
+This file was extended by parallel $as_me 20110130, which was
 generated by GNU Autoconf 2.67.  Invocation command line was
  CONFIG_FILES    = $CONFIG_FILES
@ -2746,7 +2746,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-parallel config.status 20110126
+parallel config.status 20110130
 configured by $0, generated by GNU Autoconf 2.67,
  with options \\"\$ac_cs_config\\"
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
-AC_INIT([parallel], [20110126], [bug-parallel@gnu.org])
+AC_INIT([parallel], [20110130], [bug-parallel@gnu.org])
 AM_INIT_AUTOMAKE([-Wall -Werror foreign])
 AC_CONFIG_HEADERS([config.h])
 AC_CONFIG_FILES([
--- a/doc/FUTURE_IDEAS
+++ b/doc/FUTURE_IDEAS
@ -85,7 +85,7 @@ Prøv fieldsep: Find eet tegn, som optræder det samme antal gange i alle linjer
 Prøv klyngesep: Find den samme klynge tegn, som står samme antal gange i alle linjer (' | ' sep)
 Fjern whitespace før og efter colonne
-hvis der er n af tegn A og 2n af tegn B, så 
+hvis der er n af tegn A og 2n af tegn B, så
  a | b | c
@ -112,6 +112,160 @@ colsep = [sepchars]{no_of_sepchars}
 # TODO compute how many can be transferred within max_line_length
 # TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
 =head1 YouTube video --pipe
 cp parallel.fasta  parallel.mbox lucene.tar
 # GNU Parallel 20110205 - The FOSDEM Release
 I assume you already know GNU Parallel. If not watch the intro video first.
 GNU Parallel has so far worked similar to xargs. But the FOSDEM
 release of GNU Parallel introduces the new --pipe option. It makes GNU
 Parallel work similar to tee.
 tee pipes a copy of the output to a file and a copy to another
 program.
 seq 1 5 | tee myfile | wc
 Here it pipes a copy to the file myfile and to the command word count (wc).
 cat myfile
 and we can see the content is what we expected.
 The pipe option of GNU Parallel splits data into records and pipes a
 block of records into a program:
 seq 1 5 | parallel --pipe -N1 cat';' echo foo
 Here we pipe each number to the command cat and print foo after
 running cat.
 GNU Parallel does this in parallel starting one process per cpu so the
 order may be different because one command may finish before another.
 # RECORD SEPARATORS
 GNU Parallel splits on record separators.
 seq 1 5 | parallel --pipe --recend '\n' -N1 cat';' echo foo
 This is the example we saw before: the record separator is \n and
 --recend will keep the record separator at the end of the record.
 But if what your records start with a record separator? Here is a
 fast-a file:
 cat parallel.fasta
 Every record start with a >. To keep that with the record you use
 --recstart:
 cat parallel.fasta | parallel --pipe --recstart '>' -N1 cat';' echo foo
 But what if you have both? mbox files is an example that has both an
 ending and starting separator:
 cat parallel.mbox |
 parallel --pipe --recend '\n\n' --recstart 'From ' -N1 cat';' echo foo | less   #
 The two newlines are staying with the email before and the From_ stays with the next record.
 GNU Parallel cannot guarantee the first record will start with record
 separator and it cannot guarantee the last record will end with record
 separator. You will simply get what is first and last.
 But GNU Parallel _does_ guarantee that it will only split at record
 separators.
 # NUMBER OF RECORDS
 So far we have used -N1. This tells GNU Parallel to pipe one record to
 the program.
 seq 1 5 | parallel --pipe -N1 cat';' echo foo
 But we can choose any amount:
 seq 1 5 | parallel --pipe -N3 cat';' echo foo
 This will pipe blocks of 3 records into cat and if there is not enough the last will only get two.
 # BLOCKSIZE
 However, using -N is inefficient. It is faster to pipe a full block into the program.
 cat /usr/share/dict/words | parallel --pipe --blocksize 500k wc
 We here tell GNU Parallel to split on \n and pipe blocks of 500 KB to
 wc. 1 MB is the default:
 cat /usr/share/dict/words | parallel --pipe wc
 If you just have a bunch of bytes you often do not care about the
 record separator. To split input into chunks you can disable the
 --recend
 ls -l lucene.tar
 cat lucene.tar | parallel --pipe --recend '' -k gzip > lucene.tar.gz
 GNU Parallel will then split the input into 1 MB blocks; pipe that to
 gzip and -k will make sure the order of the output is kept before
 saving to the tar.gz file.
 The beauty of gzip is that if you concatenate two gzip files it is a
 valid gzip file. To test this:
 tar tvzf lucene.tar.gz #
 # OUTPUT AS FILE
 Sometimes the output of GNU Parallel cannot be mixed in a single stream like this:
 seq 1 10 | shuf | parallel --pipe -N 3 sort -n
 As you can see each block of 3 is sorted but the whole output is not sorted.
 GNU Parallel can give the output in file. GNU Parallel will the list the
 files created:
 seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n
 Each of these files contains a sorted block:
 cat
 Sort has -m to merge sorted files into a sorted stream
 seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n | parallel -mj1 sort -nm
 -m will append all the files behind the sort command and the -j1 will
 make sure we only run one command. The only part missing now is
 cleaning up by removing the temporary files. We can do that by
 appending rm
 seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n |
 parallel -mj1 sort -nm {} ";"rm {}
 # Thank you for watching
 #
 # If you like GNU Parallel:
 # * Post this video on forums/blogs/Twitter/Facebook/Linkedin
 # * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
 # * Request or write a review for your favourite magazine
 # * Request or build a package for your favourite distribution
 # * Invite me for your next conference (Contact http://ole.tange.dk)
 #
 # If GNU Parallel saves you money:
 # * (Have your company) donate to FSF https://my.fsf.org/donate/
 #
 # Find GNU Parallel at http://www.gnu.org/software/parallel/
 =head1 YouTube video2
 Converting of WAV files to MP3 using GNU Parallel
@ -177,7 +331,7 @@ it easy to distribute jobs to these.
 terminal2: ssh parallel@vh2.pi.dk
 ssh parallel@vh2.pi.dk
- and  
+ and
 PS1="\[\e[7m\]GNU Parallel:\[\033[01;34m\]\w\[\033[00m\e[27m\]$ "
 gunzip logs/*gz
@ -362,7 +516,7 @@ find . -name '*.gz' | parallel -j+0 "zcat {} | bzip2 >{.}.bz2 && rm {}"
 # Create a directory for each zip-file and unzip it in that dir
 parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip
-# Convert all *.mp3 in subdirs to *.ogg running 
+# Convert all *.mp3 in subdirs to *.ogg running
 #   one process per CPU core on local computer and server2
 find . -name '*.mp3' | parallel --trc {.}.ogg -j+0 -S server2,: \
         'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg'
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -66,12 +66,14 @@ echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org
 == Download and test ==
 pushd /tmp
 rm parallel-$YYYYMMDD.tar.bz2
 wget http://ftp.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
 #wget http://alpha.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
 tar xjvf parallel-$YYYYMMDD.tar.bz2
 cd parallel-$YYYYMMDD
 ./configure
 make -j && sudo make -j install
 pushd
 == Update OpenSUSE build system ==
@ -138,17 +140,22 @@ cc:Peter Simons <simons@cryp.to>, Sandro Cazzaniga <kharec@mandriva.org>,
   ryoichiro.suzuki@gmail.com,kerick@shiftedbit.net,
   Christian Faulhammer <fauli@gentoo.org>, Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>
-Subject: GNU Parallel 2011XXXX released
+Subject: GNU Parallel 20110205 (FOSDEM release) released
-GNU Parallel 2011XXXX has been released. It is available for
+GNU Parallel 20110205 (the FOSDEM release) has been released. It is
-download at: http://ftp.gnu.org/gnu/parallel/
+available for download at: http://ftp.gnu.org/gnu/parallel/
 This is a major release as the --pipe option introduces a new way to
-work. To learn about --pipe see the example section for uses of
+work. GNU Parallel has so far been similar to xargs, with --pipe it
--pipe.
+becomes somewhat similar to tee. To learn about --pipe see the example
 section for uses of --pipe.
 But rest assured: No old functionality is changed.
 If you want GNU Parallel to be part of your favourite distribution
 contact the people maintaining the distribution (complaining on
 Twitter is not enough).
 New in this release:
 * --pipe splits piped data into blocks. Each block is piped to a
@ -170,12 +177,22 @@ New in this release:
  followed immediately by a start of a record. This is useful if
  either recend or recstart can occur in the middle of a record.
 * --remove-rec-sep removes the string matched by --recstart and
  --recend.
 * --regexp will make GNU Parallel treat --recstart and --recend as
  regular expressions.
 * --output-as-files will put the output of the programs into files and
  instead of giving the output GNU Parallel will output the name of
  these files.
-* -N set the number of records to read. If used with --blocksize
+* -N if used with --pipe sets the number of records to read.
-  the block read will at most be --blocksize.
+
 * GNU Parallel was presented at FOSDEM.
 * Article in USENIX Magazine ;login: (print)
  http://www.usenix.org/publications/login/2011-02/
 * GNU Parallel is now on ohloh.net. Thanks to Wim Muskee.
  https://www.ohloh.net/p/gnu-parallel
--- a/src/niceload
+++ b/src/niceload
@ -236,7 +236,7 @@ B<parallel>(1), B<nice>(1)
 use strict;
 use Getopt::Long;
 $Global::progname="niceload";
-$Global::version = 20110126;
+$Global::version = 20110130;
 Getopt::Long::Configure("bundling","require_order");
 get_options_from_array(\@ARGV) || die_usage();
 if($::opt_version) {
--- a/src/parallel
+++ b/src/parallel
@ -88,17 +88,17 @@ sub spreadstdin {
 	# If both --recstart and --recend is given then both must match
 	$recstart = $::opt_recstart;
 	$recend = $::opt_recend;
-	$recerror = "Warning: --recend and --recstart unmatched. Is --blocksize too small?";
+	$recerror = "parallel: Warning: --recend and --recstart unmatched. Is --blocksize too small?";
    } elsif(defined($::opt_recstart)) {
 	# If --recstart is given it must match start of record
 	$recstart = $::opt_recstart;
 	$recend = "";
-	$recerror = "Warning: --recstart unmatched. Is --blocksize too small?";
+	$recerror = "parallel: Warning: --recstart unmatched. Is --blocksize too small?";
    } elsif(defined($::opt_recend)) {
 	# If --recend is given then it must match end of record
 	$recstart = "";
 	$recend = $::opt_recend;
-	$recerror = "Warning: --recend unmatched. Is --blocksize too small?";
+	$recerror = "parallel: Warning: --recend unmatched. Is --blocksize too small?";
    }
    while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
@ -333,6 +333,7 @@ sub get_options_from_array {
 	 "pipe|spreadstdin" => \$::opt_pipe,
 	 "recstart=s" => \$::opt_recstart,
 	 "recend=s" => \$::opt_recend,
 	 "regexp|regex" => \$::opt_regexp,
 	 "remove-rec-sep|removerecsep|rrs" => \$::opt_remove_rec_sep,
 	 "files|output-as-files|outputasfiles" => \$::opt_files,
 	 "block|block-size|blocksize=s" => \$::opt_blocksize,
@ -377,7 +378,7 @@ sub get_options_from_array {
 sub parse_options {
    # Returns: N/A
    # Defaults:
-    $Global::version = 20110126;
+    $Global::version = 20110130;
    $Global::progname = 'parallel';
    $Global::infinity = 2**31;
    $Global::debug = 0;
@ -519,7 +520,7 @@ sub parse_options {
        # As we do not know the max line length on the remote machine
        # long commands generated by xargs may fail
        # If opt_N is set, it is probably safe
-        print STDERR ("Warning: using -X or -m with --sshlogin may fail\n");
+        print STDERR ("parallel: Warning: using -X or -m with --sshlogin may fail\n");
    }
    if(not defined $::opt_P) {
@ -1265,19 +1266,19 @@ sub parse_sshlogin {
            # There are no remote hosts
            if(defined @::opt_trc) {
                print $Global::original_stderr
-                    "Warning: --trc ignored as there are no remote --sshlogin\n";
+                    "parallel: Warning: --trc ignored as there are no remote --sshlogin\n";
            } elsif (defined $::opt_transfer) {
                print $Global::original_stderr
-                    "Warning: --transfer ignored as there are no remote --sshlogin\n";
+                    "parallel: Warning: --transfer ignored as there are no remote --sshlogin\n";
            } elsif (defined @::opt_return) {
                print $Global::original_stderr
-                    "Warning: --return ignored as there are no remote --sshlogin\n";
+                    "parallel: Warning: --return ignored as there are no remote --sshlogin\n";
            } elsif (defined $::opt_cleanup) {
                print $Global::original_stderr
-                    "Warning: --cleanup ignored as there are no remote --sshlogin\n";
+                    "parallel: Warning: --cleanup ignored as there are no remote --sshlogin\n";
            } elsif (defined @::opt_basefile) {
                print $Global::original_stderr
-                    "Warning: --basefile ignored as there are no remote --sshlogin\n";
+                    "parallel: Warning: --basefile ignored as there are no remote --sshlogin\n";
            }
        }
    }
@ -1898,20 +1899,20 @@ sub processes_available_by_system_limit {
            # Give the user a warning. He can press Ctrl-C if this
            # sucks.
            print $Global::original_stderr
-                ("Warning: Starting 10 extra processes takes > 2 sec.\n",
+                ("parallel: Warning: Starting 10 extra processes takes > 2 sec.\n",
                 "Consider adjusting -j. Press CTRL-C to stop.\n");
            $slow_spawining_warning_printed = 1;
        }
    }
    if($system_limit < $wanted_processes and not $more_filehandles) {
        print $Global::original_stderr
-            ("Warning: Only enough filehandles to run ",
+            ("parallel: Warning: Only enough filehandles to run ",
             $system_limit, " jobs in parallel. ",
             "Raising ulimit -n may help\n");
    }
    if($system_limit < $wanted_processes and $max_system_proc_reached) {
        print $Global::original_stderr
-            ("Warning: Only enough available processes to run ",
+            ("parallel: Warning: Only enough available processes to run ",
             $system_limit, " jobs in parallel.\n");
    }
    # Cleanup: Close the files
@ -1948,7 +1949,7 @@ sub simultaneous_sshlogin_limit {
    if($ssh_limit < $wanted_processes) {
        my $serverlogin = $self->serverlogin();
        print $Global::original_stderr
-            ("Warning: ssh to $serverlogin only allows ",
+            ("parallel: Warning: ssh to $serverlogin only allows ",
             "for $ssh_limit simultaneous logins.\n",
             "You may raise this by changing ",
             "/etc/ssh/sshd_config:MaxStartup on $serverlogin\n",
@ -2059,7 +2060,7 @@ sub ncpus {
                $self->{'ncpus'} = $ncpu;
            } else {
                print $Global::original_stderr
-                    ("Warning: Could not figure out ",
+                    ("parallel: Warning: Could not figure out ",
                     "number of cpus on $serverlogin. Using 1\n");
                $self->{'ncpus'} = 1;
            }
@ -2080,7 +2081,7 @@ sub no_of_cpus {
    if($no_of_cpus) {
        return $no_of_cpus;
    } else {
-        warn("Cannot figure out number of cpus. Using 1");
+        warn("parallel: Cannot figure out number of cpus. Using 1");
        return 1;
    }
 }
@ -2097,7 +2098,7 @@ sub no_of_cores {
    if($no_of_cores) {
        return $no_of_cores;
    } else {
-        warn("Cannot figure out number of CPU cores. Using 1");
+        warn("parallel: Cannot figure out number of CPU cores. Using 1");
        return 1;
    }
 }
@ -2699,7 +2700,7 @@ sub sshtransfer {
 	    $pre .= "$mkremote_workdir; rsync $rsync_opt ".::shell_quote_scalar($file)." $serverlogin:$rsync_destdir;";
 	} else {
 	    print $Global::original_stderr
-		"Warning: $file is not readable and will not be transferred\n";
+		"parallel: Warning: $file is not readable and will not be transferred\n";
 	}
    }
    return $pre;
@ -4017,6 +4018,6 @@ sub unlock {
 # Keep perl -w happy
-$::opt_regexp = $::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
+$::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
 $::opt_skip_first_line = $::opt_shebang = 0 ;
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -28,8 +28,7 @@ If you use B<xargs> today you will find GNU B<parallel> very easy to
 use as GNU B<parallel> is written to have the same options as
 B<xargs>. If you write loops in shell, you will find GNU B<parallel>
 may be able to replace most of the loops and make them run faster by
-running several jobs simultaneously. If you use B<ppss> or B<pexec> you
+running several jobs simultaneously.
 will find GNU B<parallel> will often make the command easier to read.
 GNU B<parallel> makes sure output from the commands is the same output
 as you would get had you run the commands sequentially. This makes it
@ -713,8 +712,8 @@ If B<--recstart> is given I<startstring> will be used to split at record start.
 If B<--recend> is given I<endstring> will be used to split at record end.
 If both B<--recstart> and B<--recend> are given the string
-I<startregexp>I<endregexp> will have to match to find a split
+I<startstring>I<endstring> will have to match to find a split
-position. This is useful if either I<startregexp> or I<endregexp>
+position. This is useful if either I<startstring> or I<endstring>
 match in the middle of a record.
 If neither B<--recstart> nor B<--recend> are given then B<--recend>
@ -726,7 +725,7 @@ Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
 expressions. This is slow, however.
-=item B<--regexp> (unimplimented)
+=item B<--regexp> (beta test)
 Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
 expressions. This is slow, however.
@ -743,6 +742,7 @@ it to the command.
 Only used with B<--pipe>.
 =item B<--retries> I<n> (beta testing)
 If a job fails, retry it on another computer. Do this I<n> times. If
@ -1601,6 +1601,34 @@ B<parallel -j 100 < jobs_to_run>
 As there is not a I<command> the jobs will be evaluated by the shell.
 =head1 EXAMPLE: Processing a big file using more cores
 To process a big file or some output you can use B<--pipe> to split up
 the data into blocks and pipe the blocks into the processing program.
 If the program is B<gzip -9> you can do:
 B<cat bigfile | parallel --pipe --recend '' -k gzip -9 >>B<bigfile.gz>
 This will split B<bigfile> into blocks of 1 MB and pass that to B<gzip
 -9> in parallel. One B<gzip> will be run per CPU core. The output of
 B<gzip -9> will be kept in order and saved to B<bigfile.gz>
 B<gzip> works fine if the output is appended, but some processing does
 not work like that - for example sorting. For this GNU B<parallel> can
 put the output of each command into a file. This will sort a big file
 in parallel:
 B<cat bigfile | parallel --pipe --files sort | parallel -Xj1 sort -m {} ';' rm {} >>B<bigfile.sort>
 Here B<bigfile> is split into blocks of around 1MB, each block ending
 in '\n' (which is the default for B<--recend>). Each block is passed
 to B<sort> and the output from B<sort> is saved into files. These
 files are passed to the second B<parallel> that runs B<sort -m> on the
 files before it removes the files. The output is saved to
 B<bigfile.sort>.
 =head1 EXAMPLE: Working as mutex and counting semaphore
 The command B<sem> is an alias for B<parallel --semaphore>.
@ -1921,7 +1949,7 @@ variable $PARALLEL which takes precedence over the file
 =head1 PROFILE FILES
-If B<--profile> set, GNU B<parallel> will read the profile from that file instead of 
+If B<--profile> set, GNU B<parallel> will read the profile from that file instead of
 ~/.parallel/config.
 Example: Profile for running every command with B<-j+0> and B<nice>
--- a/src/sql
+++ b/src/sql
@ -531,7 +531,7 @@ $Global::Initfile && unlink $Global::Initfile;
 exit ($err);
 sub parse_options {
-    $Global::version = 20110126;
+    $Global::version = 20110130;
    $Global::progname = 'sql';
    # This must be done first as this may exec myself