mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
parallel: implemented --regexp. Prepended 'parallel:' to warnings
This commit is contained in:
parent
cb468fb6d3
commit
ad61df30f0
20
configure
vendored
20
configure
vendored
|
@ -1,6 +1,6 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.67 for parallel 20110126.
|
# Generated by GNU Autoconf 2.67 for parallel 20110130.
|
||||||
#
|
#
|
||||||
# Report bugs to <bug-parallel@gnu.org>.
|
# Report bugs to <bug-parallel@gnu.org>.
|
||||||
#
|
#
|
||||||
|
@ -551,8 +551,8 @@ MAKEFLAGS=
|
||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='parallel'
|
PACKAGE_NAME='parallel'
|
||||||
PACKAGE_TARNAME='parallel'
|
PACKAGE_TARNAME='parallel'
|
||||||
PACKAGE_VERSION='20110126'
|
PACKAGE_VERSION='20110130'
|
||||||
PACKAGE_STRING='parallel 20110126'
|
PACKAGE_STRING='parallel 20110130'
|
||||||
PACKAGE_BUGREPORT='bug-parallel@gnu.org'
|
PACKAGE_BUGREPORT='bug-parallel@gnu.org'
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
|
@ -1168,7 +1168,7 @@ if test "$ac_init_help" = "long"; then
|
||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures parallel 20110126 to adapt to many kinds of systems.
|
\`configure' configures parallel 20110130 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
|
@ -1234,7 +1234,7 @@ fi
|
||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of parallel 20110126:";;
|
short | recursive ) echo "Configuration of parallel 20110130:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
|
@ -1301,7 +1301,7 @@ fi
|
||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
parallel configure 20110126
|
parallel configure 20110130
|
||||||
generated by GNU Autoconf 2.67
|
generated by GNU Autoconf 2.67
|
||||||
|
|
||||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
@ -1318,7 +1318,7 @@ cat >config.log <<_ACEOF
|
||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by parallel $as_me 20110126, which was
|
It was created by parallel $as_me 20110130, which was
|
||||||
generated by GNU Autoconf 2.67. Invocation command line was
|
generated by GNU Autoconf 2.67. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
|
@ -2133,7 +2133,7 @@ fi
|
||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='parallel'
|
PACKAGE='parallel'
|
||||||
VERSION='20110126'
|
VERSION='20110130'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
@ -2684,7 +2684,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by parallel $as_me 20110126, which was
|
This file was extended by parallel $as_me 20110130, which was
|
||||||
generated by GNU Autoconf 2.67. Invocation command line was
|
generated by GNU Autoconf 2.67. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
|
@ -2746,7 +2746,7 @@ _ACEOF
|
||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
parallel config.status 20110126
|
parallel config.status 20110130
|
||||||
configured by $0, generated by GNU Autoconf 2.67,
|
configured by $0, generated by GNU Autoconf 2.67,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
AC_INIT([parallel], [20110126], [bug-parallel@gnu.org])
|
AC_INIT([parallel], [20110130], [bug-parallel@gnu.org])
|
||||||
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
|
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
|
||||||
AC_CONFIG_HEADERS([config.h])
|
AC_CONFIG_HEADERS([config.h])
|
||||||
AC_CONFIG_FILES([
|
AC_CONFIG_FILES([
|
||||||
|
|
160
doc/FUTURE_IDEAS
160
doc/FUTURE_IDEAS
|
@ -85,7 +85,7 @@ Prøv fieldsep: Find eet tegn, som optræder det samme antal gange i alle linjer
|
||||||
Prøv klyngesep: Find den samme klynge tegn, som står samme antal gange i alle linjer (' | ' sep)
|
Prøv klyngesep: Find den samme klynge tegn, som står samme antal gange i alle linjer (' | ' sep)
|
||||||
Fjern whitespace før og efter colonne
|
Fjern whitespace før og efter colonne
|
||||||
|
|
||||||
hvis der er n af tegn A og 2n af tegn B, så
|
hvis der er n af tegn A og 2n af tegn B, så
|
||||||
|
|
||||||
a | b | c
|
a | b | c
|
||||||
|
|
||||||
|
@ -112,6 +112,160 @@ colsep = [sepchars]{no_of_sepchars}
|
||||||
# TODO compute how many can be transferred within max_line_length
|
# TODO compute how many can be transferred within max_line_length
|
||||||
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
|
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
|
||||||
|
|
||||||
|
=head1 YouTube video --pipe
|
||||||
|
|
||||||
|
cp parallel.fasta parallel.mbox lucene.tar
|
||||||
|
|
||||||
|
# GNU Parallel 20110205 - The FOSDEM Release
|
||||||
|
|
||||||
|
I assume you already know GNU Parallel. If not watch the intro video first.
|
||||||
|
|
||||||
|
GNU Parallel has so far worked similar to xargs. But the FOSDEM
|
||||||
|
release of GNU Parallel introduces the new --pipe option. It makes GNU
|
||||||
|
Parallel work similar to tee.
|
||||||
|
|
||||||
|
tee pipes a copy of the output to a file and a copy to another
|
||||||
|
program.
|
||||||
|
|
||||||
|
seq 1 5 | tee myfile | wc
|
||||||
|
|
||||||
|
Here it pipes a copy to the file myfile and to the command word count (wc).
|
||||||
|
|
||||||
|
cat myfile
|
||||||
|
|
||||||
|
and we can see the content is what we expected.
|
||||||
|
|
||||||
|
The pipe option of GNU Parallel splits data into records and pipes a
|
||||||
|
block of records into a program:
|
||||||
|
|
||||||
|
seq 1 5 | parallel --pipe -N1 cat';' echo foo
|
||||||
|
|
||||||
|
Here we pipe each number to the command cat and print foo after
|
||||||
|
running cat.
|
||||||
|
|
||||||
|
GNU Parallel does this in parallel starting one process per cpu so the
|
||||||
|
order may be different because one command may finish before another.
|
||||||
|
|
||||||
|
# RECORD SEPARATORS
|
||||||
|
|
||||||
|
GNU Parallel splits on record separators.
|
||||||
|
|
||||||
|
seq 1 5 | parallel --pipe --recend '\n' -N1 cat';' echo foo
|
||||||
|
|
||||||
|
This is the example we saw before: the record separator is \n and
|
||||||
|
--recend will keep the record separator at the end of the record.
|
||||||
|
|
||||||
|
But if what your records start with a record separator? Here is a
|
||||||
|
fast-a file:
|
||||||
|
|
||||||
|
cat parallel.fasta
|
||||||
|
|
||||||
|
Every record start with a >. To keep that with the record you use
|
||||||
|
--recstart:
|
||||||
|
|
||||||
|
cat parallel.fasta | parallel --pipe --recstart '>' -N1 cat';' echo foo
|
||||||
|
|
||||||
|
But what if you have both? mbox files is an example that has both an
|
||||||
|
ending and starting separator:
|
||||||
|
|
||||||
|
cat parallel.mbox |
|
||||||
|
parallel --pipe --recend '\n\n' --recstart 'From ' -N1 cat';' echo foo | less #
|
||||||
|
|
||||||
|
The two newlines are staying with the email before and the From_ stays with the next record.
|
||||||
|
|
||||||
|
GNU Parallel cannot guarantee the first record will start with record
|
||||||
|
separator and it cannot guarantee the last record will end with record
|
||||||
|
separator. You will simply get what is first and last.
|
||||||
|
|
||||||
|
But GNU Parallel _does_ guarantee that it will only split at record
|
||||||
|
separators.
|
||||||
|
|
||||||
|
# NUMBER OF RECORDS
|
||||||
|
|
||||||
|
So far we have used -N1. This tells GNU Parallel to pipe one record to
|
||||||
|
the program.
|
||||||
|
|
||||||
|
seq 1 5 | parallel --pipe -N1 cat';' echo foo
|
||||||
|
|
||||||
|
But we can choose any amount:
|
||||||
|
|
||||||
|
seq 1 5 | parallel --pipe -N3 cat';' echo foo
|
||||||
|
|
||||||
|
This will pipe blocks of 3 records into cat and if there is not enough the last will only get two.
|
||||||
|
|
||||||
|
# BLOCKSIZE
|
||||||
|
|
||||||
|
However, using -N is inefficient. It is faster to pipe a full block into the program.
|
||||||
|
|
||||||
|
cat /usr/share/dict/words | parallel --pipe --blocksize 500k wc
|
||||||
|
|
||||||
|
We here tell GNU Parallel to split on \n and pipe blocks of 500 KB to
|
||||||
|
wc. 1 MB is the default:
|
||||||
|
|
||||||
|
cat /usr/share/dict/words | parallel --pipe wc
|
||||||
|
|
||||||
|
If you just have a bunch of bytes you often do not care about the
|
||||||
|
record separator. To split input into chunks you can disable the
|
||||||
|
--recend
|
||||||
|
|
||||||
|
ls -l lucene.tar
|
||||||
|
cat lucene.tar | parallel --pipe --recend '' -k gzip > lucene.tar.gz
|
||||||
|
|
||||||
|
GNU Parallel will then split the input into 1 MB blocks; pipe that to
|
||||||
|
gzip and -k will make sure the order of the output is kept before
|
||||||
|
saving to the tar.gz file.
|
||||||
|
|
||||||
|
The beauty of gzip is that if you concatenate two gzip files it is a
|
||||||
|
valid gzip file. To test this:
|
||||||
|
|
||||||
|
tar tvzf lucene.tar.gz #
|
||||||
|
|
||||||
|
# OUTPUT AS FILE
|
||||||
|
|
||||||
|
Sometimes the output of GNU Parallel cannot be mixed in a single stream like this:
|
||||||
|
|
||||||
|
seq 1 10 | shuf | parallel --pipe -N 3 sort -n
|
||||||
|
|
||||||
|
As you can see each block of 3 is sorted but the whole output is not sorted.
|
||||||
|
|
||||||
|
GNU Parallel can give the output in file. GNU Parallel will the list the
|
||||||
|
files created:
|
||||||
|
|
||||||
|
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n
|
||||||
|
|
||||||
|
Each of these files contains a sorted block:
|
||||||
|
|
||||||
|
cat
|
||||||
|
|
||||||
|
Sort has -m to merge sorted files into a sorted stream
|
||||||
|
|
||||||
|
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n | parallel -mj1 sort -nm
|
||||||
|
|
||||||
|
-m will append all the files behind the sort command and the -j1 will
|
||||||
|
make sure we only run one command. The only part missing now is
|
||||||
|
cleaning up by removing the temporary files. We can do that by
|
||||||
|
appending rm
|
||||||
|
|
||||||
|
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n |
|
||||||
|
parallel -mj1 sort -nm {} ";"rm {}
|
||||||
|
|
||||||
|
|
||||||
|
# Thank you for watching
|
||||||
|
#
|
||||||
|
# If you like GNU Parallel:
|
||||||
|
# * Post this video on forums/blogs/Twitter/Facebook/Linkedin
|
||||||
|
# * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
|
||||||
|
# * Request or write a review for your favourite magazine
|
||||||
|
# * Request or build a package for your favourite distribution
|
||||||
|
# * Invite me for your next conference (Contact http://ole.tange.dk)
|
||||||
|
#
|
||||||
|
# If GNU Parallel saves you money:
|
||||||
|
# * (Have your company) donate to FSF https://my.fsf.org/donate/
|
||||||
|
#
|
||||||
|
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
=head1 YouTube video2
|
=head1 YouTube video2
|
||||||
|
|
||||||
Converting of WAV files to MP3 using GNU Parallel
|
Converting of WAV files to MP3 using GNU Parallel
|
||||||
|
@ -177,7 +331,7 @@ it easy to distribute jobs to these.
|
||||||
|
|
||||||
terminal2: ssh parallel@vh2.pi.dk
|
terminal2: ssh parallel@vh2.pi.dk
|
||||||
ssh parallel@vh2.pi.dk
|
ssh parallel@vh2.pi.dk
|
||||||
and
|
and
|
||||||
|
|
||||||
PS1="\[\e[7m\]GNU Parallel:\[\033[01;34m\]\w\[\033[00m\e[27m\]$ "
|
PS1="\[\e[7m\]GNU Parallel:\[\033[01;34m\]\w\[\033[00m\e[27m\]$ "
|
||||||
gunzip logs/*gz
|
gunzip logs/*gz
|
||||||
|
@ -362,7 +516,7 @@ find . -name '*.gz' | parallel -j+0 "zcat {} | bzip2 >{.}.bz2 && rm {}"
|
||||||
# Create a directory for each zip-file and unzip it in that dir
|
# Create a directory for each zip-file and unzip it in that dir
|
||||||
parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip
|
parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip
|
||||||
|
|
||||||
# Convert all *.mp3 in subdirs to *.ogg running
|
# Convert all *.mp3 in subdirs to *.ogg running
|
||||||
# one process per CPU core on local computer and server2
|
# one process per CPU core on local computer and server2
|
||||||
find . -name '*.mp3' | parallel --trc {.}.ogg -j+0 -S server2,: \
|
find . -name '*.mp3' | parallel --trc {.}.ogg -j+0 -S server2,: \
|
||||||
'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg'
|
'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg'
|
||||||
|
|
|
@ -66,12 +66,14 @@ echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org
|
||||||
== Download and test ==
|
== Download and test ==
|
||||||
|
|
||||||
pushd /tmp
|
pushd /tmp
|
||||||
|
rm parallel-$YYYYMMDD.tar.bz2
|
||||||
wget http://ftp.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
|
wget http://ftp.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
|
||||||
#wget http://alpha.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
|
#wget http://alpha.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
|
||||||
tar xjvf parallel-$YYYYMMDD.tar.bz2
|
tar xjvf parallel-$YYYYMMDD.tar.bz2
|
||||||
cd parallel-$YYYYMMDD
|
cd parallel-$YYYYMMDD
|
||||||
./configure
|
./configure
|
||||||
make -j && sudo make -j install
|
make -j && sudo make -j install
|
||||||
|
pushd
|
||||||
|
|
||||||
== Update OpenSUSE build system ==
|
== Update OpenSUSE build system ==
|
||||||
|
|
||||||
|
@ -138,17 +140,22 @@ cc:Peter Simons <simons@cryp.to>, Sandro Cazzaniga <kharec@mandriva.org>,
|
||||||
ryoichiro.suzuki@gmail.com,kerick@shiftedbit.net,
|
ryoichiro.suzuki@gmail.com,kerick@shiftedbit.net,
|
||||||
Christian Faulhammer <fauli@gentoo.org>, Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>
|
Christian Faulhammer <fauli@gentoo.org>, Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>
|
||||||
|
|
||||||
Subject: GNU Parallel 2011XXXX released
|
Subject: GNU Parallel 20110205 (FOSDEM release) released
|
||||||
|
|
||||||
GNU Parallel 2011XXXX has been released. It is available for
|
GNU Parallel 20110205 (the FOSDEM release) has been released. It is
|
||||||
download at: http://ftp.gnu.org/gnu/parallel/
|
available for download at: http://ftp.gnu.org/gnu/parallel/
|
||||||
|
|
||||||
This is a major release as the --pipe option introduces a new way to
|
This is a major release as the --pipe option introduces a new way to
|
||||||
work. To learn about --pipe see the example section for uses of
|
work. GNU Parallel has so far been similar to xargs, with --pipe it
|
||||||
--pipe.
|
becomes somewhat similar to tee. To learn about --pipe see the example
|
||||||
|
section for uses of --pipe.
|
||||||
|
|
||||||
But rest assured: No old functionality is changed.
|
But rest assured: No old functionality is changed.
|
||||||
|
|
||||||
|
If you want GNU Parallel to be part of your favourite distribution
|
||||||
|
contact the people maintaining the distribution (complaining on
|
||||||
|
Twitter is not enough).
|
||||||
|
|
||||||
New in this release:
|
New in this release:
|
||||||
|
|
||||||
* --pipe splits piped data into blocks. Each block is piped to a
|
* --pipe splits piped data into blocks. Each block is piped to a
|
||||||
|
@ -170,12 +177,22 @@ New in this release:
|
||||||
followed immediately by a start of a record. This is useful if
|
followed immediately by a start of a record. This is useful if
|
||||||
either recend or recstart can occur in the middle of a record.
|
either recend or recstart can occur in the middle of a record.
|
||||||
|
|
||||||
|
* --remove-rec-sep removes the string matched by --recstart and
|
||||||
|
--recend.
|
||||||
|
|
||||||
|
* --regexp will make GNU Parallel treat --recstart and --recend as
|
||||||
|
regular expressions.
|
||||||
|
|
||||||
* --output-as-files will put the output of the programs into files and
|
* --output-as-files will put the output of the programs into files and
|
||||||
instead of giving the output GNU Parallel will output the name of
|
instead of giving the output GNU Parallel will output the name of
|
||||||
these files.
|
these files.
|
||||||
|
|
||||||
* -N set the number of records to read. If used with --blocksize
|
* -N if used with --pipe sets the number of records to read.
|
||||||
the block read will at most be --blocksize.
|
|
||||||
|
* GNU Parallel was presented at FOSDEM.
|
||||||
|
|
||||||
|
* Article in USENIX Magazine ;login: (print)
|
||||||
|
http://www.usenix.org/publications/login/2011-02/
|
||||||
|
|
||||||
* GNU Parallel is now on ohloh.net. Thanks to Wim Muskee.
|
* GNU Parallel is now on ohloh.net. Thanks to Wim Muskee.
|
||||||
https://www.ohloh.net/p/gnu-parallel
|
https://www.ohloh.net/p/gnu-parallel
|
||||||
|
|
|
@ -236,7 +236,7 @@ B<parallel>(1), B<nice>(1)
|
||||||
use strict;
|
use strict;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
$Global::progname="niceload";
|
$Global::progname="niceload";
|
||||||
$Global::version = 20110126;
|
$Global::version = 20110130;
|
||||||
Getopt::Long::Configure("bundling","require_order");
|
Getopt::Long::Configure("bundling","require_order");
|
||||||
get_options_from_array(\@ARGV) || die_usage();
|
get_options_from_array(\@ARGV) || die_usage();
|
||||||
if($::opt_version) {
|
if($::opt_version) {
|
||||||
|
|
39
src/parallel
39
src/parallel
|
@ -88,17 +88,17 @@ sub spreadstdin {
|
||||||
# If both --recstart and --recend is given then both must match
|
# If both --recstart and --recend is given then both must match
|
||||||
$recstart = $::opt_recstart;
|
$recstart = $::opt_recstart;
|
||||||
$recend = $::opt_recend;
|
$recend = $::opt_recend;
|
||||||
$recerror = "Warning: --recend and --recstart unmatched. Is --blocksize too small?";
|
$recerror = "parallel: Warning: --recend and --recstart unmatched. Is --blocksize too small?";
|
||||||
} elsif(defined($::opt_recstart)) {
|
} elsif(defined($::opt_recstart)) {
|
||||||
# If --recstart is given it must match start of record
|
# If --recstart is given it must match start of record
|
||||||
$recstart = $::opt_recstart;
|
$recstart = $::opt_recstart;
|
||||||
$recend = "";
|
$recend = "";
|
||||||
$recerror = "Warning: --recstart unmatched. Is --blocksize too small?";
|
$recerror = "parallel: Warning: --recstart unmatched. Is --blocksize too small?";
|
||||||
} elsif(defined($::opt_recend)) {
|
} elsif(defined($::opt_recend)) {
|
||||||
# If --recend is given then it must match end of record
|
# If --recend is given then it must match end of record
|
||||||
$recstart = "";
|
$recstart = "";
|
||||||
$recend = $::opt_recend;
|
$recend = $::opt_recend;
|
||||||
$recerror = "Warning: --recend unmatched. Is --blocksize too small?";
|
$recerror = "parallel: Warning: --recend unmatched. Is --blocksize too small?";
|
||||||
}
|
}
|
||||||
|
|
||||||
while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
|
while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
|
||||||
|
@ -333,6 +333,7 @@ sub get_options_from_array {
|
||||||
"pipe|spreadstdin" => \$::opt_pipe,
|
"pipe|spreadstdin" => \$::opt_pipe,
|
||||||
"recstart=s" => \$::opt_recstart,
|
"recstart=s" => \$::opt_recstart,
|
||||||
"recend=s" => \$::opt_recend,
|
"recend=s" => \$::opt_recend,
|
||||||
|
"regexp|regex" => \$::opt_regexp,
|
||||||
"remove-rec-sep|removerecsep|rrs" => \$::opt_remove_rec_sep,
|
"remove-rec-sep|removerecsep|rrs" => \$::opt_remove_rec_sep,
|
||||||
"files|output-as-files|outputasfiles" => \$::opt_files,
|
"files|output-as-files|outputasfiles" => \$::opt_files,
|
||||||
"block|block-size|blocksize=s" => \$::opt_blocksize,
|
"block|block-size|blocksize=s" => \$::opt_blocksize,
|
||||||
|
@ -377,7 +378,7 @@ sub get_options_from_array {
|
||||||
sub parse_options {
|
sub parse_options {
|
||||||
# Returns: N/A
|
# Returns: N/A
|
||||||
# Defaults:
|
# Defaults:
|
||||||
$Global::version = 20110126;
|
$Global::version = 20110130;
|
||||||
$Global::progname = 'parallel';
|
$Global::progname = 'parallel';
|
||||||
$Global::infinity = 2**31;
|
$Global::infinity = 2**31;
|
||||||
$Global::debug = 0;
|
$Global::debug = 0;
|
||||||
|
@ -519,7 +520,7 @@ sub parse_options {
|
||||||
# As we do not know the max line length on the remote machine
|
# As we do not know the max line length on the remote machine
|
||||||
# long commands generated by xargs may fail
|
# long commands generated by xargs may fail
|
||||||
# If opt_N is set, it is probably safe
|
# If opt_N is set, it is probably safe
|
||||||
print STDERR ("Warning: using -X or -m with --sshlogin may fail\n");
|
print STDERR ("parallel: Warning: using -X or -m with --sshlogin may fail\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(not defined $::opt_P) {
|
if(not defined $::opt_P) {
|
||||||
|
@ -1265,19 +1266,19 @@ sub parse_sshlogin {
|
||||||
# There are no remote hosts
|
# There are no remote hosts
|
||||||
if(defined @::opt_trc) {
|
if(defined @::opt_trc) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
"Warning: --trc ignored as there are no remote --sshlogin\n";
|
"parallel: Warning: --trc ignored as there are no remote --sshlogin\n";
|
||||||
} elsif (defined $::opt_transfer) {
|
} elsif (defined $::opt_transfer) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
"Warning: --transfer ignored as there are no remote --sshlogin\n";
|
"parallel: Warning: --transfer ignored as there are no remote --sshlogin\n";
|
||||||
} elsif (defined @::opt_return) {
|
} elsif (defined @::opt_return) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
"Warning: --return ignored as there are no remote --sshlogin\n";
|
"parallel: Warning: --return ignored as there are no remote --sshlogin\n";
|
||||||
} elsif (defined $::opt_cleanup) {
|
} elsif (defined $::opt_cleanup) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
"Warning: --cleanup ignored as there are no remote --sshlogin\n";
|
"parallel: Warning: --cleanup ignored as there are no remote --sshlogin\n";
|
||||||
} elsif (defined @::opt_basefile) {
|
} elsif (defined @::opt_basefile) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
"Warning: --basefile ignored as there are no remote --sshlogin\n";
|
"parallel: Warning: --basefile ignored as there are no remote --sshlogin\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1898,20 +1899,20 @@ sub processes_available_by_system_limit {
|
||||||
# Give the user a warning. He can press Ctrl-C if this
|
# Give the user a warning. He can press Ctrl-C if this
|
||||||
# sucks.
|
# sucks.
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
("Warning: Starting 10 extra processes takes > 2 sec.\n",
|
("parallel: Warning: Starting 10 extra processes takes > 2 sec.\n",
|
||||||
"Consider adjusting -j. Press CTRL-C to stop.\n");
|
"Consider adjusting -j. Press CTRL-C to stop.\n");
|
||||||
$slow_spawining_warning_printed = 1;
|
$slow_spawining_warning_printed = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if($system_limit < $wanted_processes and not $more_filehandles) {
|
if($system_limit < $wanted_processes and not $more_filehandles) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
("Warning: Only enough filehandles to run ",
|
("parallel: Warning: Only enough filehandles to run ",
|
||||||
$system_limit, " jobs in parallel. ",
|
$system_limit, " jobs in parallel. ",
|
||||||
"Raising ulimit -n may help\n");
|
"Raising ulimit -n may help\n");
|
||||||
}
|
}
|
||||||
if($system_limit < $wanted_processes and $max_system_proc_reached) {
|
if($system_limit < $wanted_processes and $max_system_proc_reached) {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
("Warning: Only enough available processes to run ",
|
("parallel: Warning: Only enough available processes to run ",
|
||||||
$system_limit, " jobs in parallel.\n");
|
$system_limit, " jobs in parallel.\n");
|
||||||
}
|
}
|
||||||
# Cleanup: Close the files
|
# Cleanup: Close the files
|
||||||
|
@ -1948,7 +1949,7 @@ sub simultaneous_sshlogin_limit {
|
||||||
if($ssh_limit < $wanted_processes) {
|
if($ssh_limit < $wanted_processes) {
|
||||||
my $serverlogin = $self->serverlogin();
|
my $serverlogin = $self->serverlogin();
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
("Warning: ssh to $serverlogin only allows ",
|
("parallel: Warning: ssh to $serverlogin only allows ",
|
||||||
"for $ssh_limit simultaneous logins.\n",
|
"for $ssh_limit simultaneous logins.\n",
|
||||||
"You may raise this by changing ",
|
"You may raise this by changing ",
|
||||||
"/etc/ssh/sshd_config:MaxStartup on $serverlogin\n",
|
"/etc/ssh/sshd_config:MaxStartup on $serverlogin\n",
|
||||||
|
@ -2059,7 +2060,7 @@ sub ncpus {
|
||||||
$self->{'ncpus'} = $ncpu;
|
$self->{'ncpus'} = $ncpu;
|
||||||
} else {
|
} else {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
("Warning: Could not figure out ",
|
("parallel: Warning: Could not figure out ",
|
||||||
"number of cpus on $serverlogin. Using 1\n");
|
"number of cpus on $serverlogin. Using 1\n");
|
||||||
$self->{'ncpus'} = 1;
|
$self->{'ncpus'} = 1;
|
||||||
}
|
}
|
||||||
|
@ -2080,7 +2081,7 @@ sub no_of_cpus {
|
||||||
if($no_of_cpus) {
|
if($no_of_cpus) {
|
||||||
return $no_of_cpus;
|
return $no_of_cpus;
|
||||||
} else {
|
} else {
|
||||||
warn("Cannot figure out number of cpus. Using 1");
|
warn("parallel: Cannot figure out number of cpus. Using 1");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2097,7 +2098,7 @@ sub no_of_cores {
|
||||||
if($no_of_cores) {
|
if($no_of_cores) {
|
||||||
return $no_of_cores;
|
return $no_of_cores;
|
||||||
} else {
|
} else {
|
||||||
warn("Cannot figure out number of CPU cores. Using 1");
|
warn("parallel: Cannot figure out number of CPU cores. Using 1");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2699,7 +2700,7 @@ sub sshtransfer {
|
||||||
$pre .= "$mkremote_workdir; rsync $rsync_opt ".::shell_quote_scalar($file)." $serverlogin:$rsync_destdir;";
|
$pre .= "$mkremote_workdir; rsync $rsync_opt ".::shell_quote_scalar($file)." $serverlogin:$rsync_destdir;";
|
||||||
} else {
|
} else {
|
||||||
print $Global::original_stderr
|
print $Global::original_stderr
|
||||||
"Warning: $file is not readable and will not be transferred\n";
|
"parallel: Warning: $file is not readable and will not be transferred\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return $pre;
|
return $pre;
|
||||||
|
@ -4017,6 +4018,6 @@ sub unlock {
|
||||||
|
|
||||||
# Keep perl -w happy
|
# Keep perl -w happy
|
||||||
|
|
||||||
$::opt_regexp = $::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
|
$::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
|
||||||
$::opt_skip_first_line = $::opt_shebang = 0 ;
|
$::opt_skip_first_line = $::opt_shebang = 0 ;
|
||||||
|
|
||||||
|
|
|
@ -28,8 +28,7 @@ If you use B<xargs> today you will find GNU B<parallel> very easy to
|
||||||
use as GNU B<parallel> is written to have the same options as
|
use as GNU B<parallel> is written to have the same options as
|
||||||
B<xargs>. If you write loops in shell, you will find GNU B<parallel>
|
B<xargs>. If you write loops in shell, you will find GNU B<parallel>
|
||||||
may be able to replace most of the loops and make them run faster by
|
may be able to replace most of the loops and make them run faster by
|
||||||
running several jobs simultaneously. If you use B<ppss> or B<pexec> you
|
running several jobs simultaneously.
|
||||||
will find GNU B<parallel> will often make the command easier to read.
|
|
||||||
|
|
||||||
GNU B<parallel> makes sure output from the commands is the same output
|
GNU B<parallel> makes sure output from the commands is the same output
|
||||||
as you would get had you run the commands sequentially. This makes it
|
as you would get had you run the commands sequentially. This makes it
|
||||||
|
@ -713,8 +712,8 @@ If B<--recstart> is given I<startstring> will be used to split at record start.
|
||||||
If B<--recend> is given I<endstring> will be used to split at record end.
|
If B<--recend> is given I<endstring> will be used to split at record end.
|
||||||
|
|
||||||
If both B<--recstart> and B<--recend> are given the string
|
If both B<--recstart> and B<--recend> are given the string
|
||||||
I<startregexp>I<endregexp> will have to match to find a split
|
I<startstring>I<endstring> will have to match to find a split
|
||||||
position. This is useful if either I<startregexp> or I<endregexp>
|
position. This is useful if either I<startstring> or I<endstring>
|
||||||
match in the middle of a record.
|
match in the middle of a record.
|
||||||
|
|
||||||
If neither B<--recstart> nor B<--recend> are given then B<--recend>
|
If neither B<--recstart> nor B<--recend> are given then B<--recend>
|
||||||
|
@ -726,7 +725,7 @@ Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
|
||||||
expressions. This is slow, however.
|
expressions. This is slow, however.
|
||||||
|
|
||||||
|
|
||||||
=item B<--regexp> (unimplimented)
|
=item B<--regexp> (beta test)
|
||||||
|
|
||||||
Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
|
Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
|
||||||
expressions. This is slow, however.
|
expressions. This is slow, however.
|
||||||
|
@ -743,6 +742,7 @@ it to the command.
|
||||||
|
|
||||||
Only used with B<--pipe>.
|
Only used with B<--pipe>.
|
||||||
|
|
||||||
|
|
||||||
=item B<--retries> I<n> (beta testing)
|
=item B<--retries> I<n> (beta testing)
|
||||||
|
|
||||||
If a job fails, retry it on another computer. Do this I<n> times. If
|
If a job fails, retry it on another computer. Do this I<n> times. If
|
||||||
|
@ -1601,6 +1601,34 @@ B<parallel -j 100 < jobs_to_run>
|
||||||
As there is not a I<command> the jobs will be evaluated by the shell.
|
As there is not a I<command> the jobs will be evaluated by the shell.
|
||||||
|
|
||||||
|
|
||||||
|
=head1 EXAMPLE: Processing a big file using more cores
|
||||||
|
|
||||||
|
To process a big file or some output you can use B<--pipe> to split up
|
||||||
|
the data into blocks and pipe the blocks into the processing program.
|
||||||
|
|
||||||
|
If the program is B<gzip -9> you can do:
|
||||||
|
|
||||||
|
B<cat bigfile | parallel --pipe --recend '' -k gzip -9 >>B<bigfile.gz>
|
||||||
|
|
||||||
|
This will split B<bigfile> into blocks of 1 MB and pass that to B<gzip
|
||||||
|
-9> in parallel. One B<gzip> will be run per CPU core. The output of
|
||||||
|
B<gzip -9> will be kept in order and saved to B<bigfile.gz>
|
||||||
|
|
||||||
|
B<gzip> works fine if the output is appended, but some processing does
|
||||||
|
not work like that - for example sorting. For this GNU B<parallel> can
|
||||||
|
put the output of each command into a file. This will sort a big file
|
||||||
|
in parallel:
|
||||||
|
|
||||||
|
B<cat bigfile | parallel --pipe --files sort | parallel -Xj1 sort -m {} ';' rm {} >>B<bigfile.sort>
|
||||||
|
|
||||||
|
Here B<bigfile> is split into blocks of around 1MB, each block ending
|
||||||
|
in '\n' (which is the default for B<--recend>). Each block is passed
|
||||||
|
to B<sort> and the output from B<sort> is saved into files. These
|
||||||
|
files are passed to the second B<parallel> that runs B<sort -m> on the
|
||||||
|
files before it removes the files. The output is saved to
|
||||||
|
B<bigfile.sort>.
|
||||||
|
|
||||||
|
|
||||||
=head1 EXAMPLE: Working as mutex and counting semaphore
|
=head1 EXAMPLE: Working as mutex and counting semaphore
|
||||||
|
|
||||||
The command B<sem> is an alias for B<parallel --semaphore>.
|
The command B<sem> is an alias for B<parallel --semaphore>.
|
||||||
|
@ -1921,7 +1949,7 @@ variable $PARALLEL which takes precedence over the file
|
||||||
|
|
||||||
=head1 PROFILE FILES
|
=head1 PROFILE FILES
|
||||||
|
|
||||||
If B<--profile> set, GNU B<parallel> will read the profile from that file instead of
|
If B<--profile> set, GNU B<parallel> will read the profile from that file instead of
|
||||||
~/.parallel/config.
|
~/.parallel/config.
|
||||||
|
|
||||||
Example: Profile for running every command with B<-j+0> and B<nice>
|
Example: Profile for running every command with B<-j+0> and B<nice>
|
||||||
|
|
2
src/sql
2
src/sql
|
@ -531,7 +531,7 @@ $Global::Initfile && unlink $Global::Initfile;
|
||||||
exit ($err);
|
exit ($err);
|
||||||
|
|
||||||
sub parse_options {
|
sub parse_options {
|
||||||
$Global::version = 20110126;
|
$Global::version = 20110130;
|
||||||
$Global::progname = 'sql';
|
$Global::progname = 'sql';
|
||||||
|
|
||||||
# This must be done first as this may exec myself
|
# This must be done first as this may exec myself
|
||||||
|
|
Loading…
Reference in a new issue