mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
parallel: implemented --regexp. Prepended 'parallel:' to warnings
This commit is contained in:
parent
cb468fb6d3
commit
ad61df30f0
20
configure
vendored
20
configure
vendored
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.67 for parallel 20110126.
|
||||
# Generated by GNU Autoconf 2.67 for parallel 20110130.
|
||||
#
|
||||
# Report bugs to <bug-parallel@gnu.org>.
|
||||
#
|
||||
|
@ -551,8 +551,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='parallel'
|
||||
PACKAGE_TARNAME='parallel'
|
||||
PACKAGE_VERSION='20110126'
|
||||
PACKAGE_STRING='parallel 20110126'
|
||||
PACKAGE_VERSION='20110130'
|
||||
PACKAGE_STRING='parallel 20110130'
|
||||
PACKAGE_BUGREPORT='bug-parallel@gnu.org'
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -1168,7 +1168,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures parallel 20110126 to adapt to many kinds of systems.
|
||||
\`configure' configures parallel 20110130 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1234,7 +1234,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of parallel 20110126:";;
|
||||
short | recursive ) echo "Configuration of parallel 20110130:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1301,7 +1301,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
parallel configure 20110126
|
||||
parallel configure 20110130
|
||||
generated by GNU Autoconf 2.67
|
||||
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
|
@ -1318,7 +1318,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by parallel $as_me 20110126, which was
|
||||
It was created by parallel $as_me 20110130, which was
|
||||
generated by GNU Autoconf 2.67. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
|
@ -2133,7 +2133,7 @@ fi
|
|||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='parallel'
|
||||
VERSION='20110126'
|
||||
VERSION='20110130'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
|
@ -2684,7 +2684,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by parallel $as_me 20110126, which was
|
||||
This file was extended by parallel $as_me 20110130, which was
|
||||
generated by GNU Autoconf 2.67. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -2746,7 +2746,7 @@ _ACEOF
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
parallel config.status 20110126
|
||||
parallel config.status 20110130
|
||||
configured by $0, generated by GNU Autoconf 2.67,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
AC_INIT([parallel], [20110126], [bug-parallel@gnu.org])
|
||||
AC_INIT([parallel], [20110130], [bug-parallel@gnu.org])
|
||||
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
AC_CONFIG_FILES([
|
||||
|
|
154
doc/FUTURE_IDEAS
154
doc/FUTURE_IDEAS
|
@ -112,6 +112,160 @@ colsep = [sepchars]{no_of_sepchars}
|
|||
# TODO compute how many can be transferred within max_line_length
|
||||
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
|
||||
|
||||
=head1 YouTube video --pipe
|
||||
|
||||
cp parallel.fasta parallel.mbox lucene.tar
|
||||
|
||||
# GNU Parallel 20110205 - The FOSDEM Release
|
||||
|
||||
I assume you already know GNU Parallel. If not watch the intro video first.
|
||||
|
||||
GNU Parallel has so far worked similar to xargs. But the FOSDEM
|
||||
release of GNU Parallel introduces the new --pipe option. It makes GNU
|
||||
Parallel work similar to tee.
|
||||
|
||||
tee pipes a copy of the output to a file and a copy to another
|
||||
program.
|
||||
|
||||
seq 1 5 | tee myfile | wc
|
||||
|
||||
Here it pipes a copy to the file myfile and to the command word count (wc).
|
||||
|
||||
cat myfile
|
||||
|
||||
and we can see the content is what we expected.
|
||||
|
||||
The pipe option of GNU Parallel splits data into records and pipes a
|
||||
block of records into a program:
|
||||
|
||||
seq 1 5 | parallel --pipe -N1 cat';' echo foo
|
||||
|
||||
Here we pipe each number to the command cat and print foo after
|
||||
running cat.
|
||||
|
||||
GNU Parallel does this in parallel starting one process per cpu so the
|
||||
order may be different because one command may finish before another.
|
||||
|
||||
# RECORD SEPARATORS
|
||||
|
||||
GNU Parallel splits on record separators.
|
||||
|
||||
seq 1 5 | parallel --pipe --recend '\n' -N1 cat';' echo foo
|
||||
|
||||
This is the example we saw before: the record separator is \n and
|
||||
--recend will keep the record separator at the end of the record.
|
||||
|
||||
But if what your records start with a record separator? Here is a
|
||||
fast-a file:
|
||||
|
||||
cat parallel.fasta
|
||||
|
||||
Every record start with a >. To keep that with the record you use
|
||||
--recstart:
|
||||
|
||||
cat parallel.fasta | parallel --pipe --recstart '>' -N1 cat';' echo foo
|
||||
|
||||
But what if you have both? mbox files is an example that has both an
|
||||
ending and starting separator:
|
||||
|
||||
cat parallel.mbox |
|
||||
parallel --pipe --recend '\n\n' --recstart 'From ' -N1 cat';' echo foo | less #
|
||||
|
||||
The two newlines are staying with the email before and the From_ stays with the next record.
|
||||
|
||||
GNU Parallel cannot guarantee the first record will start with record
|
||||
separator and it cannot guarantee the last record will end with record
|
||||
separator. You will simply get what is first and last.
|
||||
|
||||
But GNU Parallel _does_ guarantee that it will only split at record
|
||||
separators.
|
||||
|
||||
# NUMBER OF RECORDS
|
||||
|
||||
So far we have used -N1. This tells GNU Parallel to pipe one record to
|
||||
the program.
|
||||
|
||||
seq 1 5 | parallel --pipe -N1 cat';' echo foo
|
||||
|
||||
But we can choose any amount:
|
||||
|
||||
seq 1 5 | parallel --pipe -N3 cat';' echo foo
|
||||
|
||||
This will pipe blocks of 3 records into cat and if there is not enough the last will only get two.
|
||||
|
||||
# BLOCKSIZE
|
||||
|
||||
However, using -N is inefficient. It is faster to pipe a full block into the program.
|
||||
|
||||
cat /usr/share/dict/words | parallel --pipe --blocksize 500k wc
|
||||
|
||||
We here tell GNU Parallel to split on \n and pipe blocks of 500 KB to
|
||||
wc. 1 MB is the default:
|
||||
|
||||
cat /usr/share/dict/words | parallel --pipe wc
|
||||
|
||||
If you just have a bunch of bytes you often do not care about the
|
||||
record separator. To split input into chunks you can disable the
|
||||
--recend
|
||||
|
||||
ls -l lucene.tar
|
||||
cat lucene.tar | parallel --pipe --recend '' -k gzip > lucene.tar.gz
|
||||
|
||||
GNU Parallel will then split the input into 1 MB blocks; pipe that to
|
||||
gzip and -k will make sure the order of the output is kept before
|
||||
saving to the tar.gz file.
|
||||
|
||||
The beauty of gzip is that if you concatenate two gzip files it is a
|
||||
valid gzip file. To test this:
|
||||
|
||||
tar tvzf lucene.tar.gz #
|
||||
|
||||
# OUTPUT AS FILE
|
||||
|
||||
Sometimes the output of GNU Parallel cannot be mixed in a single stream like this:
|
||||
|
||||
seq 1 10 | shuf | parallel --pipe -N 3 sort -n
|
||||
|
||||
As you can see each block of 3 is sorted but the whole output is not sorted.
|
||||
|
||||
GNU Parallel can give the output in file. GNU Parallel will the list the
|
||||
files created:
|
||||
|
||||
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n
|
||||
|
||||
Each of these files contains a sorted block:
|
||||
|
||||
cat
|
||||
|
||||
Sort has -m to merge sorted files into a sorted stream
|
||||
|
||||
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n | parallel -mj1 sort -nm
|
||||
|
||||
-m will append all the files behind the sort command and the -j1 will
|
||||
make sure we only run one command. The only part missing now is
|
||||
cleaning up by removing the temporary files. We can do that by
|
||||
appending rm
|
||||
|
||||
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n |
|
||||
parallel -mj1 sort -nm {} ";"rm {}
|
||||
|
||||
|
||||
# Thank you for watching
|
||||
#
|
||||
# If you like GNU Parallel:
|
||||
# * Post this video on forums/blogs/Twitter/Facebook/Linkedin
|
||||
# * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
|
||||
# * Request or write a review for your favourite magazine
|
||||
# * Request or build a package for your favourite distribution
|
||||
# * Invite me for your next conference (Contact http://ole.tange.dk)
|
||||
#
|
||||
# If GNU Parallel saves you money:
|
||||
# * (Have your company) donate to FSF https://my.fsf.org/donate/
|
||||
#
|
||||
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
||||
|
||||
|
||||
|
||||
=head1 YouTube video2
|
||||
|
||||
Converting of WAV files to MP3 using GNU Parallel
|
||||
|
|
|
@ -66,12 +66,14 @@ echo put parallel-$YYYYMMDD.tar.bz2{,.sig,*asc} | ncftp ftp://ftp-upload.gnu.org
|
|||
== Download and test ==
|
||||
|
||||
pushd /tmp
|
||||
rm parallel-$YYYYMMDD.tar.bz2
|
||||
wget http://ftp.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
|
||||
#wget http://alpha.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2
|
||||
tar xjvf parallel-$YYYYMMDD.tar.bz2
|
||||
cd parallel-$YYYYMMDD
|
||||
./configure
|
||||
make -j && sudo make -j install
|
||||
pushd
|
||||
|
||||
== Update OpenSUSE build system ==
|
||||
|
||||
|
@ -138,17 +140,22 @@ cc:Peter Simons <simons@cryp.to>, Sandro Cazzaniga <kharec@mandriva.org>,
|
|||
ryoichiro.suzuki@gmail.com,kerick@shiftedbit.net,
|
||||
Christian Faulhammer <fauli@gentoo.org>, Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>
|
||||
|
||||
Subject: GNU Parallel 2011XXXX released
|
||||
Subject: GNU Parallel 20110205 (FOSDEM release) released
|
||||
|
||||
GNU Parallel 2011XXXX has been released. It is available for
|
||||
download at: http://ftp.gnu.org/gnu/parallel/
|
||||
GNU Parallel 20110205 (the FOSDEM release) has been released. It is
|
||||
available for download at: http://ftp.gnu.org/gnu/parallel/
|
||||
|
||||
This is a major release as the --pipe option introduces a new way to
|
||||
work. To learn about --pipe see the example section for uses of
|
||||
--pipe.
|
||||
work. GNU Parallel has so far been similar to xargs, with --pipe it
|
||||
becomes somewhat similar to tee. To learn about --pipe see the example
|
||||
section for uses of --pipe.
|
||||
|
||||
But rest assured: No old functionality is changed.
|
||||
|
||||
If you want GNU Parallel to be part of your favourite distribution
|
||||
contact the people maintaining the distribution (complaining on
|
||||
Twitter is not enough).
|
||||
|
||||
New in this release:
|
||||
|
||||
* --pipe splits piped data into blocks. Each block is piped to a
|
||||
|
@ -170,12 +177,22 @@ New in this release:
|
|||
followed immediately by a start of a record. This is useful if
|
||||
either recend or recstart can occur in the middle of a record.
|
||||
|
||||
* --remove-rec-sep removes the string matched by --recstart and
|
||||
--recend.
|
||||
|
||||
* --regexp will make GNU Parallel treat --recstart and --recend as
|
||||
regular expressions.
|
||||
|
||||
* --output-as-files will put the output of the programs into files and
|
||||
instead of giving the output GNU Parallel will output the name of
|
||||
these files.
|
||||
|
||||
* -N set the number of records to read. If used with --blocksize
|
||||
the block read will at most be --blocksize.
|
||||
* -N if used with --pipe sets the number of records to read.
|
||||
|
||||
* GNU Parallel was presented at FOSDEM.
|
||||
|
||||
* Article in USENIX Magazine ;login: (print)
|
||||
http://www.usenix.org/publications/login/2011-02/
|
||||
|
||||
* GNU Parallel is now on ohloh.net. Thanks to Wim Muskee.
|
||||
https://www.ohloh.net/p/gnu-parallel
|
||||
|
|
|
@ -236,7 +236,7 @@ B<parallel>(1), B<nice>(1)
|
|||
use strict;
|
||||
use Getopt::Long;
|
||||
$Global::progname="niceload";
|
||||
$Global::version = 20110126;
|
||||
$Global::version = 20110130;
|
||||
Getopt::Long::Configure("bundling","require_order");
|
||||
get_options_from_array(\@ARGV) || die_usage();
|
||||
if($::opt_version) {
|
||||
|
|
39
src/parallel
39
src/parallel
|
@ -88,17 +88,17 @@ sub spreadstdin {
|
|||
# If both --recstart and --recend is given then both must match
|
||||
$recstart = $::opt_recstart;
|
||||
$recend = $::opt_recend;
|
||||
$recerror = "Warning: --recend and --recstart unmatched. Is --blocksize too small?";
|
||||
$recerror = "parallel: Warning: --recend and --recstart unmatched. Is --blocksize too small?";
|
||||
} elsif(defined($::opt_recstart)) {
|
||||
# If --recstart is given it must match start of record
|
||||
$recstart = $::opt_recstart;
|
||||
$recend = "";
|
||||
$recerror = "Warning: --recstart unmatched. Is --blocksize too small?";
|
||||
$recerror = "parallel: Warning: --recstart unmatched. Is --blocksize too small?";
|
||||
} elsif(defined($::opt_recend)) {
|
||||
# If --recend is given then it must match end of record
|
||||
$recstart = "";
|
||||
$recend = $::opt_recend;
|
||||
$recerror = "Warning: --recend unmatched. Is --blocksize too small?";
|
||||
$recerror = "parallel: Warning: --recend unmatched. Is --blocksize too small?";
|
||||
}
|
||||
|
||||
while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
|
||||
|
@ -333,6 +333,7 @@ sub get_options_from_array {
|
|||
"pipe|spreadstdin" => \$::opt_pipe,
|
||||
"recstart=s" => \$::opt_recstart,
|
||||
"recend=s" => \$::opt_recend,
|
||||
"regexp|regex" => \$::opt_regexp,
|
||||
"remove-rec-sep|removerecsep|rrs" => \$::opt_remove_rec_sep,
|
||||
"files|output-as-files|outputasfiles" => \$::opt_files,
|
||||
"block|block-size|blocksize=s" => \$::opt_blocksize,
|
||||
|
@ -377,7 +378,7 @@ sub get_options_from_array {
|
|||
sub parse_options {
|
||||
# Returns: N/A
|
||||
# Defaults:
|
||||
$Global::version = 20110126;
|
||||
$Global::version = 20110130;
|
||||
$Global::progname = 'parallel';
|
||||
$Global::infinity = 2**31;
|
||||
$Global::debug = 0;
|
||||
|
@ -519,7 +520,7 @@ sub parse_options {
|
|||
# As we do not know the max line length on the remote machine
|
||||
# long commands generated by xargs may fail
|
||||
# If opt_N is set, it is probably safe
|
||||
print STDERR ("Warning: using -X or -m with --sshlogin may fail\n");
|
||||
print STDERR ("parallel: Warning: using -X or -m with --sshlogin may fail\n");
|
||||
}
|
||||
|
||||
if(not defined $::opt_P) {
|
||||
|
@ -1265,19 +1266,19 @@ sub parse_sshlogin {
|
|||
# There are no remote hosts
|
||||
if(defined @::opt_trc) {
|
||||
print $Global::original_stderr
|
||||
"Warning: --trc ignored as there are no remote --sshlogin\n";
|
||||
"parallel: Warning: --trc ignored as there are no remote --sshlogin\n";
|
||||
} elsif (defined $::opt_transfer) {
|
||||
print $Global::original_stderr
|
||||
"Warning: --transfer ignored as there are no remote --sshlogin\n";
|
||||
"parallel: Warning: --transfer ignored as there are no remote --sshlogin\n";
|
||||
} elsif (defined @::opt_return) {
|
||||
print $Global::original_stderr
|
||||
"Warning: --return ignored as there are no remote --sshlogin\n";
|
||||
"parallel: Warning: --return ignored as there are no remote --sshlogin\n";
|
||||
} elsif (defined $::opt_cleanup) {
|
||||
print $Global::original_stderr
|
||||
"Warning: --cleanup ignored as there are no remote --sshlogin\n";
|
||||
"parallel: Warning: --cleanup ignored as there are no remote --sshlogin\n";
|
||||
} elsif (defined @::opt_basefile) {
|
||||
print $Global::original_stderr
|
||||
"Warning: --basefile ignored as there are no remote --sshlogin\n";
|
||||
"parallel: Warning: --basefile ignored as there are no remote --sshlogin\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1898,20 +1899,20 @@ sub processes_available_by_system_limit {
|
|||
# Give the user a warning. He can press Ctrl-C if this
|
||||
# sucks.
|
||||
print $Global::original_stderr
|
||||
("Warning: Starting 10 extra processes takes > 2 sec.\n",
|
||||
("parallel: Warning: Starting 10 extra processes takes > 2 sec.\n",
|
||||
"Consider adjusting -j. Press CTRL-C to stop.\n");
|
||||
$slow_spawining_warning_printed = 1;
|
||||
}
|
||||
}
|
||||
if($system_limit < $wanted_processes and not $more_filehandles) {
|
||||
print $Global::original_stderr
|
||||
("Warning: Only enough filehandles to run ",
|
||||
("parallel: Warning: Only enough filehandles to run ",
|
||||
$system_limit, " jobs in parallel. ",
|
||||
"Raising ulimit -n may help\n");
|
||||
}
|
||||
if($system_limit < $wanted_processes and $max_system_proc_reached) {
|
||||
print $Global::original_stderr
|
||||
("Warning: Only enough available processes to run ",
|
||||
("parallel: Warning: Only enough available processes to run ",
|
||||
$system_limit, " jobs in parallel.\n");
|
||||
}
|
||||
# Cleanup: Close the files
|
||||
|
@ -1948,7 +1949,7 @@ sub simultaneous_sshlogin_limit {
|
|||
if($ssh_limit < $wanted_processes) {
|
||||
my $serverlogin = $self->serverlogin();
|
||||
print $Global::original_stderr
|
||||
("Warning: ssh to $serverlogin only allows ",
|
||||
("parallel: Warning: ssh to $serverlogin only allows ",
|
||||
"for $ssh_limit simultaneous logins.\n",
|
||||
"You may raise this by changing ",
|
||||
"/etc/ssh/sshd_config:MaxStartup on $serverlogin\n",
|
||||
|
@ -2059,7 +2060,7 @@ sub ncpus {
|
|||
$self->{'ncpus'} = $ncpu;
|
||||
} else {
|
||||
print $Global::original_stderr
|
||||
("Warning: Could not figure out ",
|
||||
("parallel: Warning: Could not figure out ",
|
||||
"number of cpus on $serverlogin. Using 1\n");
|
||||
$self->{'ncpus'} = 1;
|
||||
}
|
||||
|
@ -2080,7 +2081,7 @@ sub no_of_cpus {
|
|||
if($no_of_cpus) {
|
||||
return $no_of_cpus;
|
||||
} else {
|
||||
warn("Cannot figure out number of cpus. Using 1");
|
||||
warn("parallel: Cannot figure out number of cpus. Using 1");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -2097,7 +2098,7 @@ sub no_of_cores {
|
|||
if($no_of_cores) {
|
||||
return $no_of_cores;
|
||||
} else {
|
||||
warn("Cannot figure out number of CPU cores. Using 1");
|
||||
warn("parallel: Cannot figure out number of CPU cores. Using 1");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -2699,7 +2700,7 @@ sub sshtransfer {
|
|||
$pre .= "$mkremote_workdir; rsync $rsync_opt ".::shell_quote_scalar($file)." $serverlogin:$rsync_destdir;";
|
||||
} else {
|
||||
print $Global::original_stderr
|
||||
"Warning: $file is not readable and will not be transferred\n";
|
||||
"parallel: Warning: $file is not readable and will not be transferred\n";
|
||||
}
|
||||
}
|
||||
return $pre;
|
||||
|
@ -4017,6 +4018,6 @@ sub unlock {
|
|||
|
||||
# Keep perl -w happy
|
||||
|
||||
$::opt_regexp = $::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
|
||||
$::opt_x = $::opt_workdir = $Semaphore::timeout = $Semaphore::wait =
|
||||
$::opt_skip_first_line = $::opt_shebang = 0 ;
|
||||
|
||||
|
|
|
@ -28,8 +28,7 @@ If you use B<xargs> today you will find GNU B<parallel> very easy to
|
|||
use as GNU B<parallel> is written to have the same options as
|
||||
B<xargs>. If you write loops in shell, you will find GNU B<parallel>
|
||||
may be able to replace most of the loops and make them run faster by
|
||||
running several jobs simultaneously. If you use B<ppss> or B<pexec> you
|
||||
will find GNU B<parallel> will often make the command easier to read.
|
||||
running several jobs simultaneously.
|
||||
|
||||
GNU B<parallel> makes sure output from the commands is the same output
|
||||
as you would get had you run the commands sequentially. This makes it
|
||||
|
@ -713,8 +712,8 @@ If B<--recstart> is given I<startstring> will be used to split at record start.
|
|||
If B<--recend> is given I<endstring> will be used to split at record end.
|
||||
|
||||
If both B<--recstart> and B<--recend> are given the string
|
||||
I<startregexp>I<endregexp> will have to match to find a split
|
||||
position. This is useful if either I<startregexp> or I<endregexp>
|
||||
I<startstring>I<endstring> will have to match to find a split
|
||||
position. This is useful if either I<startstring> or I<endstring>
|
||||
match in the middle of a record.
|
||||
|
||||
If neither B<--recstart> nor B<--recend> are given then B<--recend>
|
||||
|
@ -726,7 +725,7 @@ Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
|
|||
expressions. This is slow, however.
|
||||
|
||||
|
||||
=item B<--regexp> (unimplimented)
|
||||
=item B<--regexp> (beta test)
|
||||
|
||||
Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular
|
||||
expressions. This is slow, however.
|
||||
|
@ -743,6 +742,7 @@ it to the command.
|
|||
|
||||
Only used with B<--pipe>.
|
||||
|
||||
|
||||
=item B<--retries> I<n> (beta testing)
|
||||
|
||||
If a job fails, retry it on another computer. Do this I<n> times. If
|
||||
|
@ -1601,6 +1601,34 @@ B<parallel -j 100 < jobs_to_run>
|
|||
As there is not a I<command> the jobs will be evaluated by the shell.
|
||||
|
||||
|
||||
=head1 EXAMPLE: Processing a big file using more cores
|
||||
|
||||
To process a big file or some output you can use B<--pipe> to split up
|
||||
the data into blocks and pipe the blocks into the processing program.
|
||||
|
||||
If the program is B<gzip -9> you can do:
|
||||
|
||||
B<cat bigfile | parallel --pipe --recend '' -k gzip -9 >>B<bigfile.gz>
|
||||
|
||||
This will split B<bigfile> into blocks of 1 MB and pass that to B<gzip
|
||||
-9> in parallel. One B<gzip> will be run per CPU core. The output of
|
||||
B<gzip -9> will be kept in order and saved to B<bigfile.gz>
|
||||
|
||||
B<gzip> works fine if the output is appended, but some processing does
|
||||
not work like that - for example sorting. For this GNU B<parallel> can
|
||||
put the output of each command into a file. This will sort a big file
|
||||
in parallel:
|
||||
|
||||
B<cat bigfile | parallel --pipe --files sort | parallel -Xj1 sort -m {} ';' rm {} >>B<bigfile.sort>
|
||||
|
||||
Here B<bigfile> is split into blocks of around 1MB, each block ending
|
||||
in '\n' (which is the default for B<--recend>). Each block is passed
|
||||
to B<sort> and the output from B<sort> is saved into files. These
|
||||
files are passed to the second B<parallel> that runs B<sort -m> on the
|
||||
files before it removes the files. The output is saved to
|
||||
B<bigfile.sort>.
|
||||
|
||||
|
||||
=head1 EXAMPLE: Working as mutex and counting semaphore
|
||||
|
||||
The command B<sem> is an alias for B<parallel --semaphore>.
|
||||
|
|
Loading…
Reference in a new issue