From 3b3c344097f13dae6d43e51dab7f2c147a203f3a Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Tue, 7 Dec 2010 00:30:08 +0100 Subject: [PATCH] parallel: pod-file now separated (it became too big). niceload: rewritten to GetOpt and first testsuite. --- configure | 20 +- configure.ac | 2 +- doc/release_new_version | 3 + src/Makefile.am | 9 +- src/Makefile.in | 10 +- src/niceload | 159 +- src/parallel | 2520 +------------------------- src/parallel.pod | 2518 +++++++++++++++++++++++++ src/sem.pod | 214 --- src/sql | 2 +- testsuite/tests-to-run/niceload01.sh | 11 + testsuite/tests-to-run/test43.sh | 25 + testsuite/tests-to-run/test45.sh | 3 +- testsuite/wanted-results/test43 | 6 + 14 files changed, 2704 insertions(+), 2798 deletions(-) create mode 100644 src/parallel.pod create mode 100644 testsuite/tests-to-run/niceload01.sh create mode 100644 testsuite/tests-to-run/test43.sh create mode 100644 testsuite/wanted-results/test43 diff --git a/configure b/configure index 15588950..d2d12dbe 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.67 for parallel 20101202. +# Generated by GNU Autoconf 2.67 for parallel 20101206. # # Report bugs to . # @@ -551,8 +551,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='parallel' PACKAGE_TARNAME='parallel' -PACKAGE_VERSION='20101202' -PACKAGE_STRING='parallel 20101202' +PACKAGE_VERSION='20101206' +PACKAGE_STRING='parallel 20101206' PACKAGE_BUGREPORT='bug-parallel@gnu.org' PACKAGE_URL='' @@ -1168,7 +1168,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures parallel 20101202 to adapt to many kinds of systems. +\`configure' configures parallel 20101206 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1234,7 +1234,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of parallel 20101202:";; + short | recursive ) echo "Configuration of parallel 20101206:";; esac cat <<\_ACEOF @@ -1301,7 +1301,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -parallel configure 20101202 +parallel configure 20101206 generated by GNU Autoconf 2.67 Copyright (C) 2010 Free Software Foundation, Inc. @@ -1318,7 +1318,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by parallel $as_me 20101202, which was +It was created by parallel $as_me 20101206, which was generated by GNU Autoconf 2.67. Invocation command line was $ $0 $@ @@ -2133,7 +2133,7 @@ fi # Define the identity of the package. PACKAGE='parallel' - VERSION='20101202' + VERSION='20101206' cat >>confdefs.h <<_ACEOF @@ -2684,7 +2684,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by parallel $as_me 20101202, which was +This file was extended by parallel $as_me 20101206, which was generated by GNU Autoconf 2.67. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2746,7 +2746,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -parallel config.status 20101202 +parallel config.status 20101206 configured by $0, generated by GNU Autoconf 2.67, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index a3666580..ec86589b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([parallel], [20101202], [bug-parallel@gnu.org]) +AC_INIT([parallel], [20101206], [bug-parallel@gnu.org]) AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([ diff --git a/doc/release_new_version b/doc/release_new_version index 4387aab8..3d22ef7f 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -140,6 +140,9 @@ New in this release: * Implemented --load to wait until the load is below a limit before starting another job on that computer. +* Implemented --nice set the niceness of jobs running both locally and + remotely. + * Review with focus on clusters. Thanks to Taylor Gillespie http://www.unixpronews.com/unixpronews-49-20101019GNUParallelSpeedUpProcessingWithMulticoresClusters.html diff --git a/src/Makefile.am b/src/Makefile.am index 703fe1bc..a810920b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,9 +2,9 @@ bin_SCRIPTS = parallel sem sql niceload man_MANS = parallel.1 sem.1 sql.1 niceload.1 doc_DATA = parallel.html sem.html sql.html niceload.html -parallel.1: parallel Makefile +parallel.1: parallel.pod Makefile pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ - --section=1 $(srcdir)/parallel > $(srcdir)/parallel.1 + --section=1 $(srcdir)/parallel.pod > $(srcdir)/parallel.1 sem.1: sem.pod Makefile pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ @@ -38,4 +38,7 @@ sem: parallel ln -fs parallel sem DISTCLEANFILES = parallel.1 sem.1 sql.1 niceload.1 parallel.html sem.html sql.html niceload.html -EXTRA_DIST = parallel sem sql niceload parallel.1 sem.1 sql.1 niceload.1 parallel.html sem.html sem.pod sql.html niceload.html +EXTRA_DIST = parallel sem sql niceload \ + parallel.1 sem.1 sql.1 niceload.1 \ + parallel.html sem.html sql.html niceload.html \ + sem.pod parallel.pod diff --git a/src/Makefile.in b/src/Makefile.in index 107b1ac0..97d9a127 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -150,7 +150,11 @@ bin_SCRIPTS = parallel sem sql niceload man_MANS = parallel.1 sem.1 sql.1 niceload.1 doc_DATA = parallel.html sem.html sql.html niceload.html DISTCLEANFILES = parallel.1 sem.1 sql.1 niceload.1 parallel.html sem.html sql.html niceload.html -EXTRA_DIST = parallel sem sql niceload parallel.1 sem.1 sql.1 niceload.1 parallel.html sem.html sem.pod sql.html niceload.html +EXTRA_DIST = parallel sem sql niceload \ + parallel.1 sem.1 sql.1 niceload.1 \ + parallel.html sem.html sql.html niceload.html \ + sem.pod parallel.pod + all: all-am .SUFFIXES: @@ -443,9 +447,9 @@ uninstall-man: uninstall-man1 uninstall-man1 -parallel.1: parallel Makefile +parallel.1: parallel.pod Makefile pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ - --section=1 $(srcdir)/parallel > $(srcdir)/parallel.1 + --section=1 $(srcdir)/parallel.pod > $(srcdir)/parallel.1 sem.1: sem.pod Makefile pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ diff --git a/src/niceload b/src/niceload index 15fcca1c..7a2998c2 100755 --- a/src/niceload +++ b/src/niceload @@ -1,4 +1,4 @@ -#!/usr/bin/perl -sw +#!/usr/bin/perl -w =head1 NAME @@ -6,9 +6,9 @@ niceload - run a program when the load is below a certain limit =head1 SYNOPSIS -B [-v] [-n=nice] [-l=load] [-t=time] [-s=time|-f=factor] command +B [-v] [-n nice] [-l load] [-t time] [-s time|-f factor] command -B [-v] [-n=nice] [-l=load] [-t=time] [-s=time|-f=factor] -p=PID +B [-v] [-n nice] [-l load] [-t time] [-s time|-f factor] -p=PID =head1 DESCRIPTION @@ -27,30 +27,30 @@ run 1 second, suspend (3.00-1.00) seconds, run 1 second, suspend =over 9 -=item B<-n>=I +=item B<-n> I Sets niceness. See B(1). -=item B<-l>=I +=item B<-l> I Max load. The maximal load average before suspending command. Default is 1.00. -=item B<-t>=I +=item B<-t> I Recheck load time. Sleep SEC seconds before checking load again. Default is 1 second. -=item B<-s>=I +=item B<-s> I Suspend time. Suspend the command this many seconds when the max load average is reached. -=item B<-f>=I +=item B<-f> I Suspend time factor. Dynamically set B<-s> as max load average over limit * factor. Default is 1. -=item B<-p>=I +=item B<-p> I Process ID of process to suspend. @@ -67,7 +67,7 @@ In terminal 1 run: top In terminal 2 run: -B50)'> +B>B<50)'> This will print a '.' every second for 50 seconds and eat a lot of CPU. When the load rises to 1.0 the process is suspended. @@ -233,39 +233,46 @@ B(1), B(1) =cut -sub help { - print q{ -Usage: - niceload [-v] [-n=nice] [-l=load] [-t=time] [-s=time|-f=factor] command - niceload [-v] [-n=nice] [-l=load] [-t=time] [-s=time|-f=factor] -p=PID -}; - +use strict; +use Getopt::Long; +$Global::progname="niceload"; +$Global::version = 20101206; +Getopt::Long::Configure("bundling","require_order"); +get_options_from_array(\@ARGV) || die_usage(); +if($::opt_version) { + version(); + exit 0; } - -if($f and $s) { - # You cannot have -s and -f +if($::opt_help) { + help(); + exit 0; +} +if($::opt_factor and $::opt_suspend) { + # You cannot have --suspend and --factor help(); exit; } -my $nice = $n || 0; # -n=0 Nice level (Default: 0) -my $max_load = $l || 1; # -l=1 Max acceptable load average (Default: 1) -my $check_time = $t || 1; # -t=1 Seconds between checking load average (Default: 1) +my $nice = $::opt_nice || 0; # -n=0 Nice level (Default: 0) +my $max_load = $::opt_load || 1; # -l=1 Max acceptable load average (Default: 1) +my $check_time = $::opt_recheck || 1; # -t=1 Seconds between checking load average (Default: 1) my $wait_factor; my $wait_time; -if($s) { - $wait_time = $s; # -s=sec Seconds to suspend process when load average is too high +if($::opt_suspend) { + # --suspend=sec Seconds to suspend process when load average is too high + $wait_time = $::opt_suspend; } else { - $wait_factor=$f || 1; # -f=1 compute wait_time dynamically as (load - limit) * factor + # --factor=1 compute wait_time dynamically as (load - limit) * factor + $wait_factor=$::opt_factor || 1; } -my $processid = $p; # Control this PID (Default: control the command) -my $verbose = $v || $debug; - -@program = @ARGV; +my $processid = $::opt_pid; # Control this PID (Default: control the command) +my $verbose = $::opt_verbose || $::opt_debug; +my @program = @ARGV; $SIG{CHLD} = \&REAPER; if($processid) { $Child::fork = $processid; + $::opt_verbose and print STDERR "Control $processid\n"; init_signal_handling_attached_child(); my $child_pgrp = getpgrp $Child::fork; suspend_resume($max_load,$check_time,$wait_time,$wait_factor,$child_pgrp); @@ -277,13 +284,13 @@ if($processid) { suspend_resume($max_load,$check_time,$wait_time,$wait_factor,$child_pgrp); } else { setpgrp(0,0); - $debug and debug("Child pid: $$, pgrp: ",getpgrp $$,"\n"); + debug("Child pid: $$, pgrp: ",getpgrp $$,"\n"); if($nice) { unshift(@program,"nice","-n",$nice); } - $debug and debug("@program\n"); + debug("@program\n"); system(@program); - $debug and debug("Child exit\n"); + debug("Child exit\n"); exit; } } else { @@ -291,10 +298,74 @@ if($processid) { exit; } -sub debug { - print STDERR @_; +sub get_options_from_array { + # Run GetOptions on @array + # Returns: + # true if parsing worked + # false if parsing failed + # @array is changed + my $array_ref = shift; + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + my @retval = GetOptions + ("debug|D" => \$::opt_debug, + "load|l=s" => \$::opt_load, + "factor|f=s" => \$::opt_factor, + "suspend|s=s" => \$::opt_suspend, + "recheck|t=s" => \$::opt_recheck, + "nice|n=i" => \$::opt_nice, + "help|h" => \$::opt_help, + "process|pid|p=s" => \$::opt_pid, + "verbose|v" => \$::opt_verbose, + "version|V" => \$::opt_version, + ); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return @retval; } +sub die_usage { + help(); + exit 1; +} + +sub help { + print q{ +Usage: + niceload [-v] [-n=niceness] [-l=loadavg] [-t=recheck_sec] [-s=suspend_sec|-f=factor] command + niceload [-v] [-n=niceness] [-l=loadavg] [-t=recheck_sec] [-s=suspend_sec|-f=factor] command +}; +} + + + +sub debug { + if($::opt_debug) { + print STDERR @_; + } +} + +sub version { + # Returns: N/A + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2004,2005,2006,2007,2008,2009 Ole Tange", + "Copyright (C) 2010 Ole Tange and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later ", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/parallel\n" + ); +} sub init_signal_handling_attached_child { $SIG{INT}=\&sigint_attached_child; @@ -323,38 +394,38 @@ sub REAPER { } sub kill_child_CONT { - $debug and debug("SIGCONT received. Killing $Child::fork\n"); + debug("SIGCONT received. Killing $Child::fork\n"); kill CONT => -getpgrp($Child::fork); } sub kill_child_TSTP { - $debug and debug("SIGTSTP received. Killing $Child::fork and self\n"); + debug("SIGTSTP received. Killing $Child::fork and self\n"); kill TSTP => -getpgrp($Child::fork); kill STOP => -$$; } sub kill_child_INT { - $debug and debug("SIGINT received. Killing $Child::fork Exit\n"); + debug("SIGINT received. Killing $Child::fork Exit\n"); kill INT => -getpgrp($Child::fork); exit; } sub suspend_resume { my ($max_load,$check_time,$wait_time,$wait_factor,@pids) = @_; - $debug and debug("suspend_resume these @pids\n"); + debug("suspend_resume these @pids\n"); resume_pids(@pids); while (pids_exist(@pids)) { if ( loadavg() > $max_load ) { if($wait_factor) { $wait_time = (loadavg()-$max_load) * $wait_factor; } - $verbose and debug("suspending for $wait_time seconds\n"); + $::opt_verbose and print STDERR "suspending for $wait_time seconds\n"; suspend_pids(@pids); sleep 1; # for some reason this statement is skipped sleep $wait_time; resume_pids(@pids); } - $verbose and debug("running for $check_time second(s)\n"); + $::opt_verbose and print STDERR "running for $check_time second(s)\n"; sleep($check_time); } } @@ -362,7 +433,7 @@ sub suspend_resume { sub pids_exist { my (@pids) = @_; my ($exists) = 0; - for $pid (@pids) { + for my $pid (@pids) { if(-e "/proc/".$pid) { $exists++ } #if(kill 0 => $Child::fork) { $exists++ } } @@ -406,9 +477,7 @@ sub signal_pids { my ($signal,@pids) = @_; # local $SIG{$signal} = 'IGNORE'; - for $pid (@pids) { + for my $pid (@pids) { kill $signal => -$pid; # stop PID group } } - -$v=$f=$l=$h=$n=$t=$s=$p=$h=$processid; # Ignore perl -w diff --git a/src/parallel b/src/parallel index 0bbc897e..4087ff66 100755 --- a/src/parallel +++ b/src/parallel @@ -1,2523 +1,5 @@ #!/usr/bin/perl -w -=head1 NAME - -parallel - build and execute shell command lines from standard input in parallel - -=head1 SYNOPSIS - -B [options] [I [arguments]] < list_of_arguments - -B [options] [I [arguments]] B<:::> arguments - -B [options] [I [arguments]] B<::::> argfile(s) - -B --semaphore [options] I - -B<#!/usr/bin/parallel> --shebang [options] [I [arguments]] - -=head1 DESCRIPTION - -GNU B is a shell tool for executing jobs concurrently locally -or using remote computers. A job is typically a single command or a -small script that has to be run for each of the lines in the -input. The typical input is a list of files, a list of hosts, a list -of users, a list of URLs, or a list of tables. - -If you use B today you will find GNU B very easy to -use as GNU B is written to have the same options as -B. If you write loops in shell, you will find GNU B -may be able to replace most of the loops and make them run faster by -running several jobs simultaneously. If you use B or B you -will find GNU B will often make the command easier to read. - -GNU B makes sure output from the commands is the same output -as you would get had you run the commands sequentially. This makes it -possible to use output from GNU B as input for other -programs. - -For each line of input GNU B will execute I with -the line as arguments. If no I is given, the line of input is -executed. Several lines will be run in parallel. GNU B can -often be used as a substitute for B or B. - -Before looking at the options you may want to check out the Bs -after the list of options. That will give you an idea of what GNU -B is capable of. - -You can also watch the intro video for a quick introduction: -http://www.youtube.com/watch?v=OpaiGYxkSuQ or at -http://tinyogg.com/watch/TORaR/ and http://tinyogg.com/watch/hfxKj/ - -=head1 OPTIONS - -=over 9 - -=item I - -Command to execute. If I or the following arguments contain -{} every instance will be substituted with the input line. Setting a -command also invokes B<--file>. - -If I is given, GNU B will behave similar to B. If -I is not given GNU B will behave similar to B. - - -=item B<{}> - -Input line. This is the default replacement string and will normally -be used for putting the argument in the command line. It can be -changed with B<-I>. - - -=item B<{.}> - -Input line without extension. This is a specialized replacement string -with the extension removed. If the input line contains B<.> after the -last B the last B<.> till the end of the string will be removed and -B<{.}> will be replaced with the remaining. E.g. I becomes -I, I becomes I, I -becomes I, I remains I. If the -input line does not contain B<.> it will remain unchanged. - -B<{.}> can be used the same places as B<{}>. The replacement string -B<{.}> can be changed with B<-U>. - - -=item B<{/}> (beta testing) - -Basename of input line. This is a specialized replacement string -with the directory part removed. - -B<{/}> can be used the same places as B<{}>. The replacement string -B<{/}> can be changed with B<--basenamereplace>. - - -=item B<{/.}> (beta testing) - -Basename of input line without extension. This is a specialized -replacement string with the directory and extension part removed. It -is a combination of B<{/}> and B<{.}>. - -B<{/.}> can be used the same places as B<{}>. The replacement string -B<{/.}> can be changed with B<--basenameextensionreplace>. - - -=item B<{>IB<}> - -Argument from argument file I or the I'th argument. See B<-a> -and B<-N>. - -B<{>IB<}> can be used the same places as B<{}>. - - -=item B<{>I.B<}> - -Argument from argument file I or the I'th argument without -extension. It is a combination of B<{>IB<}> and B<{.}>. - -B<{>I.B<}> can be used the same places as B<{>IB<}>. - - -=item B<{>I/B<}> (beta testing) - -Basename of argument from argument file I or the I'th argument. -It is a combination of B<{>IB<}> and B<{/}>. See B<-a> and B<-N>. - -B<{>I/B<}> can be used the same places as B<{>IB<}>. - - -=item B<{>I/.B<}> (beta testing) - -Basename of argument from argument file I or the I'th argument -without extension. It is a combination of B<{>IB<}>, B<{/}>, and -B<{.}>. See B<-a> and B<-N>. - -B<{>I/.B<}> can be used the same places as B<{>IB<}>. - - - -=item B<:::> I - -Use arguments from the command line as input instead of from stdin -(standard input). Unlike other options for GNU B B<:::> is -placed after the I and before the arguments. - -The following are equivalent: - - (echo file1; echo file2) | parallel gzip - parallel gzip ::: file1 file2 - parallel gzip {} ::: file1 file2 - parallel --arg-sep ,, gzip {} ,, file1 file2 - parallel --arg-sep ,, gzip ,, file1 file2 - parallel ::: "gzip file1" "gzip file2" - -To avoid treating B<:::> as special use B<--arg-sep> to set the -argument separator to something else. See also B<--arg-sep>. - -stdin (standard input) will be passed to the first process run. - -If B<--arg-file> is set arguments from that file will be appended. - - -=item B<::::> I - -Another way to write B<-a> I B<-a> I ... - -See B<-a>. - - -=item B<--null> - -=item B<-0> - -Use NUL as delimiter. Normally input lines will end in \n -(newline). If they end in \0 (NUL), then use this option. It is useful -for processing arguments that may contain \n (newline). - - -=item B<--arg-file> I - -=item B<-a> I - -Read items from the file I instead of stdin (standard input). If -you use this option, stdin is given to the first process run. -Otherwise, stdin is redirected from /dev/null. - -If multiple B<-a> are given, one line will be read from each of the -files. The arguments can be accessed in the command as B<{1}> -.. B<{>IB<}>, so B<{1}> will be a line from the first file, and -B<{6}> will refer to the line with the same line number from the 6th -file. - - -=item B<--arg-file-sep> I - -Use I instead of B<::::> as separator string between command -and argument files. Useful if B<::::> is used for something else by the -command. - -See also: B<::::>. - - -=item B<--arg-sep> I - -Use I instead of B<:::> as separator string. Useful if B<:::> -is used for something else by the command. - -Also useful if you command uses B<:::> but you still want to read -arguments from stdin (standard input): Simply change B<--arg-sep> to a -string that is not in the command line. - -See also: B<:::>. - - -=item B<--basefile> I - -=item B<-B> I - -I will be transferred to each sshlogin before a jobs is -started. It will be removed if B<--cleanup> is active. The file may be -a script to run or some common base data needed for the jobs. -Multiple B<-B> can be specified to transfer more basefiles. The -I will be transferred the same way as B<--transfer>. - - -=item B<--basenamereplace> I (beta testing) - -Use the replacement string I instead of B<{/}> for basename of input line. - - -=item B<--basenameextensionreplace> I (beta testing) - -Use the replacement string I instead of B<{/.}> for basename of input line without extension. - - -=item B<--bg> (beta testing) - -Run command in background thus GNU B will not wait for -completion of the command before exiting. This is the default if -B<--semaphore> is set. - -See also: B<--fg> - -Implies B<--semaphore>. - - -=item B<--cleanup> - -Remove transferred files. B<--cleanup> will remove the transferred files -on the remote server after processing is done. - - find log -name '*gz' | parallel \ - --sshlogin server.example.com --transfer --return {.}.bz2 \ - --cleanup "zcat {} | bzip -9 >{.}.bz2" - -With B<--transfer> the file transferred to the remote server will be -removed on the remote server. Directories created will not be removed -- even if they are empty. - -With B<--return> the file transferred from the remote server will be -removed on the remote server. Directories created will not be removed -- even if they are empty. - -B<--cleanup> is ignored when not used with B<--transfer> or B<--return>. - - -=item B<--colsep> I - -=item B<-C> I - -Column separator. The input will be treated as a table with I -separating the columns. The n'th column can be access using -B<{>IB<}> or B<{>I.B<}>. E.g. B<{3}> is the 3rd column. - -B<--colsep> implies B<--trim rl>. - -I is a Perl Regular Expression: -http://perldoc.perl.org/perlre.html - - -=item B<--command> - -=item B<-c> (Use B<--command> as B<-c> may be removed in later versions) - -Line is a command. The input line contains more than one argument or -the input line needs to be evaluated by the shell. This is the default -if I is not set. Can be reversed with B<--file>. - -Most people will never need this because GNU B normally -selects the correct B<--file> or B<--command>. - - -=item B<--delimiter> I - -=item B<-d> I - -Input items are terminated by the specified character. Quotes and -backslash are not special; every character in the input is taken -literally. Disables the end-of-file string, which is treated like any -other argument. This can be used when the input consists of simply -newline-separated items, although it is almost always better to design -your program to use --null where this is possible. The specified -delimiter may be a single character, a C-style character escape such -as \n, or an octal or hexadecimal escape code. Octal and -hexadecimal escape codes are understood as for the printf command. -Multibyte characters are not supported. - -=item B<-E> I - -Set the end of file string to eof-str. If the end of file string -occurs as a line of input, the rest of the input is ignored. If -neither B<-E> nor B<-e> is used, no end of file string is used. - - -=item B<--eof>[=I] - -=item B<-e>[I] - -This option is a synonym for the B<-E> option. Use B<-E> instead, -because it is POSIX compliant for B while this option is not. -If I is omitted, there is no end of file string. If neither -B<-E> nor B<-e> is used, no end of file string is used. - - -=item B<--eta> - -Show the estimated number of seconds before finishing. This forces GNU -B to read all jobs before starting to find the number of -jobs. GNU B normally only reads the next job to run. -Implies B<--progress>. - - -=item B<--fg> (beta testing) - -Run command in foreground thus GNU B will wait for -completion of the command before exiting. - -See also: B<--bg> - -Implies B<--semaphore>. - - -=item B<--file> - -=item B<-f> (Use B<--file> as B<-f> may be removed in later versions) - -Line is a filename. The input line contains a filename that will be -quoted so it is not evaluated by the shell. This is the default if -I is set. Can be reversed with B<--command>. - -Most people will never need this because GNU B normally -selects the correct B<--file> or B<--command>. - - -=item B<--group> - -=item B<-g> - -Group output. Output from each jobs is grouped together and is only -printed when the command is finished. STDERR first followed by STDOUT. -B<-g> is the default. Can be reversed with B<-u>. - -=item B<--help> - -=item B<-h> - -Print a summary of the options to GNU B and exit. - - -=item B<--halt-on-error> <0|1|2> - -=item B<-H> <0|1|2> - -=over 3 - -=item 0 - -Do not halt if a job fails. Exit status will be the number of jobs -failed. This is the default. - -=item 1 - -Do not start new jobs if a job fails, but complete the running jobs -including cleanup. The exit status will be the exit status from the -last failing job. - -=item 2 - -Kill off all jobs immediately and exit without cleanup. The exit -status will be the exit status from the failing job. - -=back - - -=item B<-I> I - -Use the replacement string I instead of {}. - - -=item B<--replace>[=I] - -=item B<-i>[I] - -This option is a synonym for B<-I>I if I is -specified, and for B<-I>{} otherwise. This option is deprecated; -use B<-I> instead. - - -=item B<--jobs> I - -=item B<-j> I - -=item B<--max-procs> I - -=item B<-P> I - -Run up to N jobs in parallel. 0 means as many as possible. Default is -9. - -If B<--semaphore> is set default is 1 thus making a mutex. - - -=item B<--jobs> I<+N> - -=item B<-j> I<+N> - -=item B<--max-procs> I<+N> - -=item B<-P> I<+N> - -Add N to the number of CPU cores. Run this many jobs in parallel. For -compute intensive jobs B<-j> +0 is useful as it will run -number-of-cpu-cores jobs simultaneously. See also -B<--use-cpus-instead-of-cores>. - - -=item B<--jobs> I<-N> - -=item B<-j> I<-N> - -=item B<--max-procs> I<-N> - -=item B<-P> I<-N> - -Subtract N from the number of CPU cores. Run this many jobs in parallel. -If the evaluated number is less than 1 then 1 will be used. See also -B<--use-cpus-instead-of-cores>. - - -=item B<--jobs> I% - -=item B<-j> I% - -=item B<--max-procs> I% - -=item B<-P> I% - -Multiply N% with the number of CPU cores. Run this many jobs in parallel. -If the evaluated number is less than 1 then 1 will be used. See also -B<--use-cpus-instead-of-cores>. - - -=item B<--jobs> I - -=item B<-j> I - -=item B<--max-procs> I - -=item B<-P> I - -Read parameter from file. Use the content of I as parameter -for I<-j>. E.g. I could contain the string 100% or +2 or -10. If I is changed when a job completes, I is -read again and the new number of jobs is computed. If the number is -lower than before, running jobs will be allowed to finish but new jobs -will not be started until the wanted number of jobs has been reached. -This makes it possible to change the number of simultaneous running -jobs while GNU B is running. - - -=item B<--keeporder> - -=item B<-k> - -Keep sequence of output same as the order of input. If jobs 1 2 3 4 -end in the sequence 3 1 4 2 the output will still be 1 2 3 4. - - -=item B<-L> I - -Use at most I nonblank input lines per command line. -Trailing blanks cause an input line to be logically continued on the -next input line. - -Implies B<-X> unless B<-m> is set. - - -=item B<--max-lines>[=I] - -=item B<-l>[I] - -Synonym for the B<-L> option. Unlike B<-L>, the I argument -is optional. If I is not specified, it defaults to one. -The B<-l> option is deprecated since the POSIX standard specifies -B<-L> instead. - -Implies B<-X> unless B<-m> is set. - - -=item B<--load> I (experimental) - -Do not start new jobs on a given machine unless the load is less than -I. I uses the same syntax as B<--jobs>, so I<100%> -is a valid setting. - -The load average is only sampled every 10 seconds to avoid stressing -small machines. - - -=item B<--controlmaster> (experimental) - -=item B<-M> (experimental) - -Use ssh's ControlMaster to make ssh connections faster. Useful if jobs -run remote and are very fast to run. This is disabled for sshlogins -that specify their own ssh command. - - -=item B<--xargs> - -=item B<-m> - -Multiple. Insert as many arguments as the command line length -permits. If B<{}> is not used the arguments will be appended to the -line. If B<{}> is used multiple times each B<{}> will be replaced -with all the arguments. - -Support for B<-m> with B<--sshlogin> is limited and may fail. - -See also B<-X> for context replace. If in doubt use B<-X> as that will -most likely do what is needed. - - -=item B<--progress> - -Show progress of computations. List the computers involved in the task -with number of CPU cores detected and the max number of jobs to -run. After that show progress for each computer: number of running -jobs, number of completed jobs, and percentage of all jobs done by -this computer. The percentage will only be available after all jobs -have been scheduled as GNU B only read the next job when -ready to schedule it - this is to avoid wasting time and memory by -reading everything at startup. - -By sending GNU B SIGUSR2 you can toggle turning on/off -B<--progress> on a running GNU B process. - - -=item B<--max-args>=I - -=item B<-n> I - -Use at most I arguments per command line. Fewer than -I arguments will be used if the size (see the B<-s> option) -is exceeded, unless the B<-x> option is given, in which case -GNU B will exit. - -Implies B<-X> unless B<-m> is set. - - -=item B<--max-replace-args>=I - -=item B<-N> I - -Use at most I arguments per command line. Like B<-n> but -also makes replacement strings B<{1}> .. B<{>IB<}> that -represents argument 1 .. I. If too few args the B<{>IB<}> will -be empty. - -This will set the owner of the homedir to the user: - -B - -Implies B<-X> unless B<-m> is set. - - -=item B<--max-line-length-allowed> - -Print the maximal number characters allowed on the command line and -exit (used by GNU B itself to determine the line length -on remote computers). - - -=item B<--number-of-cpus> - -Print the number of physical CPUs and exit (used by GNU B -itself to determine the number of physical CPUs on remote computers). - - -=item B<--number-of-cores> - -Print the number of CPU cores and exit (used by GNU B itself -to determine the number of CPU cores on remote computers). - - -=item B<--nice> I (beta testing) - -Run the command at this niceness. For simple commands you can just add -B in front of the command. But if the command consists of more -sub commands (Like: ls|wc) then prepending B will not always -work. B<--nice> will make sure all sub commands are niced. - - -=item B<--interactive> - -=item B<-p> - -Prompt the user about whether to run each command line and read a line -from the terminal. Only run the command line if the response starts -with 'y' or 'Y'. Implies B<-t>. - - -=item B<--profile> I - -=item B<-J> I - -Use profile I for options. This is useful if you want to -have multiple profiles. You could have one profile for running jobs in -parallel on the local machine and a different profile for running jobs -on remote machines. See the section PROFILE FILES for examples. - -I corresponds to the file ~/.parallel/I. - -Default: config - -=item B<--quote> - -=item B<-q> - -Quote I. This will quote the command line so special -characters are not interpreted by the shell. See the section -QUOTING. Most people will never need this. Quoting is disabled by -default. - - -=item B<--no-run-if-empty> - -=item B<-r> - -If the stdin (standard input) only contains whitespace, do not run the command. - - -=item B<--retries> I - -If a job fails, retry it on another computer. Do this I times. If -there are fewer than I computers in B<--sshlogin> GNU parallel will -re-use the computers. This is useful if some jobs fail for no apparent -reason (such as network failure). - - -=item B<--return> I - -Transfer files from remote servers. B<--return> is used with -B<--sshlogin> when the arguments are files on the remote servers. When -processing is done the file I will be transferred -from the remote server using B and will be put relative to -the default login dir. E.g. - - echo foo/bar.txt | parallel \ - --sshlogin server.example.com --return {.}.out touch {.}.out - -This will transfer the file I<$HOME/foo/bar.out> from the server -I to the file I after running -B on I. - - echo /tmp/foo/bar.txt | parallel \ - --sshlogin server.example.com --return {.}.out touch {.}.out - -This will transfer the file I from the server -I to the file I after running -B on I. - -Multiple files can be transferred by repeating the options multiple -times: - - echo /tmp/foo/bar.txt | \ - parallel --sshlogin server.example.com \ - --return {.}.out --return {.}.out2 touch {.}.out {.}.out2 - -B<--return> is often used with B<--transfer> and B<--cleanup>. - -B<--return> is ignored when used with B<--sshlogin :> or when not used -with B<--sshlogin>. - - -=item B<--max-chars>=I - -=item B<-s> I - -Use at most I characters per command line, including the -command and initial-arguments and the terminating nulls at the ends of -the argument strings. The largest allowed value is system-dependent, -and is calculated as the argument length limit for exec, less the size -of your environment. The default value is the maximum. - -Implies B<-X> unless B<-m> is set. - - -=item B<--show-limits> - -Display the limits on the command-line length which are imposed by the -operating system and the B<-s> option. Pipe the input from /dev/null -(and perhaps specify --no-run-if-empty) if you don't want GNU B -to do anything. - - -=item B<--semaphore> - -Work as a counting semaphore. B<--semaphore> will cause GNU -B to start I in the background. When the number of -simultaneous jobs is reached, GNU B will wait for one of -these to complete before starting another command. - -B<--semaphore> implies B<--bg> unless B<--fg> is specified. - -B<--semaphore> implies B<--semaphorename `tty`> unless -B<--semaphorename> is specified. - -Used with B<--fg>, B<--wait>, and B<--semaphorename>. - -The command B is an alias for B. - - -=item B<--semaphorename> I - -=item B<--id> I - -The name of the semaphore to use. The semaphore can be shared between -multiple processes. - -Implies B<--semaphore>. - - -=item B<--semaphoretimeout> I (not implemented) - -If the semaphore is not released within secs seconds, take it anyway. - -Implies B<--semaphore>. - - -=item B<--skip-first-line> - -Do not use the first line of input (used by GNU B itself -when called with B<--shebang>). - - -=item B<-S> I<[ncpu/]sshlogin[,[ncpu/]sshlogin[,...]]> - -=item B<--sshlogin> I<[ncpu/]sshlogin[,[ncpu/]sshlogin[,...]]> - -Distribute jobs to remote servers. The jobs will be run on a list of -remote servers. GNU B will determine the number of CPU -cores on the remote servers and run the number of jobs as specified by -B<-j>. If the number I is given GNU B will use this -number for number of CPU cores on the host. Normally I will not -be needed. - -An I is of the form: - - [sshcommand [options]][username@]hostname - -The sshlogin must not require a password. - -The sshlogin ':' is special, it means 'no ssh' and will therefore run -on the local computer. - -The sshlogin '..' is special, it read sshlogins from ~/.parallel/sshloginfile - -To specify more sshlogins separate the sshlogins by comma or repeat -the options multiple times. - -For examples: see B<--sshloginfile>. - -The remote host must have GNU B installed. - -B<--sshlogin> is known to cause problems with B<-m> and B<-X>. - - -=item B<--sshloginfile> I - -File with sshlogins. The file consists of sshlogins on separate -lines. Empty lines and lines starting with '#' are ignored. Example: - - server.example.com - username@server2.example.com - 8/my-8-core-server.example.com - 2/my_other_username@my-dualcore.example.net - # This server has SSH running on port 2222 - ssh -p 2222 server.example.net - 4/ssh -p 2222 quadserver.example.net - # Use a different ssh program - myssh -p 2222 -l myusername hexacpu.example.net - # Use a different ssh program with default number of cores - //usr/local/bin/myssh -p 2222 -l myusername hexacpu.example.net - # Use a different ssh program with 6 cores - 6//usr/local/bin/myssh -p 2222 -l myusername hexacpu.example.net - # Assume 16 cores on the local computer - 16/: - -When using a different ssh program the last argument must be the hostname. - -The sshloginfile '..' is special, it read sshlogins from -~/.parallel/sshloginfile - - -=item B<--silent> - -Silent. The job to be run will not be printed. This is the default. -Can be reversed with B<-v>. - - -=item B<--tmpdir> I - -Directory for temporary files. GNU B normally buffers output -into temporary files in /tmp. By setting B<--tmpdir> you can use a -different dir for the files. Setting B<--tmpdir> is equivalent to -setting $TMPDIR. - - -=item B<--verbose> - -=item B<-t> - -Print the command line on the standard error output before executing -it. - -See also B<-v> and B<-p>. - - -=item B<--transfer> - -Transfer files to remote servers. B<--transfer> is used with -B<--sshlogin> when the arguments are files and should be transferred to -the remote servers. The files will be transferred using B and -will be put relative to the default login dir. E.g. - - echo foo/bar.txt | parallel \ - --sshlogin server.example.com --transfer wc - -This will transfer the file I to the server -I to the file I<$HOME/foo/bar.txt> before running -B on I. - - echo /tmp/foo/bar.txt | parallel \ - --sshlogin server.example.com --transfer wc - -This will transfer the file I to the server -I to the file I before running -B on I. - -B<--transfer> is often used with B<--return> and B<--cleanup>. - -B<--transfer> is ignored when used with B<--sshlogin :> or when not used with B<--sshlogin>. - - -=item B<--trc> I - -Transfer, Return, Cleanup. Short hand for: - -B<--transfer> B<--return> I B<--cleanup> - - -=item B<--trim> - -Trim white space in input. - -=over 4 - -=item n - -No trim. Input is not modified. This is the default. - -=item l - -Left trim. Remove white space from start of input. E.g. " a bc " -> "a bc ". - -=item r - -Right trim. Remove white space from end of input. E.g. " a bc " -> " a bc". - -=item lr - -=item rl - -Both trim. Remove white space from both start and end of input. E.g. " -a bc " -> "a bc". This is the default if B<--colsep> is used. - -=back - - -=item B<--ungroup> - -=item B<-u> - -Ungroup output. Output is printed as soon as possible. This may cause -output from different commands to be mixed. GNU B runs -faster with B<-u>. Can be reversed with B<-g>. - - -=item B<--extensionreplace> I - -=item B<-U> I - -Use the replacement string I instead of {.} for input line without extension. - - -=item B<--use-cpus-instead-of-cores> - -Count the number of physical CPUs instead of CPU cores. When computing -how many jobs to run simultaneously relative to the number of CPU cores -you can ask GNU B to instead look at the number of physical -CPUs. This will make sense for computers that have hyperthreading as -two jobs running on one CPU with hyperthreading will run slower than -two jobs running on two physical CPUs. Some multi-core CPUs can run -faster if only one thread is running per physical CPU. Most users will -not need this option. - - -=item B<-v> - -Verbose. Print the job to be run on STDOUT. Can be reversed with -B<--silent>. See also B<-t>. - -Use B<-v> B<-v> to print the wrapping ssh command when running remotely. - - -=item B<--version> - -=item B<-V> - -Print the version GNU B and exit. - - -=item B<--workdir> I (beta testing) - -=item B<-W> I (beta testing) - -Files transferred using B<--transfer> and B<--return> will be relative -to I on remote machines, and the command will be executed in -that dir. The special workdir B<...> will create a workdir in -B<~/.parallel/tmp/> on the remote machines and will be removed if -using B<--cleanup>. - - -=item B<--wait> (beta testing) - -Wait for all commands to complete. - -Implies B<--semaphore>. - - -=item B<-X> - -Multiple arguments with context replace. Insert as many arguments as -the command line length permits. If B<{}> is not used the arguments -will be appended to the line. If B<{}> is used as part of a word -(like I) then the whole word will be repeated. If B<{}> is -used multiple times each B<{}> will be replaced with the arguments. - -Normally B<-X> will do the right thing, whereas B<-m> can give -unexpected results if B<{}> is used as part of a word. - -Support for B<-X> with B<--sshlogin> is limited and may fail. - -See also B<-m>. - - -=item B<--exit> - -=item B<-x> - -Exit if the size (see the B<-s> option) is exceeded. - - -=item B<--shebang> - -=item B<--hashbang> - -=item B<-Y> - -GNU B can be called as a shebang (#!) command as the first line of a script. Like this: - - #!/usr/bin/parallel -Yr traceroute - - foss.org.my - debian.org - freenetproject.org - -For this to work B<--shebang> or B<-Y> must be set as the first option. - - -=back - -=head1 EXAMPLE: Working as xargs -n1. Argument appending - -GNU B can work similar to B. - -To compress all html files using B run: - -B - -If the file names may contain a newline use B<-0>. Substitute FOO BAR with -FUBAR in all files in this dir and subdirs: - -B - -Note B<-q> is needed because of the space in 'FOO BAR'. - - -=head1 EXAMPLE: Reading arguments from command line - -GNU B can take the arguments from command line instead of -stdin (standard input). To compress all html files in the current dir -using B run: - -B - -To convert *.wav to *.mp3 using LAME running one process per CPU core -run: - -B - - -=head1 EXAMPLE: Inserting multiple arguments - -When moving a lot of files like this: B you will -sometimes get the error: - -B - -because there are too many files. You can instead do: - -B - -This will run B for each file. It can be done faster if B gets -as many arguments that will fit on the line: - -B - - -=head1 EXAMPLE: Context replace - -To remove the files I .. I you could do: - -B - -You could also do: - -B - -The first will run B 10000 times, while the last will only run -B as many times needed to keep the command line length short -enough to avoid B (it typically runs 1-2 times). - -You could also run: - -B - -This will also only run B as many times needed to keep the command -line length short enough. - - -=head1 EXAMPLE: Compute intensive jobs and substitution - -If ImageMagick is installed this will generate a thumbnail of a jpg -file: - -B - -If the system has more than 1 CPU core it can be run with -number-of-cpu-cores jobs in parallel (B<-j> +0). This will do that for -all jpg files in a directory: - -B - -To do it recursively use B: - -B - -Notice how the argument has to start with B<{}> as B<{}> will include path -(e.g. running B would clearly be wrong). The command will -generate files like ./foo/bar.jpg_thumb.jpg. - -Use B<{.}> to avoid the extra .jpg in the file name. This command will -make files like ./foo/bar_thumb.jpg: - -B - - -=head1 EXAMPLE: Substitution and redirection - -This will generate an uncompressed version of .gz-files next to the .gz-file: - -B>B<"{.} ::: *.gz> - -Quoting of > is necessary to postpone the redirection. Another -solution is to quote the whole command: - -B>B<{.}" ::: *.gz> - -Other special shell charaters (such as * ; $ > < | >> <<) also needs -to be put in quotes, as they may otherwise be interpreted by the shell -and not given to GNU B. - -=head1 EXAMPLE: Composed commands - -A job can consist of several commands. This will print the number of -files in each directory: - -B - -To put the output in a file called .dir: - -B> B<{}.dir'> - -Even small shell scripts can be run by GNU B: - -B - -Given a list of URLs, list all URLs that fail to download. Print the -line number and the URL. - -B>B - - -=head1 EXAMPLE: Removing file extension when processing files - -When processing files removing the file extension using B<{.}> is -often useful. - -Create a directory for each zip-file and unzip it in that dir: - -B - -Recompress all .gz files in current directory using B running 1 -job per CPU core in parallel: - -B>B<{.}.bz2 && rm {}" ::: *.gz> - -Convert all WAV files to MP3 using LAME: - -B - -Put all converted in the same directory: - -B - -=head1 EXAMPLE: Removing two file extensions when processing files and -calling GNU Parallel from itself - -If you have directory with tar.gz files and want these extracted in -the corresponding dir (e.g foo.tar.gz will be extracted in the dir -foo) you can do: - -B - -=head1 EXAMPLE: Download 10 images for each of the past 30 days - -Let us assume a website stores images like: - - http://www.example.com/path/to/YYYYMMDD_##.jpg - -where YYYYMMDD is the date and ## is the number 01-10. This will -generate the past 30 days as YYYYMMDD: - -B - -Based on this we can let GNU B generate 10 Bs per day: - -I B<| parallel -I {o} seq -w 1 10 "|" parallel wget -http://www.example.com/path/to/{o}_{}.jpg> - -=head1 EXAMPLE: Rewriting a for-loop and a while-read-loop - -for-loops like this: - - (for x in `cat list` ; do - do_something $x - done) | process_output - -and while-read-loops like this: - - cat list | (while read x ; do - do_something $x - done) | process_output - -can be written like this: - -B - -If the processing requires more steps the for-loop like this: - - (for x in `cat list` ; do - no_extension=${x%.*}; - do_something $x scale $no_extension.jpg - do_step2 <$x $no_extension - done) | process_output - -and while-loops like this: - - cat list | (while read x ; do - no_extension=${x%.*}; - do_something $x scale $no_extension.jpg - do_step2 <$x $no_extension - done) | process_output - -can be written like this: - -B - - -=head1 EXAMPLE: Group output lines - -When running jobs that output data, you often do not want the output -of multiple jobs to run together. GNU B defaults to grouping the -output of each job, so the output is printed when the job finishes. If -you want the output to be printed while the job is running you can use -B<-u>. - -Compare the output of: - -B - -to the output of: - -B - - -=head1 EXAMPLE: Keep order of output same as order of input - -Normally the output of a job will be printed as soon as it -completes. Sometimes you want the order of the output to remain the -same as the order of the input. This is often important, if the output -is used as input for another system. B<-k> will make sure the order of -output will be in the same order as input even if later jobs end -before earlier jobs. - -Append a string to every line in a text file: - -B - -If you remove B<-k> some of the lines may come out in the wrong order. - -Another example is B: - -B - -will give traceroute of foss.org.my, debian.org and -freenetproject.org, but it will be sorted according to which job -completed first. - -To keep the order the same as input run: - -B - -This will make sure the traceroute to foss.org.my will be printed -first. - -A bit more complex example is downloading a huge file in chunks in -parallel: Some internet connections will deliver more data if you -download files in parallel. For downloading files in parallel see: -"EXAMPLE: Download 10 images for each of the past 30 days". But if you -are downloading a big file you can download the file in chunks in -parallel. - -To download byte 10000000-19999999 you can use B: - -B > B - -To download a 1 GB file we need 100 10MB chunks downloaded and -combined in the correct order. - -B > B - -=head1 EXAMPLE: Parallel grep - -B greps recursively through directories. On multicore CPUs -GNU B can often speed this up. - -B - -This will run 1.5 job per core, and give 1000 arguments to B. - - -=head1 EXAMPLE: Using remote computers - -To run commands on a remote computer SSH needs to be set up and you -must be able to login without entering a password (B may be -handy). - -To run B on B: - - seq 1 10 | parallel --sshlogin server.example.com echo - -To run commands on more than one remote computer run: - - seq 1 10 | parallel --sshlogin server.example.com,server2.example.net echo - -Or: - - seq 1 10 | parallel --sshlogin server.example.com \ - --sshlogin server2.example.net echo - -If the login username is I on I use: - - seq 1 10 | parallel --sshlogin server.example.com \ - --sshlogin foo@server2.example.net echo - -To distribute the commands to a list of computers, make a file -I with all the computers: - - server.example.com - foo@server2.example.com - server3.example.com - -Then run: - - seq 1 10 | parallel --sshloginfile mycomputers echo - -To include the local computer add the special sshlogin ':' to the list: - - server.example.com - foo@server2.example.com - server3.example.com - : - -GNU B will try to determine the number of CPU cores on each -of the remote computers, so B<-j+0> will run one job per CPU core - -even if the remote computers do not have the same number of CPU cores. - -If the number of CPU cores on the remote servers is not identified -correctly the number of CPU cores can be added in front. Here the -server has 8 CPU cores. - - seq 1 10 | parallel --sshlogin 8/server.example.com echo - - -=head1 EXAMPLE: Transferring of files - -To recompress gzipped files with B using a remote server run: - - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com \ - --transfer "zcat {} | bzip2 -9 >{.}.bz2" - -This will list the .gz-files in the I directory and all -directories below. Then it will transfer the files to -I to the corresponding directory in -I<$HOME/logs>. On I the file will be recompressed -using B and B resulting in the corresponding file with -I<.gz> replaced with I<.bz2>. - -If you want the resulting bz2-file to be transferred back to the local -computer add I<--return {.}.bz2>: - - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com \ - --transfer --return {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" - -After the recompressing is done the I<.bz2>-file is transferred back to -the local computer and put next to the original I<.gz>-file. - -If you want to delete the transferred files on the remote computer add -I<--cleanup>. This will remove both the file transferred to the remote -computer and the files transferred from the remote computer: - - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com \ - --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" - -If you want run on several servers add the servers to I<--sshlogin> -either using ',' or multiple I<--sshlogin>: - - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com,server2.example.com \ - --sshlogin server3.example.com \ - --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" - -You can add the local computer using I<--sshlogin :>. This will disable the -removing and transferring for the local computer only: - - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com,server2.example.com \ - --sshlogin server3.example.com \ - --sshlogin : \ - --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" - -Often I<--transfer>, I<--return> and I<--cleanup> are used together. They can be -shortened to I<--trc>: - - find logs/ -name '*.gz' | \ - parallel --sshlogin server.example.com,server2.example.com \ - --sshlogin server3.example.com \ - --sshlogin : \ - --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" - -With the file I containing the list of computers it becomes: - - find logs/ -name '*.gz' | parallel --sshloginfile mycomputers \ - --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" - -If the file I<~/.parallel/sshloginfile> contains the list of computers -the special short hand I<-S ..> can be used: - - find logs/ -name '*.gz' | parallel -S .. \ - --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" - -=head1 EXAMPLE: Distributing work to local and remote computers - -Convert *.mp3 to *.ogg running one process per CPU core on local computer and server2: - - parallel --trc {.}.ogg -j+0 -S server2,: \ - 'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg' ::: *.mp3 - -=head1 EXAMPLE: Use multiple inputs in one command - -Copy files like foo.es.ext to foo.ext: - -B - -The perl command spits out 2 lines for each input. GNU B -takes 2 inputs (using B<-N2>) and replaces {1} and {2} with the inputs. - -Print the number on the opposing sides of a six sided die: - -B - -Convert files from all subdirs to PNG-files with consecutive numbers -(useful for making input PNG's for B): - -B - -Alternative version: - -B - - -=head1 EXAMPLE: Use a table as input - -Content of table_file.tsv: - - foobar - baz quux - -To run: - - cmd -o bar -i foo - cmd -o quux -i baz - -you can run: - -B - -Note: The default for GNU B is to remove the spaces around the columns. To keep the spaces: - -B - - -=head1 EXAMPLE: Working as cat | sh. Resource inexpensive jobs and evaluation - -GNU B can work similar to B. - -A resource inexpensive job is a job that takes very little CPU, disk -I/O and network I/O. Ping is an example of a resource inexpensive -job. wget is too - if the webpages are small. - -The content of the file jobs_to_run: - - ping -c 1 10.0.0.1 - wget http://status-server/status.cgi?ip=10.0.0.1 - ping -c 1 10.0.0.2 - wget http://status-server/status.cgi?ip=10.0.0.2 - ... - ping -c 1 10.0.0.255 - wget http://status-server/status.cgi?ip=10.0.0.255 - -To run 100 processes simultaneously do: - -B - -As there is not a I the option B<--command> is default -because the jobs needs to be evaluated by the shell. - - -=head1 EXAMPLE: Working as mutex and counting semaphore - -The command B is an alias for B. - -A counting semaphore will allow a given number of jobs to be started -in the background. When the number of jobs are running in the -background, GNU B will wait for one of these to complete before -starting another command. B will wait for all jobs to -complete. - -Run 10 jobs concurrently in the background: - - for i in `ls *.log` ; do - echo $i - sem -j10 gzip $i ";" echo done - done - sem --wait - -A mutex is a counting semaphore allowing only one job to run. This -will edit the file I and prepends the file with lines with the -numbers 1 to 3. - - seq 1 3 | parallel sem sed -i -e 'i{}' myfile - -As I can be very big it is important only one process edits -the file at the same time. - -Name the semaphore to have multiple different semaphores active at the -same time: - - seq 1 3 | parallel sem --id mymutex sed -i -e 'i{}' myfile - - -=head1 EXAMPLE: Start editor with filenames from stdin (standard input) - -You can use GNU Parallel to start interactive programs like emacs or vi: - -B - -B - -If there are more files than will fit on a single command line, the -editor will be started again with the remaining files. - - -=head1 EXAMPLE: GNU Parallel as queue system/batch manager - -GNU Parallel can work as a simple job queue system or batch manager. -The idea is to put the jobs into a file and have GNU Parallel read -from that continuously. As GNU Parallel will stop at end of file we -use tail to continue reading: - -B>B; B - -To submit your jobs to the queue: - -B>>B< jobqueue> - -You can of course use B<-S> to distribute the jobs to remote -computers: - -B>B; B - - -=head1 EXAMPLE: GNU Parallel as dir processor - -If you have a dir in which users drop files that needs to be processed -you can do this on GNU/Linux (If you know what B is -called on other platforms file a bug report): - -B - -This will run the command B on each file put into B or -subdirs of B. - -The B<-u> is needed because of a small bug in GNU B. If that -proves to be a problem, file a bug report. - -You can of course use B<-S> to distribute the jobs to remote -computers: - -B - - -=head1 QUOTING - -For more advanced use quoting may be an issue. The following will -print the filename for each line that has exactly 2 columns: - -B - -This can be done by GNU B using: - -B - -Notice how \'s, "'s, and $'s needs to be quoted. GNU B can do -the quoting by using option B<-q>: - -B - -However, this means you cannot make the shell interpret special -characters. For example this B: - -B>B<{.}"> - -B>B<{.}.bz2"> - -because > and | need to be interpreted by the shell. - -If you get errors like: - - sh: -c: line 0: syntax error near unexpected token - sh: Syntax error: Unterminated quoted string - sh: -c: line 0: unexpected EOF while looking for matching `'' - sh: -c: line 1: syntax error: unexpected end of file - -then you might try using B<-q>. - -If you are using B process substitution like B<<(cat foo)> then -you may try B<-q> and prepending I with B: - -B - -Or for substituting output: - -B>B<(gzip >>B<{}.tar.gz) | bzip2 >>B<{}.tar.bz2'> - -B: To avoid dealing with the quoting problems it may be -easier just to write a small script and have GNU B call that -script. - - -=head1 LIST RUNNING JOBS - -If you want a list of the jobs currently running you can run: - -B - -GNU B will then print the currently running jobs on STDERR. - - -=head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS - -If you regret starting a lot of jobs you can simply break GNU B, -but if you want to make sure you do not have halfcompleted jobs you -should send the signal B to GNU B: - -B - -This will tell GNU B to not start any new jobs, but wait until -the currently running jobs are finished before exiting. - - -=head1 ENVIRONMENT VARIABLES - -=over 9 - -=item $PARALLEL_PID - -The environment variable $PARALLEL_PID is set by GNU B and -is visible to the jobs started from GNU B. This makes it -possible for the jobs to communicate directly to GNU B. -Remember to quote the $, so it gets evaluated by the correct -shell. - -B If each of the jobs tests a solution and one of jobs finds -the solution the job can tell GNU B not to start more jobs -by: B. This only works on the local -computer. - - -=item $PARALLEL_SEQ - -$PARALLEL_SEQ will be set to the sequence number of the job -running. Remember to quote the $, so it gets evaluated by the correct -shell. - -B - -B - - -=item $TMPDIR - -Directory for temporary files. See: B<--tmpdir>. - - -=item $PARALLEL - -The environment variable $PARALLEL will be used as default options for -GNU B. If the variable contains special shell characters -(e.g. $, *, or space) then these need to be to be escaped with \. - -B - -B - -can be written as: - -B - -B - -can be written as: - -B - -Notice the \ in the middle is needed because 'myssh' and 'user@server' -must be one argument. - -=back - -=head1 DEFAULT PROFILE (CONFIG FILE) - -The file ~/.parallel/config (formerly known as .parallelrc) will be -read if it exists. Lines starting with '#' will be ignored. It can be -formatted like the environment variable $PARALLEL, but it is often -easier to simply put each option on its own line. - -Options on the command line takes precedence over the environment -variable $PARALLEL which takes precedence over the file -~/.parallel/config. - -=head1 PROFILE FILES - -If B<--profile> set, GNU B will read the profile from that file instead of -~/.parallel/config. - -Example: Profile for running every command with B<-j+0> and B - - echo -j+0 nice > ~/.parallel/nice_profile - parallel -J nice_profile bzip2 -9 ::: * - -Example: Profile for running a perl script before every command: - - echo "perl -e '\$a=\$\$; print \$a,\" \",'\$PARALLEL_SEQ',\" \";';" > ~/.parallel/pre_perl - parallel -J pre_perl echo ::: * - -Note how the $ and " need to be quoted using \. - -Example: Profile for running distributed jobs with B on the -remote machines: - - echo -S .. nice > ~/.parallel/dist - parallel -J dist --trc {.}.bz2 bzip2 -9 ::: * - - -=head1 EXIT STATUS - -If B<--halt-on-error> 0 or not specified: - -=over 6 - -=item 0 - -All jobs ran without error. - -=item 1-253 - -Some of the jobs failed. The exit status gives the number of failed jobs - -=item 254 - -More than 253 jobs failed. - -=item 255 - -Other error. - -=back - -If B<--halt-on-error> 1 or 2: Exit status of the failing job. - - -=head1 DIFFERENCES BETWEEN GNU Parallel AND ALTERNATIVES - -There are a lot programs with some of the functionality of GNU -B. GNU B strives to include the best of the -functionality without sacrifying ease of use. - -=head2 SUMMARY TABLE - -The following features are in some of the comparable tools: - -Inputs - I1. Arguments can be read from stdin - I2. Arguments can be read from a file - I3. Arguments can be read from multiple files - I4. Arguments can be read from command line - I5. Arguments can be read from a table - I6. Arguments can be read from the same file using #! (shebang) - I7. Line oriented input as default (Quoting of special chars not needed) - -Manipulation of input - M1. Composed command - M2. Multiple arguments can fill up an execution line - M3. Arguments can be put anywhere in the execution line - M4. Multiple arguments can be put anywhere in the execution line - M5. Arguments can be replaced with context - M6. Input can be treated as complete execution line - -Outputs - O1. Grouping output so output from different jobs do not mix - O2. Send stderr to stderr - O3. Send stdout to stdout - O4. Order of output can be same as order of input - O5. Stdout only contains stdout from the command - O6. Stderr only contains stdout from the command - -Execution - E1. Running jobs in parallel - E2. List running jobs - E3. Finish running jobs, but do not start new jobs - E4. Number of running jobs can depend on number of cpus - E5. Finish running jobs, but do not start new jobs after first failure - E6. Number of running jobs can be adjusted while running - -Remote execution - R1. Jobs can be run on remote computers - R2. Basefiles can be transferred - R3. Argument files can be transferred - R4. Result files can be transferred - R5. Cleanup of transferred files - R6. No config files needed - R7. Do not run more than SSHD's MaxStartup can handle - R8. Configurable SSH command - R9. Retry if connection breaks occationally - -Semaphore - S1. Possibility to work as a mutex - S2. Possibility to work as a counting semaphore - -Legend - - = no - x = not applicable - ID = yes - -As every new version of the programs are not tested the table may be -outdated. Please file a bug-report if you find errors (See REPORTING -BUGS). - -parallel: -I1 I2 I3 I4 I5 I6 I7 -M1 M2 M3 M4 M5 M6 -O1 O2 O3 O4 O5 O6 -E1 E2 E3 E4 E5 E6 -R1 R2 R3 R4 R5 R6 R7 R8 R9 -S1 S2 - -xargs: -I1 I2 - - - - - -- M2 M3 - - - -- O2 O3 - O5 O6 -E1 - - - - - -- - - - - x - - - -- - - -find -exec: -- - - x - x - -- M2 M3 - - - - -- O2 O3 O4 O5 O6 -- - - - - - - -- - - - - - - - - -x x - -make -j: -- - - - - - - -- - - - - - -O1 O2 O3 - x O6 -E1 - - - E5 - -- - - - - - - - - -- - - -ppss: -I1 I2 - - - - I7 -M1 - M3 - - M6 -O1 - - x - - -E1 E2 ?E3 E4 - - -R1 R2 R3 R4 - - ?R7 ? ? -- - - -pexec: -I1 I2 - I4 I5 - - -M1 - M3 - - M6 -O1 O2 O3 - O5 O6 -E1 - - E4 - E6 -R1 - - - - R6 - - - -S1 - - -xjobs: TODO - Please file a bug-report if you know what features xjobs -supports (See REPORTING BUGS). - -prll: TODO - Please file a bug-report if you know what features prll -supports (See REPORTING BUGS). - -dxargs: TODO - Please file a bug-report if you know what features dxargs -supports (See REPORTING BUGS). - -mdm/middelman: TODO - Please file a bug-report if you know what -features mdm/middelman supports (See REPORTING BUGS). - -xapply: TODO - Please file a bug-report if you know what features xapply -supports (See REPORTING BUGS). - -paexec: TODO - Please file a bug-report if you know what features paexec -supports (See REPORTING BUGS). - -ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH -supports (See REPORTING BUGS). - - -=head2 DIFFERENCES BETWEEN xargs AND GNU Parallel - -B offer some of the same possibilites as GNU B. - -B deals badly with special characters (such as space, ' and -"). To see the problem try this: - - touch important_file - touch 'not important_file' - ls not* | xargs rm - mkdir -p '12" records' - ls | xargs rmdir - -You can specify B<-0> or B<-d "\n">, but many input generators are not -optimized for using B as separator but are optimized for -B as separator. E.g B, B, B, B, B, -B, B, B (B<-0> and \0 instead of \n), B -(requires using B<-0>), B (requires using B<-print0>), B -(requires user to use B<-z> or B<-Z>), B (requires using B<-z>). - -So GNU B's newline separation can be emulated with: - -B> - -B can run a given number of jobs in parallel, but has no -support for running number-of-cpu-cores jobs in parallel. - -B has no support for grouping the output, therefore output may -run together, e.g. the first half of a line is from one process and -the last half of the line is from another process. The example -B cannot be done reliably with B because of -this. To see this in action try: - - parallel perl -e '\$a=\"1{}\"x10000000\;print\ \$a,\"\\n\"' '>' {} ::: a b c d e f - ls -l a b c d e f - parallel -kP4 -n1 grep 1 > out.par ::: a b c d e f - echo a b c d e f | xargs -P4 -n1 grep 1 > out.xargs-unbuf - echo a b c d e f | xargs -P4 -n1 grep --line-buffered 1 > out.xargs-linebuf - echo a b c d e f | xargs -n1 grep --line-buffered 1 > out.xargs-serial - ls -l out* - md5sum out* - -B has no support for keeping the order of the output, therefore -if running jobs in parallel using B the output of the second -job cannot be postponed till the first job is done. - -B has no support for running jobs on remote computers. - -B has no support for context replace, so you will have to create the -arguments. - -If you use a replace string in B (B<-I>) you can not force -B to use more than one argument. - -Quoting in B works like B<-q> in GNU B. This means -composed commands and redirection require using B. - -B> B<{}.wc"> - -becomes - -B>B< {}.wc"> - -and - -B - -becomes - -B - - -=head2 DIFFERENCES BETWEEN find -exec AND GNU Parallel - -B offer some of the same possibilites as GNU B. - -B only works on files. So processing other input (such as -hosts or URLs) will require creating these inputs as files. B has no support for running commands in parallel. - - -=head2 DIFFERENCES BETWEEN make -j AND GNU Parallel - -B can run jobs in parallel, but requires a crafted Makefile -to do this. That results in extra quoting to get filename containing -newline to work correctly. - -B has no support for grouping the output, therefore output -may run together, e.g. the first half of a line is from one process -and the last half of the line is from another process. The example -B cannot be done reliably with B because of -this. - -(Very early versions of GNU B were coincidently implemented -using B). - - -=head2 DIFFERENCES BETWEEN ppss AND GNU Parallel - -B is also a tool for running jobs in parallel. - -The output of B is status information and thus not useful for -using as input for another command. The output from the jobs are put -into files. - -The argument replace string ($ITEM) cannot be changed. Arguments must -be quoted - thus arguments containing special characters (space '"&!*) -may cause problems. More than one argument is not supported. File -names containing newlines are not processed correctly. When reading -input from a file null cannot be used terminator. B needs to -read the whole input file before starting any jobs. - -Output and status information is stored in ppss_dir and thus requires -cleanup when completed. If the dir is not removed before running -B again it may cause nothing to happen as B thinks the -task is already done. GNU B will normally not need cleaning -up if running locally and will only need cleaning up if stopped -abnormally and running remote (B<--cleanup> may not complete if -stopped abnormally). The example B would require extra -postprocessing if written using B. - -For remote systems PPSS requires 3 steps: config, deploy, and -start. GNU B only requires one step. - -=head3 EXAMPLES FROM ppss MANUAL - -Here are the examples from B's manual page with the equivalent -using GNU B: - -B<1> ./ppss.sh standalone -d /path/to/files -c 'gzip ' - -B<1> find /path/to/files -type f | parallel -j+0 gzip - -B<2> ./ppss.sh standalone -d /path/to/files -c 'cp "$ITEM" /destination/dir ' - -B<2> find /path/to/files -type f | parallel -j+0 cp {} /destination/dir - -B<3> ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q ' - -B<3> parallel -a list-of-urls.txt wget -q - -B<4> ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q "$ITEM"' - -B<4> parallel -a list-of-urls.txt wget -q {} - -B<5> ./ppss config -C config.cfg -c 'encode.sh ' -d /source/dir -m -192.168.1.100 -u ppss -k ppss-key.key -S ./encode.sh -n nodes.txt -o -/some/output/dir --upload --download ; ./ppss deploy -C config.cfg ; -./ppss start -C config - -B<5> # parallel does not use configs. If you want a different username put it in nodes.txt: user@hostname - -B<5> find source/dir -type f | parallel --sshloginfile nodes.txt --trc {.}.mp3 lame -a {} -o {.}.mp3 --preset standard --quiet - -B<6> ./ppss stop -C config.cfg - -B<6> killall -TERM parallel - -B<7> ./ppss pause -C config.cfg - -B<7> Press: CTRL-Z or killall -SIGTSTP parallel - -B<8> ./ppss continue -C config.cfg - -B<8> Enter: fg or killall -SIGCONT parallel - -B<9> ./ppss.sh status -C config.cfg - -B<9> killall -SIGUSR2 parallel - - -=head2 DIFFERENCES BETWEEN pexec AND GNU Parallel - -B is also a tool for running jobs in parallel. - -Here are the examples from B's info page with the equivalent -using GNU B: - -B<1> pexec -o sqrt-%s.dat -p "$(seq 10)" -e NUM -n 4 -c -- \ - 'echo "scale=10000;sqrt($NUM)" | bc' - -B<1> seq 10 | parallel -j4 'echo "scale=10000;sqrt({})" | bc > sqrt-{}.dat' - -B<2> pexec -p "$(ls myfiles*.ext)" -i %s -o %s.sort -- sort - -B<2> ls myfiles*.ext | parallel sort {} ">{}.sort" - -B<3> pexec -f image.list -n auto -e B -u star.log -c -- \ - 'fistar $B.fits -f 100 -F id,x,y,flux -o $B.star' - -B<3> parallel -a image.list -j+0 \ - 'fistar {}.fits -f 100 -F id,x,y,flux -o {}.star' 2>star.log - -B<4> pexec -r *.png -e IMG -c -o - -- \ - 'convert $IMG ${IMG%.png}.jpeg ; "echo $IMG: done"' - -B<4> ls *.png | parallel 'convert {} {.}.jpeg; echo {}: done' - -B<5> pexec -r *.png -i %s -o %s.jpg -c 'pngtopnm | pnmtojpeg' - -B<5> ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {}.jpg' - -B<6> for p in *.png ; do echo ${p%.png} ; done | \ - pexec -f - -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' - -B<6> ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {.}.jpg' - -B<7> LIST=$(for p in *.png ; do echo ${p%.png} ; done) - pexec -r $LIST -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' - -B<7> ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {.}.jpg' - -B<8> pexec -n 8 -r *.jpg -y unix -e IMG -c \ - 'pexec -j -m blockread -d $IMG | \ - jpegtopnm | pnmscale 0.5 | pnmtojpeg | \ - pexec -j -m blockwrite -s th_$IMG' - -B<8> Combining GNU B and GNU B. - -B<8> ls *jpg | parallel -j8 'sem --id blockread cat {} | jpegtopnm |' \ - 'pnmscale 0.5 | pnmtojpeg | sem --id blockwrite cat > th_{}' - -B<8> If reading and writing is done to the same disk, this may be -faster as only one process will be either reading or writing: - -B<8> ls *jpg | parallel -j8 'sem --id diskio cat {} | jpegtopnm |' \ - 'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_{}' - -=head2 DIFFERENCES BETWEEN xjobs AND GNU Parallel - -B is also a tool for running jobs in parallel. It only supports -running jobs on your local computer. - -B deals badly with special characters just like B. See -the section B. - -Here are the examples from B's man page with the equivalent -using GNU B: - -B<1> ls -1 *.zip | xjobs unzip - -B<1> ls *.zip | parallel unzip - -B<2> ls -1 *.zip | xjobs -n unzip - -B<2> ls *.zip | parallel unzip >/dev/null - -B<3> find . -name '*.bak' | xjobs gzip - -B<3> find . -name '*.bak' | parallel gzip - -B<4> ls -1 *.jar | sed 's/\(.*\)/\1 > \1.idx/' | xjobs jar tf - -B<4> ls *.jar | parallel jar tf {} '>' {}.idx - -B<5> xjobs -s script - -B<5> cat script | parallel - -B<6> mkfifo /var/run/my_named_pipe; -xjobs -s /var/run/my_named_pipe & -echo unzip 1.zip >> /var/run/my_named_pipe; -echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe - -B<6> mkfifo /var/run/my_named_pipe; -cat /var/run/my_named_pipe | parallel & -echo unzip 1.zip >> /var/run/my_named_pipe; -echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe - - -=head2 DIFFERENCES BETWEEN prll AND GNU Parallel - -B is also a tool for running jobs in parallel. It does not -support running jobs on remote computers. - -B encourages using BASH aliases and BASH functions instead of -scripts. GNU B can use the aliases and functions that are -defined at login (using: B) but it will -never support running aliases and functions that are defined defined -later (see why -http://www.perlmonks.org/index.pl?node_id=484296). However, scripts or -composed commands work just fine. - -B generates a lot of status information on STDERR which makes it -harder to use the STDERR output of the job directly as input for -another program. - -Here is the example from B's man page with the equivalent -using GNU B: - -prll -s 'mogrify -flip $1' *.jpg - -parallel mogrify -flip ::: *.jpg - - -=head2 DIFFERENCES BETWEEN dxargs AND GNU Parallel - -B is also a tool for running jobs in parallel. - -B does not deal well with more simultaneous jobs than SSHD's -MaxStartup. B is only built for remote run jobs, but does not -support transferring of files. - - -=head2 DIFFERENCES BETWEEN mdm/middleman AND GNU Parallel - -middleman(mdm) is also a tool for running jobs in parallel. - -Here are the shellscripts of http://mdm.berlios.de/usage.html ported -to GNU B: - -B>B< result> - -B - -B - -=head2 DIFFERENCES BETWEEN xapply AND GNU Parallel - -B can run jobs in parallel on the local computer. - -Here are the examples from B's man page with the equivalent -using GNU B: - -B<1> xapply '(cd %1 && make all)' */ - -B<1> parallel 'cd {} && make all' ::: */ - -B<2> xapply -f 'diff %1 ../version5/%1' manifest | more - -B<2> parallel diff {} ../version5/{} < manifest | more - -B<3> xapply -p/dev/null -f 'diff %1 %2' manifest1 checklist1 - -B<3> parallel diff {1} {2} :::: manifest1 checklist1 - -B<4> xapply 'indent' *.c - -B<4> parallel indent ::: *.c - -B<5> find ~ksb/bin -type f ! -perm -111 -print | xapply -f -v 'chmod a+x' - - -B<5> find ~ksb/bin -type f ! -perm -111 -print | parallel -v chmod a+x - -B<6> find */ -... | fmt 960 1024 | xapply -f -i /dev/tty 'vi' - - -B<6> sh <(find */ -... | parallel -s 1024 echo vi) - -B<6> find */ -... | parallel -s 1024 -Xuj1 vi - -B<7> find ... | xapply -f -5 -i /dev/tty 'vi' - - - - - - -B<7> sh <(find ... |parallel -n5 echo vi) - -B<7> find ... |parallel -n5 -uj1 vi - -B<8> xapply -fn "" /etc/passwd - -B<8> parallel -k echo < /etc/passwd - -B<9> tr ':' '\012' < /etc/passwd | xapply -7 -nf 'chown %1 %6' - - - - - - - - -B<9> tr ':' '\012' < /etc/passwd | parallel -N7 chown {1} {6} - -B<10> xapply '[ -d %1/RCS ] || echo %1' */ - -B<10> parallel '[ -d {}/RCS ] || echo {}' ::: */ - -B<11> xapply -f '[ -f %1 ] && echo %1' List | ... - -B<11> parallel '[ -f {} ] && echo {}' < List | ... - - -=head2 DIFFERENCES BETWEEN paexec AND GNU Parallel - -B can run jobs in parallel on both the local and remote computers. - -B requires commands to print a blank line as the last -output. This means you will have to write a wrapper for most programs. - -B has a job dependency facility so a job can depend on another -job to be executed successfully. Sort of a poor-man's B. - -Here are the examples from B's example catalog with the equivalent -using GNU B: - -=over 1 - -=item 1_div_X_run: - - ../../paexec -s -l -c "`pwd`/1_div_X_cmd" -n +1 <. - -ClusterSSH runs the same command with the same arguments on a list of -machines - one per machine. This is typically used for administrating -several machines that are almost identical. - -GNU B runs the same (or different) commands with different -arguments in parallel possibly using remote machines to help -computing. If more than one machine is listed in B<-S> GNU B may -only use one of these (e.g. if there are 8 jobs to be run and one -machine has 8 cores). - -GNU B can be used as a poor-man's version of ClusterSSH: - -B - - -=head1 BUGS - -=head2 Quoting of newline - -Because of the way newline is quoted this will not work: - -echo 1,2,3 | parallel -vkd, "echo 'a{}'" - -However, this will work: - -echo 1,2,3 | parallel -vkd, echo a{} - -=head2 Startup speed - -GNU B is slow at starting up. Half of the startup time on -the local computer is spent finding the maximal length of a command -line. Setting B<-s> will remove this part of the startup time. - -When using multiple computers GNU B opens B connections -to them to figure out how many connections can be used reliably -simultaneously (Namely SSHD's MaxStartup). This test is done for each -host in serial, so if your --sshloginfile contains many hosts it may -be slow. - -=head2 --nice limits command length - -The current implementation of B<--nice> is too pessimistic in the max -allowed command length. It only uses a little more than half of what -it could. This affects -X and -m. If this becomes a real problem for -you file a bug-report. - - -=head1 REPORTING BUGS - -Report bugs to . - - -=head1 AUTHOR - -Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk - -Copyright (C) 2008,2009,2010 Ole Tange, http://ole.tange.dk - -Copyright (C) 2010 Ole Tange, http://ole.tange.dk and Free Software -Foundation, Inc. - -Parts of the manual concerning B compatibility is inspired by -the manual of B from GNU findutils 4.4.2. - - - -=head1 LICENSE - -Copyright (C) 2007,2008,2009,2010 Free Software Foundation, Inc. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3 of the License, or -at your option any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -=head2 Documentation license I - -Permission is granted to copy, distribute and/or modify this documentation -under the terms of the GNU Free Documentation License, Version 1.3 or -any later version published by the Free Software Foundation; with no -Invariant Sections, with no Front-Cover Texts, and with no Back-Cover -Texts. A copy of the license is included in the file fdl.txt. - -=head2 Documentation license II - -You are free: - -=over 9 - -=item B - -to copy, distribute and transmit the work - -=item B - -to adapt the work - -=back - -Under the following conditions: - -=over 9 - -=item B - -You must attribute the work in the manner specified by the author or -licensor (but not in any way that suggests that they endorse you or -your use of the work). - -=item B - -If you alter, transform, or build upon this work, you may distribute -the resulting work only under the same, similar or a compatible -license. - -=back - -With the understanding that: - -=over 9 - -=item B - -Any of the above conditions can be waived if you get permission from -the copyright holder. - -=item B - -Where the work or any of its elements is in the public domain under -applicable law, that status is in no way affected by the license. - -=item B - -In no way are any of the following rights affected by the license: - -=over 2 - -=item * - -Your fair dealing or fair use rights, or other applicable -copyright exceptions and limitations; - -=item * - -The author's moral rights; - -=item * - -Rights other persons may have either in the work itself or in -how the work is used, such as publicity or privacy rights. - -=back - -=back - -=over 9 - -=item B - -For any reuse or distribution, you must make clear to others the -license terms of this work. - -=back - -A copy of the full license is included in the file as cc-by-sa.txt. - -=head1 DEPENDENCIES - -GNU B uses Perl, and the Perl modules Getopt::Long, -IPC::Open3, Symbol, IO::File, POSIX, and File::Temp. For remote usage -it also uses Rsync with Ssh. - - -=head1 SEE ALSO - -B(1), B(1), B(1), B(1), B(1), -B(1), B(1), B(1), B(1) - -=cut - - use IPC::Open3; use Symbol qw(gensym); use IO::File; @@ -2701,7 +183,7 @@ sub get_options_from_array { sub parse_options { # Returns: N/A # Defaults: - $Global::version = 20101202; + $Global::version = 20101206; $Global::progname = 'parallel'; $Global::debug = 0; $Global::verbose = 0; diff --git a/src/parallel.pod b/src/parallel.pod new file mode 100644 index 00000000..5a550042 --- /dev/null +++ b/src/parallel.pod @@ -0,0 +1,2518 @@ +#!/usr/bin/perl -w + +=head1 NAME + +parallel - build and execute shell command lines from standard input in parallel + +=head1 SYNOPSIS + +B [options] [I [arguments]] < list_of_arguments + +B [options] [I [arguments]] B<:::> arguments + +B [options] [I [arguments]] B<::::> argfile(s) + +B --semaphore [options] I + +B<#!/usr/bin/parallel> --shebang [options] [I [arguments]] + +=head1 DESCRIPTION + +GNU B is a shell tool for executing jobs concurrently locally +or using remote computers. A job is typically a single command or a +small script that has to be run for each of the lines in the +input. The typical input is a list of files, a list of hosts, a list +of users, a list of URLs, or a list of tables. + +If you use B today you will find GNU B very easy to +use as GNU B is written to have the same options as +B. If you write loops in shell, you will find GNU B +may be able to replace most of the loops and make them run faster by +running several jobs simultaneously. If you use B or B you +will find GNU B will often make the command easier to read. + +GNU B makes sure output from the commands is the same output +as you would get had you run the commands sequentially. This makes it +possible to use output from GNU B as input for other +programs. + +For each line of input GNU B will execute I with +the line as arguments. If no I is given, the line of input is +executed. Several lines will be run in parallel. GNU B can +often be used as a substitute for B or B. + +Before looking at the options you may want to check out the Bs +after the list of options. That will give you an idea of what GNU +B is capable of. + +You can also watch the intro video for a quick introduction: +http://www.youtube.com/watch?v=OpaiGYxkSuQ or at +http://tinyogg.com/watch/TORaR/ and http://tinyogg.com/watch/hfxKj/ + +=head1 OPTIONS + +=over 9 + +=item I + +Command to execute. If I or the following arguments contain +{} every instance will be substituted with the input line. Setting a +command also invokes B<--file>. + +If I is given, GNU B will behave similar to B. If +I is not given GNU B will behave similar to B. + + +=item B<{}> + +Input line. This is the default replacement string and will normally +be used for putting the argument in the command line. It can be +changed with B<-I>. + + +=item B<{.}> + +Input line without extension. This is a specialized replacement string +with the extension removed. If the input line contains B<.> after the +last B the last B<.> till the end of the string will be removed and +B<{.}> will be replaced with the remaining. E.g. I becomes +I, I becomes I, I +becomes I, I remains I. If the +input line does not contain B<.> it will remain unchanged. + +B<{.}> can be used the same places as B<{}>. The replacement string +B<{.}> can be changed with B<-U>. + + +=item B<{/}> (beta testing) + +Basename of input line. This is a specialized replacement string +with the directory part removed. + +B<{/}> can be used the same places as B<{}>. The replacement string +B<{/}> can be changed with B<--basenamereplace>. + + +=item B<{/.}> (beta testing) + +Basename of input line without extension. This is a specialized +replacement string with the directory and extension part removed. It +is a combination of B<{/}> and B<{.}>. + +B<{/.}> can be used the same places as B<{}>. The replacement string +B<{/.}> can be changed with B<--basenameextensionreplace>. + + +=item B<{>IB<}> + +Argument from argument file I or the I'th argument. See B<-a> +and B<-N>. + +B<{>IB<}> can be used the same places as B<{}>. + + +=item B<{>I.B<}> + +Argument from argument file I or the I'th argument without +extension. It is a combination of B<{>IB<}> and B<{.}>. + +B<{>I.B<}> can be used the same places as B<{>IB<}>. + + +=item B<{>I/B<}> (beta testing) + +Basename of argument from argument file I or the I'th argument. +It is a combination of B<{>IB<}> and B<{/}>. See B<-a> and B<-N>. + +B<{>I/B<}> can be used the same places as B<{>IB<}>. + + +=item B<{>I/.B<}> (beta testing) + +Basename of argument from argument file I or the I'th argument +without extension. It is a combination of B<{>IB<}>, B<{/}>, and +B<{.}>. See B<-a> and B<-N>. + +B<{>I/.B<}> can be used the same places as B<{>IB<}>. + + + +=item B<:::> I + +Use arguments from the command line as input instead of from stdin +(standard input). Unlike other options for GNU B B<:::> is +placed after the I and before the arguments. + +The following are equivalent: + + (echo file1; echo file2) | parallel gzip + parallel gzip ::: file1 file2 + parallel gzip {} ::: file1 file2 + parallel --arg-sep ,, gzip {} ,, file1 file2 + parallel --arg-sep ,, gzip ,, file1 file2 + parallel ::: "gzip file1" "gzip file2" + +To avoid treating B<:::> as special use B<--arg-sep> to set the +argument separator to something else. See also B<--arg-sep>. + +stdin (standard input) will be passed to the first process run. + +If B<--arg-file> is set arguments from that file will be appended. + + +=item B<::::> I + +Another way to write B<-a> I B<-a> I ... + +See B<-a>. + + +=item B<--null> + +=item B<-0> + +Use NUL as delimiter. Normally input lines will end in \n +(newline). If they end in \0 (NUL), then use this option. It is useful +for processing arguments that may contain \n (newline). + + +=item B<--arg-file> I + +=item B<-a> I + +Read items from the file I instead of stdin (standard input). If +you use this option, stdin is given to the first process run. +Otherwise, stdin is redirected from /dev/null. + +If multiple B<-a> are given, one line will be read from each of the +files. The arguments can be accessed in the command as B<{1}> +.. B<{>IB<}>, so B<{1}> will be a line from the first file, and +B<{6}> will refer to the line with the same line number from the 6th +file. + + +=item B<--arg-file-sep> I + +Use I instead of B<::::> as separator string between command +and argument files. Useful if B<::::> is used for something else by the +command. + +See also: B<::::>. + + +=item B<--arg-sep> I + +Use I instead of B<:::> as separator string. Useful if B<:::> +is used for something else by the command. + +Also useful if you command uses B<:::> but you still want to read +arguments from stdin (standard input): Simply change B<--arg-sep> to a +string that is not in the command line. + +See also: B<:::>. + + +=item B<--basefile> I + +=item B<-B> I + +I will be transferred to each sshlogin before a jobs is +started. It will be removed if B<--cleanup> is active. The file may be +a script to run or some common base data needed for the jobs. +Multiple B<-B> can be specified to transfer more basefiles. The +I will be transferred the same way as B<--transfer>. + + +=item B<--basenamereplace> I (beta testing) + +Use the replacement string I instead of B<{/}> for basename of input line. + + +=item B<--basenameextensionreplace> I (beta testing) + +Use the replacement string I instead of B<{/.}> for basename of input line without extension. + + +=item B<--bg> (beta testing) + +Run command in background thus GNU B will not wait for +completion of the command before exiting. This is the default if +B<--semaphore> is set. + +See also: B<--fg> + +Implies B<--semaphore>. + + +=item B<--cleanup> + +Remove transferred files. B<--cleanup> will remove the transferred files +on the remote server after processing is done. + + find log -name '*gz' | parallel \ + --sshlogin server.example.com --transfer --return {.}.bz2 \ + --cleanup "zcat {} | bzip -9 >{.}.bz2" + +With B<--transfer> the file transferred to the remote server will be +removed on the remote server. Directories created will not be removed +- even if they are empty. + +With B<--return> the file transferred from the remote server will be +removed on the remote server. Directories created will not be removed +- even if they are empty. + +B<--cleanup> is ignored when not used with B<--transfer> or B<--return>. + + +=item B<--colsep> I + +=item B<-C> I + +Column separator. The input will be treated as a table with I +separating the columns. The n'th column can be access using +B<{>IB<}> or B<{>I.B<}>. E.g. B<{3}> is the 3rd column. + +B<--colsep> implies B<--trim rl>. + +I is a Perl Regular Expression: +http://perldoc.perl.org/perlre.html + + +=item B<--command> + +=item B<-c> (Use B<--command> as B<-c> may be removed in later versions) + +Line is a command. The input line contains more than one argument or +the input line needs to be evaluated by the shell. This is the default +if I is not set. Can be reversed with B<--file>. + +Most people will never need this because GNU B normally +selects the correct B<--file> or B<--command>. + + +=item B<--delimiter> I + +=item B<-d> I + +Input items are terminated by the specified character. Quotes and +backslash are not special; every character in the input is taken +literally. Disables the end-of-file string, which is treated like any +other argument. This can be used when the input consists of simply +newline-separated items, although it is almost always better to design +your program to use --null where this is possible. The specified +delimiter may be a single character, a C-style character escape such +as \n, or an octal or hexadecimal escape code. Octal and +hexadecimal escape codes are understood as for the printf command. +Multibyte characters are not supported. + +=item B<-E> I + +Set the end of file string to eof-str. If the end of file string +occurs as a line of input, the rest of the input is ignored. If +neither B<-E> nor B<-e> is used, no end of file string is used. + + +=item B<--eof>[=I] + +=item B<-e>[I] + +This option is a synonym for the B<-E> option. Use B<-E> instead, +because it is POSIX compliant for B while this option is not. +If I is omitted, there is no end of file string. If neither +B<-E> nor B<-e> is used, no end of file string is used. + + +=item B<--eta> + +Show the estimated number of seconds before finishing. This forces GNU +B to read all jobs before starting to find the number of +jobs. GNU B normally only reads the next job to run. +Implies B<--progress>. + + +=item B<--fg> (beta testing) + +Run command in foreground thus GNU B will wait for +completion of the command before exiting. + +See also: B<--bg> + +Implies B<--semaphore>. + + +=item B<--file> + +=item B<-f> (Use B<--file> as B<-f> may be removed in later versions) + +Line is a filename. The input line contains a filename that will be +quoted so it is not evaluated by the shell. This is the default if +I is set. Can be reversed with B<--command>. + +Most people will never need this because GNU B normally +selects the correct B<--file> or B<--command>. + + +=item B<--group> + +=item B<-g> + +Group output. Output from each jobs is grouped together and is only +printed when the command is finished. STDERR first followed by STDOUT. +B<-g> is the default. Can be reversed with B<-u>. + +=item B<--help> + +=item B<-h> + +Print a summary of the options to GNU B and exit. + + +=item B<--halt-on-error> <0|1|2> + +=item B<-H> <0|1|2> + +=over 3 + +=item 0 + +Do not halt if a job fails. Exit status will be the number of jobs +failed. This is the default. + +=item 1 + +Do not start new jobs if a job fails, but complete the running jobs +including cleanup. The exit status will be the exit status from the +last failing job. + +=item 2 + +Kill off all jobs immediately and exit without cleanup. The exit +status will be the exit status from the failing job. + +=back + + +=item B<-I> I + +Use the replacement string I instead of {}. + + +=item B<--replace>[=I] + +=item B<-i>[I] + +This option is a synonym for B<-I>I if I is +specified, and for B<-I>{} otherwise. This option is deprecated; +use B<-I> instead. + + +=item B<--jobs> I + +=item B<-j> I + +=item B<--max-procs> I + +=item B<-P> I + +Run up to N jobs in parallel. 0 means as many as possible. Default is +9. + +If B<--semaphore> is set default is 1 thus making a mutex. + + +=item B<--jobs> I<+N> + +=item B<-j> I<+N> + +=item B<--max-procs> I<+N> + +=item B<-P> I<+N> + +Add N to the number of CPU cores. Run this many jobs in parallel. For +compute intensive jobs B<-j> +0 is useful as it will run +number-of-cpu-cores jobs simultaneously. See also +B<--use-cpus-instead-of-cores>. + + +=item B<--jobs> I<-N> + +=item B<-j> I<-N> + +=item B<--max-procs> I<-N> + +=item B<-P> I<-N> + +Subtract N from the number of CPU cores. Run this many jobs in parallel. +If the evaluated number is less than 1 then 1 will be used. See also +B<--use-cpus-instead-of-cores>. + + +=item B<--jobs> I% + +=item B<-j> I% + +=item B<--max-procs> I% + +=item B<-P> I% + +Multiply N% with the number of CPU cores. Run this many jobs in parallel. +If the evaluated number is less than 1 then 1 will be used. See also +B<--use-cpus-instead-of-cores>. + + +=item B<--jobs> I + +=item B<-j> I + +=item B<--max-procs> I + +=item B<-P> I + +Read parameter from file. Use the content of I as parameter +for I<-j>. E.g. I could contain the string 100% or +2 or +10. If I is changed when a job completes, I is +read again and the new number of jobs is computed. If the number is +lower than before, running jobs will be allowed to finish but new jobs +will not be started until the wanted number of jobs has been reached. +This makes it possible to change the number of simultaneous running +jobs while GNU B is running. + + +=item B<--keeporder> + +=item B<-k> + +Keep sequence of output same as the order of input. If jobs 1 2 3 4 +end in the sequence 3 1 4 2 the output will still be 1 2 3 4. + + +=item B<-L> I + +Use at most I nonblank input lines per command line. +Trailing blanks cause an input line to be logically continued on the +next input line. + +Implies B<-X> unless B<-m> is set. + + +=item B<--max-lines>[=I] + +=item B<-l>[I] + +Synonym for the B<-L> option. Unlike B<-L>, the I argument +is optional. If I is not specified, it defaults to one. +The B<-l> option is deprecated since the POSIX standard specifies +B<-L> instead. + +Implies B<-X> unless B<-m> is set. + + +=item B<--load> I (experimental) + +Do not start new jobs on a given machine unless the load is less than +I. I uses the same syntax as B<--jobs>, so I<100%> +is a valid setting. + +The load average is only sampled every 10 seconds to avoid stressing +small machines. + + +=item B<--controlmaster> (experimental) + +=item B<-M> (experimental) + +Use ssh's ControlMaster to make ssh connections faster. Useful if jobs +run remote and are very fast to run. This is disabled for sshlogins +that specify their own ssh command. + + +=item B<--xargs> + +=item B<-m> + +Multiple. Insert as many arguments as the command line length +permits. If B<{}> is not used the arguments will be appended to the +line. If B<{}> is used multiple times each B<{}> will be replaced +with all the arguments. + +Support for B<-m> with B<--sshlogin> is limited and may fail. + +See also B<-X> for context replace. If in doubt use B<-X> as that will +most likely do what is needed. + + +=item B<--progress> + +Show progress of computations. List the computers involved in the task +with number of CPU cores detected and the max number of jobs to +run. After that show progress for each computer: number of running +jobs, number of completed jobs, and percentage of all jobs done by +this computer. The percentage will only be available after all jobs +have been scheduled as GNU B only read the next job when +ready to schedule it - this is to avoid wasting time and memory by +reading everything at startup. + +By sending GNU B SIGUSR2 you can toggle turning on/off +B<--progress> on a running GNU B process. + + +=item B<--max-args>=I + +=item B<-n> I + +Use at most I arguments per command line. Fewer than +I arguments will be used if the size (see the B<-s> option) +is exceeded, unless the B<-x> option is given, in which case +GNU B will exit. + +Implies B<-X> unless B<-m> is set. + + +=item B<--max-replace-args>=I + +=item B<-N> I + +Use at most I arguments per command line. Like B<-n> but +also makes replacement strings B<{1}> .. B<{>IB<}> that +represents argument 1 .. I. If too few args the B<{>IB<}> will +be empty. + +This will set the owner of the homedir to the user: + +B + +Implies B<-X> unless B<-m> is set. + + +=item B<--max-line-length-allowed> + +Print the maximal number characters allowed on the command line and +exit (used by GNU B itself to determine the line length +on remote computers). + + +=item B<--number-of-cpus> + +Print the number of physical CPUs and exit (used by GNU B +itself to determine the number of physical CPUs on remote computers). + + +=item B<--number-of-cores> + +Print the number of CPU cores and exit (used by GNU B itself +to determine the number of CPU cores on remote computers). + + +=item B<--nice> I (beta testing) + +Run the command at this niceness. For simple commands you can just add +B in front of the command. But if the command consists of more +sub commands (Like: ls|wc) then prepending B will not always +work. B<--nice> will make sure all sub commands are niced. + + +=item B<--interactive> + +=item B<-p> + +Prompt the user about whether to run each command line and read a line +from the terminal. Only run the command line if the response starts +with 'y' or 'Y'. Implies B<-t>. + + +=item B<--profile> I + +=item B<-J> I + +Use profile I for options. This is useful if you want to +have multiple profiles. You could have one profile for running jobs in +parallel on the local machine and a different profile for running jobs +on remote machines. See the section PROFILE FILES for examples. + +I corresponds to the file ~/.parallel/I. + +Default: config + +=item B<--quote> + +=item B<-q> + +Quote I. This will quote the command line so special +characters are not interpreted by the shell. See the section +QUOTING. Most people will never need this. Quoting is disabled by +default. + + +=item B<--no-run-if-empty> + +=item B<-r> + +If the stdin (standard input) only contains whitespace, do not run the command. + + +=item B<--retries> I + +If a job fails, retry it on another computer. Do this I times. If +there are fewer than I computers in B<--sshlogin> GNU parallel will +re-use the computers. This is useful if some jobs fail for no apparent +reason (such as network failure). + + +=item B<--return> I + +Transfer files from remote servers. B<--return> is used with +B<--sshlogin> when the arguments are files on the remote servers. When +processing is done the file I will be transferred +from the remote server using B and will be put relative to +the default login dir. E.g. + + echo foo/bar.txt | parallel \ + --sshlogin server.example.com --return {.}.out touch {.}.out + +This will transfer the file I<$HOME/foo/bar.out> from the server +I to the file I after running +B on I. + + echo /tmp/foo/bar.txt | parallel \ + --sshlogin server.example.com --return {.}.out touch {.}.out + +This will transfer the file I from the server +I to the file I after running +B on I. + +Multiple files can be transferred by repeating the options multiple +times: + + echo /tmp/foo/bar.txt | \ + parallel --sshlogin server.example.com \ + --return {.}.out --return {.}.out2 touch {.}.out {.}.out2 + +B<--return> is often used with B<--transfer> and B<--cleanup>. + +B<--return> is ignored when used with B<--sshlogin :> or when not used +with B<--sshlogin>. + + +=item B<--max-chars>=I + +=item B<-s> I + +Use at most I characters per command line, including the +command and initial-arguments and the terminating nulls at the ends of +the argument strings. The largest allowed value is system-dependent, +and is calculated as the argument length limit for exec, less the size +of your environment. The default value is the maximum. + +Implies B<-X> unless B<-m> is set. + + +=item B<--show-limits> + +Display the limits on the command-line length which are imposed by the +operating system and the B<-s> option. Pipe the input from /dev/null +(and perhaps specify --no-run-if-empty) if you don't want GNU B +to do anything. + + +=item B<--semaphore> + +Work as a counting semaphore. B<--semaphore> will cause GNU +B to start I in the background. When the number of +simultaneous jobs is reached, GNU B will wait for one of +these to complete before starting another command. + +B<--semaphore> implies B<--bg> unless B<--fg> is specified. + +B<--semaphore> implies B<--semaphorename `tty`> unless +B<--semaphorename> is specified. + +Used with B<--fg>, B<--wait>, and B<--semaphorename>. + +The command B is an alias for B. + + +=item B<--semaphorename> I + +=item B<--id> I + +The name of the semaphore to use. The semaphore can be shared between +multiple processes. + +Implies B<--semaphore>. + + +=item B<--semaphoretimeout> I (not implemented) + +If the semaphore is not released within secs seconds, take it anyway. + +Implies B<--semaphore>. + + +=item B<--skip-first-line> + +Do not use the first line of input (used by GNU B itself +when called with B<--shebang>). + + +=item B<-S> I<[ncpu/]sshlogin[,[ncpu/]sshlogin[,...]]> + +=item B<--sshlogin> I<[ncpu/]sshlogin[,[ncpu/]sshlogin[,...]]> + +Distribute jobs to remote servers. The jobs will be run on a list of +remote servers. GNU B will determine the number of CPU +cores on the remote servers and run the number of jobs as specified by +B<-j>. If the number I is given GNU B will use this +number for number of CPU cores on the host. Normally I will not +be needed. + +An I is of the form: + + [sshcommand [options]][username@]hostname + +The sshlogin must not require a password. + +The sshlogin ':' is special, it means 'no ssh' and will therefore run +on the local computer. + +The sshlogin '..' is special, it read sshlogins from ~/.parallel/sshloginfile + +To specify more sshlogins separate the sshlogins by comma or repeat +the options multiple times. + +For examples: see B<--sshloginfile>. + +The remote host must have GNU B installed. + +B<--sshlogin> is known to cause problems with B<-m> and B<-X>. + + +=item B<--sshloginfile> I + +File with sshlogins. The file consists of sshlogins on separate +lines. Empty lines and lines starting with '#' are ignored. Example: + + server.example.com + username@server2.example.com + 8/my-8-core-server.example.com + 2/my_other_username@my-dualcore.example.net + # This server has SSH running on port 2222 + ssh -p 2222 server.example.net + 4/ssh -p 2222 quadserver.example.net + # Use a different ssh program + myssh -p 2222 -l myusername hexacpu.example.net + # Use a different ssh program with default number of cores + //usr/local/bin/myssh -p 2222 -l myusername hexacpu.example.net + # Use a different ssh program with 6 cores + 6//usr/local/bin/myssh -p 2222 -l myusername hexacpu.example.net + # Assume 16 cores on the local computer + 16/: + +When using a different ssh program the last argument must be the hostname. + +The sshloginfile '..' is special, it read sshlogins from +~/.parallel/sshloginfile + + +=item B<--silent> + +Silent. The job to be run will not be printed. This is the default. +Can be reversed with B<-v>. + + +=item B<--tmpdir> I + +Directory for temporary files. GNU B normally buffers output +into temporary files in /tmp. By setting B<--tmpdir> you can use a +different dir for the files. Setting B<--tmpdir> is equivalent to +setting $TMPDIR. + + +=item B<--verbose> + +=item B<-t> + +Print the command line on the standard error output before executing +it. + +See also B<-v> and B<-p>. + + +=item B<--transfer> + +Transfer files to remote servers. B<--transfer> is used with +B<--sshlogin> when the arguments are files and should be transferred to +the remote servers. The files will be transferred using B and +will be put relative to the default login dir. E.g. + + echo foo/bar.txt | parallel \ + --sshlogin server.example.com --transfer wc + +This will transfer the file I to the server +I to the file I<$HOME/foo/bar.txt> before running +B on I. + + echo /tmp/foo/bar.txt | parallel \ + --sshlogin server.example.com --transfer wc + +This will transfer the file I to the server +I to the file I before running +B on I. + +B<--transfer> is often used with B<--return> and B<--cleanup>. + +B<--transfer> is ignored when used with B<--sshlogin :> or when not used with B<--sshlogin>. + + +=item B<--trc> I + +Transfer, Return, Cleanup. Short hand for: + +B<--transfer> B<--return> I B<--cleanup> + + +=item B<--trim> + +Trim white space in input. + +=over 4 + +=item n + +No trim. Input is not modified. This is the default. + +=item l + +Left trim. Remove white space from start of input. E.g. " a bc " -> "a bc ". + +=item r + +Right trim. Remove white space from end of input. E.g. " a bc " -> " a bc". + +=item lr + +=item rl + +Both trim. Remove white space from both start and end of input. E.g. " +a bc " -> "a bc". This is the default if B<--colsep> is used. + +=back + + +=item B<--ungroup> + +=item B<-u> + +Ungroup output. Output is printed as soon as possible. This may cause +output from different commands to be mixed. GNU B runs +faster with B<-u>. Can be reversed with B<-g>. + + +=item B<--extensionreplace> I + +=item B<-U> I + +Use the replacement string I instead of {.} for input line without extension. + + +=item B<--use-cpus-instead-of-cores> + +Count the number of physical CPUs instead of CPU cores. When computing +how many jobs to run simultaneously relative to the number of CPU cores +you can ask GNU B to instead look at the number of physical +CPUs. This will make sense for computers that have hyperthreading as +two jobs running on one CPU with hyperthreading will run slower than +two jobs running on two physical CPUs. Some multi-core CPUs can run +faster if only one thread is running per physical CPU. Most users will +not need this option. + + +=item B<-v> + +Verbose. Print the job to be run on STDOUT. Can be reversed with +B<--silent>. See also B<-t>. + +Use B<-v> B<-v> to print the wrapping ssh command when running remotely. + + +=item B<--version> + +=item B<-V> + +Print the version GNU B and exit. + + +=item B<--workdir> I (beta testing) + +=item B<-W> I (beta testing) + +Files transferred using B<--transfer> and B<--return> will be relative +to I on remote machines, and the command will be executed in +that dir. The special workdir B<...> will create a workdir in +B<~/.parallel/tmp/> on the remote machines and will be removed if +using B<--cleanup>. + + +=item B<--wait> (beta testing) + +Wait for all commands to complete. + +Implies B<--semaphore>. + + +=item B<-X> + +Multiple arguments with context replace. Insert as many arguments as +the command line length permits. If B<{}> is not used the arguments +will be appended to the line. If B<{}> is used as part of a word +(like I) then the whole word will be repeated. If B<{}> is +used multiple times each B<{}> will be replaced with the arguments. + +Normally B<-X> will do the right thing, whereas B<-m> can give +unexpected results if B<{}> is used as part of a word. + +Support for B<-X> with B<--sshlogin> is limited and may fail. + +See also B<-m>. + + +=item B<--exit> + +=item B<-x> + +Exit if the size (see the B<-s> option) is exceeded. + + +=item B<--shebang> + +=item B<--hashbang> + +=item B<-Y> + +GNU B can be called as a shebang (#!) command as the first line of a script. Like this: + + #!/usr/bin/parallel -Yr traceroute + + foss.org.my + debian.org + freenetproject.org + +For this to work B<--shebang> or B<-Y> must be set as the first option. + + +=back + +=head1 EXAMPLE: Working as xargs -n1. Argument appending + +GNU B can work similar to B. + +To compress all html files using B run: + +B + +If the file names may contain a newline use B<-0>. Substitute FOO BAR with +FUBAR in all files in this dir and subdirs: + +B + +Note B<-q> is needed because of the space in 'FOO BAR'. + + +=head1 EXAMPLE: Reading arguments from command line + +GNU B can take the arguments from command line instead of +stdin (standard input). To compress all html files in the current dir +using B run: + +B + +To convert *.wav to *.mp3 using LAME running one process per CPU core +run: + +B + + +=head1 EXAMPLE: Inserting multiple arguments + +When moving a lot of files like this: B you will +sometimes get the error: + +B + +because there are too many files. You can instead do: + +B + +This will run B for each file. It can be done faster if B gets +as many arguments that will fit on the line: + +B + + +=head1 EXAMPLE: Context replace + +To remove the files I .. I you could do: + +B + +You could also do: + +B + +The first will run B 10000 times, while the last will only run +B as many times needed to keep the command line length short +enough to avoid B (it typically runs 1-2 times). + +You could also run: + +B + +This will also only run B as many times needed to keep the command +line length short enough. + + +=head1 EXAMPLE: Compute intensive jobs and substitution + +If ImageMagick is installed this will generate a thumbnail of a jpg +file: + +B + +If the system has more than 1 CPU core it can be run with +number-of-cpu-cores jobs in parallel (B<-j> +0). This will do that for +all jpg files in a directory: + +B + +To do it recursively use B: + +B + +Notice how the argument has to start with B<{}> as B<{}> will include path +(e.g. running B would clearly be wrong). The command will +generate files like ./foo/bar.jpg_thumb.jpg. + +Use B<{.}> to avoid the extra .jpg in the file name. This command will +make files like ./foo/bar_thumb.jpg: + +B + + +=head1 EXAMPLE: Substitution and redirection + +This will generate an uncompressed version of .gz-files next to the .gz-file: + +B>B<"{.} ::: *.gz> + +Quoting of > is necessary to postpone the redirection. Another +solution is to quote the whole command: + +B>B<{.}" ::: *.gz> + +Other special shell charaters (such as * ; $ > < | >> <<) also needs +to be put in quotes, as they may otherwise be interpreted by the shell +and not given to GNU B. + +=head1 EXAMPLE: Composed commands + +A job can consist of several commands. This will print the number of +files in each directory: + +B + +To put the output in a file called .dir: + +B> B<{}.dir'> + +Even small shell scripts can be run by GNU B: + +B + +Given a list of URLs, list all URLs that fail to download. Print the +line number and the URL. + +B>B + + +=head1 EXAMPLE: Removing file extension when processing files + +When processing files removing the file extension using B<{.}> is +often useful. + +Create a directory for each zip-file and unzip it in that dir: + +B + +Recompress all .gz files in current directory using B running 1 +job per CPU core in parallel: + +B>B<{.}.bz2 && rm {}" ::: *.gz> + +Convert all WAV files to MP3 using LAME: + +B + +Put all converted in the same directory: + +B + +=head1 EXAMPLE: Removing two file extensions when processing files and +calling GNU Parallel from itself + +If you have directory with tar.gz files and want these extracted in +the corresponding dir (e.g foo.tar.gz will be extracted in the dir +foo) you can do: + +B + +=head1 EXAMPLE: Download 10 images for each of the past 30 days + +Let us assume a website stores images like: + + http://www.example.com/path/to/YYYYMMDD_##.jpg + +where YYYYMMDD is the date and ## is the number 01-10. This will +generate the past 30 days as YYYYMMDD: + +B + +Based on this we can let GNU B generate 10 Bs per day: + +I B<| parallel -I {o} seq -w 1 10 "|" parallel wget +http://www.example.com/path/to/{o}_{}.jpg> + +=head1 EXAMPLE: Rewriting a for-loop and a while-read-loop + +for-loops like this: + + (for x in `cat list` ; do + do_something $x + done) | process_output + +and while-read-loops like this: + + cat list | (while read x ; do + do_something $x + done) | process_output + +can be written like this: + +B + +If the processing requires more steps the for-loop like this: + + (for x in `cat list` ; do + no_extension=${x%.*}; + do_something $x scale $no_extension.jpg + do_step2 <$x $no_extension + done) | process_output + +and while-loops like this: + + cat list | (while read x ; do + no_extension=${x%.*}; + do_something $x scale $no_extension.jpg + do_step2 <$x $no_extension + done) | process_output + +can be written like this: + +B + + +=head1 EXAMPLE: Group output lines + +When running jobs that output data, you often do not want the output +of multiple jobs to run together. GNU B defaults to grouping the +output of each job, so the output is printed when the job finishes. If +you want the output to be printed while the job is running you can use +B<-u>. + +Compare the output of: + +B + +to the output of: + +B + + +=head1 EXAMPLE: Keep order of output same as order of input + +Normally the output of a job will be printed as soon as it +completes. Sometimes you want the order of the output to remain the +same as the order of the input. This is often important, if the output +is used as input for another system. B<-k> will make sure the order of +output will be in the same order as input even if later jobs end +before earlier jobs. + +Append a string to every line in a text file: + +B + +If you remove B<-k> some of the lines may come out in the wrong order. + +Another example is B: + +B + +will give traceroute of foss.org.my, debian.org and +freenetproject.org, but it will be sorted according to which job +completed first. + +To keep the order the same as input run: + +B + +This will make sure the traceroute to foss.org.my will be printed +first. + +A bit more complex example is downloading a huge file in chunks in +parallel: Some internet connections will deliver more data if you +download files in parallel. For downloading files in parallel see: +"EXAMPLE: Download 10 images for each of the past 30 days". But if you +are downloading a big file you can download the file in chunks in +parallel. + +To download byte 10000000-19999999 you can use B: + +B > B + +To download a 1 GB file we need 100 10MB chunks downloaded and +combined in the correct order. + +B > B + +=head1 EXAMPLE: Parallel grep + +B greps recursively through directories. On multicore CPUs +GNU B can often speed this up. + +B + +This will run 1.5 job per core, and give 1000 arguments to B. + + +=head1 EXAMPLE: Using remote computers + +To run commands on a remote computer SSH needs to be set up and you +must be able to login without entering a password (B may be +handy). + +To run B on B: + + seq 1 10 | parallel --sshlogin server.example.com echo + +To run commands on more than one remote computer run: + + seq 1 10 | parallel --sshlogin server.example.com,server2.example.net echo + +Or: + + seq 1 10 | parallel --sshlogin server.example.com \ + --sshlogin server2.example.net echo + +If the login username is I on I use: + + seq 1 10 | parallel --sshlogin server.example.com \ + --sshlogin foo@server2.example.net echo + +To distribute the commands to a list of computers, make a file +I with all the computers: + + server.example.com + foo@server2.example.com + server3.example.com + +Then run: + + seq 1 10 | parallel --sshloginfile mycomputers echo + +To include the local computer add the special sshlogin ':' to the list: + + server.example.com + foo@server2.example.com + server3.example.com + : + +GNU B will try to determine the number of CPU cores on each +of the remote computers, so B<-j+0> will run one job per CPU core - +even if the remote computers do not have the same number of CPU cores. + +If the number of CPU cores on the remote servers is not identified +correctly the number of CPU cores can be added in front. Here the +server has 8 CPU cores. + + seq 1 10 | parallel --sshlogin 8/server.example.com echo + + +=head1 EXAMPLE: Transferring of files + +To recompress gzipped files with B using a remote server run: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com \ + --transfer "zcat {} | bzip2 -9 >{.}.bz2" + +This will list the .gz-files in the I directory and all +directories below. Then it will transfer the files to +I to the corresponding directory in +I<$HOME/logs>. On I the file will be recompressed +using B and B resulting in the corresponding file with +I<.gz> replaced with I<.bz2>. + +If you want the resulting bz2-file to be transferred back to the local +computer add I<--return {.}.bz2>: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com \ + --transfer --return {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +After the recompressing is done the I<.bz2>-file is transferred back to +the local computer and put next to the original I<.gz>-file. + +If you want to delete the transferred files on the remote computer add +I<--cleanup>. This will remove both the file transferred to the remote +computer and the files transferred from the remote computer: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com \ + --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" + +If you want run on several servers add the servers to I<--sshlogin> +either using ',' or multiple I<--sshlogin>: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com,server2.example.com \ + --sshlogin server3.example.com \ + --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" + +You can add the local computer using I<--sshlogin :>. This will disable the +removing and transferring for the local computer only: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com,server2.example.com \ + --sshlogin server3.example.com \ + --sshlogin : \ + --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" + +Often I<--transfer>, I<--return> and I<--cleanup> are used together. They can be +shortened to I<--trc>: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com,server2.example.com \ + --sshlogin server3.example.com \ + --sshlogin : \ + --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +With the file I containing the list of computers it becomes: + + find logs/ -name '*.gz' | parallel --sshloginfile mycomputers \ + --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +If the file I<~/.parallel/sshloginfile> contains the list of computers +the special short hand I<-S ..> can be used: + + find logs/ -name '*.gz' | parallel -S .. \ + --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +=head1 EXAMPLE: Distributing work to local and remote computers + +Convert *.mp3 to *.ogg running one process per CPU core on local computer and server2: + + parallel --trc {.}.ogg -j+0 -S server2,: \ + 'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg' ::: *.mp3 + +=head1 EXAMPLE: Use multiple inputs in one command + +Copy files like foo.es.ext to foo.ext: + +B + +The perl command spits out 2 lines for each input. GNU B +takes 2 inputs (using B<-N2>) and replaces {1} and {2} with the inputs. + +Print the number on the opposing sides of a six sided die: + +B + +Convert files from all subdirs to PNG-files with consecutive numbers +(useful for making input PNG's for B): + +B + +Alternative version: + +B + + +=head1 EXAMPLE: Use a table as input + +Content of table_file.tsv: + + foobar + baz quux + +To run: + + cmd -o bar -i foo + cmd -o quux -i baz + +you can run: + +B + +Note: The default for GNU B is to remove the spaces around the columns. To keep the spaces: + +B + + +=head1 EXAMPLE: Working as cat | sh. Resource inexpensive jobs and evaluation + +GNU B can work similar to B. + +A resource inexpensive job is a job that takes very little CPU, disk +I/O and network I/O. Ping is an example of a resource inexpensive +job. wget is too - if the webpages are small. + +The content of the file jobs_to_run: + + ping -c 1 10.0.0.1 + wget http://status-server/status.cgi?ip=10.0.0.1 + ping -c 1 10.0.0.2 + wget http://status-server/status.cgi?ip=10.0.0.2 + ... + ping -c 1 10.0.0.255 + wget http://status-server/status.cgi?ip=10.0.0.255 + +To run 100 processes simultaneously do: + +B + +As there is not a I the option B<--command> is default +because the jobs needs to be evaluated by the shell. + + +=head1 EXAMPLE: Working as mutex and counting semaphore + +The command B is an alias for B. + +A counting semaphore will allow a given number of jobs to be started +in the background. When the number of jobs are running in the +background, GNU B will wait for one of these to complete before +starting another command. B will wait for all jobs to +complete. + +Run 10 jobs concurrently in the background: + + for i in `ls *.log` ; do + echo $i + sem -j10 gzip $i ";" echo done + done + sem --wait + +A mutex is a counting semaphore allowing only one job to run. This +will edit the file I and prepends the file with lines with the +numbers 1 to 3. + + seq 1 3 | parallel sem sed -i -e 'i{}' myfile + +As I can be very big it is important only one process edits +the file at the same time. + +Name the semaphore to have multiple different semaphores active at the +same time: + + seq 1 3 | parallel sem --id mymutex sed -i -e 'i{}' myfile + + +=head1 EXAMPLE: Start editor with filenames from stdin (standard input) + +You can use GNU Parallel to start interactive programs like emacs or vi: + +B + +B + +If there are more files than will fit on a single command line, the +editor will be started again with the remaining files. + + +=head1 EXAMPLE: GNU Parallel as queue system/batch manager + +GNU Parallel can work as a simple job queue system or batch manager. +The idea is to put the jobs into a file and have GNU Parallel read +from that continuously. As GNU Parallel will stop at end of file we +use tail to continue reading: + +B>B; B + +To submit your jobs to the queue: + +B>>B< jobqueue> + +You can of course use B<-S> to distribute the jobs to remote +computers: + +B>B; B + + +=head1 EXAMPLE: GNU Parallel as dir processor + +If you have a dir in which users drop files that needs to be processed +you can do this on GNU/Linux (If you know what B is +called on other platforms file a bug report): + +B + +This will run the command B on each file put into B or +subdirs of B. + +The B<-u> is needed because of a small bug in GNU B. If that +proves to be a problem, file a bug report. + +You can of course use B<-S> to distribute the jobs to remote +computers: + +B + + +=head1 QUOTING + +For more advanced use quoting may be an issue. The following will +print the filename for each line that has exactly 2 columns: + +B + +This can be done by GNU B using: + +B + +Notice how \'s, "'s, and $'s needs to be quoted. GNU B can do +the quoting by using option B<-q>: + +B + +However, this means you cannot make the shell interpret special +characters. For example this B: + +B>B<{.}"> + +B>B<{.}.bz2"> + +because > and | need to be interpreted by the shell. + +If you get errors like: + + sh: -c: line 0: syntax error near unexpected token + sh: Syntax error: Unterminated quoted string + sh: -c: line 0: unexpected EOF while looking for matching `'' + sh: -c: line 1: syntax error: unexpected end of file + +then you might try using B<-q>. + +If you are using B process substitution like B<<(cat foo)> then +you may try B<-q> and prepending I with B: + +B + +Or for substituting output: + +B>B<(gzip >>B<{}.tar.gz) | bzip2 >>B<{}.tar.bz2'> + +B: To avoid dealing with the quoting problems it may be +easier just to write a small script and have GNU B call that +script. + + +=head1 LIST RUNNING JOBS + +If you want a list of the jobs currently running you can run: + +B + +GNU B will then print the currently running jobs on STDERR. + + +=head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS + +If you regret starting a lot of jobs you can simply break GNU B, +but if you want to make sure you do not have halfcompleted jobs you +should send the signal B to GNU B: + +B + +This will tell GNU B to not start any new jobs, but wait until +the currently running jobs are finished before exiting. + + +=head1 ENVIRONMENT VARIABLES + +=over 9 + +=item $PARALLEL_PID + +The environment variable $PARALLEL_PID is set by GNU B and +is visible to the jobs started from GNU B. This makes it +possible for the jobs to communicate directly to GNU B. +Remember to quote the $, so it gets evaluated by the correct +shell. + +B If each of the jobs tests a solution and one of jobs finds +the solution the job can tell GNU B not to start more jobs +by: B. This only works on the local +computer. + + +=item $PARALLEL_SEQ + +$PARALLEL_SEQ will be set to the sequence number of the job +running. Remember to quote the $, so it gets evaluated by the correct +shell. + +B + +B + + +=item $TMPDIR + +Directory for temporary files. See: B<--tmpdir>. + + +=item $PARALLEL + +The environment variable $PARALLEL will be used as default options for +GNU B. If the variable contains special shell characters +(e.g. $, *, or space) then these need to be to be escaped with \. + +B + +B + +can be written as: + +B + +B + +can be written as: + +B + +Notice the \ in the middle is needed because 'myssh' and 'user@server' +must be one argument. + +=back + +=head1 DEFAULT PROFILE (CONFIG FILE) + +The file ~/.parallel/config (formerly known as .parallelrc) will be +read if it exists. Lines starting with '#' will be ignored. It can be +formatted like the environment variable $PARALLEL, but it is often +easier to simply put each option on its own line. + +Options on the command line takes precedence over the environment +variable $PARALLEL which takes precedence over the file +~/.parallel/config. + +=head1 PROFILE FILES + +If B<--profile> set, GNU B will read the profile from that file instead of +~/.parallel/config. + +Example: Profile for running every command with B<-j+0> and B + + echo -j+0 nice > ~/.parallel/nice_profile + parallel -J nice_profile bzip2 -9 ::: * + +Example: Profile for running a perl script before every command: + + echo "perl -e '\$a=\$\$; print \$a,\" \",'\$PARALLEL_SEQ',\" \";';" > ~/.parallel/pre_perl + parallel -J pre_perl echo ::: * + +Note how the $ and " need to be quoted using \. + +Example: Profile for running distributed jobs with B on the +remote machines: + + echo -S .. nice > ~/.parallel/dist + parallel -J dist --trc {.}.bz2 bzip2 -9 ::: * + + +=head1 EXIT STATUS + +If B<--halt-on-error> 0 or not specified: + +=over 6 + +=item 0 + +All jobs ran without error. + +=item 1-253 + +Some of the jobs failed. The exit status gives the number of failed jobs + +=item 254 + +More than 253 jobs failed. + +=item 255 + +Other error. + +=back + +If B<--halt-on-error> 1 or 2: Exit status of the failing job. + + +=head1 DIFFERENCES BETWEEN GNU Parallel AND ALTERNATIVES + +There are a lot programs with some of the functionality of GNU +B. GNU B strives to include the best of the +functionality without sacrifying ease of use. + +=head2 SUMMARY TABLE + +The following features are in some of the comparable tools: + +Inputs + I1. Arguments can be read from stdin + I2. Arguments can be read from a file + I3. Arguments can be read from multiple files + I4. Arguments can be read from command line + I5. Arguments can be read from a table + I6. Arguments can be read from the same file using #! (shebang) + I7. Line oriented input as default (Quoting of special chars not needed) + +Manipulation of input + M1. Composed command + M2. Multiple arguments can fill up an execution line + M3. Arguments can be put anywhere in the execution line + M4. Multiple arguments can be put anywhere in the execution line + M5. Arguments can be replaced with context + M6. Input can be treated as complete execution line + +Outputs + O1. Grouping output so output from different jobs do not mix + O2. Send stderr to stderr + O3. Send stdout to stdout + O4. Order of output can be same as order of input + O5. Stdout only contains stdout from the command + O6. Stderr only contains stdout from the command + +Execution + E1. Running jobs in parallel + E2. List running jobs + E3. Finish running jobs, but do not start new jobs + E4. Number of running jobs can depend on number of cpus + E5. Finish running jobs, but do not start new jobs after first failure + E6. Number of running jobs can be adjusted while running + +Remote execution + R1. Jobs can be run on remote computers + R2. Basefiles can be transferred + R3. Argument files can be transferred + R4. Result files can be transferred + R5. Cleanup of transferred files + R6. No config files needed + R7. Do not run more than SSHD's MaxStartup can handle + R8. Configurable SSH command + R9. Retry if connection breaks occationally + +Semaphore + S1. Possibility to work as a mutex + S2. Possibility to work as a counting semaphore + +Legend + - = no + x = not applicable + ID = yes + +As every new version of the programs are not tested the table may be +outdated. Please file a bug-report if you find errors (See REPORTING +BUGS). + +parallel: +I1 I2 I3 I4 I5 I6 I7 +M1 M2 M3 M4 M5 M6 +O1 O2 O3 O4 O5 O6 +E1 E2 E3 E4 E5 E6 +R1 R2 R3 R4 R5 R6 R7 R8 R9 +S1 S2 + +xargs: +I1 I2 - - - - - +- M2 M3 - - - +- O2 O3 - O5 O6 +E1 - - - - - +- - - - - x - - - +- - + +find -exec: +- - - x - x - +- M2 M3 - - - - +- O2 O3 O4 O5 O6 +- - - - - - - +- - - - - - - - - +x x + +make -j: +- - - - - - - +- - - - - - +O1 O2 O3 - x O6 +E1 - - - E5 - +- - - - - - - - - +- - + +ppss: +I1 I2 - - - - I7 +M1 - M3 - - M6 +O1 - - x - - +E1 E2 ?E3 E4 - - +R1 R2 R3 R4 - - ?R7 ? ? +- - + +pexec: +I1 I2 - I4 I5 - - +M1 - M3 - - M6 +O1 O2 O3 - O5 O6 +E1 - - E4 - E6 +R1 - - - - R6 - - - +S1 - + +xjobs: TODO - Please file a bug-report if you know what features xjobs +supports (See REPORTING BUGS). + +prll: TODO - Please file a bug-report if you know what features prll +supports (See REPORTING BUGS). + +dxargs: TODO - Please file a bug-report if you know what features dxargs +supports (See REPORTING BUGS). + +mdm/middelman: TODO - Please file a bug-report if you know what +features mdm/middelman supports (See REPORTING BUGS). + +xapply: TODO - Please file a bug-report if you know what features xapply +supports (See REPORTING BUGS). + +paexec: TODO - Please file a bug-report if you know what features paexec +supports (See REPORTING BUGS). + +ClusterSSH: TODO - Please file a bug-report if you know what features ClusterSSH +supports (See REPORTING BUGS). + + +=head2 DIFFERENCES BETWEEN xargs AND GNU Parallel + +B offer some of the same possibilites as GNU B. + +B deals badly with special characters (such as space, ' and +"). To see the problem try this: + + touch important_file + touch 'not important_file' + ls not* | xargs rm + mkdir -p '12" records' + ls | xargs rmdir + +You can specify B<-0> or B<-d "\n">, but many input generators are not +optimized for using B as separator but are optimized for +B as separator. E.g B, B, B, B, B, +B, B, B (B<-0> and \0 instead of \n), B +(requires using B<-0>), B (requires using B<-print0>), B +(requires user to use B<-z> or B<-Z>), B (requires using B<-z>). + +So GNU B's newline separation can be emulated with: + +B> + +B can run a given number of jobs in parallel, but has no +support for running number-of-cpu-cores jobs in parallel. + +B has no support for grouping the output, therefore output may +run together, e.g. the first half of a line is from one process and +the last half of the line is from another process. The example +B cannot be done reliably with B because of +this. To see this in action try: + + parallel perl -e '\$a=\"1{}\"x10000000\;print\ \$a,\"\\n\"' '>' {} ::: a b c d e f + ls -l a b c d e f + parallel -kP4 -n1 grep 1 > out.par ::: a b c d e f + echo a b c d e f | xargs -P4 -n1 grep 1 > out.xargs-unbuf + echo a b c d e f | xargs -P4 -n1 grep --line-buffered 1 > out.xargs-linebuf + echo a b c d e f | xargs -n1 grep --line-buffered 1 > out.xargs-serial + ls -l out* + md5sum out* + +B has no support for keeping the order of the output, therefore +if running jobs in parallel using B the output of the second +job cannot be postponed till the first job is done. + +B has no support for running jobs on remote computers. + +B has no support for context replace, so you will have to create the +arguments. + +If you use a replace string in B (B<-I>) you can not force +B to use more than one argument. + +Quoting in B works like B<-q> in GNU B. This means +composed commands and redirection require using B. + +B> B<{}.wc"> + +becomes + +B>B< {}.wc"> + +and + +B + +becomes + +B + + +=head2 DIFFERENCES BETWEEN find -exec AND GNU Parallel + +B offer some of the same possibilites as GNU B. + +B only works on files. So processing other input (such as +hosts or URLs) will require creating these inputs as files. B has no support for running commands in parallel. + + +=head2 DIFFERENCES BETWEEN make -j AND GNU Parallel + +B can run jobs in parallel, but requires a crafted Makefile +to do this. That results in extra quoting to get filename containing +newline to work correctly. + +B has no support for grouping the output, therefore output +may run together, e.g. the first half of a line is from one process +and the last half of the line is from another process. The example +B cannot be done reliably with B because of +this. + +(Very early versions of GNU B were coincidently implemented +using B). + + +=head2 DIFFERENCES BETWEEN ppss AND GNU Parallel + +B is also a tool for running jobs in parallel. + +The output of B is status information and thus not useful for +using as input for another command. The output from the jobs are put +into files. + +The argument replace string ($ITEM) cannot be changed. Arguments must +be quoted - thus arguments containing special characters (space '"&!*) +may cause problems. More than one argument is not supported. File +names containing newlines are not processed correctly. When reading +input from a file null cannot be used terminator. B needs to +read the whole input file before starting any jobs. + +Output and status information is stored in ppss_dir and thus requires +cleanup when completed. If the dir is not removed before running +B again it may cause nothing to happen as B thinks the +task is already done. GNU B will normally not need cleaning +up if running locally and will only need cleaning up if stopped +abnormally and running remote (B<--cleanup> may not complete if +stopped abnormally). The example B would require extra +postprocessing if written using B. + +For remote systems PPSS requires 3 steps: config, deploy, and +start. GNU B only requires one step. + +=head3 EXAMPLES FROM ppss MANUAL + +Here are the examples from B's manual page with the equivalent +using GNU B: + +B<1> ./ppss.sh standalone -d /path/to/files -c 'gzip ' + +B<1> find /path/to/files -type f | parallel -j+0 gzip + +B<2> ./ppss.sh standalone -d /path/to/files -c 'cp "$ITEM" /destination/dir ' + +B<2> find /path/to/files -type f | parallel -j+0 cp {} /destination/dir + +B<3> ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q ' + +B<3> parallel -a list-of-urls.txt wget -q + +B<4> ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q "$ITEM"' + +B<4> parallel -a list-of-urls.txt wget -q {} + +B<5> ./ppss config -C config.cfg -c 'encode.sh ' -d /source/dir -m +192.168.1.100 -u ppss -k ppss-key.key -S ./encode.sh -n nodes.txt -o +/some/output/dir --upload --download ; ./ppss deploy -C config.cfg ; +./ppss start -C config + +B<5> # parallel does not use configs. If you want a different username put it in nodes.txt: user@hostname + +B<5> find source/dir -type f | parallel --sshloginfile nodes.txt --trc {.}.mp3 lame -a {} -o {.}.mp3 --preset standard --quiet + +B<6> ./ppss stop -C config.cfg + +B<6> killall -TERM parallel + +B<7> ./ppss pause -C config.cfg + +B<7> Press: CTRL-Z or killall -SIGTSTP parallel + +B<8> ./ppss continue -C config.cfg + +B<8> Enter: fg or killall -SIGCONT parallel + +B<9> ./ppss.sh status -C config.cfg + +B<9> killall -SIGUSR2 parallel + + +=head2 DIFFERENCES BETWEEN pexec AND GNU Parallel + +B is also a tool for running jobs in parallel. + +Here are the examples from B's info page with the equivalent +using GNU B: + +B<1> pexec -o sqrt-%s.dat -p "$(seq 10)" -e NUM -n 4 -c -- \ + 'echo "scale=10000;sqrt($NUM)" | bc' + +B<1> seq 10 | parallel -j4 'echo "scale=10000;sqrt({})" | bc > sqrt-{}.dat' + +B<2> pexec -p "$(ls myfiles*.ext)" -i %s -o %s.sort -- sort + +B<2> ls myfiles*.ext | parallel sort {} ">{}.sort" + +B<3> pexec -f image.list -n auto -e B -u star.log -c -- \ + 'fistar $B.fits -f 100 -F id,x,y,flux -o $B.star' + +B<3> parallel -a image.list -j+0 \ + 'fistar {}.fits -f 100 -F id,x,y,flux -o {}.star' 2>star.log + +B<4> pexec -r *.png -e IMG -c -o - -- \ + 'convert $IMG ${IMG%.png}.jpeg ; "echo $IMG: done"' + +B<4> ls *.png | parallel 'convert {} {.}.jpeg; echo {}: done' + +B<5> pexec -r *.png -i %s -o %s.jpg -c 'pngtopnm | pnmtojpeg' + +B<5> ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {}.jpg' + +B<6> for p in *.png ; do echo ${p%.png} ; done | \ + pexec -f - -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' + +B<6> ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {.}.jpg' + +B<7> LIST=$(for p in *.png ; do echo ${p%.png} ; done) + pexec -r $LIST -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' + +B<7> ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {.}.jpg' + +B<8> pexec -n 8 -r *.jpg -y unix -e IMG -c \ + 'pexec -j -m blockread -d $IMG | \ + jpegtopnm | pnmscale 0.5 | pnmtojpeg | \ + pexec -j -m blockwrite -s th_$IMG' + +B<8> Combining GNU B and GNU B. + +B<8> ls *jpg | parallel -j8 'sem --id blockread cat {} | jpegtopnm |' \ + 'pnmscale 0.5 | pnmtojpeg | sem --id blockwrite cat > th_{}' + +B<8> If reading and writing is done to the same disk, this may be +faster as only one process will be either reading or writing: + +B<8> ls *jpg | parallel -j8 'sem --id diskio cat {} | jpegtopnm |' \ + 'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_{}' + +=head2 DIFFERENCES BETWEEN xjobs AND GNU Parallel + +B is also a tool for running jobs in parallel. It only supports +running jobs on your local computer. + +B deals badly with special characters just like B. See +the section B. + +Here are the examples from B's man page with the equivalent +using GNU B: + +B<1> ls -1 *.zip | xjobs unzip + +B<1> ls *.zip | parallel unzip + +B<2> ls -1 *.zip | xjobs -n unzip + +B<2> ls *.zip | parallel unzip >/dev/null + +B<3> find . -name '*.bak' | xjobs gzip + +B<3> find . -name '*.bak' | parallel gzip + +B<4> ls -1 *.jar | sed 's/\(.*\)/\1 > \1.idx/' | xjobs jar tf + +B<4> ls *.jar | parallel jar tf {} '>' {}.idx + +B<5> xjobs -s script + +B<5> cat script | parallel + +B<6> mkfifo /var/run/my_named_pipe; +xjobs -s /var/run/my_named_pipe & +echo unzip 1.zip >> /var/run/my_named_pipe; +echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe + +B<6> mkfifo /var/run/my_named_pipe; +cat /var/run/my_named_pipe | parallel & +echo unzip 1.zip >> /var/run/my_named_pipe; +echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe + + +=head2 DIFFERENCES BETWEEN prll AND GNU Parallel + +B is also a tool for running jobs in parallel. It does not +support running jobs on remote computers. + +B encourages using BASH aliases and BASH functions instead of +scripts. GNU B can use the aliases and functions that are +defined at login (using: B) but it will +never support running aliases and functions that are defined defined +later (see why +http://www.perlmonks.org/index.pl?node_id=484296). However, scripts or +composed commands work just fine. + +B generates a lot of status information on STDERR which makes it +harder to use the STDERR output of the job directly as input for +another program. + +Here is the example from B's man page with the equivalent +using GNU B: + +prll -s 'mogrify -flip $1' *.jpg + +parallel mogrify -flip ::: *.jpg + + +=head2 DIFFERENCES BETWEEN dxargs AND GNU Parallel + +B is also a tool for running jobs in parallel. + +B does not deal well with more simultaneous jobs than SSHD's +MaxStartup. B is only built for remote run jobs, but does not +support transferring of files. + + +=head2 DIFFERENCES BETWEEN mdm/middleman AND GNU Parallel + +middleman(mdm) is also a tool for running jobs in parallel. + +Here are the shellscripts of http://mdm.berlios.de/usage.html ported +to GNU B: + +B>B< result> + +B + +B + +=head2 DIFFERENCES BETWEEN xapply AND GNU Parallel + +B can run jobs in parallel on the local computer. + +Here are the examples from B's man page with the equivalent +using GNU B: + +B<1> xapply '(cd %1 && make all)' */ + +B<1> parallel 'cd {} && make all' ::: */ + +B<2> xapply -f 'diff %1 ../version5/%1' manifest | more + +B<2> parallel diff {} ../version5/{} < manifest | more + +B<3> xapply -p/dev/null -f 'diff %1 %2' manifest1 checklist1 + +B<3> parallel diff {1} {2} :::: manifest1 checklist1 + +B<4> xapply 'indent' *.c + +B<4> parallel indent ::: *.c + +B<5> find ~ksb/bin -type f ! -perm -111 -print | xapply -f -v 'chmod a+x' - + +B<5> find ~ksb/bin -type f ! -perm -111 -print | parallel -v chmod a+x + +B<6> find */ -... | fmt 960 1024 | xapply -f -i /dev/tty 'vi' - + +B<6> sh <(find */ -... | parallel -s 1024 echo vi) + +B<6> find */ -... | parallel -s 1024 -Xuj1 vi + +B<7> find ... | xapply -f -5 -i /dev/tty 'vi' - - - - - + +B<7> sh <(find ... |parallel -n5 echo vi) + +B<7> find ... |parallel -n5 -uj1 vi + +B<8> xapply -fn "" /etc/passwd + +B<8> parallel -k echo < /etc/passwd + +B<9> tr ':' '\012' < /etc/passwd | xapply -7 -nf 'chown %1 %6' - - - - - - - + +B<9> tr ':' '\012' < /etc/passwd | parallel -N7 chown {1} {6} + +B<10> xapply '[ -d %1/RCS ] || echo %1' */ + +B<10> parallel '[ -d {}/RCS ] || echo {}' ::: */ + +B<11> xapply -f '[ -f %1 ] && echo %1' List | ... + +B<11> parallel '[ -f {} ] && echo {}' < List | ... + + +=head2 DIFFERENCES BETWEEN paexec AND GNU Parallel + +B can run jobs in parallel on both the local and remote computers. + +B requires commands to print a blank line as the last +output. This means you will have to write a wrapper for most programs. + +B has a job dependency facility so a job can depend on another +job to be executed successfully. Sort of a poor-man's B. + +Here are the examples from B's example catalog with the equivalent +using GNU B: + +=over 1 + +=item 1_div_X_run: + + ../../paexec -s -l -c "`pwd`/1_div_X_cmd" -n +1 <. + +ClusterSSH runs the same command with the same arguments on a list of +machines - one per machine. This is typically used for administrating +several machines that are almost identical. + +GNU B runs the same (or different) commands with different +arguments in parallel possibly using remote machines to help +computing. If more than one machine is listed in B<-S> GNU B may +only use one of these (e.g. if there are 8 jobs to be run and one +machine has 8 cores). + +GNU B can be used as a poor-man's version of ClusterSSH: + +B + + +=head1 BUGS + +=head2 Quoting of newline + +Because of the way newline is quoted this will not work: + +echo 1,2,3 | parallel -vkd, "echo 'a{}'" + +However, this will work: + +echo 1,2,3 | parallel -vkd, echo a{} + +=head2 Startup speed + +GNU B is slow at starting up. Half of the startup time on +the local computer is spent finding the maximal length of a command +line. Setting B<-s> will remove this part of the startup time. + +When using multiple computers GNU B opens B connections +to them to figure out how many connections can be used reliably +simultaneously (Namely SSHD's MaxStartup). This test is done for each +host in serial, so if your --sshloginfile contains many hosts it may +be slow. + +=head2 --nice limits command length + +The current implementation of B<--nice> is too pessimistic in the max +allowed command length. It only uses a little more than half of what +it could. This affects -X and -m. If this becomes a real problem for +you file a bug-report. + + +=head1 REPORTING BUGS + +Report bugs to . + + +=head1 AUTHOR + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008,2009,2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010 Ole Tange, http://ole.tange.dk and Free Software +Foundation, Inc. + +Parts of the manual concerning B compatibility is inspired by +the manual of B from GNU findutils 4.4.2. + + + +=head1 LICENSE + +Copyright (C) 2007,2008,2009,2010 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this documentation +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the file fdl.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B + +to copy, distribute and transmit the work + +=item B + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as cc-by-sa.txt. + +=head1 DEPENDENCIES + +GNU B uses Perl, and the Perl modules Getopt::Long, +IPC::Open3, Symbol, IO::File, POSIX, and File::Temp. For remote usage +it also uses Rsync with Ssh. + + +=head1 SEE ALSO + +B(1), B(1), B(1), B(1), B(1), +B(1), B(1), B(1), B(1) + +=cut diff --git a/src/sem.pod b/src/sem.pod index 2ca64cee..546525b8 100755 --- a/src/sem.pod +++ b/src/sem.pod @@ -217,217 +217,3 @@ Symbol, Fcntl. B(1) =cut - -use strict; -use Symbol qw(gensym); -use Getopt::Long; - -Getopt::Long::Configure ("bundling","require_order"); -GetOptions("debug|D" => \$::opt_D, - "id|i=s" => \$::opt_id, - "count|j=i" => \$::opt_count, - "fg" => \$::opt_fg, - "timeout|t=i" => \$::opt_timeout, - "version" => \$::opt_version, - "wait|w" => \$::opt_wait, - ) || die_usage(); -$Global::debug = $::opt_D; -$Global::version = 20100814; -$Global::progname = 'sem'; - -my $count = 1; # Default 1 = mutex -if($::opt_count) { - $count = $::opt_count + 1; -} -if($::opt_wait) { - $count = 1; -} -my $id = $::opt_id; -my $fg = $::opt_fg || $::opt_wait; -$::opt_timeout = $::opt_timeout; -if(defined $::opt_version) { - version(); -} - -if(not defined $id) { - # $id = getppid(); - # does not work with: - # find . -name '*linux*' -exec sem -j1000 "sleep 3; echo `tty` '{}'" \; ; sem --wait echo done - $id = `tty`; -} -$id = "id-$id"; -$id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex -my $sem = Semaphore->new($id,$count); -$sem->acquire(); -debug("run"); -if($fg) { - system @ARGV; - $sem->release(); -} else { - # If run in the background, the PID will change - # therefore release and re-acquire the semaphore - $sem->release(); - if(not fork()) { - # child - # Get a semaphore for this pid - my $child_sem = Semaphore->new($id,$count); - $child_sem->acquire(); - system @ARGV; - $child_sem->release(); - } -} - -sub version { - # Returns: N/A - print join("\n", - "GNU $Global::progname $Global::version", - "Copyright (C) 2010 Ole Tange and Free Software Foundation, Inc.", - "License GPLv3+: GNU GPL version 3 or later ", - "This is free software: you are free to change and redistribute it.", - "GNU $Global::progname comes with no warranty.", - "", - "Web site: http://www.gnu.org/software/parallel\n" - ); -} - -sub usage { - # Returns: N/A - print "Usage:\n"; - print "$Global::progname [options] [command [arguments]] < list_of_arguments)\n"; - print "$Global::progname [options] [command [arguments]] ::: arguments\n"; - print "$Global::progname [options] [command [arguments]] :::: argfile(s)\n"; - print "\n"; - print "See 'man $Global::progname' for the options\n"; -} - -sub die_usage { - usage(); - exit(255); -} - -sub debug { - # Returns: N/A - $Global::debug or return; - @_ = grep { defined $_ ? $_ : "" } @_; - print map {$_,"\n" } @_; -} - -package Semaphore; - -# This package provides a counting semaphore -# -# If a process dies without releasing the semaphore the next process -# that needs that entry will clean up dead semaphores -# -# The semaphores are stored in ~/.parallel/semaphores/id- Each -# file in ~/.parallel/semaphores/id-/ is the process ID of the -# process holding the entry. If the process dies, the entry can be -# taken by another process. - -use Fcntl qw(:DEFAULT :flock); - -sub new { - my $class = shift; - my $id = shift; - my $count = shift; - my $parallel_locks = $ENV{'HOME'}."/.parallel/semaphores"; - -d $parallel_locks or mkdir $parallel_locks; - my $lockdir = "$parallel_locks/$id"; - my $lockfile = $lockdir.".lock"; - return bless { - 'lockfile' => $lockfile, - 'lockfh' => Symbol::gensym(), - 'lockdir' => $lockdir, - 'id' => $id, - 'idfile' => $lockdir."/".$id, - 'pid' => $$, - 'pidfile' => $lockdir."/".$$, - 'count' => $count - }, ref($class) || $class; -} - -sub acquire { - my $self = shift; - while(1) { - $self->atomic_link_if_count_less_than() and last; - ::debug("Remove dead locks"); - my $lockdir = $self->{'lockdir'}; - for my $d (<$lockdir/*>) { - $d =~ m:$lockdir/([0-9]+):o or next; - if(not kill 0, $1) { - ::debug("Dead: $d"); - unlink $d; - } else { - ::debug("Alive: $d"); - } - } - # try again - $self->atomic_link_if_count_less_than() and last; - sleep 1; - # TODO if timeout: last - } - ::debug("got $self->{'pid'}"); -} - -sub release { - my ($self) = shift; - unlink $self->{'pidfile'}; - if($self->nlinks() == 1) { - # This is the last link, so atomic cleanup - $self->lock(); - if($self->nlinks() == 1) { - unlink $self->{'idfile'}; - rmdir $self->{'lockdir'}; - } - $self->unlock(); - } - ::debug("released $self->{'pid'}"); -} - - -sub atomic_link_if_count_less_than { - # Link $file1 to $file2 if nlinks to $file1 < $count - my ($self) = shift; - my ($retval) = 0; - $self->lock(); - if($self->nlinks() < $count) { - -d $self->{'lockdir'} || mkdir $self->{'lockdir'}; - if(not -e $self->{'idfile'}) { - open (A, ">", $self->{'idfile'}) or die ">$self->{'idfile'}"; - close A; - } - $retval = link $self->{'idfile'}, $self->{'pidfile'}; - } - $self->unlock(); - ::debug("atomic $retval"); - return $retval; -} - -sub nlinks { - my $self = shift; - if(-e $self->{'idfile'}) { - return (stat(_))[3]; - } else { - return 0; - } -} - -sub lock { - my ($self) = shift; - open $self->{'lockfh'}, ">", $self->{'lockfile'} - or die "Can't open semaphore file $self->{'lockfile'}: $!"; - chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw - while(not flock $self->{'lockfh'}, LOCK_EX()|LOCK_NB()) { - ::debug("Cannot lock $self->{'lockfile'}"); - # TODO if timeout: last - sleep 1; - } - ::debug("locked $self->{'lockfile'}"); -} - -sub unlock { - my $self = shift; - unlink $self->{'lockfile'}; - close $self->{'lockfh'}; - ::debug("unlocked"); -} diff --git a/src/sql b/src/sql index 7b128637..68956b5c 100755 --- a/src/sql +++ b/src/sql @@ -528,7 +528,7 @@ $Global::Initfile && unlink $Global::Initfile; exit ($err); sub parse_options { - $Global::version = 20101202; + $Global::version = 20101206; $Global::progname = 'sql'; # This must be done first as this may exec myself diff --git a/testsuite/tests-to-run/niceload01.sh b/testsuite/tests-to-run/niceload01.sh new file mode 100644 index 00000000..1b2b821c --- /dev/null +++ b/testsuite/tests-to-run/niceload01.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +echo '### Test niceload' +niceload -s 1 perl -e '$|=1;do{$l==$r or print "."; $l=$r}until(($r=time-$^T)>10)' +echo + +#echo '### Test niceload -p' +#sleep 3 & +#nice-load -v -p $! + + diff --git a/testsuite/tests-to-run/test43.sh b/testsuite/tests-to-run/test43.sh new file mode 100644 index 00000000..e58b4530 --- /dev/null +++ b/testsuite/tests-to-run/test43.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Assume /dev/shm is easy to fill up +mkdir -p /dev/shm/parallel + +echo '### Test $TMPDIR' +TMPDIR=/dev/shm/parallel stdout timeout -k 1 6 parallel head -c 2000m '<'{} >/dev/null ::: /dev/zero & +seq 1 20 | parallel -j1 "df /dev/shm | parallel -k --colsep ' +' echo {4}|tail -n 1;sleep 1" \ +| stdout timeout -k 1 10 perl -ne 'BEGIN{$a=<>} $b=<>; if ($a-1000 > $b) { die "More than 1 MB gone. Good!" }' +wait +sleep 1 + +echo '### Test --tmpdir' +stdout timeout -k 1 6 parallel --tmpdir /dev/shm/parallel head -c 2000m '<'{} >/dev/null ::: /dev/zero & +seq 1 20 | parallel -j1 "df /dev/shm | parallel -k --colsep ' +' echo {4}|tail -n 1;sleep 1" \ +| stdout timeout -k 1 10 perl -ne 'BEGIN{$a=<>} $b=<>; if ($a-1000 > $b) { die "More than 1 MB gone. Good!" }' +wait +sleep 1 + +echo '### Test $TMPDIR and --tmpdir' +TMPDIR=/tmp stdout timeout -k 1 6 parallel --tmpdir /dev/shm/parallel head -c 2000m '<'{} >/dev/null ::: /dev/zero & +seq 1 20 | parallel -j1 "df /dev/shm | parallel -k --colsep ' +' echo {4}|tail -n 1;sleep 1" \ +| stdout timeout -k 1 10 perl -ne 'BEGIN{$a=<>} $b=<>; if ($a-1000 > $b) { die "More than 1 MB gone. Good!" }' +wait +sleep 1 diff --git a/testsuite/tests-to-run/test45.sh b/testsuite/tests-to-run/test45.sh index fce192f2..2a9cf18c 100644 --- a/testsuite/tests-to-run/test45.sh +++ b/testsuite/tests-to-run/test45.sh @@ -1,11 +1,10 @@ #!/bin/bash - SERVER1=parallel-server3 SERVER2=parallel-server2 echo '### Test --load locally' -seq 1 300 | nice timeout -k 1 14 parallel -j0 burnP6 +seq 1 300 | nice timeout -k 1 14 parallel -j0 burnP6 stdout /usr/bin/time -f %e parallel --load 10 sleep ::: 1 | perl -ne '$_ > 10 and print "OK\n"' echo '### Test --load remote' diff --git a/testsuite/wanted-results/test43 b/testsuite/wanted-results/test43 new file mode 100644 index 00000000..fb0fd507 --- /dev/null +++ b/testsuite/wanted-results/test43 @@ -0,0 +1,6 @@ +### Test $TMPDIR +More than 1 MB gone. Good! at -e line 1, <> line 3. +### Test --tmpdir +More than 1 MB gone. Good! at -e line 1, <> line 3. +### Test $TMPDIR and --tmpdir +More than 1 MB gone. Good! at -e line 1, <> line 3.