From 81741d3dbce495f2662f197129e012bdb0acd43b Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Sun, 28 Apr 2019 22:13:10 +0200 Subject: [PATCH] Re-fixed #56115: Compute max-line-length fast on CygWin. Fixed bug #56214: *Q is not set in shell_quote_scalar. --- doc/haikus | 3 +++ src/parallel | 24 ++++++++++++++--------- src/parallel.pod | 51 ++++++++++++++++++++++++++++++++++++------------ 3 files changed, 56 insertions(+), 22 deletions(-) diff --git a/doc/haikus b/doc/haikus index 1a6415fe..875fac7d 100644 --- a/doc/haikus +++ b/doc/haikus @@ -1,5 +1,8 @@ Quote of the month: +Amazingly useful script! + -- unxusr@reddit.com + GNU parallel really changed how I do a lot of data processing stuff -- Brendan Dolan-Gavitt @moyix@twitter diff --git a/src/parallel b/src/parallel index 6710bf03..304f8e38 100755 --- a/src/parallel +++ b/src/parallel @@ -2524,14 +2524,15 @@ sub shell_quote_scalar($) { *shell_quote_scalar = \&shell_quote_scalar_default; } # The sub is now redefined. Call it - return shell_quote_scalar(@_); + return shell_quote_scalar($_[0]); } sub Q($) { # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); no warnings 'redefine'; *Q = \&::shell_quote_scalar; - return Q(@_); + return $ret; } sub shell_quote_file($) { @@ -2578,8 +2579,9 @@ sub perl_quote_scalar($) { # -w complains about prototype sub pQ($) { # pQ alias for ::perl_quote_scalar + my $ret = perl_quote_scalar($_[0]); *pQ = \&::perl_quote_scalar; - return pQ(@_); + return $ret; } sub unquote_printf() { @@ -10899,8 +10901,9 @@ sub real_max_length($) { # The maximal command line length # Use an upper bound of 100 MB if the shell allows for infinite long lengths my $upper = 100_000_000; - # 268 makes the search faster on CygWin - 1000 is supported everywhere - my $len = 268; + # 1000 is supported everywhere, so the search can start anywhere 1..999 + # 324 makes the search much faster on CygWin, so let us use that + my $len = 324; do { if($len > $upper) { return $len }; $len *= 16; @@ -11483,15 +11486,18 @@ sub new($) { sub Q($) { # Q alias for ::shell_quote_scalar + my $ret = ::Q($_[0]); no warnings 'redefine'; - *Q = \&::shell_quote_scalar; - return Q(@_); + *Q = \&::Q; + return $ret; } sub pQ($) { # pQ alias for ::perl_quote_scalar - *pQ = \&::perl_quote_scalar; - return pQ(@_); + my $ret = ::pQ($_[0]); + no warnings 'redefine'; + *pQ = \&::pQ; + return $ret; } sub total_jobs() { diff --git a/src/parallel.pod b/src/parallel.pod index 3453fab7..3fd46010 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -62,9 +62,13 @@ or download it at: https://doi.org/10.5281/zenodo.1146014 Otherwise start by watching the intro videos for a quick introduction: http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1 -Then browse through the Bs after the list of B in -B (Use B). That will give -you an idea of what GNU B is capable of. +If you need a one page printable cheat sheet you can find it on: +https://www.gnu.org/software/parallel/parallel_cheat.pdf + +You can find a lot of Bs of use after the list of B +in B (Use B). That will +give you an idea of what GNU B is capable of, and you may +find a solution you can simply adapt to your situation. If you want to dive even deeper: spend a couple of hours walking through the tutorial (B). Your command line @@ -663,11 +667,11 @@ equivalent: B<--delay 100000> and B<--delay 1d3.5h16.6m4s>. =item B<--dry-run> Print the job to run on stdout (standard output), but do not run the -job. Use B<-v -v> to include the wrapping that GNU Parallel generates -(for remote jobs, B<--tmux>, B<--nice>, B<--pipe>, B<--pipepart>, -B<--fifo> and B<--cat>). Do not count on this literally, though, as the -job may be scheduled on another computer or the local computer if : is -in the list. +job. Use B<-v -v> to include the wrapping that GNU B +generates (for remote jobs, B<--tmux>, B<--nice>, B<--pipe>, +B<--pipepart>, B<--fifo> and B<--cat>). Do not count on this +literally, though, as the job may be scheduled on another computer or +the local computer if : is in the list. =item B<--eof>[=I] @@ -3391,6 +3395,27 @@ If B<-j0> normally spawns 252 jobs, then the above will try to spawn this technique with no problems. To raise the 32000 jobs limit raise /proc/sys/kernel/pid_max to 4194303. +If you do not need GNU B to have control over each job (so +no need for B<--retries> or B<--joblog> or similar), then it can be +even faster if you can generate the command lines and pipe those to a +shell. So if you can do this: + + mygenerator | sh + +Then that can be parallelized like this: + + mygenerator | parallel --pipe --block 10M sh + +E.g. + + mygenerator() { + seq 10000000 | perl -pe 'print "echo This is fast job number "'; + } + mygenerator | parallel --pipe --block 10M sh + +The overhead is 100000 times smaller namely around 100 nanoseconds per +job. + =head1 EXAMPLE: Using shell variables @@ -4308,7 +4333,7 @@ In some cases you can run on more CPUs and computers during the night: cp night_server_list ~/.parallel/sshloginfile tail -n+0 -f jobqueue | parallel --jobs jobfile -S .. -GNU Parallel discovers if B or B<~/.parallel/sshloginfile> +GNU B discovers if B or B<~/.parallel/sshloginfile> changes. There is a a small issue when using GNU B as queue @@ -4346,8 +4371,8 @@ If the files to be processed are in a tar file then unpacking one file and processing it immediately may be faster than first unpacking all files. Set up the dir processor as above and unpack into the dir. -Using GNU Parallel as dir processor has the same limitations as using -GNU Parallel as queue system/batch manager. +Using GNU B as dir processor has the same limitations as +using GNU B as queue system/batch manager. =head1 EXAMPLE: Locate the missing package @@ -4551,7 +4576,7 @@ Options to pass on to B. Defaults to: -rlDzR. =item $PARALLEL_SHELL -Use this shell for the commands run by GNU Parallel: +Use this shell for the commands run by GNU B: =over 2 @@ -4561,7 +4586,7 @@ $PARALLEL_SHELL. If undefined use: =item * -The shell that started GNU Parallel. If that cannot be determined: +The shell that started GNU B. If that cannot be determined: =item *