parallel: Attempt 3 at fixing https://lists.gnu.org/archive/html/parallel/2020-08/msg00003.html

2024-11-24 23:17:55 +00:00 · 2020-08-20 23:07:56 +02:00 · 2020-08-20 23:07:56 +02:00 · 19cd8e015f
parent 2ed5279f6a
commit 19cd8e015f
4 changed files with 122 additions and 62 deletions
--- a/1
+++ b/1
@ -1,5 +1,6 @@
 People who have helped GNU Parallel different ways.
 Morten Rønne: Donating his old Mac for testing.
 Renan Valieris: Maintaining GNU Parallel for Anaconda Cloud.
 Jakub Kulík: Maintaining GNU Parallel for Solaris-userland.
 Rich Burridge: Maintaining GNU Parallel for Solaris-userland.
--- a/src/parallel
+++ b/src/parallel
@ -2160,7 +2160,7 @@ sub check_invalid_option_combinations() {
 sub init_globals() {
    # Defaults:
-    $Global::version = 20200815;
+    $Global::version = 20200820;
    $Global::progname = 'parallel';
    $::name = "GNU Parallel";
    $Global::infinity = 2**31;
@ -5351,7 +5351,7 @@ sub qqx(@) {
 	# CygWin does not respect 2>/dev/null
 	# so we do that by hand
 	# This trick does not work:
-	# https://stackoverflow.com/questions/13833088/why-doesnt-local-work-on-stderr-and-stdout
+	# https://stackoverflow.com/q/13833088/363028
 	# local *STDERR;
 	# open(STDERR, ">", "/dev/null");
 	open(local *CHILD_STDIN,  '<', '/dev/null') or die $!;
@ -5370,7 +5370,10 @@ sub qqx(@) {
 	    # Make sure $? is set
 	    waitpid($pid, 0);
 	    return wantarray ? @arr : join "",@arr;
-	}
+        } or do {
            # If eval fails, force $?=false
            `false`;
        };
    }
 }
@ -10481,7 +10484,7 @@ sub slot($) {
 {
    my $already_spread;
-    my $env_size;
+    my $darwin_max_len;
    sub populate($) {
 	# Add arguments from arg_queue until the number of arguments or
@ -10499,6 +10502,28 @@ sub slot($) {
 	my $next_arg;
 	my $max_len = $Global::minimal_command_line_length
 	    || Limits::Command::max_length();
 	if($^O eq "darwin") {
 	    # env $((260932 - $single - $envc - $envl * 4 + 1064 ))
 	    # Darwin's limit is affected by:
 	    # * number of environment names (variables+functions)
 	    # * size of environment
 	    # * the length of arguments:
 	    #   a one-char argument lowers the limit by 5
 	    #   To be safe assume all arguments are one-char
 	    # The max_len is cached between runs, but if the size of
 	    # the environment is different we need to recompute the
 	    # usable max length for this run of GNU Parallel
 	    # See https://unix.stackexchange.com/a/604943/2972
 	    if(not $darwin_max_len) {
 		my $envc = (keys %ENV);
 		my $envn = length join"",(keys %ENV);
 		my $envv = length join"",(values %ENV);
 		$darwin_max_len = 3+($max_len - $envn - $envv) / 5 - $envc*2;
 		::debug("init",
 			"length: $darwin_max_len 3+($max_len - $envn - $envv) / 5 - $envc*2");
 	    }
 	    $max_len = $darwin_max_len;
 	}
 	if($opt::cat or $opt::fifo) {
 	    # Get the empty arg added by --pipepart (if any)
 	    $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
@ -11501,33 +11526,9 @@ sub max_length($) {
 sub real_max_length() {
    # Find the max_length of a command line
    # Returns:
-    #   The maximal command line length
+    #   The maximal command line length with 1 byte arguments
-    if($^O eq "darwin") {
+    # return find_max(" x");
-	# env $((-260932 + $envl *4 -1064  + $envc + $single))
+    return find_max("x");
 	# Darwin's limit is affected by:
 	# * number of environment names (variables+functions)
 	# * size of environment
 	# * the length of arguments:
 	#   a one-char argument lowers the limit by 5
 	#   To be safe assume all arguments are one-char
 	my $max_len = find_max("x");
 	my $env_size = (keys %ENV) * 4 + length(join'',%ENV);
 	$max_len -= $env_size;
 	# Small fudge constant I cannot explain
 	$max_len -= 1064;
 	# If all arguments are 1 char, the limit is 5x smaller
 	$max_len /= 5;
 	return $max_len;
    } else {
 	return find_max("x");
    }
 }
 sub real_max_args() {
    # Find the max number of args allowed on a command line
    # Returns:
    #   The maximal number of args allowed on a command line
    return find_max(" x");
 }
 sub find_max($) {
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -5192,7 +5192,7 @@ Your bug report should always include:
 The error message you get (if any). If the error message is not from
 GNU B<parallel> you need to show why you think GNU B<parallel> caused
-these.
+this.
 =item *
@ -5204,15 +5204,16 @@ version.
 =item *
 A minimal, complete, and verifiable example (See description on
-http://stackoverflow.com/help/mcve).
+https://stackoverflow.com/help/mcve).
-It should be a complete example that others can run that shows the
+It should be a complete example that others can run which shows the
 problem including all files needed to run the example. This should
 preferably be small and simple, so try to remove as many options as
 possible. A combination of B<yes>, B<seq>, B<cat>, B<echo>, B<wc>, and
 B<sleep> can reproduce most errors. If your example requires large
 files, see if you can make them with something like B<seq 100000000> >
-B<bigfile> or B<yes | head -n 1000000000> > B<file>.
+B<bigfile> or B<yes | head -n 1000000000> > B<file>. If you need
 multiple columns: B<paste <(seq 1000) <(seq 1000 1999)>
 If your example requires remote execution, see if you can use
 B<localhost> - maybe using another login.
@ -5392,9 +5393,9 @@ For remote usage it uses B<rsync> with B<ssh>.
 =head1 SEE ALSO
-B<ssh>(1), B<ssh-agent>(1), B<sshpass>(1), B<ssh-copy-id>(1),
+B<parallel_tutorial>(1), B<env_parallel>(1), B<parset>(1),
-B<rsync>(1), B<find>(1), B<xargs>(1), B<dirname>(1), B<make>(1),
+B<parsort>(1), B<parallel_alternatives>(1), B<parallel_design>(7),
-B<pexec>(1), B<ppss>(1), B<xjobs>(1), B<prll>(1), B<dxargs>(1),
+B<niceload>(1), B<sql>(1), B<ssh>(1), B<ssh-agent>(1), B<sshpass>(1),
-B<mdm>(1)
+B<ssh-copy-id>(1), B<rsync>(1)
 =cut
--- a/testsuite/tests-to-run/parallel-macos.sh
+++ b/testsuite/tests-to-run/parallel-macos.sh
@ -3,103 +3,160 @@
 . `which env_parallel.bash`
 env_parallel --session
 true <<'EOF'
 #!/bin/bash
 # Find the command line limit formula
 # macosx.p = 10.7.5
 # El capitan = 10.11.4
 . `which binsearch`
 doit() {
    nfunc=$1
    lfunc=$2
    lfuncname=$3
    nvar=$4
    lvar=$5
    lvarname=$6
    varval="$(perl -e 'print "x "x('$lvar'/2)')"
    varname=$(perl -e 'print "x"x'$lvarname)
    funcval="$(perl -e 'print "x "x('$lfunc'/2)')"
    funcname=$(perl -e 'print "x"x'$lfuncname)
    for a in `seq $nvar`; do eval "export v$varname$a='$varval'" ; done
    for a in `seq $nfunc`; do eval "f$funcname$a() { $funcval; }" ; done
    for a in `seq $nfunc`; do eval "export -f f$funcname$a" ; done
    myrun() {
 	/bin/echo $(perl -e 'print " x"x('$1'/2-5)')
    }
    export -f myrun
    binlen=dummy
    binlen=$(binsearch -q myrun)
    perl -e '
    $envc=(keys %ENV);
    $envn=length join"",(keys %ENV);
    $envv=length join"",(values %ENV);
    $maxlen=3+(262144 - $envn - $envv) / 5 - $envc*2;
    print("Max len = $maxlen\n");
    $bin='$binlen';
    print("$bin=",$bin-$maxlen," $envc $envn $envv\n");
       '
 }
 export -f doit
 val="$(seq 2 100 1000)"
 val="10 20 50 100 200 500 1000"
 val="11 23 57 101 207 503 1007"
 parallel --shuf --tag -k doit ::: $val ::: $val ::: $val ::: $val ::: $val ::: $val
 EOF
 # Each should generate at least 2 commands
 par_many_args() {
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
-    gen500k() { yes | head -c 131000; }
+    geny() { yes | head -c $1; }
    for a in `seq 6000`; do eval "export a$a=1" ; done
-    gen500k | stdout parallel --load 27 -Xkj1  'pecho {} {} {} {} | wc' |
+    geny 10000 | stdout parallel -Xkj1  'pecho {} {} {} {} | wc' |
-	perl -pe 's/\d{10,}.\d+ //g'
+	perl -pe 's/( y){10,}//g'
 }
 par_many_var() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    for a in `seq 6000`; do eval "export a$a=1" ; done
-    gen500k | stdout parallel --load 4 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 10000 | stdout parallel -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
 par_many_var_func() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
-    for a in `seq 5000`; do eval "export a$a=1" ; done
+    for a in `seq 2000`; do eval "export a$a=1" ; done
-    for a in `seq 5000`; do eval "a$a() { 1; }" ; done
+    for a in `seq 2000`; do eval "a$a() { 1; }" ; done
-    for a in `seq 5000`; do eval export -f a$a ; done
+    for a in `seq 2000`; do eval export -f a$a ; done
-    gen500k | stdout parallel --load 20 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 20000 | stdout parallel -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
 par_many_func() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
    for a in `seq 5000`; do eval "a$a() { 1; }" ; done
    for a in `seq 5000`; do eval export -f a$a ; done
-    gen500k | stdout parallel --load 5 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 10000 | stdout parallel -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
 par_big_func() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
    big=`seq 1000`
    for a in `seq 1`; do eval "a$a() { '$big'; }" ; done
    for a in `seq 1`; do eval export -f a$a ; done
-    gen500k | stdout parallel --load 2 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 20000 | stdout parallel --load 2 -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
 par_many_var_big_func() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
    big=`seq 1000`
    for a in `seq 5000`; do eval "export a$a=1" ; done
    for a in `seq 10`; do eval "a$a() { '$big'; }" ; done
    for a in `seq 10`; do eval export -f a$a ; done
-    gen500k | stdout parallel --load 5 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 10000 | stdout parallel -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
 par_big_func_name() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
    big=`perl -e print\"x\"x10000`
    for a in `seq 10`; do eval "export a$big$a=1" ; done
-    gen500k | stdout parallel --load 5 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 15000 | stdout parallel -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
 par_big_var_func_name() {
    export LC_ALL=C
    rm -f ~/.parallel/tmp/sshlogin/*/linelen
-    gen500k() { seq -f %f 1000000000000000 1000000000050000 | head -c 131000; }
+    gen() { seq -f %f 1000000000000000 1000000000050000 | head -c $1; }
    pecho() { perl -e 'print "@ARGV\n"' "$@"; }
    export -f pecho
    big=`perl -e print\"x\"x10000`
    for a in `seq 10`; do eval "export a$big$a=1" ; done
    for a in `seq 10`; do eval "a$big$a() { 1; }" ; done
    for a in `seq 10`; do eval export -f a$big$a ; done
-    gen500k | stdout parallel --load 4 -Xkj1  'pecho {} {} {} {} | wc' |
+    gen 10000 | stdout parallel --load 4 -Xkj1  'pecho {} {} {} {} | wc' |
 	perl -pe 's/\d{10,}.\d+ //g'
 }
-scp /usr/local/bin/parallel macosx.p:bin/
+#macsshlogin=ota@mac
 macsshlogin=macosx.p
 scp /usr/local/bin/parallel $macsshlogin:bin/
 export LC_ALL=C
 export -f $(compgen -A function | grep par_)
 #compgen -A function |
 compgen -A function |
    grep par_ |
    LC_ALL=C sort |
-    env_parallel --timeout 3000% --tag -k -S macosx.p
+    env_parallel --timeout 1000% --tag -k -S 6/$macsshlogin 'PATH=$HOME/bin:$PATH; {}'