pod2rst-fix: Work-around some pod2rst conversion errors.

parallel: Fix race condition for --limit 'load'.
2024-11-22 05:57:54 +00:00 · 2022-03-17 19:57:09 +01:00 · 2022-03-17 19:57:09 +01:00 · e40b886a9a
parent 7f73c0fb2c
commit e40b886a9a
9 changed files with 232 additions and 70 deletions
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -274,7 +274,9 @@ New in this release:

 News about GNU Parallel:

- Bash: GNU Parallel with Curl https://gist.github.com/CMCDragonkai/5914e02df62137e47f32?permalink_comment_id=2617456
+* Bash: GNU Parallel with Curl https://gist.github.com/CMCDragonkai/5914e02df62137e47f32?permalink_comment_id=2617456
+
+* The Pipe Operator Explained https://medium.com/geekculture/the-pipe-operator-explained-cbd41e23775a

 <<>>

--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -246,51 +246,51 @@ parsort.texi: parsort
 	|| echo "Warning: pod2texi not found. Using old parsort.texi"

 parallel.rst: parallel.pod
-	pod2rst --outfile "$(srcdir)"/parallel.rst --infile="$(srcdir)"/parallel.pod \
+	./pod2rst-fix < "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.rst \
 	|| echo "Warning: pod2rst not found. Using old parallel.rst"

 env_parallel.rst: env_parallel.pod
-	pod2rst --outfile "$(srcdir)"/env_parallel.rst --infile="$(srcdir)"/env_parallel.pod \
+	./pod2rst-fix < "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.rst \
 	|| echo "Warning: pod2rst not found. Using old env_parallel.rst"

 parallel_tutorial.rst: parallel_tutorial.pod
-	pod2rst --outfile "$(srcdir)"/parallel_tutorial.rst --infile="$(srcdir)"/parallel_tutorial.pod \
+	./pod2rst-fix < "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.rst \
 	|| echo "Warning: pod2rst not found. Using old parallel_tutorial.rst"

 parallel_book.rst: parallel_book.pod
-	pod2rst --outfile "$(srcdir)"/parallel_book.rst --infile="$(srcdir)"/parallel_book.pod \
+	./pod2rst-fix "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.rst \
 	|| echo "Warning: pod2rst not found. Using old parallel_book.rst"

 parallel_design.rst: parallel_design.pod
-	pod2rst --outfile "$(srcdir)"/parallel_design.rst --infile="$(srcdir)"/parallel_design.pod \
+	./pod2rst-fix "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.rst \
 	|| echo "Warning: pod2rst not found. Using old parallel_design.rst"

 parallel_alternatives.rst: parallel_alternatives.pod
-	pod2rst --outfile "$(srcdir)"/parallel_alternatives.rst --infile="$(srcdir)"/parallel_alternatives.pod \
+	./pod2rst-fix "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.rst \
 	|| echo "Warning: pod2rst not found. Using old parallel_alternatives.rst"

 sem.rst: sem.pod
-	pod2rst --outfile "$(srcdir)"/sem.rst --infile="$(srcdir)"/sem.pod \
+	./pod2rst-fix "$(srcdir)"/sem.pod > "$(srcdir)"/sem.rst \
 	|| echo "Warning: pod2rst not found. Using old sem.rst"

 sql.rst: sql
-	pod2rst --outfile "$(srcdir)"/sql.rst --infile="$(srcdir)"/sql \
+	./pod2rst-fix "$(srcdir)"/sql > "$(srcdir)"/sql.rst \
 	|| echo "Warning: pod2rst not found. Using old sql.rst"

 niceload.rst: niceload.pod
-	pod2rst --outfile "$(srcdir)"/niceload.rst --infile="$(srcdir)"/niceload.pod \
+	./pod2rst-fix "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.rst \
 	|| echo "Warning: pod2rst not found. Using old niceload.rst"

 parcat.rst: parcat.pod
-	pod2rst --outfile "$(srcdir)"/parcat.rst --infile="$(srcdir)"/parcat.pod \
+	./pod2rst-fix "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.rst \
 	|| echo "Warning: pod2rst not found. Using old parcat.rst"

 parset.rst: parset.pod
-	pod2rst --outfile "$(srcdir)"/parset.rst --infile="$(srcdir)"/parset.pod \
+	./pod2rst-fix "$(srcdir)"/parset.pod > "$(srcdir)"/parset.rst \
 	|| echo "Warning: pod2rst not found. Using old parset.rst"

 parsort.rst: parsort
-	pod2rst --outfile "$(srcdir)"/parsort.rst --infile="$(srcdir)"/parsort \
+	./pod2rst-fix "$(srcdir)"/parsort > "$(srcdir)"/parsort.rst \
 	|| echo "Warning: pod2rst not found. Using old parsort.rst"

 parallel.pdf: parallel.pod
--- a/src/parallel
+++ b/src/parallel
@ -3819,7 +3819,7 @@ sub progress() {
            ($Global::host{$w}->ncpus() || "-")." / ".
            $Global::host{$w}->max_jobs_running()."\n";
    }
-    $status = "x"x($termcols+1);
+    $status = "c"x($termcols+1);
    # Select an output format that will fit on a single line
    if(length $status > $termcols) {
        # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs
@ -6868,8 +6868,7 @@ sub limit($) {
 	     	     wc -l |
 	     	 perl -ne 'exit ('$limit' < $_)';
 	     };
-	     export -f load;
-	     load %s;
+	     load %s
 	     !,
 	    );
 	my ($cmd,@args) = split /\s+/,$opt::limit;
@ -6908,9 +6907,9 @@ sub swap_activity($) {
    my $self = shift;
    # Should we update the swap_activity file?
    my $update_swap_activity_file = 0;
-    if(-r $self->{'swap_activity_file'}) {
-        open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
-	    ::die_bug("swap_activity_file-r");
+    # Test with (on 64 core machine):
+    # seq 100 | parallel --lb -j100 'seq 1000 | parallel  --noswap -j 1 true'
+    if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) {
        my $swap_out = <$swap_fh>;
        close $swap_fh;
        if($swap_out =~ /^(\d+)$/) {
@ -9556,28 +9555,34 @@ sub sshlogin_wrap($) {
 		    $script = $tmpdir."/par".
 			join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
 		} while(-e $script);
+		# Create a script from the hex code
+		# that removes itself and runs the commands
 		open($fh,">",$script) || die;
 		# \040 = space - but we remove spaces in the script
 		# ' needed due to rc-shell
                print($fh("rm\040\'$script\'\n",$bashfunc.$cmd));
                close $fh;
+		my $parent = getppid;
+		my $done = 0;
                $SIG{CHLD} = sub { $done = 1; };
 		$pid = fork;
                unless($pid) {
 		    # Make own process group to be able to kill HUP it later
 		    eval { setpgrp };
+		    # Set nice value
 		    eval { setpriority(0,0,$nice) };
+		    # Run the script
                    exec($shell,$script);
 		    die("exec\040failed: $!");
 		}
-		my $parent = getppid;
-		do {
+		while((not $done) and (getppid == $parent)) {
 		    # Parent pid is not changed, so sshd is alive
 		    # Exponential sleep up to 1 sec
                    $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
                    select(undef, undef, undef, $s);
-                } until ($done || getppid != $parent);
+                }
 		if(not $done) {
+		    # sshd is dead: User pressed Ctrl-C
 		    # Kill as per --termseq
 		    my @term_seq = split/,/,$termseq;
 		    if(not @term_seq) {
@ -11580,11 +11585,11 @@ sub results_out($) {
 	my $upper = 100_000_000;
 	# Dir length of 8 chars is supported everywhere
 	my $len = 8;
-	my $dir = "x"x$len;
+	my $dir = "d"x$len;
 	do {
 	    rmdir($testdir."/".$dir);
 	    $len *= 16;
-	    $dir = "x"x$len;
+	    $dir = "d"x$len;
 	} while ($len < $upper and mkdir $testdir."/".$dir);
 	# Then search for the actual max length between $len/16 and $len
 	my $min = $len/16;
@ -11593,7 +11598,7 @@ sub results_out($) {
 	    # If we are within 5 chars of the exact value:
 	    # it is not worth the extra time to find the exact value
 	    my $test = int(($min+$max)/2);
-	    $dir = "x"x$test;
+	    $dir = "d"x$test;
 	    if(mkdir $testdir."/".$dir) {
 		rmdir($testdir."/".$dir);
 		$min = $test;
@ -12416,8 +12421,8 @@ sub real_max_length() {
    # Find the max_length of a command line
    # Returns:
    #   The maximal command line length with 1 byte arguments
-    # return find_max(" x");
-    return find_max("x");
+    # return find_max(" c");
+    return find_max("c");
 }

 sub find_max($) {
@ -12507,7 +12512,7 @@ sub tmux_length($) {
 	    my $tmpfile = ::tmpname("tms");
 	    my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
 		" -S $tmpfile new-session -d -n echo $l".
-		("x"x$l). " && echo $l; rm -f $tmpfile";
+		("t"x$l). " && echo $l; rm -f $tmpfile";
 	    push @out, ::qqx($tmuxcmd);
 	    ::rm($tmpfile);
 	}
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -1854,7 +1854,7 @@ where records end.

 =back

-See also: <--pipe> B<--recstart> B<--recend> B<--arg-file> B<::::>
+See also: B<--pipe> B<--recstart> B<--recend> B<--arg-file> B<::::>


 =item B<--plain>
@ -4763,7 +4763,11 @@ transfers files. B<outrun> must be installed on the remote system.

 You can use B<outrun> in an sshlogin this way:

-  parallel -S 'outrun user@server eval' command
+  parallel -S 'outrun user@server' command
+
+or:
+
+  parallel --ssh outrun -S server command


 =head2 EXAMPLE: Slurm cluster
@ -5981,10 +5985,11 @@ installed software that is not in the VirtualBox images.
 If you cannot reproduce the error on any of the VirtualBox images
 above, see if you can build a VirtualBox image on which you can
 reproduce the error. If not you should assume the debugging will be
-done through you. That will put more burden on you and it is extra
-important you give any information that help. In general the problem
-will be fixed faster and with less work for you if you can reproduce
-the error on a VirtualBox.
+done through you. That will put a lot more burden on you and it is
+extra important you give any information that help. In general the
+problem will be fixed faster and with much less work for you if you
+can reproduce the error on a VirtualBox - even if you have to build a
+VirtualBox image.

 =head2 In summary

--- a/src/parallel_design.pod
+++ b/src/parallel_design.pod
@ -617,11 +617,12 @@ unlinked as soon as they are opened.

 The shell shock bug in B<bash> did not affect GNU B<parallel>, but the
 solutions did. B<bash> first introduced functions in variables named:
-I<BASH_FUNC_myfunc()> and later changed that to I<BASH_FUNC_myfunc%%>. When
-transferring functions GNU B<parallel> reads off the function and changes
-that into a function definition, which is copied to the remote system and
-executed before the actual command is executed. Therefore GNU B<parallel>
-needs to know how to read the function.
+I<BASH_FUNC_myfunc()> and later changed that to
+I<BASH_FUNC_myfunc%%>. When transferring functions GNU B<parallel>
+reads off the function and changes that into a function definition,
+which is copied to the remote system and executed before the actual
+command is executed. Therefore GNU B<parallel> needs to know how to
+read the function.

 From version 20150122 GNU B<parallel> tries both the ()-version and
 the %%-version, and the function definition works on both pre- and
@ -633,6 +634,28 @@ post-shell shock versions of B<bash>.
 The remote system wrapper does some initialization before starting the
 command on the remote system.

+=head3 Make quoting unnecessary by hex encoding everything
+
+When you run B<ssh server foo> then B<foo> has to be quoted once:
+
+  ssh server "echo foo; echo bar"
+
+If you run B<ssh server1 ssh server2 foo> then B<foo> has to be quoted
+twice:
+
+  ssh server1 ssh server2 \'"echo foo; echo bar"\'
+
+GNU B<parallel> avoids this by packing everyting into hex values and
+running a command that does not need quoting:
+
+  perl -X -e GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV
+
+This command reads hex from the command line and converts that to
+bytes that are then eval'ed as a Perl expression.
+
+The string B<GNU_Parallel_worker> is not needed. It is simply there to
+let the user know, that this process is GNU B<parallel> working.
+
 =head3 Ctrl-C and standard error (stderr)

 If the user presses Ctrl-C the user expects jobs to stop. This works
@ -668,44 +691,78 @@ B<nice>ing the remote process is done by B<setpriority(0,0,$nice)>. A
 few old systems do not implement this and B<--nice> is unsupported on
 those.

+
 =head3 Setting $PARALLEL_TMP

 B<$PARALLEL_TMP> is used by B<--fifo> and B<--cat> and must point to a
 non-exitent file in B<$TMPDIR>. This file name is computed on the
 remote system.

+
 =head3 The wrapper

 The wrapper looks like this:

  $shell = $PARALLEL_SHELL || $SHELL;
-  $tmpdir = $TMPDIR;
+  $tmpdir = $TMPDIR || $PARALLEL_REMOTE_TMPDIR;
  $nice = $opt::nice;
+  $termseq = $opt::termseq;
+
+  # Check that $tmpdir is writable
+  -w $tmpdir ||
+      die("$tmpdir is not writable.".
+  	" Set PARALLEL_REMOTE_TMPDIR");
  # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
  do {
      $ENV{PARALLEL_TMP} = $tmpdir."/par".
  	join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
  } while(-e $ENV{PARALLEL_TMP});
+  # Set $script to a non-existent file name in $TMPDIR
+  do {
+      $script = $tmpdir."/par".
+  	join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
+  } while(-e $script);
+  # Create a script from the hex code
+  # that removes itself and runs the commands
+  open($fh,">",$script) || die;
+  # ' needed due to rc-shell
+  print($fh("rm \'$script\'\n",$bashfunc.$cmd));
+  close $fh;
+  my $parent = getppid;
+  my $done = 0;
  $SIG{CHLD} = sub { $done = 1; };
  $pid = fork;
  unless($pid) {
      # Make own process group to be able to kill HUP it later
-      setpgrp;
+      eval { setpgrp };
+      # Set nice value
      eval { setpriority(0,0,$nice) };
-      exec $shell, "-c", ($bashfunc."@ARGV");
-      die "exec: $!\n";
+      # Run the script
+      exec($shell,$script);
+      die("exec failed: $!");
  }
-  do {
-      # Parent is not init (ppid=1), so sshd is alive
+  while((not $done) and (getppid == $parent)) {
+      # Parent pid is not changed, so sshd is alive
      # Exponential sleep up to 1 sec
      $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
      select(undef, undef, undef, $s);
-  } until ($done || getppid == 1);
-  # Kill HUP the process group if job not done
-  kill(SIGHUP, -${pid}) unless $done;
+  }
+  if(not $done) {
+      # sshd is dead: User pressed Ctrl-C
+      # Kill as per --termseq
+      my @term_seq = split/,/,$termseq;
+      if(not @term_seq) {
+  	@term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
+      }
+      while(@term_seq && kill(0,-$pid)) {
+  	kill(shift @term_seq, -$pid);
+  	select(undef, undef, undef, (shift @term_seq)/1000);
+      }
+  }
  wait;
  exit ($?&127 ? 128+($?&127) : 1+$?>>8)

+
 =head2 Transferring of variables and functions

 Transferring of variables and functions given by B<--env> is done by
@ -728,9 +785,9 @@ into one single word, which often is longer than 1024 chars.

 When the line to run is > 1000 chars, GNU B<parallel> therefore
 encodes the line to run. The encoding B<bzip2>s the line to run,
-converts this to base64, splits the base64 into 1000 char blocks (so B<csh>
-does not fail), and prepends it with this Perl script that decodes,
-decompresses and B<eval>s the line.
+converts this to base64, splits the base64 into 1000 char blocks (so
+B<csh> does not fail), and prepends it with this Perl script that
+decodes, decompresses and B<eval>s the line.

    @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
    eval "@GNU_Parallel";
@ -1142,12 +1199,12 @@ the whole output of a single job and save it as csv/tsv or SQL.

 =head2 Argument separators ::: :::: :::+ ::::+

-The argument separator B<:::> was chosen because I have never seen B<:::>
-used in any command. The natural choice B<--> would be a bad idea since
-it is not unlikely that the template command will contain B<-->. I have
-seen B<::> used in programming languanges to separate classes, and I
-did not want the user to be confused that the separator had anything
-to do with classes.
+The argument separator B<:::> was chosen because I have never seen
+B<:::> used in any command. The natural choice B<--> would be a bad
+idea since it is not unlikely that the template command will contain
+B<-->. I have seen B<::> used in programming languanges to separate
+classes, and I did not want the user to be confused that the separator
+had anything to do with classes.


 B<:::> also makes a visual separation, which is good if there are
@ -1400,14 +1457,4 @@ So to lessen the frustration and the resulting support, B<--tollef>
 was obsoleted 20130222 and removed one year later.


-=head2 Transferring of variables and functions
-
-Until 20150122 variables and functions were transferred by looking at
-$SHELL to see whether the shell was a B<*csh> shell. If so the
-variables would be set using B<setenv>. Otherwise they would be set
-using B<=>. This caused the content of the variable to be repeated:
-
-echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && setenv VAR foo ||
-export VAR=foo
-
 =cut
--- a/src/pod2rst-fix
+++ b/src/pod2rst-fix
@ -0,0 +1,88 @@
+#!/usr/bin/perl
+
+# Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free
+# Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses/>
+# or write to the Free Software Foundation, Inc., 51 Franklin St,
+# Fifth Floor, Boston, MA 02110-1301 USA
+#
+# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# This fixes problems in pod2rst conversion
+
+# Conversion errors:
+
+# Fixed:
+# ... B<foo>
+# bar
+
+# Fixed:
+# =item - - a
+
+# Not fixed (RST does not support Bold-Italic):
+# B<cat | xargs -d "\n" -n1 I<command>>
+
+sub pipefunc {
+    my $func = pop;
+
+    my $pid = open(my $kid_to_read, "-|");
+    defined($pid) || die "can't fork: $!";
+    if ($pid) {
+	open STDIN, "<&", $kid_to_read or die;
+	&$func();
+    } else { # child
+	close $kid_to_read;
+	if($_[1]) {
+	    # More than one function remaining: Recurse
+	    pipefunc(@_);
+	} else {
+	    # Only one function remaining: Run it
+	    $func = pop;
+	    &$func();
+	}
+	exit 0;
+    }
+}
+
+sub pre {
+    # Remove comments
+    $_=join("", grep { ! /^#/ } <>);
+    # join lines in each paragraph
+    s/(\S)\n(\S)/$1 $2/g;
+    # quote -
+    s/^=item -/=item \001/gm;
+    print $_;
+}
+
+sub pod2rst {
+    exec "pod2rst";
+}
+
+sub post {
+    while(<STDIN>) {
+	# =item in =item
+	s/- \\[*]/- /;
+	# B<*.log>
+	s/\\\\[*]/\\*/g;
+	# - -
+	s/^-(\s+)\001/-$1\\-/g;
+	print;
+    }
+}
+
+# stdin | pre() | pod2rst() | post()
+pipefunc(*pre,*pod2rst,*post);
+
--- a/testsuite/tests-to-run/parallel-local-0.3s.sh
+++ b/testsuite/tests-to-run/parallel-local-0.3s.sh
@ -865,7 +865,6 @@ par_results_json() {
 	perl -pe 's/\d+\.\d{3}/9.999/g'
 }

-
 par_testquote() {
    testquote() {
 	printf '"#&/\n()*=?'"'" |
--- a/testsuite/tests-to-run/parallel-local-load.sh
+++ b/testsuite/tests-to-run/parallel-local-load.sh
@ -8,6 +8,21 @@
 seq 12 |
    stdout parallel --nice 11 --timeout 10 -j0 -N0 "bzip2 < /dev/zero" > /dev/null &

+par_limit_load() {
+    force_load_to_6() {
+	burn() { bzip2 -9 </dev/zero >/dev/null; }
+	export -f burn
+	parallel --timeout 5 -j0 --nice 19 burn ::: 1 2 3 4 5 6 2>/dev/null
+    }
+    force_load_to_6 &
+    # Give load time to start
+    sleep 1
+    # This should only start after the load stops
+    # and thus take > 4 sec
+    stdout /usr/bin/time -f %e parallel --limit "load 4" sleep ::: 0.8 |
+	perl -ne 'print $_ > 5 ? "OK\n" : "Broken: $_\n"'
+}
+
 par_load_more_10s() {
    echo '### Test --load locally - should take >10s'
    stdout /usr/bin/time -f %e parallel --load 10 sleep ::: 1 |
--- a/testsuite/wanted-results/parallel-local-load
+++ b/testsuite/wanted-results/parallel-local-load
@ -1,3 +1,4 @@
+par_limit_load	OK
 par_load_file_less_10s	### Test --load read from a file - less than 10s
 par_load_file_less_10s	OK
 par_load_file_more_10s	### Test --load read from a file - more than 10s