pod2rst-fix: Work-around some pod2rst conversion errors.

parallel: Fix race condition for --limit 'load'.
This commit is contained in:
Ole Tange 2022-03-17 19:57:09 +01:00
parent 7f73c0fb2c
commit e40b886a9a
9 changed files with 232 additions and 70 deletions

View file

@ -274,7 +274,9 @@ New in this release:
News about GNU Parallel:
Bash: GNU Parallel with Curl https://gist.github.com/CMCDragonkai/5914e02df62137e47f32?permalink_comment_id=2617456
* Bash: GNU Parallel with Curl https://gist.github.com/CMCDragonkai/5914e02df62137e47f32?permalink_comment_id=2617456
* The Pipe Operator Explained https://medium.com/geekculture/the-pipe-operator-explained-cbd41e23775a
<<>>

View file

@ -246,51 +246,51 @@ parsort.texi: parsort
|| echo "Warning: pod2texi not found. Using old parsort.texi"
parallel.rst: parallel.pod
pod2rst --outfile "$(srcdir)"/parallel.rst --infile="$(srcdir)"/parallel.pod \
./pod2rst-fix < "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.rst \
|| echo "Warning: pod2rst not found. Using old parallel.rst"
env_parallel.rst: env_parallel.pod
pod2rst --outfile "$(srcdir)"/env_parallel.rst --infile="$(srcdir)"/env_parallel.pod \
./pod2rst-fix < "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.rst \
|| echo "Warning: pod2rst not found. Using old env_parallel.rst"
parallel_tutorial.rst: parallel_tutorial.pod
pod2rst --outfile "$(srcdir)"/parallel_tutorial.rst --infile="$(srcdir)"/parallel_tutorial.pod \
./pod2rst-fix < "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.rst \
|| echo "Warning: pod2rst not found. Using old parallel_tutorial.rst"
parallel_book.rst: parallel_book.pod
pod2rst --outfile "$(srcdir)"/parallel_book.rst --infile="$(srcdir)"/parallel_book.pod \
./pod2rst-fix "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.rst \
|| echo "Warning: pod2rst not found. Using old parallel_book.rst"
parallel_design.rst: parallel_design.pod
pod2rst --outfile "$(srcdir)"/parallel_design.rst --infile="$(srcdir)"/parallel_design.pod \
./pod2rst-fix "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.rst \
|| echo "Warning: pod2rst not found. Using old parallel_design.rst"
parallel_alternatives.rst: parallel_alternatives.pod
pod2rst --outfile "$(srcdir)"/parallel_alternatives.rst --infile="$(srcdir)"/parallel_alternatives.pod \
./pod2rst-fix "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.rst \
|| echo "Warning: pod2rst not found. Using old parallel_alternatives.rst"
sem.rst: sem.pod
pod2rst --outfile "$(srcdir)"/sem.rst --infile="$(srcdir)"/sem.pod \
./pod2rst-fix "$(srcdir)"/sem.pod > "$(srcdir)"/sem.rst \
|| echo "Warning: pod2rst not found. Using old sem.rst"
sql.rst: sql
pod2rst --outfile "$(srcdir)"/sql.rst --infile="$(srcdir)"/sql \
./pod2rst-fix "$(srcdir)"/sql > "$(srcdir)"/sql.rst \
|| echo "Warning: pod2rst not found. Using old sql.rst"
niceload.rst: niceload.pod
pod2rst --outfile "$(srcdir)"/niceload.rst --infile="$(srcdir)"/niceload.pod \
./pod2rst-fix "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.rst \
|| echo "Warning: pod2rst not found. Using old niceload.rst"
parcat.rst: parcat.pod
pod2rst --outfile "$(srcdir)"/parcat.rst --infile="$(srcdir)"/parcat.pod \
./pod2rst-fix "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.rst \
|| echo "Warning: pod2rst not found. Using old parcat.rst"
parset.rst: parset.pod
pod2rst --outfile "$(srcdir)"/parset.rst --infile="$(srcdir)"/parset.pod \
./pod2rst-fix "$(srcdir)"/parset.pod > "$(srcdir)"/parset.rst \
|| echo "Warning: pod2rst not found. Using old parset.rst"
parsort.rst: parsort
pod2rst --outfile "$(srcdir)"/parsort.rst --infile="$(srcdir)"/parsort \
./pod2rst-fix "$(srcdir)"/parsort > "$(srcdir)"/parsort.rst \
|| echo "Warning: pod2rst not found. Using old parsort.rst"
parallel.pdf: parallel.pod

View file

@ -3819,7 +3819,7 @@ sub progress() {
($Global::host{$w}->ncpus() || "-")." / ".
$Global::host{$w}->max_jobs_running()."\n";
}
$status = "x"x($termcols+1);
$status = "c"x($termcols+1);
# Select an output format that will fit on a single line
if(length $status > $termcols) {
# sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs
@ -6868,8 +6868,7 @@ sub limit($) {
wc -l |
perl -ne 'exit ('$limit' < $_)';
};
export -f load;
load %s;
load %s
!,
);
my ($cmd,@args) = split /\s+/,$opt::limit;
@ -6908,9 +6907,9 @@ sub swap_activity($) {
my $self = shift;
# Should we update the swap_activity file?
my $update_swap_activity_file = 0;
if(-r $self->{'swap_activity_file'}) {
open(my $swap_fh, "<", $self->{'swap_activity_file'}) ||
::die_bug("swap_activity_file-r");
# Test with (on 64 core machine):
# seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true'
if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) {
my $swap_out = <$swap_fh>;
close $swap_fh;
if($swap_out =~ /^(\d+)$/) {
@ -9556,28 +9555,34 @@ sub sshlogin_wrap($) {
$script = $tmpdir."/par".
join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
} while(-e $script);
# Create a script from the hex code
# that removes itself and runs the commands
open($fh,">",$script) || die;
# \040 = space - but we remove spaces in the script
# ' needed due to rc-shell
print($fh("rm\040\'$script\'\n",$bashfunc.$cmd));
close $fh;
my $parent = getppid;
my $done = 0;
$SIG{CHLD} = sub { $done = 1; };
$pid = fork;
unless($pid) {
# Make own process group to be able to kill HUP it later
eval { setpgrp };
# Set nice value
eval { setpriority(0,0,$nice) };
# Run the script
exec($shell,$script);
die("exec\040failed: $!");
}
my $parent = getppid;
do {
while((not $done) and (getppid == $parent)) {
# Parent pid is not changed, so sshd is alive
# Exponential sleep up to 1 sec
$s = $s < 1 ? 0.001 + $s * 1.03 : $s;
select(undef, undef, undef, $s);
} until ($done || getppid != $parent);
}
if(not $done) {
# sshd is dead: User pressed Ctrl-C
# Kill as per --termseq
my @term_seq = split/,/,$termseq;
if(not @term_seq) {
@ -11580,11 +11585,11 @@ sub results_out($) {
my $upper = 100_000_000;
# Dir length of 8 chars is supported everywhere
my $len = 8;
my $dir = "x"x$len;
my $dir = "d"x$len;
do {
rmdir($testdir."/".$dir);
$len *= 16;
$dir = "x"x$len;
$dir = "d"x$len;
} while ($len < $upper and mkdir $testdir."/".$dir);
# Then search for the actual max length between $len/16 and $len
my $min = $len/16;
@ -11593,7 +11598,7 @@ sub results_out($) {
# If we are within 5 chars of the exact value:
# it is not worth the extra time to find the exact value
my $test = int(($min+$max)/2);
$dir = "x"x$test;
$dir = "d"x$test;
if(mkdir $testdir."/".$dir) {
rmdir($testdir."/".$dir);
$min = $test;
@ -12416,8 +12421,8 @@ sub real_max_length() {
# Find the max_length of a command line
# Returns:
# The maximal command line length with 1 byte arguments
# return find_max(" x");
return find_max("x");
# return find_max(" c");
return find_max("c");
}
sub find_max($) {
@ -12507,7 +12512,7 @@ sub tmux_length($) {
my $tmpfile = ::tmpname("tms");
my $tmuxcmd = $ENV{'PARALLEL_TMUX'}.
" -S $tmpfile new-session -d -n echo $l".
("x"x$l). " && echo $l; rm -f $tmpfile";
("t"x$l). " && echo $l; rm -f $tmpfile";
push @out, ::qqx($tmuxcmd);
::rm($tmpfile);
}

View file

@ -1854,7 +1854,7 @@ where records end.
=back
See also: <--pipe> B<--recstart> B<--recend> B<--arg-file> B<::::>
See also: B<--pipe> B<--recstart> B<--recend> B<--arg-file> B<::::>
=item B<--plain>
@ -4763,7 +4763,11 @@ transfers files. B<outrun> must be installed on the remote system.
You can use B<outrun> in an sshlogin this way:
parallel -S 'outrun user@server eval' command
parallel -S 'outrun user@server' command
or:
parallel --ssh outrun -S server command
=head2 EXAMPLE: Slurm cluster
@ -5981,10 +5985,11 @@ installed software that is not in the VirtualBox images.
If you cannot reproduce the error on any of the VirtualBox images
above, see if you can build a VirtualBox image on which you can
reproduce the error. If not you should assume the debugging will be
done through you. That will put more burden on you and it is extra
important you give any information that help. In general the problem
will be fixed faster and with less work for you if you can reproduce
the error on a VirtualBox.
done through you. That will put a lot more burden on you and it is
extra important you give any information that help. In general the
problem will be fixed faster and with much less work for you if you
can reproduce the error on a VirtualBox - even if you have to build a
VirtualBox image.
=head2 In summary

View file

@ -617,11 +617,12 @@ unlinked as soon as they are opened.
The shell shock bug in B<bash> did not affect GNU B<parallel>, but the
solutions did. B<bash> first introduced functions in variables named:
I<BASH_FUNC_myfunc()> and later changed that to I<BASH_FUNC_myfunc%%>. When
transferring functions GNU B<parallel> reads off the function and changes
that into a function definition, which is copied to the remote system and
executed before the actual command is executed. Therefore GNU B<parallel>
needs to know how to read the function.
I<BASH_FUNC_myfunc()> and later changed that to
I<BASH_FUNC_myfunc%%>. When transferring functions GNU B<parallel>
reads off the function and changes that into a function definition,
which is copied to the remote system and executed before the actual
command is executed. Therefore GNU B<parallel> needs to know how to
read the function.
From version 20150122 GNU B<parallel> tries both the ()-version and
the %%-version, and the function definition works on both pre- and
@ -633,6 +634,28 @@ post-shell shock versions of B<bash>.
The remote system wrapper does some initialization before starting the
command on the remote system.
=head3 Make quoting unnecessary by hex encoding everything
When you run B<ssh server foo> then B<foo> has to be quoted once:
ssh server "echo foo; echo bar"
If you run B<ssh server1 ssh server2 foo> then B<foo> has to be quoted
twice:
ssh server1 ssh server2 \'"echo foo; echo bar"\'
GNU B<parallel> avoids this by packing everyting into hex values and
running a command that does not need quoting:
perl -X -e GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV
This command reads hex from the command line and converts that to
bytes that are then eval'ed as a Perl expression.
The string B<GNU_Parallel_worker> is not needed. It is simply there to
let the user know, that this process is GNU B<parallel> working.
=head3 Ctrl-C and standard error (stderr)
If the user presses Ctrl-C the user expects jobs to stop. This works
@ -668,44 +691,78 @@ B<nice>ing the remote process is done by B<setpriority(0,0,$nice)>. A
few old systems do not implement this and B<--nice> is unsupported on
those.
=head3 Setting $PARALLEL_TMP
B<$PARALLEL_TMP> is used by B<--fifo> and B<--cat> and must point to a
non-exitent file in B<$TMPDIR>. This file name is computed on the
remote system.
=head3 The wrapper
The wrapper looks like this:
$shell = $PARALLEL_SHELL || $SHELL;
$tmpdir = $TMPDIR;
$tmpdir = $TMPDIR || $PARALLEL_REMOTE_TMPDIR;
$nice = $opt::nice;
$termseq = $opt::termseq;
# Check that $tmpdir is writable
-w $tmpdir ||
die("$tmpdir is not writable.".
" Set PARALLEL_REMOTE_TMPDIR");
# Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
do {
$ENV{PARALLEL_TMP} = $tmpdir."/par".
join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
} while(-e $ENV{PARALLEL_TMP});
# Set $script to a non-existent file name in $TMPDIR
do {
$script = $tmpdir."/par".
join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
} while(-e $script);
# Create a script from the hex code
# that removes itself and runs the commands
open($fh,">",$script) || die;
# ' needed due to rc-shell
print($fh("rm \'$script\'\n",$bashfunc.$cmd));
close $fh;
my $parent = getppid;
my $done = 0;
$SIG{CHLD} = sub { $done = 1; };
$pid = fork;
unless($pid) {
# Make own process group to be able to kill HUP it later
setpgrp;
eval { setpgrp };
# Set nice value
eval { setpriority(0,0,$nice) };
exec $shell, "-c", ($bashfunc."@ARGV");
die "exec: $!\n";
# Run the script
exec($shell,$script);
die("exec failed: $!");
}
do {
# Parent is not init (ppid=1), so sshd is alive
while((not $done) and (getppid == $parent)) {
# Parent pid is not changed, so sshd is alive
# Exponential sleep up to 1 sec
$s = $s < 1 ? 0.001 + $s * 1.03 : $s;
select(undef, undef, undef, $s);
} until ($done || getppid == 1);
# Kill HUP the process group if job not done
kill(SIGHUP, -${pid}) unless $done;
}
if(not $done) {
# sshd is dead: User pressed Ctrl-C
# Kill as per --termseq
my @term_seq = split/,/,$termseq;
if(not @term_seq) {
@term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25);
}
while(@term_seq && kill(0,-$pid)) {
kill(shift @term_seq, -$pid);
select(undef, undef, undef, (shift @term_seq)/1000);
}
}
wait;
exit ($?&127 ? 128+($?&127) : 1+$?>>8)
=head2 Transferring of variables and functions
Transferring of variables and functions given by B<--env> is done by
@ -728,9 +785,9 @@ into one single word, which often is longer than 1024 chars.
When the line to run is > 1000 chars, GNU B<parallel> therefore
encodes the line to run. The encoding B<bzip2>s the line to run,
converts this to base64, splits the base64 into 1000 char blocks (so B<csh>
does not fail), and prepends it with this Perl script that decodes,
decompresses and B<eval>s the line.
converts this to base64, splits the base64 into 1000 char blocks (so
B<csh> does not fail), and prepends it with this Perl script that
decodes, decompresses and B<eval>s the line.
@GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
eval "@GNU_Parallel";
@ -1142,12 +1199,12 @@ the whole output of a single job and save it as csv/tsv or SQL.
=head2 Argument separators ::: :::: :::+ ::::+
The argument separator B<:::> was chosen because I have never seen B<:::>
used in any command. The natural choice B<--> would be a bad idea since
it is not unlikely that the template command will contain B<-->. I have
seen B<::> used in programming languanges to separate classes, and I
did not want the user to be confused that the separator had anything
to do with classes.
The argument separator B<:::> was chosen because I have never seen
B<:::> used in any command. The natural choice B<--> would be a bad
idea since it is not unlikely that the template command will contain
B<-->. I have seen B<::> used in programming languanges to separate
classes, and I did not want the user to be confused that the separator
had anything to do with classes.
B<:::> also makes a visual separation, which is good if there are
@ -1400,14 +1457,4 @@ So to lessen the frustration and the resulting support, B<--tollef>
was obsoleted 20130222 and removed one year later.
=head2 Transferring of variables and functions
Until 20150122 variables and functions were transferred by looking at
$SHELL to see whether the shell was a B<*csh> shell. If so the
variables would be set using B<setenv>. Otherwise they would be set
using B<=>. This caused the content of the variable to be repeated:
echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && setenv VAR foo ||
export VAR=foo
=cut

88
src/pod2rst-fix Executable file
View file

@ -0,0 +1,88 @@
#!/usr/bin/perl
# Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free
# Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>
# or write to the Free Software Foundation, Inc., 51 Franklin St,
# Fifth Floor, Boston, MA 02110-1301 USA
#
# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
# SPDX-License-Identifier: GPL-3.0-or-later
# This fixes problems in pod2rst conversion
# Conversion errors:
# Fixed:
# ... B<foo>
# bar
# Fixed:
# =item - - a
# Not fixed (RST does not support Bold-Italic):
# B<cat | xargs -d "\n" -n1 I<command>>
sub pipefunc {
my $func = pop;
my $pid = open(my $kid_to_read, "-|");
defined($pid) || die "can't fork: $!";
if ($pid) {
open STDIN, "<&", $kid_to_read or die;
&$func();
} else { # child
close $kid_to_read;
if($_[1]) {
# More than one function remaining: Recurse
pipefunc(@_);
} else {
# Only one function remaining: Run it
$func = pop;
&$func();
}
exit 0;
}
}
sub pre {
# Remove comments
$_=join("", grep { ! /^#/ } <>);
# join lines in each paragraph
s/(\S)\n(\S)/$1 $2/g;
# quote -
s/^=item -/=item \001/gm;
print $_;
}
sub pod2rst {
exec "pod2rst";
}
sub post {
while(<STDIN>) {
# =item in =item
s/- \\[*]/- /;
# B<*.log>
s/\\\\[*]/\\*/g;
# - -
s/^-(\s+)\001/-$1\\-/g;
print;
}
}
# stdin | pre() | pod2rst() | post()
pipefunc(*pre,*pod2rst,*post);

View file

@ -865,7 +865,6 @@ par_results_json() {
perl -pe 's/\d+\.\d{3}/9.999/g'
}
par_testquote() {
testquote() {
printf '"#&/\n()*=?'"'" |

View file

@ -8,6 +8,21 @@
seq 12 |
stdout parallel --nice 11 --timeout 10 -j0 -N0 "bzip2 < /dev/zero" > /dev/null &
par_limit_load() {
force_load_to_6() {
burn() { bzip2 -9 </dev/zero >/dev/null; }
export -f burn
parallel --timeout 5 -j0 --nice 19 burn ::: 1 2 3 4 5 6 2>/dev/null
}
force_load_to_6 &
# Give load time to start
sleep 1
# This should only start after the load stops
# and thus take > 4 sec
stdout /usr/bin/time -f %e parallel --limit "load 4" sleep ::: 0.8 |
perl -ne 'print $_ > 5 ? "OK\n" : "Broken: $_\n"'
}
par_load_more_10s() {
echo '### Test --load locally - should take >10s'
stdout /usr/bin/time -f %e parallel --load 10 sleep ::: 1 |

View file

@ -1,3 +1,4 @@
par_limit_load OK
par_load_file_less_10s ### Test --load read from a file - less than 10s
par_load_file_less_10s OK
par_load_file_more_10s ### Test --load read from a file - more than 10s