Fixed bug #59453: PARALLEL_HOME with plus sign causes error.

This commit is contained in:
Ole Tange 2020-12-07 16:24:54 +01:00
parent e1a62d362d
commit 8df731b9ab
10 changed files with 103 additions and 49 deletions

View file

@ -13,15 +13,24 @@
# Fixed cpu-speed: 50% spread=0.7-1.5 ms
# 4-cpu: 30% faster: 9 ms -> 6 ms
TMP=`pwd`/tmp
export TMP
mkdir -p $TMP
if ! /tmp/bin/parallel-20140722 --version; then
wget -c ftp://ftp.gnu.org/old-gnu/parallel/p*
wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p*
parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig
parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix /tmp/{.}-bin && make && make install' ::: *sig
perl -i -pe 's/qw\(keys/(keys/' parallel*/src/parallel
mkdir /tmp/bin
parallel cp {} /tmp/bin/'{=s:/.*::=}' ::: parallel*/src/parallel
if ! $TMP/bin/parallel-20140722 --version; then
mkdir -p $TMP/ftp
(
cd $TMP/ftp
# wget -c ftp://ftp.gnu.org/old-gnu/parallel/p*
wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p*
parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig
parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix '$TMP'/ftp/{.}-bin && make && make install' ::: *sig
perl -i -pe 's/qw\(keys/(keys/' parallel*/src/parallel
perl -i -pe 's/defined(\@/(\@/' parallel*/src/parallel
perl -i -pe 's/defined\s+\@/ \@/' parallel*/src/parallel
mkdir $TMP/bin
parallel cp {} $TMP/bin/'{=s:/.*::=}' ::: parallel*/src/parallel
)
fi
measure() {
@ -35,30 +44,30 @@ measure() {
MHZ=1700
# Force cpuspeed at 1.7GHz - seems to give tighter results
forever 'parallel sudo cpufreq-set -g performance -u '$MHZ'MHz -d '$MHZ'MHz -c{} ::: {0..3};sleep 10' &
#forever 'parallel sudo cpufreq-set -g performance -u '$MHZ'MHz -d '$MHZ'MHz -c{} ::: {0..3};sleep 10' &
PATH=/tmp/bin:$PATH
cd /tmp/bin
PATH=$TMP/bin:$PATH
cd $TMP/bin
ls parallel-* |
parallel --shuf -j$CORES --joblog ~/tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
parallel --shuf -j$CORES --joblog +$TMP/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
killall forever
Rscript - <<_
jl<-read.csv("$HOME/tmp/joblog$CORES-$INNER-$OUTER.csv",sep="\t");
jl<-read.csv("$TMP/joblog$CORES-$INNER-$OUTER.csv",sep="\t");
jl\$Command <- as.factor(substr(jl\$Command,
nchar(as.character(jl\$Command))-23,
nchar(as.character(jl\$Command))-5))
pdf("/tmp/boxplot.pdf");
nchar(as.character(jl\$Command))-12,
nchar(as.character(jl\$Command))-5));
pdf("$TMP/boxplot.pdf");
par(cex.axis=0.5);
boxplot(JobRuntime/$INNER*1000~Command,data=jl,las=2,outline=F,
ylab="milliseconds/job",main="GNU Parallel overhead for different versions\n$OUTER trials each running $INNER jobs");
_
cp /tmp/boxplot.pdf $HOME/tmp/boxplot-j$CORES-${MHZ}MHz-$OUTER-${INNER}v$VERSION.pdf
evince /tmp/boxplot.pdf
cp $TMP/boxplot.pdf $TMP/boxplot-j$CORES-${MHZ}MHz-$OUTER-${INNER}v$VERSION.pdf
evince $TMP/boxplot.pdf
}
#measure 3000 1000 2 1
measure 30 10 2 1
measure 300 100 2 1
measure 3000 1000 2 1
measure 30 10 50% 1
measure 300 100 50% 1
measure 3000 1000 50% 1

View file

@ -1,13 +1,13 @@
Quote of the month:
Today I'm grateful for GNU parallel, especially with the --colsep and
--jobs parameters #GiveThanks
Erin Young @ErinYoun
I also prefer gnu parallel. Mainly because it makes embarrassingly
parallel tasks embarrassingly easy to run on the command line.
-- Vincent D. Warmerdam @fishnets88@twitter
GNU parallel should be taught in class, it is one of the best tools
to run grids of experiments
-- no love deep learning @tetraduzione@twitter
It's not a data migration party until GNU Parallel is involved...
involved
involved
@ -75,6 +75,10 @@ https://negfeedback.blogspot.com/2020/05/indispensable-command-line-tools.html
=== Used ===
GNU parallel should be taught in class, it is one of the best tools
to run grids of experiments
-- no love deep learning @tetraduzione@twitter
I get a weird sense of satisfaction every single time I see the
lovely logo of #GNU Parallel (plus, what an underrated piece of
great software!)

View file

@ -105,9 +105,7 @@ git diff
# Recheck OBS https://build.opensuse.org/package/show/home:tange/parallel
export YYYYMMDD=`yyyymmdd`
export YYYYMMDD=${YYYYMMDD:0:6}22
TAG=MyTag
. .last-doitag.txt
echo "Released as $YYYYMMDD ('$TAG')." | grep MyTag && (STOP;STOP;STOP)
echo "$TAG" | grep ' ' && (STOP;STOP;STOP)
echo "Released as $YYYYMMDD ('$TAG')."
@ -192,9 +190,9 @@ from:tange@gnu.org
to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20201122 ('Biden') released <<[stable]>>
Subject: GNU Parallel 20201222 ('Maradona') released <<[stable]>>
GNU Parallel 20201122 ('Biden') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
<<No new functionality was introduced so this is a good candidate for a stable release.>>
@ -208,15 +206,14 @@ Quote of the month:
New in this release:
<<>>
https://www.youtube.com/watch?v=t_v2Otgt87g
* Bug fixes and man page updates.
News about GNU Parallel:
* https://bash-prompt.net/guides/gnu-parallel-multi-server/
https://aws.amazon.com/blogs/storage/best-practices-for-accelerating-data-migrations-using-aws-snowball-edge/

View file

@ -2757,7 +2757,7 @@ sub read_options() {
}
close $in_fh;
} else {
if(grep /^$profile$/, @config_profiles) {
if(grep /^\Q$profile\E$/, @config_profiles) {
# config file is not required to exist
} else {
::error("$profile not readable.");

View file

@ -3800,7 +3800,7 @@ way to know for certain is to test and measure.
=head2 Limiting factor: RAM
The normal B<grep -f regexs.txt bigfile> works no matter the size of
The normal B<grep -f regexps.txt bigfile> works no matter the size of
bigfile, but if regexps.txt is so big it cannot fit into memory, then
you need to split this.
@ -3854,13 +3854,13 @@ If you can live with duplicated lines and wrong order, it is faster to do:
If the CPU is the limiting factor parallelization should be done on
the regexps:
cat regexp.txt | parallel --pipe -L1000 --roundrobin --compress \
cat regexps.txt | parallel --pipe -L1000 --roundrobin --compress \
grep -f - -n bigfile | \
sort -un | perl -pe 's/^\d+://'
The command will start one B<grep> per CPU and read I<bigfile> one
time per CPU, but as that is done in parallel, all reads except the
first will be cached in RAM. Depending on the size of I<regexp.txt> it
first will be cached in RAM. Depending on the size of I<regexps.txt> it
may be faster to use B<--block 10m> instead of B<-L1000>.
Some storage systems perform better when reading multiple chunks in
@ -3868,13 +3868,13 @@ parallel. This is true for some RAID systems and for some network file
systems. To parallelize the reading of I<bigfile>:
parallel --pipepart --block 100M -a bigfile -k --compress \
grep -f regexp.txt
grep -f regexps.txt
This will split I<bigfile> into 100MB chunks and run B<grep> on each of
these chunks. To parallelize both reading of I<bigfile> and I<regexp.txt>
combine the two using B<--fifo>:
these chunks. To parallelize both reading of I<bigfile> and I<regexps.txt>
combine the two using B<--cat>:
parallel --pipepart --block 100M -a bigfile --fifo cat regexp.txt \
parallel --pipepart --block 100M -a bigfile --cat cat regexps.txt \
\| parallel --pipe -L1000 --roundrobin grep -f - {}
If a line matches multiple regexps, the line may be duplicated.

View file

@ -850,12 +850,13 @@ be spawned:
parallel "grep -E 'ls | wc >> c' {} | wc >> c" ::: foo
parallel "LANG=C grep -E 'ls | wc >> c' {}" ::: foo
It is impossible to tell the difference between these without parsing
the string (is the B<|> a pipe in shell or an alternation in a B<grep>
regexp? Is B<LANG=C> a command in B<csh> or setting a variable in
B<bash>? Is B<>>> redirection or part of a regexp?).
It is impossible to tell how B<| wc >>>B< c> should be interpreted
without parsing the string (is the B<|> a pipe in shell or an
alternation in a B<grep> regexp? Is B<LANG=C> a command in B<csh> or
setting a variable in B<bash>? Is B<>>> redirection or part of a
regexp?).
On top of this wrapper scripts will often require a shell to be
On top of this, wrapper scripts will often require a shell to be
spawned.
The downside is that you need to quote special shell chars twice:

View file

@ -851,6 +851,15 @@ par_plus_slot_replacement() {
parallel -k --plus echo '{slot}=$PARALLEL_JOBSLOT={%}' ::: A B C
}
par_PARALLEL_HOME_with_+() {
echo 'bug #59453: PARALLEL_HOME with plus sign causes error: config not readable'
tmp=$(mktemp -d)
export PARALLEL_HOME="$tmp/ space /a+b"
mkdir -p "$PARALLEL_HOME"
parallel echo ::: Parallel_home_with+
rm -rf "$tmp"
}
export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | LC_ALL=C sort |
parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' |

View file

@ -624,6 +624,21 @@ par_test_cpu_detection_cpuinfo() {
YstArr0BOSgXJ4Xmpu4j9PRpQcgRCckdf4fcSFol9GuGecuj5uBxngHakML8
' | unpack
}
cpu15() {
echo '1-1-1-1 Intel(R) Celeron(R) M (eee900)'
echo '
KLUv/QRo5Q0ABp9XIkBrqwCI2LZJIts7o1loU/RrgCM1Bkm4qbLeX6WzKj6uMAFQAE8ARwAL
JHEgV0hbZFGyfUlxhRq4zDo7PSwsEOTgX8Ao1WnCAxwArpC+BU+AuELIaYsvGVyhh9u3mDvM
ktMJGRSuA1XhCrVVZGxQn4RcIRm7lUXJAdxIyRVy+qh7W4cZjDtZmUOV1ofD60XGIAiTIrxC
jbMozzbmbu1cuGx5XSGVkSXEFeoGrlu2294ttv1gcEJoeIJIPGxfGWyxlEiQAYXQw0KHjdim
06c4zJwl7eT3XO6A14X/5rufaPdlO5g73vsyW+/ZzmjHMY79iY99NXwp5ztrX+FgV/auiy/7
nfW8Nl/b9T13mOlmuSmpCfVps+B6l9qInNxyptxH4D13eULQrbK1NaE+2KxbSoFBSiADCKLg
fbGJS5rL2Omcpyxu7rvh1eh3e2cmwr178WNRVMbZSoJ4FJWTKXcnKADAIKuqBxBI54gcFYp3
xUIMqhrUyHoLjYPwgszI7eGRdSUOFMYQlpP0pOoEAV0WM1zTXUey4OeJUEZtb+UNcgLSAYUj
iyXJQ3TVfIX50ANedGFbHwEc/JQzJup4YQ==
' | unpack
}
export -f $(compgen -A function | grep ^cpu)
test_one() {
@ -809,6 +824,18 @@ par_test_cpu_detection_lscpu() {
YQqnBNP0ggo=
' | unpack
}
cpu15() {
echo '1-1-1-1 Intel(R) Celeron(R) M (eee900)'
echo '
KLUv/QRohQoANpVBJCDJVgAHS35yk/0LNr7il7y4NiXZTi5J2xh8EMPkH0ICUAugATkAOAA5
AFDzg//KBxEyshyKJFqSPScHvrD7GRyq62rk2xnH6ldBJBQHNqybEUHnKZJoMR7GO7S16rb9
LK0GBJfkrAs3+uHAsJHJKw7BdVNajAx8kU1wW0JWvkpugQDkdTq5CcuZ9Cq0yVtntJzZXpc6
ttWCsJoH7dvil9FXjm/cX6Zf942XV3RiikGCtXiciQQiTWo61iLRADRNY+EhPZR8lG5P1sJD
appGnczbZqUB6zW9exVt564cI78V+thnfmKZXxkzHRPjH23s3O4WruAE2y8ztjOKvtaOp8tv
j7I/mB0gQIaMujr8eNMPYIxoDLDgY+WX46hwjTmVzU7HpqEtYHxLTLzqx8jKta+0nIvY4e1q
oiCpQLqitFXU0Fyo+a4q4SvbmVMCr0burQ==
' | unpack
}
export -f $(compgen -A function | grep ^cpu)
test_one() {
@ -847,15 +874,16 @@ par_block_negative_prefix() {
par_sql_colsep() {
echo '### SQL should add Vn columns for --colsep'
parallel -k -C' ' --sqlandworker sqlite3:///%2ftmp%2ffoo/bar echo /{1}/{2}/{3}/{4}/ \
dburl=sqlite3:///%2ftmp%2fparallel-sql-colsep-$$/bar
parallel -k -C' ' --sqlandworker $dburl echo /{1}/{2}/{3}/{4}/ \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33'
parallel -k -C' ' echo /{1}/{2}/{3}/{4}/ \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33'
# TODO this is wrong
parallel -k -C' ' -N3 --sqlandworker sqlite3:///%2ftmp%2ffoo/bar echo \
parallel -k -C' ' -N3 --sqlandworker $dburl echo \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66'
parallel -k -C' ' -N3 echo \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66'
rm /tmp/parallel-sql-colsep-$$
}
par_sql_CSV() {

View file

@ -6,6 +6,8 @@ par_PARALLEL_ENV ### PARALLEL_ENV as file
par_PARALLEL_ENV OK as file
par_PARALLEL_ENV ### PARALLEL_ENV as fifo
par_PARALLEL_ENV OK as fifo
par_PARALLEL_HOME_with_+ bug #59453: PARALLEL_HOME with plus sign causes error: config not readable
par_PARALLEL_HOME_with_+ Parallel_home_with+
par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero
par_X_eta_div_zero Computers / CPU cores / Max jobs to run
par_X_eta_div_zero 0:local / 0 / 0

View file

@ -811,6 +811,8 @@ par_test_cpu_detection_cpuinfo 1-2-2-2 AMD Neo N36L Dual-Core Processor
par_test_cpu_detection_cpuinfo 1 2 2 2
par_test_cpu_detection_cpuinfo 1-1-1-1 Intel Xeon X5675 (mandriva.p)
par_test_cpu_detection_cpuinfo 1 1 1 1
par_test_cpu_detection_cpuinfo 1-1-1-1 Intel(R) Celeron(R) M (eee900)
par_test_cpu_detection_cpuinfo 1 1 1 1
par_test_cpu_detection_cpuinfo 1-4-8-4 Core i7-3632QM Acer laptop
par_test_cpu_detection_cpuinfo 1 4 8 4
par_test_cpu_detection_cpuinfo 1-2-4-2 Core i5-2410M laptop firewall
@ -835,6 +837,8 @@ par_test_cpu_detection_lscpu 1-2-2-2 AMD Neo N36L Dual-Core Processor
par_test_cpu_detection_lscpu 1 2 2 2
par_test_cpu_detection_lscpu 1-1-1-1 Intel Xeon X5675 (mandriva.p)
par_test_cpu_detection_lscpu 1 1 1 1
par_test_cpu_detection_lscpu 1-1-1-1 Intel(R) Celeron(R) M (eee900)
par_test_cpu_detection_lscpu 1 1 1 1
par_test_cpu_detection_lscpu 1-4-8-4 Core i7-3632QM Acer laptop
par_test_cpu_detection_lscpu 1 4 8 4
par_test_cpu_detection_lscpu 1-2-4-2 Core i5-2410M laptop firewall