Fixed bug #59453: PARALLEL_HOME with plus sign causes error.

This commit is contained in:
Ole Tange 2020-12-07 16:24:54 +01:00
parent e1a62d362d
commit 8df731b9ab
10 changed files with 103 additions and 49 deletions

View file

@ -13,15 +13,24 @@
# Fixed cpu-speed: 50% spread=0.7-1.5 ms # Fixed cpu-speed: 50% spread=0.7-1.5 ms
# 4-cpu: 30% faster: 9 ms -> 6 ms # 4-cpu: 30% faster: 9 ms -> 6 ms
TMP=`pwd`/tmp
export TMP
mkdir -p $TMP
if ! /tmp/bin/parallel-20140722 --version; then if ! $TMP/bin/parallel-20140722 --version; then
wget -c ftp://ftp.gnu.org/old-gnu/parallel/p* mkdir -p $TMP/ftp
(
cd $TMP/ftp
# wget -c ftp://ftp.gnu.org/old-gnu/parallel/p*
wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p* wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p*
parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig
parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix /tmp/{.}-bin && make && make install' ::: *sig parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix '$TMP'/ftp/{.}-bin && make && make install' ::: *sig
perl -i -pe 's/qw\(keys/(keys/' parallel*/src/parallel perl -i -pe 's/qw\(keys/(keys/' parallel*/src/parallel
mkdir /tmp/bin perl -i -pe 's/defined(\@/(\@/' parallel*/src/parallel
parallel cp {} /tmp/bin/'{=s:/.*::=}' ::: parallel*/src/parallel perl -i -pe 's/defined\s+\@/ \@/' parallel*/src/parallel
mkdir $TMP/bin
parallel cp {} $TMP/bin/'{=s:/.*::=}' ::: parallel*/src/parallel
)
fi fi
measure() { measure() {
@ -35,30 +44,30 @@ measure() {
MHZ=1700 MHZ=1700
# Force cpuspeed at 1.7GHz - seems to give tighter results # Force cpuspeed at 1.7GHz - seems to give tighter results
forever 'parallel sudo cpufreq-set -g performance -u '$MHZ'MHz -d '$MHZ'MHz -c{} ::: {0..3};sleep 10' & #forever 'parallel sudo cpufreq-set -g performance -u '$MHZ'MHz -d '$MHZ'MHz -c{} ::: {0..3};sleep 10' &
PATH=/tmp/bin:$PATH PATH=$TMP/bin:$PATH
cd /tmp/bin cd $TMP/bin
ls parallel-* | ls parallel-* |
parallel --shuf -j$CORES --joblog ~/tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) - parallel --shuf -j$CORES --joblog +$TMP/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
killall forever killall forever
Rscript - <<_ Rscript - <<_
jl<-read.csv("$HOME/tmp/joblog$CORES-$INNER-$OUTER.csv",sep="\t"); jl<-read.csv("$TMP/joblog$CORES-$INNER-$OUTER.csv",sep="\t");
jl\$Command <- as.factor(substr(jl\$Command, jl\$Command <- as.factor(substr(jl\$Command,
nchar(as.character(jl\$Command))-23, nchar(as.character(jl\$Command))-12,
nchar(as.character(jl\$Command))-5)) nchar(as.character(jl\$Command))-5));
pdf("/tmp/boxplot.pdf"); pdf("$TMP/boxplot.pdf");
par(cex.axis=0.5); par(cex.axis=0.5);
boxplot(JobRuntime/$INNER*1000~Command,data=jl,las=2,outline=F, boxplot(JobRuntime/$INNER*1000~Command,data=jl,las=2,outline=F,
ylab="milliseconds/job",main="GNU Parallel overhead for different versions\n$OUTER trials each running $INNER jobs"); ylab="milliseconds/job",main="GNU Parallel overhead for different versions\n$OUTER trials each running $INNER jobs");
_ _
cp /tmp/boxplot.pdf $HOME/tmp/boxplot-j$CORES-${MHZ}MHz-$OUTER-${INNER}v$VERSION.pdf cp $TMP/boxplot.pdf $TMP/boxplot-j$CORES-${MHZ}MHz-$OUTER-${INNER}v$VERSION.pdf
evince /tmp/boxplot.pdf evince $TMP/boxplot.pdf
} }
#measure 3000 1000 2 1 #measure 3000 1000 2 1
measure 30 10 2 1 measure 30 10 50% 1
measure 300 100 2 1 measure 300 100 50% 1
measure 3000 1000 2 1 measure 3000 1000 50% 1

View file

@ -1,13 +1,13 @@
Quote of the month: Quote of the month:
Today I'm grateful for GNU parallel, especially with the --colsep and
--jobs parameters #GiveThanks
Erin Young @ErinYoun
I also prefer gnu parallel. Mainly because it makes embarrassingly I also prefer gnu parallel. Mainly because it makes embarrassingly
parallel tasks embarrassingly easy to run on the command line. parallel tasks embarrassingly easy to run on the command line.
-- Vincent D. Warmerdam @fishnets88@twitter -- Vincent D. Warmerdam @fishnets88@twitter
GNU parallel should be taught in class, it is one of the best tools
to run grids of experiments
-- no love deep learning @tetraduzione@twitter
It's not a data migration party until GNU Parallel is involved... It's not a data migration party until GNU Parallel is involved...
involved involved
involved involved
@ -75,6 +75,10 @@ https://negfeedback.blogspot.com/2020/05/indispensable-command-line-tools.html
=== Used === === Used ===
GNU parallel should be taught in class, it is one of the best tools
to run grids of experiments
-- no love deep learning @tetraduzione@twitter
I get a weird sense of satisfaction every single time I see the I get a weird sense of satisfaction every single time I see the
lovely logo of #GNU Parallel (plus, what an underrated piece of lovely logo of #GNU Parallel (plus, what an underrated piece of
great software!) great software!)

View file

@ -105,9 +105,7 @@ git diff
# Recheck OBS https://build.opensuse.org/package/show/home:tange/parallel # Recheck OBS https://build.opensuse.org/package/show/home:tange/parallel
export YYYYMMDD=`yyyymmdd` . .last-doitag.txt
export YYYYMMDD=${YYYYMMDD:0:6}22
TAG=MyTag
echo "Released as $YYYYMMDD ('$TAG')." | grep MyTag && (STOP;STOP;STOP) echo "Released as $YYYYMMDD ('$TAG')." | grep MyTag && (STOP;STOP;STOP)
echo "$TAG" | grep ' ' && (STOP;STOP;STOP) echo "$TAG" | grep ' ' && (STOP;STOP;STOP)
echo "Released as $YYYYMMDD ('$TAG')." echo "Released as $YYYYMMDD ('$TAG')."
@ -192,9 +190,9 @@ from:tange@gnu.org
to:parallel@gnu.org, bug-parallel@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm> stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20201122 ('Biden') released <<[stable]>> Subject: GNU Parallel 20201222 ('Maradona') released <<[stable]>>
GNU Parallel 20201122 ('Biden') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
<<No new functionality was introduced so this is a good candidate for a stable release.>> <<No new functionality was introduced so this is a good candidate for a stable release.>>
@ -208,15 +206,14 @@ Quote of the month:
New in this release: New in this release:
<<>> <<>>
https://www.youtube.com/watch?v=t_v2Otgt87g
* Bug fixes and man page updates. * Bug fixes and man page updates.
News about GNU Parallel: News about GNU Parallel:
* https://bash-prompt.net/guides/gnu-parallel-multi-server/ https://aws.amazon.com/blogs/storage/best-practices-for-accelerating-data-migrations-using-aws-snowball-edge/

View file

@ -2757,7 +2757,7 @@ sub read_options() {
} }
close $in_fh; close $in_fh;
} else { } else {
if(grep /^$profile$/, @config_profiles) { if(grep /^\Q$profile\E$/, @config_profiles) {
# config file is not required to exist # config file is not required to exist
} else { } else {
::error("$profile not readable."); ::error("$profile not readable.");

View file

@ -3800,7 +3800,7 @@ way to know for certain is to test and measure.
=head2 Limiting factor: RAM =head2 Limiting factor: RAM
The normal B<grep -f regexs.txt bigfile> works no matter the size of The normal B<grep -f regexps.txt bigfile> works no matter the size of
bigfile, but if regexps.txt is so big it cannot fit into memory, then bigfile, but if regexps.txt is so big it cannot fit into memory, then
you need to split this. you need to split this.
@ -3854,13 +3854,13 @@ If you can live with duplicated lines and wrong order, it is faster to do:
If the CPU is the limiting factor parallelization should be done on If the CPU is the limiting factor parallelization should be done on
the regexps: the regexps:
cat regexp.txt | parallel --pipe -L1000 --roundrobin --compress \ cat regexps.txt | parallel --pipe -L1000 --roundrobin --compress \
grep -f - -n bigfile | \ grep -f - -n bigfile | \
sort -un | perl -pe 's/^\d+://' sort -un | perl -pe 's/^\d+://'
The command will start one B<grep> per CPU and read I<bigfile> one The command will start one B<grep> per CPU and read I<bigfile> one
time per CPU, but as that is done in parallel, all reads except the time per CPU, but as that is done in parallel, all reads except the
first will be cached in RAM. Depending on the size of I<regexp.txt> it first will be cached in RAM. Depending on the size of I<regexps.txt> it
may be faster to use B<--block 10m> instead of B<-L1000>. may be faster to use B<--block 10m> instead of B<-L1000>.
Some storage systems perform better when reading multiple chunks in Some storage systems perform better when reading multiple chunks in
@ -3868,13 +3868,13 @@ parallel. This is true for some RAID systems and for some network file
systems. To parallelize the reading of I<bigfile>: systems. To parallelize the reading of I<bigfile>:
parallel --pipepart --block 100M -a bigfile -k --compress \ parallel --pipepart --block 100M -a bigfile -k --compress \
grep -f regexp.txt grep -f regexps.txt
This will split I<bigfile> into 100MB chunks and run B<grep> on each of This will split I<bigfile> into 100MB chunks and run B<grep> on each of
these chunks. To parallelize both reading of I<bigfile> and I<regexp.txt> these chunks. To parallelize both reading of I<bigfile> and I<regexps.txt>
combine the two using B<--fifo>: combine the two using B<--cat>:
parallel --pipepart --block 100M -a bigfile --fifo cat regexp.txt \ parallel --pipepart --block 100M -a bigfile --cat cat regexps.txt \
\| parallel --pipe -L1000 --roundrobin grep -f - {} \| parallel --pipe -L1000 --roundrobin grep -f - {}
If a line matches multiple regexps, the line may be duplicated. If a line matches multiple regexps, the line may be duplicated.

View file

@ -850,12 +850,13 @@ be spawned:
parallel "grep -E 'ls | wc >> c' {} | wc >> c" ::: foo parallel "grep -E 'ls | wc >> c' {} | wc >> c" ::: foo
parallel "LANG=C grep -E 'ls | wc >> c' {}" ::: foo parallel "LANG=C grep -E 'ls | wc >> c' {}" ::: foo
It is impossible to tell the difference between these without parsing It is impossible to tell how B<| wc >>>B< c> should be interpreted
the string (is the B<|> a pipe in shell or an alternation in a B<grep> without parsing the string (is the B<|> a pipe in shell or an
regexp? Is B<LANG=C> a command in B<csh> or setting a variable in alternation in a B<grep> regexp? Is B<LANG=C> a command in B<csh> or
B<bash>? Is B<>>> redirection or part of a regexp?). setting a variable in B<bash>? Is B<>>> redirection or part of a
regexp?).
On top of this wrapper scripts will often require a shell to be On top of this, wrapper scripts will often require a shell to be
spawned. spawned.
The downside is that you need to quote special shell chars twice: The downside is that you need to quote special shell chars twice:

View file

@ -851,6 +851,15 @@ par_plus_slot_replacement() {
parallel -k --plus echo '{slot}=$PARALLEL_JOBSLOT={%}' ::: A B C parallel -k --plus echo '{slot}=$PARALLEL_JOBSLOT={%}' ::: A B C
} }
par_PARALLEL_HOME_with_+() {
echo 'bug #59453: PARALLEL_HOME with plus sign causes error: config not readable'
tmp=$(mktemp -d)
export PARALLEL_HOME="$tmp/ space /a+b"
mkdir -p "$PARALLEL_HOME"
parallel echo ::: Parallel_home_with+
rm -rf "$tmp"
}
export -f $(compgen -A function | grep par_) export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | LC_ALL=C sort | compgen -A function | grep par_ | LC_ALL=C sort |
parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' | parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' |

View file

@ -624,6 +624,21 @@ par_test_cpu_detection_cpuinfo() {
YstArr0BOSgXJ4Xmpu4j9PRpQcgRCckdf4fcSFol9GuGecuj5uBxngHakML8 YstArr0BOSgXJ4Xmpu4j9PRpQcgRCckdf4fcSFol9GuGecuj5uBxngHakML8
' | unpack ' | unpack
} }
cpu15() {
echo '1-1-1-1 Intel(R) Celeron(R) M (eee900)'
echo '
KLUv/QRo5Q0ABp9XIkBrqwCI2LZJIts7o1loU/RrgCM1Bkm4qbLeX6WzKj6uMAFQAE8ARwAL
JHEgV0hbZFGyfUlxhRq4zDo7PSwsEOTgX8Ao1WnCAxwArpC+BU+AuELIaYsvGVyhh9u3mDvM
ktMJGRSuA1XhCrVVZGxQn4RcIRm7lUXJAdxIyRVy+qh7W4cZjDtZmUOV1ofD60XGIAiTIrxC
jbMozzbmbu1cuGx5XSGVkSXEFeoGrlu2294ttv1gcEJoeIJIPGxfGWyxlEiQAYXQw0KHjdim
06c4zJwl7eT3XO6A14X/5rufaPdlO5g73vsyW+/ZzmjHMY79iY99NXwp5ztrX+FgV/auiy/7
nfW8Nl/b9T13mOlmuSmpCfVps+B6l9qInNxyptxH4D13eULQrbK1NaE+2KxbSoFBSiADCKLg
fbGJS5rL2Omcpyxu7rvh1eh3e2cmwr178WNRVMbZSoJ4FJWTKXcnKADAIKuqBxBI54gcFYp3
xUIMqhrUyHoLjYPwgszI7eGRdSUOFMYQlpP0pOoEAV0WM1zTXUey4OeJUEZtb+UNcgLSAYUj
iyXJQ3TVfIX50ANedGFbHwEc/JQzJup4YQ==
' | unpack
}
export -f $(compgen -A function | grep ^cpu) export -f $(compgen -A function | grep ^cpu)
test_one() { test_one() {
@ -809,6 +824,18 @@ par_test_cpu_detection_lscpu() {
YQqnBNP0ggo= YQqnBNP0ggo=
' | unpack ' | unpack
} }
cpu15() {
echo '1-1-1-1 Intel(R) Celeron(R) M (eee900)'
echo '
KLUv/QRohQoANpVBJCDJVgAHS35yk/0LNr7il7y4NiXZTi5J2xh8EMPkH0ICUAugATkAOAA5
AFDzg//KBxEyshyKJFqSPScHvrD7GRyq62rk2xnH6ldBJBQHNqybEUHnKZJoMR7GO7S16rb9
LK0GBJfkrAs3+uHAsJHJKw7BdVNajAx8kU1wW0JWvkpugQDkdTq5CcuZ9Cq0yVtntJzZXpc6
ttWCsJoH7dvil9FXjm/cX6Zf942XV3RiikGCtXiciQQiTWo61iLRADRNY+EhPZR8lG5P1sJD
appGnczbZqUB6zW9exVt564cI78V+thnfmKZXxkzHRPjH23s3O4WruAE2y8ztjOKvtaOp8tv
j7I/mB0gQIaMujr8eNMPYIxoDLDgY+WX46hwjTmVzU7HpqEtYHxLTLzqx8jKta+0nIvY4e1q
oiCpQLqitFXU0Fyo+a4q4SvbmVMCr0burQ==
' | unpack
}
export -f $(compgen -A function | grep ^cpu) export -f $(compgen -A function | grep ^cpu)
test_one() { test_one() {
@ -847,15 +874,16 @@ par_block_negative_prefix() {
par_sql_colsep() { par_sql_colsep() {
echo '### SQL should add Vn columns for --colsep' echo '### SQL should add Vn columns for --colsep'
parallel -k -C' ' --sqlandworker sqlite3:///%2ftmp%2ffoo/bar echo /{1}/{2}/{3}/{4}/ \ dburl=sqlite3:///%2ftmp%2fparallel-sql-colsep-$$/bar
parallel -k -C' ' --sqlandworker $dburl echo /{1}/{2}/{3}/{4}/ \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33'
parallel -k -C' ' echo /{1}/{2}/{3}/{4}/ \ parallel -k -C' ' echo /{1}/{2}/{3}/{4}/ \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33'
# TODO this is wrong parallel -k -C' ' -N3 --sqlandworker $dburl echo \
parallel -k -C' ' -N3 --sqlandworker sqlite3:///%2ftmp%2ffoo/bar echo \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66' ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66'
parallel -k -C' ' -N3 echo \ parallel -k -C' ' -N3 echo \
::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66' ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66'
rm /tmp/parallel-sql-colsep-$$
} }
par_sql_CSV() { par_sql_CSV() {

View file

@ -6,6 +6,8 @@ par_PARALLEL_ENV ### PARALLEL_ENV as file
par_PARALLEL_ENV OK as file par_PARALLEL_ENV OK as file
par_PARALLEL_ENV ### PARALLEL_ENV as fifo par_PARALLEL_ENV ### PARALLEL_ENV as fifo
par_PARALLEL_ENV OK as fifo par_PARALLEL_ENV OK as fifo
par_PARALLEL_HOME_with_+ bug #59453: PARALLEL_HOME with plus sign causes error: config not readable
par_PARALLEL_HOME_with_+ Parallel_home_with+
par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero par_X_eta_div_zero ### bug #34422: parallel -X --eta crashes with div by zero
par_X_eta_div_zero Computers / CPU cores / Max jobs to run par_X_eta_div_zero Computers / CPU cores / Max jobs to run
par_X_eta_div_zero 0:local / 0 / 0 par_X_eta_div_zero 0:local / 0 / 0

View file

@ -811,6 +811,8 @@ par_test_cpu_detection_cpuinfo 1-2-2-2 AMD Neo N36L Dual-Core Processor
par_test_cpu_detection_cpuinfo 1 2 2 2 par_test_cpu_detection_cpuinfo 1 2 2 2
par_test_cpu_detection_cpuinfo 1-1-1-1 Intel Xeon X5675 (mandriva.p) par_test_cpu_detection_cpuinfo 1-1-1-1 Intel Xeon X5675 (mandriva.p)
par_test_cpu_detection_cpuinfo 1 1 1 1 par_test_cpu_detection_cpuinfo 1 1 1 1
par_test_cpu_detection_cpuinfo 1-1-1-1 Intel(R) Celeron(R) M (eee900)
par_test_cpu_detection_cpuinfo 1 1 1 1
par_test_cpu_detection_cpuinfo 1-4-8-4 Core i7-3632QM Acer laptop par_test_cpu_detection_cpuinfo 1-4-8-4 Core i7-3632QM Acer laptop
par_test_cpu_detection_cpuinfo 1 4 8 4 par_test_cpu_detection_cpuinfo 1 4 8 4
par_test_cpu_detection_cpuinfo 1-2-4-2 Core i5-2410M laptop firewall par_test_cpu_detection_cpuinfo 1-2-4-2 Core i5-2410M laptop firewall
@ -835,6 +837,8 @@ par_test_cpu_detection_lscpu 1-2-2-2 AMD Neo N36L Dual-Core Processor
par_test_cpu_detection_lscpu 1 2 2 2 par_test_cpu_detection_lscpu 1 2 2 2
par_test_cpu_detection_lscpu 1-1-1-1 Intel Xeon X5675 (mandriva.p) par_test_cpu_detection_lscpu 1-1-1-1 Intel Xeon X5675 (mandriva.p)
par_test_cpu_detection_lscpu 1 1 1 1 par_test_cpu_detection_lscpu 1 1 1 1
par_test_cpu_detection_lscpu 1-1-1-1 Intel(R) Celeron(R) M (eee900)
par_test_cpu_detection_lscpu 1 1 1 1
par_test_cpu_detection_lscpu 1-4-8-4 Core i7-3632QM Acer laptop par_test_cpu_detection_lscpu 1-4-8-4 Core i7-3632QM Acer laptop
par_test_cpu_detection_lscpu 1 4 8 4 par_test_cpu_detection_lscpu 1 4 8 4
par_test_cpu_detection_lscpu 1-2-4-2 Core i5-2410M laptop firewall par_test_cpu_detection_lscpu 1-2-4-2 Core i5-2410M laptop firewall