Fixed bug #59453: PARALLEL_HOME with plus sign causes error.

2024-11-22 05:57:54 +00:00 · 2020-12-07 16:24:54 +01:00 · 2020-12-07 16:24:54 +01:00 · 8df731b9ab
parent e1a62d362d
commit 8df731b9ab
10 changed files with 103 additions and 49 deletions
--- a/doc/boxplot-overhead
+++ b/doc/boxplot-overhead
@ -13,15 +13,24 @@
 # Fixed cpu-speed: 50% spread=0.7-1.5 ms
 # 4-cpu: 30% faster: 9 ms -> 6 ms
 TMP=`pwd`/tmp
 export TMP
 mkdir -p $TMP
-if ! /tmp/bin/parallel-20140722 --version; then
+if ! $TMP/bin/parallel-20140722 --version; then
-  wget -c ftp://ftp.gnu.org/old-gnu/parallel/p*
+    mkdir -p $TMP/ftp
    (
 	cd $TMP/ftp
 #	wget -c ftp://ftp.gnu.org/old-gnu/parallel/p*
 	wget -c ftp://ftp.uni-kl.de/pub/gnu/parallel/p*
 	parallel 'gpg --auto-key-locate keyserver --keyserver-options auto-key-retrieve {}' ::: *.sig
-  parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix /tmp/{.}-bin && make && make install' ::: *sig
+	parallel --plus 'tar xvf {.} && cd {...} && ./configure --prefix '$TMP'/ftp/{.}-bin && make && make install' ::: *sig
 	perl -i -pe 's/qw\(keys/(keys/' parallel*/src/parallel
-  mkdir /tmp/bin
+	perl -i -pe 's/defined(\@/(\@/' parallel*/src/parallel
-  parallel cp {} /tmp/bin/'{=s:/.*::=}' ::: parallel*/src/parallel
+	perl -i -pe 's/defined\s+\@/ \@/' parallel*/src/parallel
 	mkdir $TMP/bin
 	parallel cp {} $TMP/bin/'{=s:/.*::=}' ::: parallel*/src/parallel
    )
 fi
 measure() {
@ -35,30 +44,30 @@ measure() {
    MHZ=1700
    # Force cpuspeed at 1.7GHz - seems to give tighter results
-    forever 'parallel sudo cpufreq-set -g performance -u '$MHZ'MHz -d '$MHZ'MHz -c{} ::: {0..3};sleep 10' &
+    #forever 'parallel sudo cpufreq-set -g performance -u '$MHZ'MHz -d '$MHZ'MHz -c{} ::: {0..3};sleep 10' &
-    PATH=/tmp/bin:$PATH
+    PATH=$TMP/bin:$PATH
-    cd /tmp/bin
+    cd $TMP/bin
    ls parallel-* |
-	parallel --shuf -j$CORES --joblog ~/tmp/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
+	parallel --shuf -j$CORES --joblog +$TMP/joblog$CORES-$INNER-$OUTER.csv 'seq '$INNER' | {2} true' :::: <(seq $OUTER) -
    killall forever
    Rscript - <<_
-      jl<-read.csv("$HOME/tmp/joblog$CORES-$INNER-$OUTER.csv",sep="\t");
+      jl<-read.csv("$TMP/joblog$CORES-$INNER-$OUTER.csv",sep="\t");
      jl\$Command <- as.factor(substr(jl\$Command,
-				      nchar(as.character(jl\$Command))-23,
+                                      nchar(as.character(jl\$Command))-12,
-				      nchar(as.character(jl\$Command))-5))
+                                      nchar(as.character(jl\$Command))-5));
-      pdf("/tmp/boxplot.pdf");
+      pdf("$TMP/boxplot.pdf");
      par(cex.axis=0.5);
      boxplot(JobRuntime/$INNER*1000~Command,data=jl,las=2,outline=F,
              ylab="milliseconds/job",main="GNU Parallel overhead for different versions\n$OUTER trials each running $INNER jobs");
 _
-    cp /tmp/boxplot.pdf $HOME/tmp/boxplot-j$CORES-${MHZ}MHz-$OUTER-${INNER}v$VERSION.pdf
+    cp $TMP/boxplot.pdf $TMP/boxplot-j$CORES-${MHZ}MHz-$OUTER-${INNER}v$VERSION.pdf
-    evince /tmp/boxplot.pdf
+    evince $TMP/boxplot.pdf
 }
 #measure 3000 1000 2 1
-measure 30 10 2 1
+measure 30 10 50% 1
-measure 300 100 2 1
+measure 300 100 50% 1
-measure 3000 1000 2 1
+measure 3000 1000 50% 1
--- a/doc/haikus
+++ b/doc/haikus
@ -1,13 +1,13 @@
 Quote of the month:
  Today I'm grateful for GNU parallel, especially with the --colsep and
  --jobs parameters #GiveThanks
    Erin Young @ErinYoun
  I also prefer gnu parallel. Mainly because it makes embarrassingly
  parallel tasks embarrassingly easy to run on the command line.
    -- Vincent D. Warmerdam @fishnets88@twitter
  GNU parallel should be taught in class, it is one of the best tools
  to run grids of experiments
    -- no love deep learning @tetraduzione@twitter
  It's not a data migration party until GNU Parallel is involved...
  involved
  involved
@ -75,6 +75,10 @@ https://negfeedback.blogspot.com/2020/05/indispensable-command-line-tools.html
 === Used ===
  GNU parallel should be taught in class, it is one of the best tools
  to run grids of experiments
    -- no love deep learning @tetraduzione@twitter
  I get a weird sense of satisfaction every single time I see the
  lovely logo of #GNU Parallel (plus, what an underrated piece of
  great software!)
--- a/doc/release_new_version
+++ b/doc/release_new_version
@ -105,9 +105,7 @@ git diff
 # Recheck OBS https://build.opensuse.org/package/show/home:tange/parallel
-export YYYYMMDD=`yyyymmdd`
+. .last-doitag.txt
 export YYYYMMDD=${YYYYMMDD:0:6}22
 TAG=MyTag
 echo "Released as $YYYYMMDD ('$TAG')." | grep MyTag && (STOP;STOP;STOP)
 echo "$TAG" | grep ' ' && (STOP;STOP;STOP)
 echo "Released as $YYYYMMDD ('$TAG')."
@ -192,9 +190,9 @@ from:tange@gnu.org
 to:parallel@gnu.org, bug-parallel@gnu.org
 stable-bcc: Jesse Alama <jessealama@fastmail.fm>
-Subject: GNU Parallel 20201122 ('Biden') released <<[stable]>>
+Subject: GNU Parallel 20201222 ('Maradona') released <<[stable]>>
-GNU Parallel 20201122 ('Biden') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
+GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
 <<No new functionality was introduced so this is a good candidate for a stable release.>>
@ -208,15 +206,14 @@ Quote of the month:
 New in this release:
 <<>>
-
+https://www.youtube.com/watch?v=t_v2Otgt87g
 * Bug fixes and man page updates.
 News about GNU Parallel:
-* https://bash-prompt.net/guides/gnu-parallel-multi-server/
+https://aws.amazon.com/blogs/storage/best-practices-for-accelerating-data-migrations-using-aws-snowball-edge/
--- a/src/parallel
+++ b/src/parallel
@ -2757,7 +2757,7 @@ sub read_options() {
 		}
 		close $in_fh;
 	    } else {
-		if(grep /^$profile$/, @config_profiles) {
+		if(grep /^\Q$profile\E$/, @config_profiles) {
 		    # config file is not required to exist
 		} else {
 		    ::error("$profile not readable.");
--- a/src/parallel.pod
+++ b/src/parallel.pod
@ -3800,7 +3800,7 @@ way to know for certain is to test and measure.
 =head2 Limiting factor: RAM
-The normal B<grep -f regexs.txt bigfile> works no matter the size of
+The normal B<grep -f regexps.txt bigfile> works no matter the size of
 bigfile, but if regexps.txt is so big it cannot fit into memory, then
 you need to split this.
@ -3854,13 +3854,13 @@ If you can live with duplicated lines and wrong order, it is faster to do:
 If the CPU is the limiting factor parallelization should be done on
 the regexps:
-  cat regexp.txt | parallel --pipe -L1000 --roundrobin --compress \
+  cat regexps.txt | parallel --pipe -L1000 --roundrobin --compress \
    grep -f - -n bigfile | \
    sort -un | perl -pe 's/^\d+://'
 The command will start one B<grep> per CPU and read I<bigfile> one
 time per CPU, but as that is done in parallel, all reads except the
-first will be cached in RAM. Depending on the size of I<regexp.txt> it
+first will be cached in RAM. Depending on the size of I<regexps.txt> it
 may be faster to use B<--block 10m> instead of B<-L1000>.
 Some storage systems perform better when reading multiple chunks in
@ -3868,13 +3868,13 @@ parallel. This is true for some RAID systems and for some network file
 systems. To parallelize the reading of I<bigfile>:
  parallel --pipepart --block 100M -a bigfile -k --compress \
-    grep -f regexp.txt
+    grep -f regexps.txt
 This will split I<bigfile> into 100MB chunks and run B<grep> on each of
-these chunks. To parallelize both reading of I<bigfile> and I<regexp.txt>
+these chunks. To parallelize both reading of I<bigfile> and I<regexps.txt>
-combine the two using B<--fifo>:
+combine the two using B<--cat>:
-  parallel --pipepart --block 100M -a bigfile --fifo cat regexp.txt \
+  parallel --pipepart --block 100M -a bigfile --cat cat regexps.txt \
    \| parallel --pipe -L1000 --roundrobin grep -f - {}
 If a line matches multiple regexps, the line may be duplicated.
--- a/src/parallel_design.pod
+++ b/src/parallel_design.pod
@ -850,12 +850,13 @@ be spawned:
  parallel "grep -E 'ls | wc >> c' {} | wc >> c" ::: foo
  parallel "LANG=C grep -E 'ls | wc >> c' {}" ::: foo
-It is impossible to tell the difference between these without parsing
+It is impossible to tell how B<| wc >>>B< c> should be interpreted
-the string (is the B<|> a pipe in shell or an alternation in a B<grep>
+without parsing the string (is the B<|> a pipe in shell or an
-regexp?  Is B<LANG=C> a command in B<csh> or setting a variable in
+alternation in a B<grep> regexp?  Is B<LANG=C> a command in B<csh> or
-B<bash>? Is B<>>> redirection or part of a regexp?).
+setting a variable in B<bash>? Is B<>>> redirection or part of a
 regexp?).
-On top of this wrapper scripts will often require a shell to be
+On top of this, wrapper scripts will often require a shell to be
 spawned.
 The downside is that you need to quote special shell chars twice:
--- a/testsuite/tests-to-run/parallel-local-0.3s.sh
+++ b/testsuite/tests-to-run/parallel-local-0.3s.sh
@ -851,6 +851,15 @@ par_plus_slot_replacement() {
    parallel -k --plus echo '{slot}=$PARALLEL_JOBSLOT={%}' ::: A B C
 }
 par_PARALLEL_HOME_with_+() {
    echo 'bug #59453: PARALLEL_HOME with plus sign causes error: config not readable'
    tmp=$(mktemp -d)
    export PARALLEL_HOME="$tmp/  space  /a+b"
    mkdir -p "$PARALLEL_HOME"
    parallel echo ::: Parallel_home_with+
    rm -rf "$tmp"
 }
 export -f $(compgen -A function | grep par_)
 compgen -A function | grep par_ | LC_ALL=C sort |
    parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' |
--- a/testsuite/tests-to-run/parallel-local-1s.sh
+++ b/testsuite/tests-to-run/parallel-local-1s.sh
@ -624,6 +624,21 @@ par_test_cpu_detection_cpuinfo() {
 	YstArr0BOSgXJ4Xmpu4j9PRpQcgRCckdf4fcSFol9GuGecuj5uBxngHakML8
 	' | unpack
    }
    cpu15() {
 	echo '1-1-1-1 Intel(R) Celeron(R) M (eee900)'
 	echo '
 	KLUv/QRo5Q0ABp9XIkBrqwCI2LZJIts7o1loU/RrgCM1Bkm4qbLeX6WzKj6uMAFQAE8ARwAL
 	JHEgV0hbZFGyfUlxhRq4zDo7PSwsEOTgX8Ao1WnCAxwArpC+BU+AuELIaYsvGVyhh9u3mDvM
 	ktMJGRSuA1XhCrVVZGxQn4RcIRm7lUXJAdxIyRVy+qh7W4cZjDtZmUOV1ofD60XGIAiTIrxC
 	jbMozzbmbu1cuGx5XSGVkSXEFeoGrlu2294ttv1gcEJoeIJIPGxfGWyxlEiQAYXQw0KHjdim
 	06c4zJwl7eT3XO6A14X/5rufaPdlO5g73vsyW+/ZzmjHMY79iY99NXwp5ztrX+FgV/auiy/7
 	nfW8Nl/b9T13mOlmuSmpCfVps+B6l9qInNxyptxH4D13eULQrbK1NaE+2KxbSoFBSiADCKLg
 	fbGJS5rL2Omcpyxu7rvh1eh3e2cmwr178WNRVMbZSoJ4FJWTKXcnKADAIKuqBxBI54gcFYp3
 	xUIMqhrUyHoLjYPwgszI7eGRdSUOFMYQlpP0pOoEAV0WM1zTXUey4OeJUEZtb+UNcgLSAYUj
 	iyXJQ3TVfIX50ANedGFbHwEc/JQzJup4YQ==
 	' | unpack
    }
    export -f $(compgen -A function | grep ^cpu)
    test_one() {
@ -809,6 +824,18 @@ par_test_cpu_detection_lscpu() {
 	YQqnBNP0ggo=
 	' | unpack
    }
    cpu15() {
 	echo '1-1-1-1 Intel(R) Celeron(R) M (eee900)'
 	echo '
 	KLUv/QRohQoANpVBJCDJVgAHS35yk/0LNr7il7y4NiXZTi5J2xh8EMPkH0ICUAugATkAOAA5
 	AFDzg//KBxEyshyKJFqSPScHvrD7GRyq62rk2xnH6ldBJBQHNqybEUHnKZJoMR7GO7S16rb9
 	LK0GBJfkrAs3+uHAsJHJKw7BdVNajAx8kU1wW0JWvkpugQDkdTq5CcuZ9Cq0yVtntJzZXpc6
 	ttWCsJoH7dvil9FXjm/cX6Zf942XV3RiikGCtXiciQQiTWo61iLRADRNY+EhPZR8lG5P1sJD
 	appGnczbZqUB6zW9exVt564cI78V+thnfmKZXxkzHRPjH23s3O4WruAE2y8ztjOKvtaOp8tv
 	j7I/mB0gQIaMujr8eNMPYIxoDLDgY+WX46hwjTmVzU7HpqEtYHxLTLzqx8jKta+0nIvY4e1q
 	oiCpQLqitFXU0Fyo+a4q4SvbmVMCr0burQ==
 	' | unpack
    }
    export -f $(compgen -A function | grep ^cpu)
    test_one() {
@ -847,15 +874,16 @@ par_block_negative_prefix() {
 par_sql_colsep() {
    echo '### SQL should add Vn columns for --colsep'
-    parallel -k -C' ' --sqlandworker sqlite3:///%2ftmp%2ffoo/bar echo /{1}/{2}/{3}/{4}/ \
+    dburl=sqlite3:///%2ftmp%2fparallel-sql-colsep-$$/bar
    parallel -k -C' ' --sqlandworker $dburl echo /{1}/{2}/{3}/{4}/ \
 	     ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33'
    parallel -k -C' ' echo /{1}/{2}/{3}/{4}/ \
 	     ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33'
-    # TODO this is wrong
+    parallel -k -C' ' -N3 --sqlandworker $dburl echo \
    parallel -k -C' ' -N3 --sqlandworker sqlite3:///%2ftmp%2ffoo/bar echo \
 	     ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66'
    parallel -k -C' ' -N3 echo \
 	     ::: 'a A' 'b B' 'c C' ::: '1 11' '2 22' '3 33' '4 44' '5 55' '6 66'
    rm /tmp/parallel-sql-colsep-$$
 }
 par_sql_CSV() {
--- a/testsuite/wanted-results/parallel-local-0.3s
+++ b/testsuite/wanted-results/parallel-local-0.3s
@ -6,6 +6,8 @@ par_PARALLEL_ENV	### PARALLEL_ENV as file
 par_PARALLEL_ENV	OK as file
 par_PARALLEL_ENV	### PARALLEL_ENV as fifo
 par_PARALLEL_ENV	OK as fifo
 par_PARALLEL_HOME_with_+	bug #59453: PARALLEL_HOME with plus sign causes error: config not readable
 par_PARALLEL_HOME_with_+	Parallel_home_with+
 par_X_eta_div_zero	### bug #34422: parallel -X --eta crashes with div by zero
 par_X_eta_div_zero	Computers / CPU cores / Max jobs to run
 par_X_eta_div_zero	0:local / 0 / 0
--- a/testsuite/wanted-results/parallel-local-1s
+++ b/testsuite/wanted-results/parallel-local-1s
@ -811,6 +811,8 @@ par_test_cpu_detection_cpuinfo	1-2-2-2 AMD Neo N36L Dual-Core Processor
 par_test_cpu_detection_cpuinfo	1 2 2 2
 par_test_cpu_detection_cpuinfo	1-1-1-1 Intel Xeon X5675 (mandriva.p)
 par_test_cpu_detection_cpuinfo	1 1 1 1
 par_test_cpu_detection_cpuinfo	1-1-1-1 Intel(R) Celeron(R) M (eee900)
 par_test_cpu_detection_cpuinfo	1 1 1 1
 par_test_cpu_detection_cpuinfo	1-4-8-4 Core i7-3632QM Acer laptop
 par_test_cpu_detection_cpuinfo	1 4 8 4
 par_test_cpu_detection_cpuinfo	1-2-4-2 Core i5-2410M laptop firewall
@ -835,6 +837,8 @@ par_test_cpu_detection_lscpu	1-2-2-2 AMD Neo N36L Dual-Core Processor
 par_test_cpu_detection_lscpu	1 2 2 2
 par_test_cpu_detection_lscpu	1-1-1-1 Intel Xeon X5675 (mandriva.p)
 par_test_cpu_detection_lscpu	1 1 1 1
 par_test_cpu_detection_lscpu	1-1-1-1 Intel(R) Celeron(R) M (eee900)
 par_test_cpu_detection_lscpu	1 1 1 1
 par_test_cpu_detection_lscpu	1-4-8-4 Core i7-3632QM Acer laptop
 par_test_cpu_detection_lscpu	1 4 8 4
 par_test_cpu_detection_lscpu	1-2-4-2 Core i5-2410M laptop firewall