diff --git a/doc/release_new_version b/doc/release_new_version index 42589718..3402aecb 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -195,7 +195,7 @@ to:parallel@gnu.org, bug-parallel@gnu.org Subject: GNU Parallel 20170422 ('Санкт-Петербу́рг') released <<[stable]>> -GNU Parallel 20170422 ('Санкт-Петербу́рг') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ +GNU Parallel 20170422 ('Stockholm/London/Санкт-Петербу́рг') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ <> @@ -221,6 +221,10 @@ https://github.com/lucascbeyeler/zmbackup http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0174575 +https://www.slideshare.net/sharatsc/data-science-at-the-command-line + +http://www.jianshu.com/p/67b0665490ac + * <> * <> diff --git a/src/Makefile.am b/src/Makefile.am index 08056d23..bfc68f8c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,18 +11,21 @@ install-exec-hook: if DOCUMENTATION man_MANS = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parallel_tutorial.7 parallel_design.7 parallel_alternatives.7 \ - parcat.1 + parcat.1 parset.1 doc_DATA = parallel.html env_parallel.html sem.html sql.html niceload.html \ parallel_tutorial.html parallel_design.html parallel_alternatives.html \ parcat.html \ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ parallel_tutorial.texi parallel_design.texi parallel_alternatives.texi \ - parcat.texi \ + parcat.texi parset.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_design.pdf parallel_alternatives.pdf \ - parcat.pdf + parcat.pdf parset.pdf endif +parset: parset.pod + cp parset.pod parset + # Build documentation file if the tool to build exists. # Otherwise: Use the distributed version parallel.1: parallel.pod @@ -80,6 +83,12 @@ parcat.1: parcat && mv $(srcdir)/parcat.1n $(srcdir)/parcat.1 \ || echo "Warning: pod2man not found. Using old parcat.1" +parset.1: parset + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 $(srcdir)/parset > $(srcdir)/parset.1n \ + && mv $(srcdir)/parset.1n $(srcdir)/parset.1 \ + || echo "Warning: pod2man not found. Using old parset.1" + parallel.html: parallel.pod pod2html --title "GNU Parallel" $(srcdir)/parallel.pod > $(srcdir)/parallel.htmln \ && mv $(srcdir)/parallel.htmln $(srcdir)/parallel.html \ @@ -142,6 +151,13 @@ parcat.html: parcat niceload.html || echo "Warning: pod2html not found. Using old parcat.html" rm -f $(srcdir)/pod2htm* +# Depending on niceload.html to avoid stupid pod2html race condition +parset.html: parset parcat.html + pod2html --title "GNU parset" $(srcdir)/parset > $(srcdir)/parset.htmln \ + && mv $(srcdir)/parset.htmln $(srcdir)/parset.html \ + || echo "Warning: pod2html not found. Using old parset.html" + rm -f $(srcdir)/pod2htm* + parallel.texi: parallel.pod pod2texi --output=$(srcdir)/parallel.texi $(srcdir)/parallel.pod \ || echo "Warning: pod2texi not found. Using old parallel.texi" @@ -178,6 +194,10 @@ parcat.texi: parcat pod2texi --output=$(srcdir)/parcat.texi $(srcdir)/parcat \ || echo "Warning: pod2texi not found. Using old parcat.texi" +parset.texi: parset + pod2texi --output=$(srcdir)/parset.texi $(srcdir)/parset \ + || echo "Warning: pod2texi not found. Using old parset.texi" + parallel.pdf: parallel.pod pod2pdf --output-file $(srcdir)/parallel.pdf $(srcdir)/parallel.pod --title "GNU Parallel" \ || echo "Warning: pod2pdf not found. Using old parallel.pdf" @@ -214,26 +234,30 @@ parcat.pdf: parcat pod2pdf --output-file $(srcdir)/parcat.pdf $(srcdir)/parcat --title "GNU parcat" \ || echo "Warning: pod2pdf not found. Using old parcat.pdf" +parset.pdf: parset + pod2pdf --output-file $(srcdir)/parset.pdf $(srcdir)/parset --title "GNU parset" \ + || echo "Warning: pod2pdf not found. Using old parset.pdf" + sem: parallel ln -fs parallel sem DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parallel_tutorial.7 parallel_design.7 parallel_alternatives.7 \ - parcat.1 \ + parcat.1 parset.1 \ parallel.html env_parallel.html sem.html sql.html niceload.html \ parallel_tutorial.html parallel_design.html parallel_alternatives.html \ - parcat.html \ + parcat.html parset.html \ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ parallel_tutorial.texi parallel_design.texi parallel_alternatives.texi \ - parcat.texi \ + parcat.texi parset.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_design.pdf parallel_alternatives.pdf \ - parcat.pdf + parcat.pdf parset.pdf -EXTRA_DIST = parallel sem sql niceload parcat env_parallel \ - env_parallel.ash env_parallel.bash env_parallel.csh \ - env_parallel.dash env_parallel.fish env_parallel.ksh \ - env_parallel.pdksh env_parallel.sh env_parallel.tcsh \ - env_parallel.zsh sem.pod parallel.pod env_parallel.pod \ - niceload.pod parallel_tutorial.pod parallel_design.pod \ +EXTRA_DIST = parallel sem sql niceload parcat parset env_parallel \ + env_parallel.ash env_parallel.bash env_parallel.csh \ + env_parallel.dash env_parallel.fish env_parallel.ksh \ + env_parallel.pdksh env_parallel.sh env_parallel.tcsh \ + env_parallel.zsh sem.pod parallel.pod env_parallel.pod \ + niceload.pod parallel_tutorial.pod parallel_design.pod \ parallel_alternatives.pod $(DISTCLEANFILES) diff --git a/src/Makefile.in b/src/Makefile.in index 65c1996b..a3731cc6 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -237,37 +237,37 @@ bin_SCRIPTS = parallel sql niceload parcat env_parallel \ @DOCUMENTATION_TRUE@man_MANS = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ @DOCUMENTATION_TRUE@ parallel_tutorial.7 parallel_design.7 parallel_alternatives.7 \ -@DOCUMENTATION_TRUE@ parcat.1 +@DOCUMENTATION_TRUE@ parcat.1 parset.1 @DOCUMENTATION_TRUE@doc_DATA = parallel.html env_parallel.html sem.html sql.html niceload.html \ @DOCUMENTATION_TRUE@ parallel_tutorial.html parallel_design.html parallel_alternatives.html \ @DOCUMENTATION_TRUE@ parcat.html \ @DOCUMENTATION_TRUE@ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ @DOCUMENTATION_TRUE@ parallel_tutorial.texi parallel_design.texi parallel_alternatives.texi \ -@DOCUMENTATION_TRUE@ parcat.texi \ +@DOCUMENTATION_TRUE@ parcat.texi parset.texi \ @DOCUMENTATION_TRUE@ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ @DOCUMENTATION_TRUE@ parallel_tutorial.pdf parallel_design.pdf parallel_alternatives.pdf \ -@DOCUMENTATION_TRUE@ parcat.pdf +@DOCUMENTATION_TRUE@ parcat.pdf parset.pdf DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parallel_tutorial.7 parallel_design.7 parallel_alternatives.7 \ - parcat.1 \ + parcat.1 parset.1 \ parallel.html env_parallel.html sem.html sql.html niceload.html \ parallel_tutorial.html parallel_design.html parallel_alternatives.html \ - parcat.html \ + parcat.html parset.html \ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ parallel_tutorial.texi parallel_design.texi parallel_alternatives.texi \ - parcat.texi \ + parcat.texi parset.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_design.pdf parallel_alternatives.pdf \ - parcat.pdf + parcat.pdf parset.pdf -EXTRA_DIST = parallel sem sql niceload parcat env_parallel \ - env_parallel.ash env_parallel.bash env_parallel.csh \ - env_parallel.dash env_parallel.fish env_parallel.ksh \ - env_parallel.pdksh env_parallel.sh env_parallel.tcsh \ - env_parallel.zsh sem.pod parallel.pod env_parallel.pod \ - niceload.pod parallel_tutorial.pod parallel_design.pod \ +EXTRA_DIST = parallel sem sql niceload parcat parset env_parallel \ + env_parallel.ash env_parallel.bash env_parallel.csh \ + env_parallel.dash env_parallel.fish env_parallel.ksh \ + env_parallel.pdksh env_parallel.sh env_parallel.tcsh \ + env_parallel.zsh sem.pod parallel.pod env_parallel.pod \ + niceload.pod parallel_tutorial.pod parallel_design.pod \ parallel_alternatives.pod $(DISTCLEANFILES) all: all-am @@ -611,6 +611,9 @@ install-exec-hook: rm $(DESTDIR)$(bindir)/sem || true $(LN_S) parallel $(DESTDIR)$(bindir)/sem +parset: parset.pod + cp parset.pod parset + # Build documentation file if the tool to build exists. # Otherwise: Use the distributed version parallel.1: parallel.pod @@ -667,6 +670,12 @@ parcat.1: parcat && mv $(srcdir)/parcat.1n $(srcdir)/parcat.1 \ || echo "Warning: pod2man not found. Using old parcat.1" +parset.1: parset + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 $(srcdir)/parset > $(srcdir)/parset.1n \ + && mv $(srcdir)/parset.1n $(srcdir)/parset.1 \ + || echo "Warning: pod2man not found. Using old parset.1" + parallel.html: parallel.pod pod2html --title "GNU Parallel" $(srcdir)/parallel.pod > $(srcdir)/parallel.htmln \ && mv $(srcdir)/parallel.htmln $(srcdir)/parallel.html \ @@ -729,6 +738,13 @@ parcat.html: parcat niceload.html || echo "Warning: pod2html not found. Using old parcat.html" rm -f $(srcdir)/pod2htm* +# Depending on niceload.html to avoid stupid pod2html race condition +parset.html: parset parcat.html + pod2html --title "GNU parset" $(srcdir)/parset > $(srcdir)/parset.htmln \ + && mv $(srcdir)/parset.htmln $(srcdir)/parset.html \ + || echo "Warning: pod2html not found. Using old parset.html" + rm -f $(srcdir)/pod2htm* + parallel.texi: parallel.pod pod2texi --output=$(srcdir)/parallel.texi $(srcdir)/parallel.pod \ || echo "Warning: pod2texi not found. Using old parallel.texi" @@ -765,6 +781,10 @@ parcat.texi: parcat pod2texi --output=$(srcdir)/parcat.texi $(srcdir)/parcat \ || echo "Warning: pod2texi not found. Using old parcat.texi" +parset.texi: parset + pod2texi --output=$(srcdir)/parset.texi $(srcdir)/parset \ + || echo "Warning: pod2texi not found. Using old parset.texi" + parallel.pdf: parallel.pod pod2pdf --output-file $(srcdir)/parallel.pdf $(srcdir)/parallel.pod --title "GNU Parallel" \ || echo "Warning: pod2pdf not found. Using old parallel.pdf" @@ -801,6 +821,10 @@ parcat.pdf: parcat pod2pdf --output-file $(srcdir)/parcat.pdf $(srcdir)/parcat --title "GNU parcat" \ || echo "Warning: pod2pdf not found. Using old parcat.pdf" +parset.pdf: parset + pod2pdf --output-file $(srcdir)/parset.pdf $(srcdir)/parset --title "GNU parset" \ + || echo "Warning: pod2pdf not found. Using old parset.pdf" + sem: parallel ln -fs parallel sem diff --git a/src/env_parallel.bash b/src/env_parallel.bash index 8b040bd3..ad15e659 100755 --- a/src/env_parallel.bash +++ b/src/env_parallel.bash @@ -169,3 +169,123 @@ env_parallel() { unset PARALLEL_ENV; return $_parallel_exit_CODE } + +_parset() { + # $1 = variable NAME + # If ${"$1"} is an array: Then put the output into variables with those names + # else put the output into an array named ${"$1"} + # e.g.: + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset -a varnames echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a2 $var_b2 $var_c2 + # varname=var_a2,var_b2,var_c2 + # parset -a varname echo ::: {1..3} + # echo $var_c2 + # + # # Put output into $var_a3 $var_b3 $var_c3 + # varname="var_a3 var_b3 var_c3" + # parset -a varname echo ::: {1..3} + # echo $var_c3 + + # Variable name to store in + local _parset_vname + local _parset_vnames + # Array to fetch names from + local _parset_aname + _parset_vname="$1" + _parset_aname="_nO_sUch_vAr" + shift + if [[ "-a" == "$_parset_vname" ]] ; then + # Option -a given + echo '-a given' + _parset_vname="$1" + _parset_aname="$1" + shift + if [[ "$(declare -p $_parset_vname 2>/dev/null)" =~ "declare -a" ]]; then + # OK + true + else + # error + echo "$_parset_vname" must be an array + return 1 + fi + else + local _parset_splitable + _parset_splitable="$(eval echo '$'$_parset_vname)" + if echo "$_parset_splitable" | grep -E ',| ' >/dev/null ; then + # Split on , and space + _parset_vnames=( $(perl -e 'print map { s/,| /\n/g; $_ } @ARGV' "$_parset_splitable" ) ) + _parset_aname="_parset_vnames" + echo first ${_parset_vnames[0]} + else + # _parset_vname should be used as an array + true + fi + fi + + + if [[ "$(declare -p $_parset_aname 2>/dev/null)" =~ "declare -a" ]]; then + # vname refers to an array + # The array elements refers to variable names to put output into + eval $( + parallel --files "$@" | + perl -pe 'chop;$_="\"\`cat $_; rm $_\`\"\n"' | + parallel echo {2}={1} :::: - :::+ $(eval echo '${'$_parset_aname'[@]}') + ) + unset _parset_aname + else + # Put output into array ${$_parset_vname} + eval $_parset_vname="( $( parallel --files "$@" | + perl -pe 'chop;$_="\"\`cat $_; rm $_\`\" "' ) )" + fi +} + + +parset() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_name="$1" + shift + if echo "$_parset_name" | grep -E ',| ' >/dev/null ; then + # $1 contains , or space + # Split on , or space to get the names + eval "$( + # Compute results into files + parallel --files "$@" | + # var1=`cat tmpfile1; rm tmpfile1` + # var2=`cat tmpfile2; rm tmpfile2` + parallel -q echo {2}='`cat {1}; rm {1}`' :::: - :::+ $( + echo "$_parset_name" | + perl -pe 's/,/ /g' + ) + )" + else + # $1 contains no space or , + # => $1 is the name of the array to put data into + eval $_parset_name="( $( parallel --files "$@" | + perl -pe 'chop;$_="\"\`cat $_; rm $_\`\" "' ) )" + fi +} diff --git a/src/parallel b/src/parallel index deea5b96..c159a797 100755 --- a/src/parallel +++ b/src/parallel @@ -1362,7 +1362,7 @@ sub check_invalid_option_combinations { sub init_globals { # Defaults: - $Global::version = 20170331; + $Global::version = 20170404; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; @@ -1399,25 +1399,25 @@ sub init_globals { # {##} = number of jobs '{##}' => '$_=total_jobs()', # Bash ${a:-myval} - '{:-(.+?)}' => '$_ ||= $$1', + '{:-([^}]+?)}' => '$_ ||= $$1', # Bash ${a:2} '{:(\d+?)}' => 'substr($_,0,$$1) = ""', # Bash ${a:2:3} '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', # Bash ${a#bc} - '{#([^#].*?)}' => 's/^$$1//;', + '{#([^#][^}]*?)}' => 's/^$$1//;', # Bash ${a%def} - '{%(.+?)}' => 's/$$1$//;', + '{%([^}]+?)}' => 's/$$1$//;', # Bash ${a/def/ghi} ${a/def/} - '{/(.+?)/(.*?)}' => 's/$$1/$$2/;', + '{/([^}]+?)/([^}]*?)}' => 's/$$1/$$2/;', # Bash ${a^a} - '{^(.+?)}' => 's/^($$1)/uc($1)/e;', + '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;', # Bash ${a^^a} - '{^^(.+?)}' => 's/($$1)/uc($1)/eg;', + '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;', # Bash ${a,A} - '{,(.+?)}' => 's/^($$1)/lc($1)/e;', + '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;', # Bash ${a,,A} - '{,,(.+?)}' => 's/($$1)/lc($1)/eg;', + '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;', ); # Modifiable copy of %Global::replace %Global::rpl = %Global::replace; @@ -1646,32 +1646,38 @@ sub open_joblog { if($opt::resume || $opt::resume_failed || $opt::retry_failed) { if(open(my $joblog_fh, "<", $opt::joblog)) { # Read the joblog - $append = <$joblog_fh>; # If there is a header: Open as append later + # If there is a header: Open as append later + $append = <$joblog_fh>; my $joblog_regexp; if($opt::retry_failed) { # Make a regexp that only matches commands with exit+signal=0 # 4 host 1360490623.067 3.445 1023 1222 0 0 command $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; my @group; - while(<$joblog_fh>) { - if(/$joblog_regexp/o) { - # This is 30% faster than set_job_already_run($1); - vec($Global::job_already_run,($1||0),1) = 1; - $Global::total_completed++; - $group[$1-1] = "true"; - } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { - $group[$1-1] = $3 - } else { - chomp; - ::error("Format of '$opt::joblog' is wrong: $_"); - ::wait_and_exit(255); + { + local $/="\n"; + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + $group[$1-1] = "true"; + } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { + # Grab out the command + $group[$1-1] = $3; + } else { + chomp; + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } } } if(@group) { my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); unlink($name); # Put args into argfile - print $outfh map { $_,$/ } @group; + # Replace \0 with ' ' as used in print_joblog() + print $outfh map { s/\0/ /g; $_,$/ } @group; seek $outfh, 0, 0; exit_if_disk_full(); # Set filehandle to -a @@ -2756,13 +2762,15 @@ sub progress { $status = $eta . join(" ",map { - my $completed = ($Global::host{$_}->jobs_completed()||0); - my $running = $Global::host{$_}->jobs_running(); - my $time = $completed ? (time-$^T)/($completed) : "0"; - sprintf("%s:%d/%d/%d%%/%.1fs ", - $workerno{$_}, $running, $completed, - ($running+$completed)*100 - / $Global::total_started, $time); + if($Global::total_started) { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } } @workers); } if(length $status > $termcols) { @@ -2770,13 +2778,17 @@ sub progress { $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map - { sprintf("%s:%d/%d/%d%%", - $sshlogin{$_}, - $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0), - ($Global::host{$_}->jobs_running()+ - ($Global::host{$_}->jobs_completed()||0))*100 - / $Global::total_started) } + { + if($Global::total_started) { + sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) + } + } @workers); } if(length $status > $termcols) { @@ -2784,13 +2796,17 @@ sub progress { $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map - { sprintf("%s:%d/%d/%d%%", - $workerno{$_}, - $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0), - ($Global::host{$_}->jobs_running()+ - ($Global::host{$_}->jobs_completed()||0))*100 - / $Global::total_started) } + { + if($Global::total_started) { + sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) + } + } @workers); } if(length $status > $termcols) { @@ -5258,7 +5274,8 @@ sub loadavg_too_high { ); print `$ps{$^O}`; }); - $cmd = "perl -e ".::shell_quote_scalar($ps); + # The command is too long for csh, so base64_wrap the command + $cmd = Job::base64_wrap($ps); } return $cmd; } @@ -8673,6 +8690,8 @@ sub print_joblog { # Verbose level > 1: Print the rsync and stuff $cmd = "@command"; } + # Newlines makes it hard to parse the joblog + $cmd =~ s/\n/\0/g; print $Global::joblog join("\t", $self->seq(), $self->sshlogin()->string(), $self->starttime(), sprintf("%10.3f",$self->runtime()), diff --git a/src/parallel.pod b/src/parallel.pod index 0bc2d2fa..9f70b98a 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -1816,20 +1816,23 @@ Here are a few examples: --rpl '{#0} $_ = seq() - 1' Job slot counting from 2 --rpl '{%1} $_ = slot() + 1' + Remove all extensions + --rpl '{:} s:(\.[^/]+)*$::' You can have dynamic replacement strings by including parenthesis in the replacement string and adding a regular expression between the parenthesis. The matching string will be inserted as $$1: parallel --rpl '{%(.*?)} s/$$1//' echo {%.tar.gz} ::: my.tar.gz + parallel --rpl '{:%(.+?)} s:$$1(\.[^/]+)*$::' \ + echo {:%_file} ::: my_file.tar.gz + parallel -n3 --rpl '{/:%(.*?)} s:.*/(.*)$$1(\.[^/]+)*$:$1:' \ + echo job {#}: {2} {2.} {3/:%_1} ::: a/b.c c/d.e f/g_1.h.i You can even use multiple matches: - parallel --rpl '{@(\d+)\S(\d+)\S(\d+)} - if($$3 > 31) { ($$1,$$2,$$3) = ($$3,$$2,$$1) } - if($$2 > 12) { ($$1,$$2,$$3) = ($$1,$$3,$$2) } - $$1 = ($$1%100 + 1900); $_="$$1-$$2-$$3" - ' echo {@99-12-31} {@12.31.99} {@31/12-1999} ::: a + parallel --rpl '{/(.+?)/(.*?)} s/$$1/$$2/;' + echo {/replacethis/withthis} {/b/C} ::: a_replacethis_b parallel --rpl '{(.*?)/(.*?)} $_="$$2$_$$1"' \ echo {swap/these} ::: -middle- @@ -4116,7 +4119,7 @@ the currently running jobs are finished before exiting. =item $PARALLEL_HOME Dir where GNU B stores config files, semaphores, and caches -information between invocations. Default: $HOME/.parallel +information between invocations. Default: $HOME/.parallel. =item $PARALLEL_PID diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod index 4a85b6fe..1f2877c7 100644 --- a/src/parallel_alternatives.pod +++ b/src/parallel_alternatives.pod @@ -862,6 +862,73 @@ report this, but finishes with success - thereby risking data loss. Rush (https://github.com/shenwei356/rush) is written in Go and based on gargs. +Rush has some string manipulations that can be emulated by putting +this into ~/.parallel/config (% is used instead of ^): + + --rpl '{:} s:(\.[^/]+)*$::' + --rpl '{:%([^}]+?)} s:$$1(\.[^/]+)*$::' + --rpl '{/:%([^}]*?)} s:.*/(.*)$$1(\.[^/]+)*$:$1:' + --rpl '{/:} s:(.*/)?([^/.]+)(\.[^/]+)*$:$2:' + + +Here are the examples from B's website: + +B<1> seq 1 10 | rush echo {} + +B<1> seq 1 10 | parallel echo {} + +B<2> seq 1 10 | rush 'echo {}' -k + +B<2> seq 1 10 | parallel -k 'echo {}' + +B<3> seq 1 | rush 'sleep 2; echo {}' -t 1 + +B<3> seq 1 | parallel --timeout 1 'sleep 2; echo {}' + +B<4> seq 1 | rush 'python script.py' -r 3 + +B<4> seq 1 | parallel --retries 4 'python script.py' + +B<5> echo dir/file_1.txt.gz | rush 'echo {/} {%} {^_1.txt.gz}' + +B<5> echo dir/file_1.txt.gz | parallel --plus 'echo {//} {/} {%_1.txt.gz}' + +B<6> echo dir.d/file.txt.gz | rush 'echo {.} {:} {%.} {%:}' + +B<6> echo dir.d/file.txt.gz | parallel 'echo {.} {:} {/.} {/:}' + +B<7> echo 12 file.txt dir/s_1.fq.gz | rush 'echo job {#}: {2} {2.} {3%:^_1}' + +B<7> echo 12 file.txt dir/s_1.fq.gz | parallel --colsep ' ' 'echo job {#}: {2} {2.} {3/:%_1}' + +B<8> echo a=b=c | rush 'echo {1} {2} {3}' -d = + +B<8> echo a=b=c | parallel --colsep = 'echo {1} {2} {3}' + +B<9> echo a=b=c | rush -D "=" -k 'echo {}' + +B<9> echo -n a=b=c | parallel -d "=" -k 'echo {}' + +B<9a> echo abc | rush -D "" -k 'echo {}' + +B<9a> echo -n abc | parallel --pipe --recend '' --block 1 -k parallel echo + +B<10> seq 1 | rush 'echo Hello, {fname} {lname}!' -v fname=Wei -v lname=Shen + +B<10> seq 1 | parallel -N0 'fname=Wei; lname=Shen; echo Hello, ${fname} ${lname}!' + +B<11> echo read_1.fq.gz | rush -v p={:^_1} 'echo {p} {p}_2.fq.gz' + +B<11> echo read_1.fq.gz | parallel 'p={:%_1}; echo ${p} ${p}_2.fq.gz' + +B<12> seq 1 3 | rush 'sleep {}; echo {}' -c -t 2 + +B<12> seq 1 3 | parallel --joblog mylog --timeout 2 'sleep {}; echo {}' + +B<12> Followed by: + +B<12> seq 1 3 | parallel --joblog mylog --retry-failed 'sleep {}; echo {}' + =head2 DIFFERENCES BETWEEN machma AND GNU Parallel diff --git a/src/parset.pod b/src/parset.pod new file mode 100644 index 00000000..9a98b419 --- /dev/null +++ b/src/parset.pod @@ -0,0 +1,231 @@ +#!/usr/bin/perl -w + +=encoding utf8 + +=head1 NAME + +parset - set shell variables in parallel + + +=head1 SYNOPSIS + +B I [options for GNU Parallel] + + +=head1 DESCRIPTION + +B is a shell function that puts the output from GNU +B into shell variables. + +The B function is defined as part of B. + +If I is a single variable name, this will be treated as +the destination variable and made to an array. + +If I contains multiple names separated by ',' or space, +the names will be the destination variables. + + +=head1 OPTIONS + +Same as GNU B. + + +=head1 SUPPORTED SHELLS + +=head2 Bash + +=head3 Examples + +Put output into B: + + parset myarray seq 3 ::: 4 5 6 + echo "${myarray[1]}" + +Put output into vars B<$seq, $pwd, $ls>: + + parset "seq pwd ls" ::: "seq 10" pwd ls + echo "$ls" + +Put output into vars B<$seq, $pwd, $ls>: + + into_vars=(seq pwd ls) + parset "${into_vars[*]}" ::: "seq 10" pwd ls + echo "$ls" + +The commands to run can be an array: + + cmd=("echo '<>'" "pwd") + parset data ::: "${cmd[@]}" + echo "${data[0]}" + echo "${data[1]}" + +You cannot pipe into B, but must use a tempfile: + + seq 10 > parallel_input + parset res echo :::: parallel_input + echo "${res[0]}" + echo "${res[9]}" + + +=head3 Installation + +Put this in B<$HOME/.bashrc>: + + . `which env_parallel.bash` + +E.g. by doing: + + echo '. `which env_parallel.bash`' >> $HOME/.bashrc + + +=head1 EXIT STATUS + +Same as GNU B. + + +=head1 AUTHOR + +When using GNU B for a publication please cite: + +O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: +The USENIX Magazine, February 2011:42-47. + +This helps funding further development; and it won't cost you a cent. +If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008,2009,2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017 Ole Tange, +http://ole.tange.dk and Free Software Foundation, Inc. + +Parts of the manual concerning B compatibility is inspired by +the manual of B from GNU findutils 4.4.2. + + +=head1 LICENSE + +Copyright (C) 2016 +Ole Tange and Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this documentation +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the file fdl.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B + +to copy, distribute and transmit the work + +=item B + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as cc-by-sa.txt. + + +=head1 DEPENDENCIES + +B uses GNU B. + + +=head1 SEE ALSO + +B(1), + +B(1), B(1), B(1), B(1), B(1) B(1), +B(1). + + +=cut diff --git a/testsuite/tests-to-run/parallel-local-1s.sh b/testsuite/tests-to-run/parallel-local-1s.sh index c1274e9d..25f155da 100644 --- a/testsuite/tests-to-run/parallel-local-1s.sh +++ b/testsuite/tests-to-run/parallel-local-1s.sh @@ -196,6 +196,41 @@ par_result_replace() { rm -rf /tmp/par_*_49983-* } +par_parset() { + echo '### test parset' + . `which env_parallel.bash` + + echo 'Put output into $myarray' + parset myarray seq 3 ::: 4 5 6 + echo "${myarray[1]}" + + echo 'Put output into vars $seq, $pwd, $ls' + parset "seq pwd ls" ::: "seq 10" pwd ls + echo "$seq" + + echo 'Put output into vars $seq, $pwd, $ls': + into_vars=(seq pwd ls) + parset "${into_vars[*]}" ::: "seq 5" pwd ls + echo "$seq" + + echo 'The commands to run can be an array' + cmd=("echo '<>'" "pwd") + parset data ::: "${cmd[@]}" + echo "${data[0]}" + echo "${data[1]}" + + echo 'You cannot pipe into parset, but must use a tempfile' + seq 10 > parallel_input + parset res echo :::: parallel_input + echo "${res[0]}" + echo "${res[9]}" + + echo 'Commands with newline require -0' + parset var -0 ::: 'echo "line1 +line2"' 'echo "command2"' + echo "${var[0]}" +} + export -f $(compgen -A function | grep par_) compgen -A function | grep par_ | sort | diff --git a/testsuite/wanted-results/parallel-local-1s b/testsuite/wanted-results/parallel-local-1s index 63e62d41..1ac65c0c 100644 --- a/testsuite/wanted-results/parallel-local-1s +++ b/testsuite/wanted-results/parallel-local-1s @@ -4,7 +4,7 @@ echo "### BUG: The length for -X is not close to max (131072)"; seq 1 60000 | seq 1 60000 | parallel -X echo a{}b{}c |head -n 1 |wc 1 5644 65514 seq 1 60000 | parallel -X echo |head -n 1 |wc - 1 12771 65520 + 1 12770 65514 seq 1 60000 | parallel -X echo a{}b{}c {} |head -n 1 |wc 1 8098 65512 seq 1 60000 | parallel -X echo {}aa{} |head -n 1 |wc @@ -453,6 +453,37 @@ par_failing_compressor parallel: Error: cat;false failed. par_failing_compressor parallel -k -k -k -k --compress --compress-program cat\;false --decompress-program cat\;false echo ::: C=cat\;false,D=cat\;false par_failing_compressor C=cat;false,D=cat;false par_failing_compressor parallel: Error: cat;false failed. +par_parset ### test parset +par_parset Put output into $myarray +par_parset 3 +par_parset 4 +par_parset 5 +par_parset Put output into vars $seq, $pwd, $ls +par_parset 1 +par_parset 2 +par_parset 3 +par_parset 4 +par_parset 5 +par_parset 6 +par_parset 7 +par_parset 8 +par_parset 9 +par_parset 10 +par_parset Put output into vars $seq, $pwd, $ls: +par_parset 1 +par_parset 2 +par_parset 3 +par_parset 4 +par_parset 5 +par_parset The commands to run can be an array +par_parset <> +par_parset ~/privat/parallel/testsuite +par_parset You cannot pipe into parset, but must use a tempfile +par_parset 1 +par_parset 10 +par_parset Commands with newline require -0 +par_parset line1 +par_parset line2 par_result ### Test --results par_result I III par_result I IIII