From e22467f4dd691b50dcd1a433804eb53a90604950 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Mon, 23 Feb 2015 22:32:34 +0100 Subject: [PATCH] parallel: --number-of-cores now respects 'taskset'. --- NEWS | 4 +-- doc/release_new_version | 35 ++++------------------ src/parallel | 51 ++++++++++++++++++++++++------- src/parallel.pod | 66 ++++++++++++++++++++--------------------- src/parallel_design.pod | 46 +++++++++++++++++++++++----- 5 files changed, 120 insertions(+), 82 deletions(-) diff --git a/NEWS b/NEWS index 34d23751..425db9f0 100644 --- a/NEWS +++ b/NEWS @@ -22,7 +22,7 @@ Experiments for Identifying Recommender Differences http://elehack.net/research/thesis/mde-thesis.pdf -* GNU Parallel was using (unfortunately with wrong citation) in: +* GNU Parallel was used (unfortunately with wrong citation) in: Performance and Scaling Comparison Study of RDBMS and NoSQL (MongoDB) http://ijact.in/wp-content/uploads/2014/11/COMPUSOFT-311-1270-1275.pdf @@ -37,7 +37,7 @@ http://biorxiv.org/content/biorxiv/early/2014/12/05/012179.full.pdf * Zip Folders with GNU Parallel - http://fazky.github.io/Linux/2015-01-07-GNU-Parallel.html + http://fazky.github.io/posts/Linux/2015-01-07-GNU-Parallel.html * Using GNU Parallel with Freesurfer http://programminginadarkroom.blogspot.dk/2015/02/using-gnu-parallel-with-freesurfer.html diff --git a/doc/release_new_version b/doc/release_new_version index 4df123e2..dbe8ca31 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -208,44 +208,21 @@ cc:Tim Cuthbertson , Ryoichiro Suzuki , Jesse Alama -Subject: GNU Parallel 20150222 (' (((:~{> Krudttønden') released +Subject: GNU Parallel 20150322 ('') released -GNU Parallel 20150222 (' (((:~{> Krudttønden') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ +GNU Parallel 20150322 ('') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ Haiku of the month: - xargs' space and quote - headache causing behaviour. - Use GNU Parallel - -- Ole Tange + <<>> New in this release: -* --tmux has gotten a major overhaul. +* GNU Parallel was cited in: RIG: Recalibration and Interrelation of genomic sequence data with the GATK http://www.g3journal.org/content/early/2015/02/13/g3.115.017012.full.pdf+html -* GNU Parallel was cited in: RaftLib: A C++ Template Library for High Performance Stream Parallel Processing http://www.cs.wustl.edu/~lip/pubs/pmam15_jbeard.pdf +* GNU Parallel was cited in: MPI-blastn and NCBI-TaxCollector: Improving metagenomic analysis with high performance classification and wide taxonomic attachment http://www.worldscientific.com/doi/abs/10.1142/S0219720014500139?af=R& -* GNU Parallel was cited in: Towards Collaborative Exploration and Analysis of Big Data from Mars: A Noachis Terra Case Study http://link.springer.com/chapter/10.1007/978-3-319-13865-7_25 - -* GNU Parallel was cited in: Quantifying properties of hot and dense QCD matter through systematic model-to-data comparison http://arxiv.org/pdf/1502.00339.pdf - -* GNU Parallel was cited in: Towards Collaborative Exploration and Analysis of Big Data from Mars: A Noachis Terra Case Study http://link.springer.com/chapter/10.1007/978-3-319-13865-7_25 - -* GNU Parallel was cited in: Towards Recommender Engineering Tools and Experiments for Identifying Recommender Differences http://elehack.net/research/thesis/mde-thesis.pdf - -* GNU Parallel was using (unfortunately with wrong citation) in: Performance and Scaling Comparison Study of RDBMS and NoSQL (MongoDB) http://ijact.in/wp-content/uploads/2014/11/COMPUSOFT-311-1270-1275.pdf - -* GNU Parallel was used (unfortunately without citation) in: Parallel Implementation of Big Data Pre-Processing Algorithms for Sentiment Analysis of Social Networking Data http://www.researchmathsci.org/IJFMAart/ijfma-v6n2-7.pdf - -* GNU Parallel was used (unfortunately without citation) in: SpeedSeq: Ultra-fast personal genome analysis and interpretation http://biorxiv.org/content/biorxiv/early/2014/12/05/012179.full.pdf - -* Zip Folders with GNU Parallel http://fazky.github.io/Linux/2015-01-07-GNU-Parallel.html - -* Using GNU Parallel with Freesurfer http://programminginadarkroom.blogspot.dk/2015/02/using-gnu-parallel-with-freesurfer.html - -* GNU Parallel is used in Velociraptor: https://github.com/ericwhyne/Velociraptor - -* Marcus Beach GNU Parallel http://marcusbeach.co/gnu-parallel/ +* GNU Parallel was used in: https://github.com/alexbyrnes/FCC-Political-Ads_The-Code * Bug fixes and man page updates. diff --git a/src/parallel b/src/parallel index f50f9e49..42bfe338 100755 --- a/src/parallel +++ b/src/parallel @@ -953,7 +953,7 @@ sub parse_options { sub init_globals { # Defaults: - $Global::version = 20150222; + $Global::version = 20150223; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; @@ -4616,20 +4616,35 @@ sub no_of_cpus_gnu_linux { # undef if not GNU/Linux my $no_of_cpus; my $no_of_cores; + my $no_of_active_cores; if(-e "/proc/cpuinfo") { $no_of_cpus = 0; $no_of_cores = 0; my %seen; - open(my $in_fh, "<", "/proc/cpuinfo") || return undef; - while(<$in_fh>) { - if(/^physical id.*[:](.*)/ and not $seen{$1}++) { - $no_of_cpus++; - } - /^processor.*[:]/i and $no_of_cores++; - } - close $in_fh; + if(open(my $in_fh, "<", "/proc/cpuinfo")) { + while(<$in_fh>) { + if(/^physical id.*[:](.*)/ and not $seen{$1}++) { + $no_of_cpus++; + } + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } } - return ($no_of_cpus||$no_of_cores); + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores)); } sub no_of_cores_gnu_linux { @@ -4637,6 +4652,7 @@ sub no_of_cores_gnu_linux { # Number of CPU cores on GNU/Linux # undef if not GNU/Linux my $no_of_cores; + my $no_of_active_cores; if(-e "/proc/cpuinfo") { $no_of_cores = 0; open(my $in_fh, "<", "/proc/cpuinfo") || return undef; @@ -4645,7 +4661,20 @@ sub no_of_cores_gnu_linux { } close $in_fh; } - return $no_of_cores; + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cores,$no_of_active_cores)); } sub no_of_cpus_freebsd { diff --git a/src/parallel.pod b/src/parallel.pod index afbb722a..5c576ce7 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -104,7 +104,7 @@ B<--env> and use B instead of B. The command cannot contain the character \257 (macron: ¯). -=item B<{}> (beta testing) +=item B<{}> Input line. This replacement string will be replaced by a full line read from the input source. The input source is normally stdin @@ -117,7 +117,7 @@ If the command line contains no replacement strings then B<{}> will be appended to the command line. -=item B<{.}> (beta testing) +=item B<{.}> Input line without extension. This replacement string will be replaced by the input with the extension removed. If the input line contains @@ -133,7 +133,7 @@ The replacement string B<{.}> can be changed with B<--er>. To understand replacement strings see B<{}>. -=item B<{/}> (beta testing) +=item B<{/}> Basename of input line. This replacement string will be replaced by the input with the directory part removed. @@ -144,7 +144,7 @@ B<--basenamereplace>. To understand replacement strings see B<{}>. -=item B<{//}> (beta testing) +=item B<{//}> Dirname of input line. This replacement string will be replaced by the dir of the input line. See B(1). @@ -155,7 +155,7 @@ B<--dirnamereplace>. To understand replacement strings see B<{}>. -=item B<{/.}> (beta testing) +=item B<{/.}> Basename of input line without extension. This replacement string will be replaced by the input with the directory and extension part @@ -167,7 +167,7 @@ B<--basenameextensionreplace>. To understand replacement strings see B<{}>. -=item B<{#}> (beta testing) +=item B<{#}> Sequence number of the job to run. This replacement string will be replaced by the sequence number of the job being run. It contains the @@ -178,7 +178,7 @@ The replacement string B<{#}> can be changed with B<--seqreplace>. To understand replacement strings see B<{}>. -=item B<{%}> (beta testing) +=item B<{%}> Job slot number. This replacement string will be replaced by the job's slot number between 1 and number of jobs to run in parallel. There @@ -190,7 +190,7 @@ The replacement string B<{%}> can be changed with B<--slotreplace>. To understand replacement strings see B<{}>. -=item B<{>IB<}> (beta testing) +=item B<{>IB<}> Argument from input source I or the I'th argument. This positional replacement string will be replaced by the input from input @@ -201,7 +201,7 @@ I'th last argument. To understand replacement strings see B<{}>. -=item B<{>I.B<}> (beta testing) +=item B<{>I.B<}> Argument from input source I or the I'th argument without extension. It is a combination of B<{>IB<}> and B<{.}>. @@ -214,7 +214,7 @@ extension removed. To understand positional replacement strings see B<{>IB<}>. -=item B<{>I/B<}> (beta testing) +=item B<{>I/B<}> Basename of argument from input source I or the I'th argument. It is a combination of B<{>IB<}> and B<{/}>. @@ -227,7 +227,7 @@ directory (if any) removed. To understand positional replacement strings see B<{>IB<}>. -=item B<{>I//B<}> (beta testing) +=item B<{>I//B<}> Dirname of argument from input source I or the I'th argument. It is a combination of B<{>IB<}> and B<{//}>. @@ -239,7 +239,7 @@ the I'th argument (when used with B<-N>). See B(1). To understand positional replacement strings see B<{>IB<}>. -=item B<{>I/.B<}> (beta testing) +=item B<{>I/.B<}> Basename of argument from input source I or the I'th argument without extension. It is a combination of B<{>IB<}>, B<{/}>, and @@ -253,7 +253,7 @@ directory (if any) and extension removed. To understand positional replacement strings see B<{>IB<}>. -=item B<{=>IB<=}> (beta testing) +=item B<{=>IB<=}> Replace with calculated I. B<$_> will contain the same as B<{}>. After evaluating I B<$_> will be used @@ -266,7 +266,7 @@ The B<{=>IB<=}> must be given as a single string. See also: B<--rpl> B<--parens> -=item B<{=>I IB<=}> (beta testing) +=item B<{=>I IB<=}> Positional equivalent to B<{= perl expression =}>. To understand positional replacement strings see B<{>IB<}>. @@ -444,7 +444,7 @@ I defaults to 1M. See B<--pipe> and B<--pipepart> for use of this. -=item B<--cat> (beta testing) +=item B<--cat> Create a temporary file with content. Normally B<--pipe>/B<--pipepart> will give data to the program on stdin (standard input). With B<--cat> @@ -454,7 +454,7 @@ you can do: B. See also B<--fifo>. -=item B<--cleanup> (beta testing) +=item B<--cleanup> Remove transferred files. B<--cleanup> will remove the transferred files on the remote computer after processing is done. @@ -568,7 +568,7 @@ If I is omitted, there is no end of file string. If neither B<-E> nor B<-e> is used, no end of file string is used. -=item B<--env> I (beta testing) +=item B<--env> I Copy environment variable I. This will copy I to the environment that the command is run in. This is especially useful for @@ -635,7 +635,7 @@ Implies B<--semaphore>. See also B<--bg>, B. -=item B<--fifo> (beta testing) +=item B<--fifo> Create a temporary fifo with content. Normally B<--pipe> and B<--pipepart> will give data to the program on stdin (standard @@ -687,9 +687,9 @@ See also: B<--line-buffer> B<--ungroup> Print a summary of the options to GNU B and exit. -=item B<--halt-on-error> I (beta testing) +=item B<--halt-on-error> I -=item B<--halt> I (beta testing) +=item B<--halt> I How should GNU B terminate? @@ -925,7 +925,7 @@ limiting factor. See also: B<--group> B<--ungroup> -=item B<--load> I (beta testing) +=item B<--load> I Do not start new jobs on a given computer unless the number of running processes on the computer is less than I. I uses @@ -973,7 +973,7 @@ See also B<-X> for context replace. If in doubt use B<-X> as that will most likely do what is needed. -=item B<--memfree> I (beta testing) +=item B<--memfree> I Minimum memory free when starting another job. The I can be postfixed with K, M, G, T, P, k, m, g, t, or p which would multiply @@ -1169,7 +1169,7 @@ Print the number of CPU cores and exit (used by GNU B itself to determine the number of CPU cores on remote computers). -=item B<--no-keep-order> (beta testing) +=item B<--no-keep-order> Overrides an earlier B<--keep-order> (e.g. if set in B<~/.parallel/config>). @@ -1392,7 +1392,7 @@ useful if some jobs fail for no apparent reason (such as network failure). -=item B<--return> I (alpha testing) +=item B<--return> I (beta testing) Transfer files from remote computers. B<--return> is used with B<--sshlogin> when the arguments are files on the remote computers. When @@ -1493,7 +1493,7 @@ operating system and the B<-s> option. Pipe the input from /dev/null to do anything. -=item B<--semaphore> (beta testing) +=item B<--semaphore> Work as a counting semaphore. B<--semaphore> will cause GNU B to start I in the background. When the number of @@ -1530,9 +1530,9 @@ Implies B<--semaphore>. See also B. -=item B<--semaphoretimeout> I (beta testing) +=item B<--semaphoretimeout> I -=item B<--st> I (beta testing) +=item B<--st> I If I > 0: If the semaphore is not released within I seconds, take it anyway. @@ -1628,9 +1628,9 @@ I seconds after starting each ssh. I can be less than 1 seconds. -=item B<-S> I<[@hostgroups/][ncpu/]sshlogin[,[@hostgroups/][ncpu/]sshlogin[,...]]> (beta testing) +=item B<-S> I<[@hostgroups/][ncpu/]sshlogin[,[@hostgroups/][ncpu/]sshlogin[,...]]> -=item B<--sshlogin> I<[@hostgroups/][ncpu/]sshlogin[,[@hostgroups/][ncpu/]sshlogin[,...]]> (beta testing) +=item B<--sshlogin> I<[@hostgroups/][ncpu/]sshlogin[,[@hostgroups/][ncpu/]sshlogin[,...]]> Distribute jobs to remote computers. The jobs will be run on a list of remote computers. @@ -1768,7 +1768,7 @@ the lines will be prepended with the sshlogin instead. B<--tag> is ignored when using B<-u>. -=item B<--tagstring> I (alpha testing) +=item B<--tagstring> I (beta testing) Tag lines with a string. Each output line will be prepended with I and TAB (\t). I can contain replacement strings such as @@ -1785,7 +1785,7 @@ different dir for the files. Setting B<--tmpdir> is equivalent to setting $TMPDIR. -=item B<--tmux> (alpha testing) +=item B<--tmux> (beta testing) Use B for output. Start a B session and run each job in a window in that session. No other output will be produced. @@ -1811,7 +1811,7 @@ Print the job to be run on stderr (standard error). See also B<-v>, B<-p>. -=item B<--transfer> (beta testing) +=item B<--transfer> Transfer files to remote computers. B<--transfer> is used with B<--sshlogin> when the arguments are files and should be transferred @@ -1838,7 +1838,7 @@ B<--transfer> is often used with B<--return> and B<--cleanup>. B<--transfer> is ignored when used with B<--sshlogin :> or when not used with B<--sshlogin>. -=item B<--trc> I (beta testing) +=item B<--trc> I Transfer, Return, Cleanup. Short hand for: diff --git a/src/parallel_design.pod b/src/parallel_design.pod index e476e60e..f4de82dd 100644 --- a/src/parallel_design.pod +++ b/src/parallel_design.pod @@ -198,20 +198,52 @@ shell is B (which cannot hide stderr). =item --tmux +mkfifo I; +tmux -S new-session -s pI -d 'sleep .2' >&/dev/null; +tmux -S new-window -t pI -n <> \(<>\)\;\ perl\ -e\ \'while\(\$t++\<3\)\{\ print\ \$ARGV\[0\],\"\\n\"\ \}\'\ \$\?h/\$status\ \>\>\ I\&echo\ <>\;echo\ \Job\ finished\ at:\ \`date\`\;sleep\ 10; +exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' I -mkfifo I; tmux new-session -s pI -d -n <> \(<>\)\;\ perl\ -e\ \'while\(\$t++\<3\)\{\ print\ \$ARGV\[0\],\"\\n\"\ \}\'\ \$\?h/\$status/255\ \>\>\ I\&echo\ <>\;echo\ \Job\ finished\ at:\ \`date\`\;sleep\ 10; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' I +First a FIFO is made (.tmx). It is used for communicating exit +value. Next a new tmux session is made. This may fail if there is +already a session, so the output is ignored. If all job slots finish +at the same time, then B will close the session. A temporary +socket is made (.tms) to avoid a race condition in B. It is +cleaned up when GNU B finishes. The input is used as the name of the windows in B. When the job -inside B finishes, the exit value is printed to a fifo. This -fifo is opened by perl outside B, and perl then removes the fifo -(but keeping it open). Perl blocks until the first value is read from -the fifo, and this value is used as exit value. +inside B finishes, the exit value is printed to the FIFO (.tmx). +This FIFO is opened by B outside B, and B then +removes the FIFO. B blocks until the first value is read from +the FIFO, and this value is used as exit value. To make it compatible with B and B the exit value is -printed as: $?h/$status/255 and this is parsed by perl. +printed as: $?h/$status and this is parsed by B. + +Works in B. There is a bug that makes it necessary to print the exit value 3 -times. Works in B. +times. + +Another bug in B requires the length of the tmux title and +command to not have certain limits. When inside these limits, 75 '\ ' +are added to the title to force it to be outside the limits. + +You can map the bad limits using: + +perl -e 'map { $a=$_; print map { "$a,$_\n" } (1..17000) } (1..17000)' | shuf > ab; + +cat ab | parallel --colsep , --tagstring '{1}{=$_="\t"=}{2}' +tmux -S /tmp/p{%} new-session -d -n '{=1 $_="O"x$_ =}' true'\ {=2 $_="O"x$_ =};echo $?;rm /tmp/p{%}' +> value.csv 2>/dev/null + +R -e 'a<-read.table("value.csv");X11();plot(a[,1],a[,2],col=a[,3]+5,cex=0.1);Sys.sleep(1000)' + +For B 17000 can be lowered to 2100. + +The interesting areas are title 0..1000 with (title + whole command) +in 996..1127 and 9331..9636. + + =back