diff --git a/doc/release_new_version b/doc/release_new_version index 7ffa8786..45fcf896 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -201,50 +201,21 @@ cc:Sandro Cazzaniga , Ryoichiro Suzuki , Jesse Alama -Subject: GNU Parallel 20131122 ('Haiyan') released +Subject: GNU Parallel 20131222 ('') released -GNU Parallel 20131122 ('Haiyan') has been released. It is +GNU Parallel 20131222 ('') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ New in this release: -* A citation notice is printed on stderr only if stderr is a terminal, - the user has not specified --no-notice and the user has not run - --bibtex once. This makes the release alpha quality. +* Parallel rsync + http://pastebin.com/JmnB9ffq -* --compress will compress temporary files. If the output is big and - very compressible this will take up less disk space in $TMPDIR and - possibly be faster due to less disk I/O. +* Gnu Parallel for fun and profit + https://gist.github.com/celoyd/f7eb55ad69c9b33fd8c3 -* --compress-program comtrols which program to use for compressing - temporary files. - -* --bar show progress as a progress bar compatible with zenity. - -* --resume can now be used with --result: Jobs already run will be - skipped. - -* --transfer and --basefile support paths relative to the --workdir by - inserting /./ into the path. - -* GNU Parallel was used (unfortunately with improper citation) in: - 'fastphylo: Fast tools for phylogenetics' - http://www.biomedcentral.com/1471-2105/14/334/abstract - -* Using GNU parallel - http://davetang.org/muse/2013/11/18/using-gnu-parallel/ - -* Techlux - GNU - Parallel (German) - https://techlux.de/blog/2013/11/07/gnu-parallel/ - -* awk, sed, bzip2, grep, wc на всех ядрах - http://vk.com/page-30666517_45528467 - -* 如何利用多核CPU來加速你的Linux命令 — awk, sed, bzip2, grep, wc等 - http://www.hksilicon.com/kb/articles/290543/CPULinuxawk-sed-bzip2-grep-wc - -* GNU Parallel (Japanese) - http://jarp.does.notwork.org/diary/201311b.html#20131117 +* Procesando la contabilidad del PP + http://www.neorazorx.com/2013/07/procesando-la-contabilidad-del-pp.html * Bug fixes and man page updates. diff --git a/src/parallel b/src/parallel index 2e5412cc..bbdce162 100755 --- a/src/parallel +++ b/src/parallel @@ -119,6 +119,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh"); print $fh @cores, @cpus, @maxline, @echo; close $fh; +# my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null"; my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 1000 --joblog - --plain --tag --tagstring {1} --colsep '\t' -k eval {2} 2>/dev/null"; ::debug($cmd."\n"); open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); @@ -127,6 +128,7 @@ if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { my @col = split /\t/, $_; if(defined $col[6]) { # This is a line from --joblog + # seq host time spent sent received exit signal command # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores if($col[0] eq "Seq" and $col[1] eq "Host" and $col[2] eq "Starttime" and $col[3] eq "Runtime") { @@ -259,7 +261,7 @@ if($opt::nonall or $opt::onall) { (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), ); ::debug("| $0 $options\n"); - open(my $parallel_fh, "|-", "$0 -j0 $options") || + open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || ::die_bug("This does not run GNU Parallel: $0 $options"); my @joblogs; for my $sshlogin (values %Global::host) { @@ -2190,7 +2192,7 @@ sub reaper { $job->set_exitstatus($? >> 8); $job->set_exitsignal($? & 127); debug("died (".$job->exitstatus()."): ".$job->seq()); - $job->set_endtime(); + $job->set_endtime(::now()); if($stiff == $Global::tty_taken) { # The process that died had the tty => release it $Global::tty_taken = 0; @@ -2313,11 +2315,13 @@ sub usage { sub citation_notice { - # if --no-notice: do nothing + # if --no-notice or --plain: do nothing # if stderr redirected: do nothing # if ~/.parallel/will-cite: do nothing # else: print citation notice to stderr if($opt::no_notice + or + $opt::plain or not -t $Global::original_stderr or @@ -2394,7 +2398,7 @@ sub bibtex { print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; } print join("\n", - "When using GNU Parallel to process data for publication please cite:", + "When using programs that use GNU Parallel to process data for publication please cite:", "", "\@article{Tange2011a,", " title = {GNU Parallel - The Command-Line Power Tool},", @@ -2409,6 +2413,8 @@ sub bibtex { " pages = {42-47}", "}", "", + "(Feel free to use \\nocite{Tange2011a})", + "", "This helps funding further development.", "" ); @@ -4190,7 +4196,7 @@ sub endtime { sub set_endtime { my $self = shift; - my $endtime = shift || ::now(); + my $endtime = shift; $self->{'endtime'} = $endtime; } @@ -4207,6 +4213,7 @@ sub kill { # Record this jobs as failed $self->set_exitstatus(-1); # Send two TERMs to give time to clean up + ::debug("Kill seq ".$self->seq()."\n"); for my $signal ("TERM", "TERM", "KILL") { my $alive = 0; for my $pid (@family_pids) { @@ -4766,6 +4773,7 @@ sub should_be_retried { return 0; } else { # This command should be retried + $self->set_endtime(undef); $Global::JobQueue->unget($self); ::debug("Retry ".$self->seq()."\n"); return 1; diff --git a/src/parallel.pdf b/src/parallel.pdf index 5b08908b..364b388c 100644 Binary files a/src/parallel.pdf and b/src/parallel.pdf differ diff --git a/src/parallel.pod b/src/parallel.pod index 7a3ff917..7efa47c3 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -614,7 +614,7 @@ Logfile for executed jobs. Save a list of the executed jobs to I in the following TAB separated format: sequence number, sshlogin, start time as seconds since epoch, run time in seconds, bytes in files transferred, bytes in files returned, exit status, -and command run. +signal, and command run. To convert the times into ISO-8601 strict do: @@ -2496,6 +2496,22 @@ files before it removes the files. The output is saved to B. +=head1 EXAMPLE: Running more than 500 jobs workaround + +If you need to run a massive amount of jobs in parallel, then you will +likely hit the filehandle limit which is often around 500 jobs. If you +are super user you can raise the limit in /etc/security/limits.conf +but you can also use this workaround. The filehandle limit is per +process. That means that if you just spawn more GNU Bs then +each of them can run 500 jobs. This will spawn up to 2500 jobs: + +B + +This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this): + +B + + =head1 EXAMPLE: Working as mutex and counting semaphore The command B is an alias for B. diff --git a/src/parallel.texi b/src/parallel.texi index 3a4a86a0..f39b9966 100644 --- a/src/parallel.texi +++ b/src/parallel.texi @@ -647,7 +647,7 @@ Logfile for executed jobs. Save a list of the executed jobs to @emph{logfile} in the following TAB separated format: sequence number, sshlogin, start time as seconds since epoch, run time in seconds, bytes in files transferred, bytes in files returned, exit status, -and command run. +signal, and command run. To convert the times into ISO-8601 strict do: @@ -2691,6 +2691,22 @@ files are passed to the second @strong{parallel} that runs @strong{sort -m} on t files before it removes the files. The output is saved to @strong{bigfile.sort}. +@chapter EXAMPLE: Running more than 500 jobs workaround +@anchor{EXAMPLE: Running more than 500 jobs workaround} + +If you need to run a massive amount of jobs in parallel, then you will +likely hit the filehandle limit which is often around 500 jobs. If you +are super user you can raise the limit in /etc/security/limits.conf +but you can also use this workaround. The filehandle limit is per +process. That means that if you just spawn more GNU @strong{parallel}s then +each of them can run 500 jobs. This will spawn up to 2500 jobs: + +@strong{cat myinput | parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg} + +This will spawn up to 250000 jobs (use with caution - you need 250 GB RAM to do this): + +@strong{cat myinput | parallel --pipe -N 500 --round-robin -j500 parallel -j500 your_prg} + @chapter EXAMPLE: Working as mutex and counting semaphore @anchor{EXAMPLE: Working as mutex and counting semaphore}