diff --git a/parallel b/parallel index c8d467bb..36b42746 100755 --- a/parallel +++ b/parallel @@ -241,7 +241,7 @@ amount of files running: B -will give the output of each dir, but it will be sorted accoring to +will give the output of each dir, but it will be sorted according to which job completed first. To keep the order the same as input run: @@ -294,6 +294,18 @@ easier just to write a small script and have B call that script. +=head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS + +If you regret starting a lot of jobs you can simply break B, +but if you want to make sure you do not have halfcompleted jobs you +should send the signal B<-USR2> to B: + +B + +This will tell B to not start any new jobs, but wait until +the currently running jobs are finished. + + =head1 DIFFERENCES BETWEEN xargs/find -exec AND parallel B and B offer some of the same possibilites as @@ -323,7 +335,8 @@ run together, e.g. the first half of a line is from one process and the last half of the line is from another process. B has no support for keeping the order of the output, therefore -output of the second job cannot be postponed till the first job is done. +if running jobs in parallel using B the output of the second +job cannot be postponed till the first job is done. B has no support for context replace, so you will have to create the arguments. @@ -348,6 +361,19 @@ becomes B + +=head1 DIFFERENCES BETWEEN mdm/middleman AND parallel + +middleman(mdm) is also a tool for running jobs in parallel. + +Here are the shellscripts of http://mdm.berlios.de/usage.html ported +to parallel use: + +B>B< result> + +B + + =head1 BUGS Filenames beginning with '-' can cause some commands to give @@ -362,6 +388,29 @@ Report bugs to . xargs dropin-replacement. Implement the missing --features +monitor to see which jobs are currently running +http://code.google.com/p/ppss/ + +accept signal USR1 to complete current running jobs but do not start +new jobs. + +distibute jobs to computers with different speeds/no_of_cpu using ssh +ask the computers how many cpus they have and spawn appropriately +accoring to -j setting + +Parallelize so this can be done: +mdm.screen find dir -execdir mdm-run cmd {} \; +Maybe: +find dir -execdir parallel --communication-file /tmp/comfile cmd {} \; + +=head2 Comfile + +This will put a lock on /tmp/comfile. The number of locks is the number of running commands. +If the number is smaller than -j then it will start a process in the background ( cmd & ), +otherwise wait. + +parallel --wait /tmp/comfile will wait until no more locks on the file + =head1 AUTHOR Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk @@ -423,14 +472,8 @@ $Global::input_is_filename = (@ARGV); $/="\n"; $Global::debug = (defined $::opt_d); -if(defined $::opt_x) { - $Global::xargs = 1; - $Global::command_line_max_len = max_length_of_command_line(); -} -if(defined $::opt_X) { - $Global::Xargs = 1; - $Global::command_line_max_len = max_length_of_command_line(); -} +if(defined $::opt_x) { $Global::xargs = 1; } +if(defined $::opt_X) { $Global::Xargs = 1; } if(defined $::opt_v) { $Global::verbose = 1; } if(defined $::opt_s) { $Global::verbose = 0; } if(defined $::opt_k) { $Global::keeporder = 1; } @@ -501,7 +544,7 @@ sub generate_command_line { # debug("arglen $arg_length = $number_of_substitution * (1 + length ($next_arg)) + $length_of_context\n"); my $job_line_length = $length_of_command_no_args + 1 + $arg_length; # debug("linelen $job_line_length = $length_of_command_no_args + 1 + $arg_length\n"); - if($job_line_length >= $Global::command_line_max_len) { + if($job_line_length >= max_length_of_command_line()) { unget_arg(pop @quoted_args); if($quoted_args[0]) { last; @@ -571,12 +614,15 @@ sub shell_quote { sub max_length_of_command_line { # Find the max_length of a command line # First find an upper bound - my $len = 2; - do { - $len += $len+1; - } while (is_acceptable_command_line_length($len)); - # Then search for the actual max length between 0 and upper bound - return binary_find_max_length(0,$len); + if(not $Global::command_line_max_len) { + my $len = 2; + do { + $len += $len+1; + } while (is_acceptable_command_line_length($len)); + # Then search for the actual max length between 0 and upper bound + $Global::command_line_max_len = binary_find_max_length(0,$len); + } + return $Global::command_line_max_len; } sub binary_find_max_length { @@ -755,15 +801,16 @@ sub user_requested_processes { } sub no_of_cpus { - my $no_of_cpus = - (no_of_cpus_gnu_linux() || - no_of_cpus_solaris()); - if($no_of_cpus) { - return $no_of_cpus; - } else { - warn("Cannot figure out no of cpus. Using 1"); - return 1; + if(not $Global::no_of_cpus) { + my $no_of_cpus = (no_of_cpus_gnu_linux() || no_of_cpus_solaris()); + if($no_of_cpus) { + $Global::no_of_cpus = $no_of_cpus; + } else { + warn("Cannot figure out no of cpus. Using 1"); + $Global::no_of_cpus = 1; + } } + return $Global::no_of_cpus; } sub no_of_cpus_gnu_linux { @@ -805,6 +852,7 @@ sub init_run_jobs { open $Global::original_stdout, ">&STDOUT" or die "Can't dup STDOUT: $!"; open $Global::original_stderr, ">&STDERR" or die "Can't dup STDERR: $!"; $Global::running_jobs=0; + $SIG{USR1} = \&StartNoNewJobs; } sub next_command_line { @@ -854,10 +902,12 @@ sub drain_job_queue { sub start_more_jobs { my $jobs_started = 0; - while($Global::running_jobs < $Global::processes_to_run - and - start_another_job()) { - $jobs_started++; + if(not $Global::StartNoNewJobs) { + while($Global::running_jobs < $Global::processes_to_run + and + start_another_job()) { + $jobs_started++; + } } return $jobs_started; } @@ -967,6 +1017,10 @@ sub print_job { # Signal handling stuff # +sub StartNoNewJobs { + $Global::StartNoNewJobs++; +} + sub CountSigChild { $Global::SigChildCaught++; } diff --git a/parallel.1 b/parallel.1 index 6d9bfea8..c16bfa21 100644 --- a/parallel.1 +++ b/parallel.1 @@ -1,4 +1,4 @@ -.\" Automatically generated by Pod::Man 2.1801 (Pod::Simple 3.05) +.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07) .\" .\" Standard preamble: .\" ======================================================================== @@ -124,7 +124,7 @@ .\" ======================================================================== .\" .IX Title "PARALLEL 1" -.TH PARALLEL 1 "2009-09-02" "perl v5.10.0" "User Contributed Perl Documentation" +.TH PARALLEL 1 "2009-10-26" "perl v5.10.1" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -343,7 +343,7 @@ amount of files running: .PP \&\fBls | sort | parallel \-v \*(L"ls {} | wc\*(R"\fR .PP -will give the output of each dir, but it will be sorted accoring to +will give the output of each dir, but it will be sorted according to which job completed first. .PP To keep the order the same as input run: @@ -392,6 +392,16 @@ Or for substituting output: \&\fBConclusion\fR: To avoid dealing with the quoting problems it may be easier just to write a small script and have \fBparallel\fR call that script. +.SH "COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS" +.IX Header "COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS" +If you regret starting a lot of jobs you can simply break \fBparallel\fR, +but if you want to make sure you do not have halfcompleted jobs you +should send the signal \fB\-USR2\fR to \fBparallel\fR: +.PP +\&\fBkillall \-USR2 parallel\fR +.PP +This will tell \fBparallel\fR to not start any new jobs, but wait until +the currently running jobs are finished. .SH "DIFFERENCES BETWEEN xargs/find \-exec AND parallel" .IX Header "DIFFERENCES BETWEEN xargs/find -exec AND parallel" \&\fBxargs\fR and \fBfind \-exec\fR offer some of the same possibilites as @@ -421,7 +431,8 @@ run together, e.g. the first half of a line is from one process and the last half of the line is from another process. .PP \&\fBxargs\fR has no support for keeping the order of the output, therefore -output of the second job cannot be postponed till the first job is done. +if running jobs in parallel using \fBxargs\fR the output of the second +job cannot be postponed till the first job is done. .PP \&\fBxargs\fR has no support for context replace, so you will have to create the arguments. @@ -445,6 +456,16 @@ and becomes .PP \&\fBls | xargs \-d \*(L"\en\*(R" \-P10 \-I {} bash \-c \*(L"echo {}; ls {}|wc\*(R"\fR +.SH "DIFFERENCES BETWEEN mdm/middleman AND parallel" +.IX Header "DIFFERENCES BETWEEN mdm/middleman AND parallel" +middleman(mdm) is also a tool for running jobs in parallel. +.PP +Here are the shellscripts of http://mdm.berlios.de/usage.html ported +to parallel use: +.PP +\&\fBseq 1 19 | parallel \-j+0 buffon \-o \- | sort \-n \fR>\fB result\fR +.PP +\&\fBcat files | parallel \-j+0 cmd\fR .SH "BUGS" .IX Header "BUGS" Filenames beginning with '\-' can cause some commands to give @@ -456,6 +477,28 @@ Report bugs to . .IX Header "IDEAS" xargs dropin-replacement. Implement the missing \-\-features +.PP +monitor to see which jobs are currently running +http://code.google.com/p/ppss/ +.PP +accept signal \s-1USR1\s0 to complete current running jobs but do not start +new jobs. +.PP +distibute jobs to computers with different speeds/no_of_cpu using ssh +ask the computers how many cpus they have and spawn appropriately +accoring to \-j setting +.PP +Parallelize so this can be done: +mdm.screen find dir \-execdir mdm-run cmd {} \e; +Maybe: +find dir \-execdir parallel \-\-communication\-file /tmp/comfile cmd {} \e; +.SS "Comfile" +.IX Subsection "Comfile" +This will put a lock on /tmp/comfile. The number of locks is the number of running commands. +If the number is smaller than \-j then it will start a process in the background ( cmd & ), +otherwise wait. +.PP +parallel \-\-wait /tmp/comfile will wait until no more locks on the file .SH "AUTHOR" .IX Header "AUTHOR" Copyright (C) 2007\-10\-18 Ole Tange, http://ole.tange.dk diff --git a/unittest/actual-results/test13 b/unittest/actual-results/test13 index 098b563b..6ae4e313 100644 --- a/unittest/actual-results/test13 +++ b/unittest/actual-results/test13 @@ -30,3 +30,23 @@ begin 29 30 end +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 diff --git a/unittest/tests-to-run/test13.sh b/unittest/tests-to-run/test13.sh index 48be71ec..0b4f3ba8 100644 --- a/unittest/tests-to-run/test13.sh +++ b/unittest/tests-to-run/test13.sh @@ -4,3 +4,6 @@ ulimit -n 50 (echo "sleep 3; echo begin"; seq 1 30 | parallel -kq echo "sleep 1; echo {}"; echo "echo end") \ | parallel -k -j0 + +# Test SIGUSR1 +(sleep 5; killall parallel -USR1) & seq 1 100 | parallel -k sleep 3';' echo diff --git a/unittest/wanted-results/test13 b/unittest/wanted-results/test13 index 098b563b..6ae4e313 100644 --- a/unittest/wanted-results/test13 +++ b/unittest/wanted-results/test13 @@ -30,3 +30,23 @@ begin 29 30 end +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20