From 167332902bde4233c54ceed976a231658af8cdb7 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Tue, 15 Jun 2010 00:05:47 +0200 Subject: [PATCH] --progress implemented. Fixed bug if transfered file contains :. --- doc/FUTURE_IDEAS | 28 ++++- doc/release_new_version | 6 +- src/parallel | 181 +++++++++++++++++++++++++++++--- unittest/Start.sh | 1 + unittest/actual-results/test18 | 11 ++ unittest/actual-results/test23 | 22 ++-- unittest/tests-to-run/test18.sh | 11 +- unittest/tests-to-run/test23.sh | 17 +-- unittest/wanted-results/test18 | 11 ++ unittest/wanted-results/test23 | 22 ++-- 10 files changed, 271 insertions(+), 39 deletions(-) diff --git a/doc/FUTURE_IDEAS b/doc/FUTURE_IDEAS index 7830fe26..da118c15 100644 --- a/doc/FUTURE_IDEAS +++ b/doc/FUTURE_IDEAS @@ -1,3 +1,26 @@ +--progress +Fixed bug if transfered file contains : + + +* 100% options complete with xargs. All options for xargs can now + be used in GNU Parallel - even the more exotic. + +* --basefile for transfering basedata. When running jobs on remote + computers --basefile will transfer files before the first jobs is + run. It can be used to transfer data that remains the same for each + job such as scripts or lookup tables. + +* --progress shows progress. To see how many jobs is running on each + server use --progress. It can be turned on even after GNU Parallel + is started. + +* --halt-on-error stops if an error occurs. GNU Parallel will default + to run all jobs - even if some of them fail. With --halt-on-error + GNU Parallel can ignore errors, wait for the currently running jobs + to finish, or stop immediately when an error occurs. + +* New video showing the new options. + =head1 YouTube video GNU Parallel is a tool with lots of uses in shell. Every time you use @@ -100,10 +123,7 @@ En ssh med 20% loss og 900 ms delay, så kan login nås på 15 sek. Test if -0 works on filenames ending in '\n' -monitor to see which jobs are currently running -http://code.google.com/p/ppss/ - -If there are nomore jobs (STDIN is closed) then make sure to +If there are nomore jobs (STDIN is eof) then make sure to distribute the arguments evenly if running -X. =head1 options diff --git a/doc/release_new_version b/doc/release_new_version index fa900505..d8b3c4eb 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -67,6 +67,8 @@ http://freshmeat.net/projects/parallel/releases/new == Send announce == +Newsgroups: comp.unix.shell,comp.unix.admin + <<<<< to:parallel@gnu.org, bug-parallel@gnu.org, info-gnu@gnu.org, bug-directory@gnu.org @@ -98,7 +100,9 @@ GNU Parallel makes sure output from the commands is the same output as you would get had you run the commands sequentially. This makes it possible to use output from GNU Parallel as input for other programs. - You can find more about GNU Parallel at: http://www.gnu.org/software/parallel/ + +Watch the intro video on http://www.youtube.com/watch?v=LlXDtd_pRaY + >>>>> diff --git a/src/parallel b/src/parallel index 0737f144..5d51c9c0 100755 --- a/src/parallel +++ b/src/parallel @@ -309,6 +309,21 @@ run remote and are very fast to run. This is disabled for sshlogins that specify their own ssh command. +=item B<--progress> + +Show progress of computations. List the computers involved in the task +with number of CPU cores detected and the max number of jobs to +run. After that show progress for each computer: number of running +jobs, number of completed jobs, and percentage of all jobs done by +this computer. The percentage will only be available after all jobs +have been scheduled as GNU B only read the next job when +ready to schedule it - this is to avoid wasting time and memory by +reading everything at startup. + +By sending GNU B SIGUSR2 you can toggle turning on/off +B<--progress> on a running GNU B process. + + =item B<--max-args>=I =item B<-n> I @@ -1225,6 +1240,9 @@ abnormally and running remote (B<--cleanup> may not complete if stopped abnormally). The example B would require extra postprocessing if written using B. +For remote systems PPSS requires 3 steps: config, deploy, and +start. GNU B only requires one step. + =head3 EXAMPLES FROM ppss MANUAL Here are the examples from B's manual page with the equivalent @@ -1641,6 +1659,7 @@ sub parse_options { "cleanup" => \$::opt_cleanup, "basefile|B=s" => \@::opt_basefile, "halt-on-error|H=s" => \$::opt_halt_on_error, + "progress" => \$::opt_progress, # xargs-compatibility - implemented, man, unittest "max-procs|P=s" => \$::opt_P, "delimiter|d=s" => \$::opt_d, @@ -2112,6 +2131,7 @@ sub simultaneous_sshlogin { my $wanted_processes = shift; my ($sshcmd,$serverlogin) = sshcommand_of_sshlogin($sshlogin); my $cmd = "$sshcmd $serverlogin echo simultaneouslogin 2>&1 &"x$wanted_processes; + debug("Trying $wanted_processes logins at $serverlogin"); open (SIMUL, "($cmd)|grep simultaneouslogin | wc -l|") or die; my $ssh_limit = ; close SIMUL; @@ -2371,20 +2391,27 @@ sub min { # $Global::running{$pid}{sshlogin} = server to run on # $Global::running{$pid}{'exitstatus'} = exit status # $Global::host{$sshlogin}{'no_of_running'} = number of currently running jobs +# $Global::host{$sshlogin}{'completed'} = number of completed jobs # $Global::host{$sshlogin}{'ncpus'} = number of CPUs (or CPU cores) # $Global::host{$sshlogin}{'maxlength'} = max line length (currently buggy for remote) -# $Global::host{$sshlogin}{'max_no_of_running'} = number of currently running jobs +# $Global::host{$sshlogin}{'max_no_of_running'} = max parallel running jobs # $Global::host{$sshlogin}{'sshcmd'} = command to use as ssh # $Global::host{$sshlogin}{'serverlogin'} = username@hostname -# $Global::running_jobs = total number of running jobs +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::total_jobs = total jobs to be started at all +# $Global::total_completed = total jobs completed sub init_run_jobs { # Remember the original STDOUT and STDERR open $Global::original_stdout, ">&STDOUT" or die "Can't dup STDOUT: $!"; open $Global::original_stderr, ">&STDERR" or die "Can't dup STDERR: $!"; open $Global::original_stdin, "<&STDIN" or die "Can't dup STDIN: $!"; - $Global::running_jobs=0; + $Global::total_running = 0; + $Global::total_started = 0; + $Global::total_completed = 0; $SIG{USR1} = \&ListRunningJobs; + $SIG{USR2} = \&toggle_progress; $Global::original_sigterm = $SIG{TERM}; $SIG{TERM} = \&StartNoNewJobs; if(@::opt_basefile) { @@ -2486,10 +2513,127 @@ sub unget_arg { } sub drain_job_queue { - while($Global::running_jobs > 0) { - debug("jobs running: $Global::running_jobs Memory usage:".my_memory_usage()."\n"); - sleep 1; + if($::opt_progress) { + print init_progress(); } + my $last_header=""; + while($Global::total_running > 0) { + debug("jobs running: $Global::total_running Memory usage:".my_memory_usage()."\n"); + sleep 1; + if($::opt_progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + print "\n",$progress{'header'},"\n"; + $last_header = $progress{'header'}; + } + print "\r",$progress{'status'}; + } + } + if($::opt_progress) { + print "\n"; + } +} + +sub toggle_progress { + # Turn on/off progress view + $::opt_progress = not $::opt_progress; + if($::opt_progress) { + print init_progress(); + } +} + +sub init_progress { + $|=1; + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'},"\n"); +} + +sub progress { + my $termcols = columns(); + my ($status, $header)=("x"x($termcols+1),""); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = join("\n", map { + $workerno{$_}.":".$sshlogin{$_} ." / ". + ($Global::host{$_}{'ncpus'} || "-") ." / ". + $Global::host{$_}{'max_no_of_running'} + } @workers); + if(eof $Global::argfile) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%completed of all jobs"; + $status = join(" ",map + { sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, $Global::host{$_}{'no_of_running'}, + ($Global::host{$_}{'completed'}||0), + ($Global::host{$_}{'completed'}||0)*100 + / $Global::total_started) } + @workers); + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%completed of all jobs"; + $status = join(" ",map + { sprintf("%s:%d/%d/%d%%", + $workerno{$_}, $Global::host{$_}{'no_of_running'}, + ($Global::host{$_}{'completed'}||0), + ($Global::host{$_}{'completed'}||0)*100 + / $Global::total_started) } + @workers); + } + + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}{'no_of_running'}, + ($Global::host{$_}{'completed'}||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = join(" ",map + { sprintf("%s:%d/%d", + $workerno{$_}, $Global::host{$_}{'no_of_running'}, + ($Global::host{$_}{'completed'}||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, + ($Global::host{$_}{'completed'}||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = join(" ",map + { sprintf("%s:%d/%d", + $workerno{$_}, + ($Global::host{$_}{'completed'}||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, "status" => $status); +} + +sub columns { + # Get the number of columns of the display + if(not $Global::columns) { + $Global::columns = $ENV{'COLUMNS'}; + if(not $Global::columns) { + my $resize = qx{ resize }; + $resize =~ /COLUMNS=(\d+);/ and do { $Global::columns = $1; }; + } + $Global::columns ||= 80; + } + return $Global::columns; } sub start_more_jobs { @@ -2582,8 +2726,9 @@ sub start_job { if($Global::verbose and not $Global::grouped) { print STDOUT $command,"\n"; } - $Global::running_jobs++; - debug("$Global::running_jobs processes. Starting: $command\n"); + $Global::total_running++; + $Global::total_started++; + debug("$Global::total_running processes. Starting: $command\n"); #print STDERR "LEN".length($command)."\n"; $Global::job_start_sequence++; @@ -2689,7 +2834,7 @@ sub parse_sshlogin { $Global::host{$sshlogin}{'maxlength'} = max_length_of_command_line(); } debug("sshlogin: ", my_dump(%Global::host),"\n"); - if($::opt_transfer or @::opt_return or $::opt_cleanup) { + if($::opt_transfer or @::opt_return or $::opt_cleanup or @::opt_basefile) { if(not remote_hosts()) { # There are no remote hosts if(defined @::opt_trc) { @@ -2700,6 +2845,8 @@ sub parse_sshlogin { print STDERR "Warning: --return ignored as there are no remote --sshlogin\n"; } elsif (defined $::opt_cleanup) { print STDERR "Warning: --cleanup ignored as there are no remote --sshlogin\n"; + } elsif (defined @::opt_basefile) { + print STDERR "Warning: --basefile ignored as there are no remote --sshlogin\n"; } } } @@ -2784,6 +2931,9 @@ sub sshtransferreturn { if($transfer) { # Abs path: rsync -rlDzRE /home/tange/dir/subdir/file.gz server:/ # Rel path: rsync -rlDzRE ./subdir/file.gz server:./ + if($relpath) { + $file = "./".$file; + } if(-r shell_unquote($file)) { return "rsync $rsync_opt $file $serverlogin:$rsync_destdir"; } else { @@ -2792,13 +2942,12 @@ sub sshtransferreturn { } } else { my $noext = no_extension($file); # Remove .ext before prepending ./ - # If relative path: prepend ./ (to avoid problems with ':') - $noext = ($relpath ? "./".$noext : $noext); - $file = ($relpath ? "./".$file : $file); my @cmd = (); for my $ret_file (@Global::ret_files) { my $remove = $removesource ? "--remove-source-files" : ""; - my $replaced = context_replace($ret_file,[$file],[$noext]); + # If relative path: prepend ./ (to avoid problems with ':') + my $replaced = ($relpath ? "./" : "") . + context_replace($ret_file,[$file],[$noext]); # --return # Abs path: rsync -rlDzRE server:/home/tange/dir/subdir/file.gz / # Rel path: rsync -rlDzRE server:./subsir/file.gz ./ @@ -2940,7 +3089,9 @@ sub Reaper { } my $sshlogin = $Global::running{$stiff}{'sshlogin'}; $Global::host{$sshlogin}{'no_of_running'}--; - $Global::running_jobs--; + $Global::host{$sshlogin}{'completed'}++; + $Global::total_running--; + $Global::total_completed++; delete $Global::running{$stiff}; start_more_jobs(); } @@ -3072,9 +3223,9 @@ $Global::control_path = 0; # TODO --max-number-of-jobs print the system limited number of jobs # TODO Debian package -# TODO try transfer + return of file beginning with : # TODO to kill from a run script parallel should set PARALLEL_PID that can be sig termed # TAGS: parallel | parallel processing | multicore | multiprocessor | Clustering/Distributed Networks # job control | multiple jobs | parallelization | text processing | cluster | filters # Clustering Tools | Command Line Tools | Utilities | System Administration +# Bash parallel diff --git a/unittest/Start.sh b/unittest/Start.sh index ef840cbe..9e4c77fd 100644 --- a/unittest/Start.sh +++ b/unittest/Start.sh @@ -1,5 +1,6 @@ #!/bin/bash +export LANG=C SHFILE=/tmp/unittest-parallel.sh ls -t tests-to-run/test*.sh \ diff --git a/unittest/actual-results/test18 b/unittest/actual-results/test18 index 0aa06faf..eae7e07f 100644 --- a/unittest/actual-results/test18 +++ b/unittest/actual-results/test18 @@ -1,6 +1,17 @@ ### Check warning if --transfer but file not found Warning: /tmp/noexistant/file is not readable and will not be transferred /tmp/noexistant/file +### Transfer for file starting with : +remote-:.: +content-: +remote-file:name.file:name +content-file:name +remote-file:name.foo.file:name +content-file:name.foo +remote-file: name.foo.file: name +content-file: name.foo +remote-file : name.foo.file : name +content-file : name.foo ### Check warning if --transfer but not --sshlogin Warning: --transfer ignored as there are no remote --sshlogin diff --git a/unittest/actual-results/test23 b/unittest/actual-results/test23 index 6d43299f..b89b8637 100644 --- a/unittest/actual-results/test23 +++ b/unittest/actual-results/test23 @@ -1,11 +1,21 @@ +### Test --basefile with no --sshlogin +Warning: --basefile ignored as there are no remote --sshlogin + ### Test --basefile + --cleanup + permissions -scriptrun 1 -scriptrun 2 -scriptrun 3 -scriptrun 4 -scriptrun 5 +script1 run 1 +script2 run 1 +script1 run 2 +script2 run 2 +script1 run 3 +script2 run 3 +script1 run 4 +script2 run 4 +script1 run 5 +script2 run 5 good if no file -ls: cannot access script: No such file or directory +ls: cannot access script1: No such file or directory +OK +ls: cannot access script2: No such file or directory OK ### Test --basefile + --sshlogin : 1 diff --git a/unittest/tests-to-run/test18.sh b/unittest/tests-to-run/test18.sh index 962701a9..f233857a 100644 --- a/unittest/tests-to-run/test18.sh +++ b/unittest/tests-to-run/test18.sh @@ -8,6 +8,15 @@ SERVER2=parallel-server2 echo '### Check warning if --transfer but file not found' echo /tmp/noexistant/file | stdout $PAR -k -S $SERVER1 --transfer echo +echo '### Transfer for file starting with :' +cd /tmp +(echo ':'; echo file:name; echo file:name.foo; echo file: name.foo; echo file : name.foo;) \ + > /tmp/test18 +cat /tmp/test18 | parallel echo content-{} ">" {} +cat /tmp/test18 | parallel -j1 --trc {}.{.} -S $SERVER1,parallel@$SERVER2,: \ + '(echo remote-{}.{.};cat {}) > {}.{.}' +cat /tmp/test18 | parallel -j1 -k 'cat {}.{.}' + echo '### Check warning if --transfer but not --sshlogin' echo | stdout $PAR -k --transfer echo @@ -28,7 +37,7 @@ echo "#2/ssh -l tange nothing" >>/tmp/parallel-sshlogin seq 1 10 | $PAR -k --sshloginfile /tmp/parallel-sshlogin echo echo '### Check forced number of CPUs being respected' -stdout cat /tmp/test17 | parallel -k -j+0 -S 1/:,9/$SERVER1 "hostname; echo {} >/dev/null" +stdout seq 1 20 | parallel -k -j+0 -S 1/:,9/$SERVER1 "hostname; echo {} >/dev/null" echo '### Check more than 9 simultaneous sshlogins' seq 1 11 | $PAR -k -j0 -S "/ssh $SERVER1" echo diff --git a/unittest/tests-to-run/test23.sh b/unittest/tests-to-run/test23.sh index de43c00b..ac5be479 100644 --- a/unittest/tests-to-run/test23.sh +++ b/unittest/tests-to-run/test23.sh @@ -5,13 +5,18 @@ SERVER1=parallel-server1 SERVER2=parallel-server2 cd /tmp -( + +echo '### Test --basefile with no --sshlogin' +echo | stdout parallel --basefile foo echo + echo '### Test --basefile + --cleanup + permissions' -echo echo scriptrun '"$@"' > script -chmod 755 script -seq 1 5 | parallel -kS $SERVER1 --cleanup -B script ./script +echo echo script1 run '"$@"' > script1 +echo echo script2 run '"$@"' > script2 +chmod 755 script1 script2 +seq 1 5 | parallel -kS $SERVER1 --cleanup -B script1 --basefile script2 "./script1 {};./script2 {}" echo good if no file -stdout ssh $SERVER1 ls 'script' || echo OK +stdout ssh $SERVER1 ls 'script1' || echo OK +stdout ssh $SERVER1 ls 'script2' || echo OK echo '### Test --basefile + --sshlogin :' echo cat '"$@"' > my_script @@ -22,4 +27,4 @@ seq 1 13 | parallel echo {} '>' parallel_{}.test ls parallel_*.test | parallel -j+0 --trc {.}.out -B my_script \ -S parallel-server1,parallel@parallel-server2,: "./my_script {} > {.}.out" cat parallel_*.test parallel_*.out -) + diff --git a/unittest/wanted-results/test18 b/unittest/wanted-results/test18 index 0aa06faf..eae7e07f 100644 --- a/unittest/wanted-results/test18 +++ b/unittest/wanted-results/test18 @@ -1,6 +1,17 @@ ### Check warning if --transfer but file not found Warning: /tmp/noexistant/file is not readable and will not be transferred /tmp/noexistant/file +### Transfer for file starting with : +remote-:.: +content-: +remote-file:name.file:name +content-file:name +remote-file:name.foo.file:name +content-file:name.foo +remote-file: name.foo.file: name +content-file: name.foo +remote-file : name.foo.file : name +content-file : name.foo ### Check warning if --transfer but not --sshlogin Warning: --transfer ignored as there are no remote --sshlogin diff --git a/unittest/wanted-results/test23 b/unittest/wanted-results/test23 index 6d43299f..b89b8637 100644 --- a/unittest/wanted-results/test23 +++ b/unittest/wanted-results/test23 @@ -1,11 +1,21 @@ +### Test --basefile with no --sshlogin +Warning: --basefile ignored as there are no remote --sshlogin + ### Test --basefile + --cleanup + permissions -scriptrun 1 -scriptrun 2 -scriptrun 3 -scriptrun 4 -scriptrun 5 +script1 run 1 +script2 run 1 +script1 run 2 +script2 run 2 +script1 run 3 +script2 run 3 +script1 run 4 +script2 run 4 +script1 run 5 +script2 run 5 good if no file -ls: cannot access script: No such file or directory +ls: cannot access script1: No such file or directory +OK +ls: cannot access script2: No such file or directory OK ### Test --basefile + --sshlogin : 1