mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
parallel: --pipe now forks instead of busy looping.
This commit is contained in:
parent
91899593fc
commit
22b295cef1
|
@ -1,6 +1,6 @@
|
||||||
Summary: Shell tool for executing jobs in parallel
|
Summary: Shell tool for executing jobs in parallel
|
||||||
Name: parallel
|
Name: parallel
|
||||||
Version: 20120422
|
Version: 20120522
|
||||||
Release: 1
|
Release: 1
|
||||||
License: GPL
|
License: GPL
|
||||||
Group: Productivity/File utilities
|
Group: Productivity/File utilities
|
||||||
|
|
161
src/parallel
161
src/parallel
|
@ -236,16 +236,12 @@ sub spreadstdin {
|
||||||
if($Global::max_number_of_args) {
|
if($Global::max_number_of_args) {
|
||||||
# -N => (start..*?end){n}
|
# -N => (start..*?end){n}
|
||||||
while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) {
|
while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) {
|
||||||
$record = $header.$1;
|
write_record_to_pipe(\$header,\$1,$recstart,$recend,length $1);
|
||||||
::debug("Read record -N: ".length($record)."\n");
|
|
||||||
write_record_to_pipe(\$record,$recstart,$recend);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
# Find the last recend-recstart in $buf
|
# Find the last recend-recstart in $buf
|
||||||
if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
|
if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
|
||||||
$record = $header.$1;
|
write_record_to_pipe(\$header,\$1,$recstart,$recend,length $1);
|
||||||
::debug("Matched record: ".length($record)."/".length($buf)."\n");
|
|
||||||
write_record_to_pipe(\$record,$recstart,$recend);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -254,20 +250,16 @@ sub spreadstdin {
|
||||||
my $i = 0;
|
my $i = 0;
|
||||||
while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) {
|
while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) {
|
||||||
$i += length $recend; # find the actual splitting location
|
$i += length $recend; # find the actual splitting location
|
||||||
my $record = $header.substr($buf,0,$i);
|
write_record_to_pipe(\$header,\$buf,$recstart,$recend,$i);
|
||||||
substr($buf,0,$i) = "";
|
substr($buf,0,$i) = "";
|
||||||
::debug("Read record: ".length($record)."\n");
|
|
||||||
write_record_to_pipe(\$record,$recstart,$recend);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
# Find the last recend-recstart in $buf
|
# Find the last recend-recstart in $buf
|
||||||
my $i = rindex($buf,$recendrecstart);
|
my $i = rindex($buf,$recendrecstart);
|
||||||
if($i != -1) {
|
if($i != -1) {
|
||||||
$i += length $recend; # find the actual splitting location
|
$i += length $recend; # find the actual splitting location
|
||||||
my $record = $header.substr($buf,0,$i);
|
write_record_to_pipe(\$header,\$buf,$recstart,$recend,$i);
|
||||||
substr($buf,0,$i) = "";
|
substr($buf,0,$i) = "";
|
||||||
# ::debug("Read record: ".length($record)."\n");
|
|
||||||
write_record_to_pipe(\$record,$recstart,$recend);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -275,12 +267,10 @@ sub spreadstdin {
|
||||||
}
|
}
|
||||||
|
|
||||||
# If there is anything left in the buffer write it
|
# If there is anything left in the buffer write it
|
||||||
substr($buf,0,0) = $header;
|
substr($buf,0,0) = "";
|
||||||
write_record_to_pipe(\$buf,$recstart,$recend);
|
write_record_to_pipe(\$header,\$buf,$recstart,$recend,length $buf);
|
||||||
|
|
||||||
::debug("Done reading input\n");
|
::debug("Done reading input\n");
|
||||||
flush_and_close_pipes();
|
|
||||||
::debug("Done flushing to children\n");
|
|
||||||
$Global::start_no_new_jobs = 1;
|
$Global::start_no_new_jobs = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -299,74 +289,45 @@ sub nindex {
|
||||||
return $i;
|
return $i;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub flush_and_close_pipes {
|
|
||||||
# Flush that that is cached to the open pipes
|
|
||||||
# and close them.
|
|
||||||
my $flush_done;
|
|
||||||
my $sleep = 0.05;
|
|
||||||
do {
|
|
||||||
$flush_done = 1;
|
|
||||||
# Make sure everything is written to the jobs
|
|
||||||
for my $job (values %Global::running) {
|
|
||||||
if($job->remaining()) {
|
|
||||||
if($job->complete_write()) {
|
|
||||||
# Some data was written - reset sleep timer
|
|
||||||
$sleep = 0.05;
|
|
||||||
}
|
|
||||||
$flush_done = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$sleep = ::reap_usleep($sleep);
|
|
||||||
} while (not $flush_done);
|
|
||||||
for my $job (values %Global::running) {
|
|
||||||
my $fh = $job->stdin();
|
|
||||||
close $fh;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sub write_record_to_pipe {
|
sub write_record_to_pipe {
|
||||||
|
# Fork then
|
||||||
|
# Write record from pos 0 .. $endpos to pipe
|
||||||
|
my $header_ref = shift;
|
||||||
my $record_ref = shift;
|
my $record_ref = shift;
|
||||||
my $recstart = shift;
|
my $recstart = shift;
|
||||||
my $recend = shift;
|
my $recend = shift;
|
||||||
|
my $endpos = shift;
|
||||||
if(length $$record_ref == 0) { return; }
|
if(length $$record_ref == 0) { return; }
|
||||||
if($::opt_remove_rec_sep) {
|
# Find the minimal seq $job that has no data written == virgin
|
||||||
# Remove record separator
|
# If no virgin found, backoff
|
||||||
$$record_ref =~ s/$recend$recstart//gos;
|
my $sleep = 0.0001; # 0.01 ms - better performance on highend
|
||||||
$$record_ref =~ s/^$recstart//os;
|
while(not @Global::virgin_jobs) {
|
||||||
$$record_ref =~ s/$recend$//os;
|
::debug("No virgin jobs");
|
||||||
|
$sleep = ::reap_usleep($sleep);
|
||||||
|
start_more_jobs(); # These jobs may not be started because of loadavg
|
||||||
}
|
}
|
||||||
# Keep the pipes hot, but if nothing happens sleep should back off
|
my $job = shift @Global::virgin_jobs;
|
||||||
my $sleep = 0.00001; # 0.00001 ms - better performance on highend
|
if(fork()) {
|
||||||
write_record: while(1) {
|
# Skip
|
||||||
# Sorting according to sequence is necessary for -k to work
|
} else {
|
||||||
for my $job (sort { $a->seq() <=> $b->seq() } values %Global::running) {
|
# Chop of at $endpos as we do not know how many rec_sep will
|
||||||
::debug("Looking at ",$job->seq(),"-",$job->remaining(),"-",$job->datawritten(),"\n");
|
# be removed.
|
||||||
if($job->remaining()) {
|
my $record = substr($$record_ref,0,$endpos);
|
||||||
# Part of the job's last record has not finished being written
|
# Remove rec_sep
|
||||||
if($job->complete_write()) {
|
if($::opt_remove_rec_sep) {
|
||||||
# Something got written - reset sleep timer
|
# Remove record separator
|
||||||
$sleep = 0.00001;
|
$record =~ s/$recend$recstart//gos;
|
||||||
}
|
$record =~ s/^$recstart//os;
|
||||||
} else {
|
$record =~ s/$recend$//os;
|
||||||
if($job->datawritten() > 0) {
|
}
|
||||||
# There is no data remaining and we have written data before:
|
$job->write($header_ref);
|
||||||
# So this means we have completed writing a block.
|
$job->write(\$record);
|
||||||
# close stdin
|
my $fh = $job->stdin();
|
||||||
# This will cause the job to finish and when it dies we will spawn another job
|
close $fh;
|
||||||
my $fh = $job->stdin();
|
exit;
|
||||||
close $fh;
|
}
|
||||||
} else {
|
my $fh = $job->stdin();
|
||||||
$job->write($record_ref);
|
close $fh;
|
||||||
# Something got written - reset sleep timer
|
|
||||||
$sleep = 0.00001;
|
|
||||||
last write_record;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# Maybe this should be in an if statement: if sleep > 0.001: start more
|
|
||||||
start_more_jobs(); # These jobs may not be started because of loadavg
|
|
||||||
$sleep = ::reap_usleep($sleep);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -538,7 +499,7 @@ sub get_options_from_array {
|
||||||
sub parse_options {
|
sub parse_options {
|
||||||
# Returns: N/A
|
# Returns: N/A
|
||||||
# Defaults:
|
# Defaults:
|
||||||
$Global::version = 20120522;
|
$Global::version = 20120523;
|
||||||
$Global::progname = 'parallel';
|
$Global::progname = 'parallel';
|
||||||
$Global::infinity = 2**31;
|
$Global::infinity = 2**31;
|
||||||
$Global::debug = 0;
|
$Global::debug = 0;
|
||||||
|
@ -1055,6 +1016,7 @@ sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {}
|
||||||
# Variable structure:
|
# Variable structure:
|
||||||
#
|
#
|
||||||
# $Global::running{$pid} = Pointer to Job-object
|
# $Global::running{$pid} = Pointer to Job-object
|
||||||
|
# @Global::virgin_jobs = Pointer to Job-object that have received no input
|
||||||
# $Global::host{$sshlogin} = Pointer to SSHLogin-object
|
# $Global::host{$sshlogin} = Pointer to SSHLogin-object
|
||||||
# $Global::total_running = total number of running jobs
|
# $Global::total_running = total number of running jobs
|
||||||
# $Global::total_started = total jobs started
|
# $Global::total_started = total jobs started
|
||||||
|
@ -1159,6 +1121,9 @@ sub start_another_job {
|
||||||
debug("Command to run on '".$job->sshlogin()."': '".$job->replaced()."'\n");
|
debug("Command to run on '".$job->sshlogin()."': '".$job->replaced()."'\n");
|
||||||
if($job->start()) {
|
if($job->start()) {
|
||||||
$Global::running{$job->pid()} = $job;
|
$Global::running{$job->pid()} = $job;
|
||||||
|
if($::opt_pipe) {
|
||||||
|
push(@Global::virgin_jobs,$job);
|
||||||
|
}
|
||||||
debug("Started as seq ",$job->seq()," pid:",$job->pid(),"\n");
|
debug("Started as seq ",$job->seq()," pid:",$job->pid(),"\n");
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -2036,7 +2001,6 @@ sub my_dump {
|
||||||
|
|
||||||
sub __OBJECT_ORIENTED_PARTS__ {}
|
sub __OBJECT_ORIENTED_PARTS__ {}
|
||||||
|
|
||||||
|
|
||||||
package SSHLogin;
|
package SSHLogin;
|
||||||
|
|
||||||
sub new {
|
sub new {
|
||||||
|
@ -3051,49 +3015,14 @@ sub stdin {
|
||||||
sub set_stdin {
|
sub set_stdin {
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my $stdin = shift;
|
my $stdin = shift;
|
||||||
# set non-blocking
|
|
||||||
fcntl($stdin, ::F_SETFL, ::O_NONBLOCK) or
|
|
||||||
::die_bug("Couldn't set flags for HANDLE: $!");
|
|
||||||
$self->{'stdin'} = $stdin;
|
$self->{'stdin'} = $stdin;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub write {
|
sub write {
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my $remaining_ref = shift;
|
my $remaining_ref = shift;
|
||||||
if(length($$remaining_ref)) {
|
|
||||||
$self->{'remaining'} .= $$remaining_ref;
|
|
||||||
$self->complete_write();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sub complete_write {
|
|
||||||
# Returns:
|
|
||||||
# number of bytes written (see syswrite)
|
|
||||||
my $self = shift;
|
|
||||||
my $in = $self->{'stdin'};
|
my $in = $self->{'stdin'};
|
||||||
my $len = syswrite($in,$self->{'remaining'});
|
syswrite($in,$$remaining_ref);
|
||||||
if (!defined($len) && $! == &::EAGAIN) {
|
|
||||||
# write would block;
|
|
||||||
} else {
|
|
||||||
# Remove the part that was written
|
|
||||||
substr($self->{'remaining'},0,$len) = "";
|
|
||||||
$self->{'datawritten'} += $len;
|
|
||||||
}
|
|
||||||
return $len;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub remaining {
|
|
||||||
my $self = shift;
|
|
||||||
if(defined $self->{'remaining'}) {
|
|
||||||
return length $self->{'remaining'};
|
|
||||||
} else {
|
|
||||||
return undef;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sub datawritten {
|
|
||||||
my $self = shift;
|
|
||||||
return $self->{'datawritten'};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub pid {
|
sub pid {
|
||||||
|
|
|
@ -538,7 +538,7 @@ specified, and for B<-I>{} otherwise. This option is deprecated;
|
||||||
use B<-I> instead.
|
use B<-I> instead.
|
||||||
|
|
||||||
|
|
||||||
=item B<--joblog> I<logfile>
|
=item B<--joblog> I<logfile> (alpha testing)
|
||||||
|
|
||||||
Logfile for executed jobs. Save a list of the executed jobs to
|
Logfile for executed jobs. Save a list of the executed jobs to
|
||||||
I<logfile> in the following TAB separated format: sequence number,
|
I<logfile> in the following TAB separated format: sequence number,
|
||||||
|
@ -660,7 +660,7 @@ B<-l 0> is an alias for B<-l 1>.
|
||||||
Implies B<-X> unless B<-m> is set.
|
Implies B<-X> unless B<-m> is set.
|
||||||
|
|
||||||
|
|
||||||
=item B<--load> I<max-load>
|
=item B<--load> I<max-load> (alpha testing)
|
||||||
|
|
||||||
Do not start new jobs on a given computer unless the load is less than
|
Do not start new jobs on a given computer unless the load is less than
|
||||||
I<max-load>. I<max-load> uses the same syntax as B<--jobs>, so I<100%>
|
I<max-load>. I<max-load> uses the same syntax as B<--jobs>, so I<100%>
|
||||||
|
@ -755,9 +755,9 @@ Instead of printing the output to stdout (standard output) the output
|
||||||
of each job is saved in a file and the filename is then printed.
|
of each job is saved in a file and the filename is then printed.
|
||||||
|
|
||||||
|
|
||||||
=item B<--pipe>
|
=item B<--pipe> (alpha testing)
|
||||||
|
|
||||||
=item B<--spreadstdin>
|
=item B<--spreadstdin> (alpha testing)
|
||||||
|
|
||||||
Spread input to jobs on stdin (standard input). Read a block of data
|
Spread input to jobs on stdin (standard input). Read a block of data
|
||||||
from stdin (standard input) and give one block of data as input to one
|
from stdin (standard input) and give one block of data as input to one
|
||||||
|
|
|
@ -570,8 +570,8 @@ This option is a synonym for @strong{-I}@emph{replace-str} if @emph{replace-str}
|
||||||
specified, and for @strong{-I}@{@} otherwise. This option is deprecated;
|
specified, and for @strong{-I}@{@} otherwise. This option is deprecated;
|
||||||
use @strong{-I} instead.
|
use @strong{-I} instead.
|
||||||
|
|
||||||
@item @strong{--joblog} @emph{logfile}
|
@item @strong{--joblog} @emph{logfile} (alpha testing)
|
||||||
@anchor{@strong{--joblog} @emph{logfile}}
|
@anchor{@strong{--joblog} @emph{logfile} (alpha testing)}
|
||||||
|
|
||||||
Logfile for executed jobs. Save a list of the executed jobs to
|
Logfile for executed jobs. Save a list of the executed jobs to
|
||||||
@emph{logfile} in the following TAB separated format: sequence number,
|
@emph{logfile} in the following TAB separated format: sequence number,
|
||||||
|
@ -712,8 +712,8 @@ The @strong{-l} option is deprecated since the POSIX standard specifies
|
||||||
|
|
||||||
Implies @strong{-X} unless @strong{-m} is set.
|
Implies @strong{-X} unless @strong{-m} is set.
|
||||||
|
|
||||||
@item @strong{--load} @emph{max-load}
|
@item @strong{--load} @emph{max-load} (alpha testing)
|
||||||
@anchor{@strong{--load} @emph{max-load}}
|
@anchor{@strong{--load} @emph{max-load} (alpha testing)}
|
||||||
|
|
||||||
Do not start new jobs on a given computer unless the load is less than
|
Do not start new jobs on a given computer unless the load is less than
|
||||||
@emph{max-load}. @emph{max-load} uses the same syntax as @strong{--jobs}, so @emph{100%}
|
@emph{max-load}. @emph{max-load} uses the same syntax as @strong{--jobs}, so @emph{100%}
|
||||||
|
@ -810,11 +810,11 @@ all the output from one server will be grouped together.
|
||||||
Instead of printing the output to stdout (standard output) the output
|
Instead of printing the output to stdout (standard output) the output
|
||||||
of each job is saved in a file and the filename is then printed.
|
of each job is saved in a file and the filename is then printed.
|
||||||
|
|
||||||
@item @strong{--pipe}
|
@item @strong{--pipe} (alpha testing)
|
||||||
@anchor{@strong{--pipe}}
|
@anchor{@strong{--pipe} (alpha testing)}
|
||||||
|
|
||||||
@item @strong{--spreadstdin}
|
@item @strong{--spreadstdin} (alpha testing)
|
||||||
@anchor{@strong{--spreadstdin}}
|
@anchor{@strong{--spreadstdin} (alpha testing)}
|
||||||
|
|
||||||
Spread input to jobs on stdin (standard input). Read a block of data
|
Spread input to jobs on stdin (standard input). Read a block of data
|
||||||
from stdin (standard input) and give one block of data as input to one
|
from stdin (standard input) and give one block of data as input to one
|
||||||
|
|
|
@ -3,8 +3,9 @@
|
||||||
export LANG=C
|
export LANG=C
|
||||||
SHFILE=/tmp/unittest-parallel.sh
|
SHFILE=/tmp/unittest-parallel.sh
|
||||||
|
|
||||||
|
# Try a failing test twice.
|
||||||
ls -t tests-to-run/*${1}*.sh \
|
ls -t tests-to-run/*${1}*.sh \
|
||||||
| perl -pe 's:(.*/(.*)).sh:bash $1.sh > actual-results/$2; diff -Naur wanted-results/$2 actual-results/$2:' \
|
| perl -pe 's:(.*/(.*)).sh:bash $1.sh > actual-results/$2; diff -Naur wanted-results/$2 actual-results/$2 >/dev/null || bash $1.sh > actual-results/$2; diff -Naur wanted-results/$2 actual-results/$2:' \
|
||||||
>$SHFILE
|
>$SHFILE
|
||||||
|
|
||||||
mkdir -p actual-results
|
mkdir -p actual-results
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
rm -rf tmp 2>/dev/null
|
TMP=/tmp/parallel_local105
|
||||||
cd input-files
|
rm -rf $TMP 2>/dev/null
|
||||||
tar xjf random_dirs_with_newline.tar.bz2
|
mkdir -p $TMP
|
||||||
cd ..
|
tar -C $TMP -xf input-files/random_dirs_with_newline.tar.bz2
|
||||||
cp -a input-files/random_dirs_with_newline tmp
|
|
||||||
cd tmp
|
cd $TMP/random_dirs_with_newline
|
||||||
|
|
||||||
# tests if special dir names causes problems
|
# tests if special dir names causes problems
|
||||||
find . -type d -print0 | perl -0 -pe 's:^./::' | parallel -0 -v touch -- {}/abc-{}-{} 2>&1 \
|
find . -type d -print0 | perl -0 -pe 's:^./::' | parallel -0 -v touch -- {}/abc-{}-{} 2>&1 \
|
||||||
|
@ -33,4 +33,5 @@ find . -type f -print0 | perl -0 -ne '$a++;END{print $a}'
|
||||||
echo ' files'
|
echo ' files'
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
rm -rf tmp
|
rm -rf $TMP
|
||||||
|
|
||||||
|
|
|
@ -165,7 +165,7 @@ echo "echo a" | parallel
|
||||||
parallel -j1 -I :: -X echo 'a::b::^c::[.}c' ::: 1
|
parallel -j1 -I :: -X echo 'a::b::^c::[.}c' ::: 1
|
||||||
|
|
||||||
echo "### BUG: The length for -X is not close to max (131072)"
|
echo "### BUG: The length for -X is not close to max (131072)"
|
||||||
seq 1 4000 | parallel -X echo {.} aa {}{.} {}{}d{} {}dd{}d{.} |head -n 1 |wc
|
seq 1 4000 | parallel -k -X echo {.} aa {}{.} {}{}d{} {}dd{}d{.} |head -n 1 |wc
|
||||||
|
|
||||||
echo "### BUG: empty lines with --show-limit"
|
echo "### BUG: empty lines with --show-limit"
|
||||||
echo | parallel --show-limits
|
echo | parallel --show-limits
|
||||||
|
|
|
@ -54,10 +54,6 @@ h2
|
||||||
21xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
21xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
22xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
22xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
Stop
|
Stop
|
||||||
Start
|
|
||||||
h1
|
|
||||||
h2
|
|
||||||
Stop
|
|
||||||
### Test --header with multiple :::
|
### Test --header with multiple :::
|
||||||
a2 b1 b1 a2
|
a2 b1 b1 a2
|
||||||
### Test --shellquote
|
### Test --shellquote
|
||||||
|
|
Loading…
Reference in a new issue