mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-26 07:57:58 +00:00
parallel: Implemented --pipepart
This commit is contained in:
parent
f2c1f65a5a
commit
34e131b894
|
@ -158,6 +158,11 @@ git commit -a
|
||||||
|
|
||||||
Released as 20140X22 ('').
|
Released as 20140X22 ('').
|
||||||
|
|
||||||
|
TAG=MyTag
|
||||||
|
YYYYMMDD=`yyyymmdd`
|
||||||
|
git tag -s -m "Released as $YYYYMMDD ('$TAG')" $TAG
|
||||||
|
git tag -s -m "Released as $YYYYMMDD ('$TAG')" $YYYYMMDD
|
||||||
|
|
||||||
|
|
||||||
== Update Savannah ==
|
== Update Savannah ==
|
||||||
|
|
||||||
|
|
258
src/parallel
258
src/parallel
|
@ -68,9 +68,13 @@ if(@ARGV) {
|
||||||
}
|
}
|
||||||
|
|
||||||
my @fhlist;
|
my @fhlist;
|
||||||
@fhlist = map { open_or_exit($_) } @opt::a;
|
if($opt::pipepart) {
|
||||||
if(not @fhlist and not $opt::pipe) {
|
@fhlist = map { open_or_exit($_) } "/dev/null";
|
||||||
|
} else {
|
||||||
|
@fhlist = map { open_or_exit($_) } @opt::a;
|
||||||
|
if(not @fhlist and not $opt::pipe) {
|
||||||
@fhlist = (*STDIN);
|
@fhlist = (*STDIN);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if($opt::skip_first_line) {
|
if($opt::skip_first_line) {
|
||||||
# Skip the first line for the first file handle
|
# Skip the first line for the first file handle
|
||||||
|
@ -121,7 +125,7 @@ if($opt::nonall or $opt::onall) {
|
||||||
|
|
||||||
$Global::JobQueue = JobQueue->new(
|
$Global::JobQueue = JobQueue->new(
|
||||||
$command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
|
$command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
|
||||||
if($opt::pipe and @opt::a) {
|
if(0 and $opt::pipe and @opt::a) {
|
||||||
# ... | parallel --pipe cmd ::: arg1 arg2
|
# ... | parallel --pipe cmd ::: arg1 arg2
|
||||||
# The command to run is:
|
# The command to run is:
|
||||||
# tee >((cmd arg1) >/tmp/tmp1 2>/tmp/err1) >((cmd arg2) >/tmp/tmp2 2>/tmp/err2) >/dev/null
|
# tee >((cmd arg1) >/tmp/tmp1 2>/tmp/err1) >((cmd arg2) >/tmp/tmp2 2>/tmp/err2) >/dev/null
|
||||||
|
@ -153,6 +157,13 @@ if($opt::eta or $opt::bar) {
|
||||||
# Count the number of jobs before starting any
|
# Count the number of jobs before starting any
|
||||||
$Global::JobQueue->total_jobs();
|
$Global::JobQueue->total_jobs();
|
||||||
}
|
}
|
||||||
|
if($opt::pipepart) {
|
||||||
|
my @cmdlines;
|
||||||
|
for(@opt::a) {
|
||||||
|
push(@cmdlines, pipe_part_files($_));
|
||||||
|
}
|
||||||
|
$Global::JobQueue->{'commandlinequeue'}->unget(@cmdlines);
|
||||||
|
}
|
||||||
for my $sshlogin (values %Global::host) {
|
for my $sshlogin (values %Global::host) {
|
||||||
$sshlogin->max_jobs_running();
|
$sshlogin->max_jobs_running();
|
||||||
}
|
}
|
||||||
|
@ -163,9 +174,12 @@ if($Global::semaphore) {
|
||||||
$sem = acquire_semaphore();
|
$sem = acquire_semaphore();
|
||||||
}
|
}
|
||||||
$SIG{TERM} = \&start_no_new_jobs;
|
$SIG{TERM} = \&start_no_new_jobs;
|
||||||
|
|
||||||
start_more_jobs();
|
start_more_jobs();
|
||||||
if($opt::pipe) {
|
if(not $opt::pipepart) {
|
||||||
|
if($opt::pipe) {
|
||||||
spreadstdin();
|
spreadstdin();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
::debug("Start draining\n");
|
::debug("Start draining\n");
|
||||||
drain_job_queue();
|
drain_job_queue();
|
||||||
|
@ -197,6 +211,170 @@ if($opt::halt_on_error) {
|
||||||
|
|
||||||
sub __PIPE_MODE__ {}
|
sub __PIPE_MODE__ {}
|
||||||
|
|
||||||
|
# parallel --part-pipe -a bigfile cat
|
||||||
|
# =>
|
||||||
|
# (dd 1) | cat
|
||||||
|
# (dd 2) | cat
|
||||||
|
# (dd 3) | cat
|
||||||
|
|
||||||
|
|
||||||
|
sub pipe_part_files {
|
||||||
|
my ($file) = @_;
|
||||||
|
# find positions
|
||||||
|
my @pos = find_split_positions($file,$opt::blocksize);
|
||||||
|
# unshift job with dd_prefix
|
||||||
|
my @cmdlines;
|
||||||
|
for(my $i=0; $i<$#pos; $i++) {
|
||||||
|
my $cmd = $Global::JobQueue->{'commandlinequeue'}->get();
|
||||||
|
# TODO prepend --header (how?)
|
||||||
|
$cmd->{'replaced'} = cat_partial($file, $pos[$i],$pos[$i+1])."|" .
|
||||||
|
$cmd->{'replaced'};
|
||||||
|
::debug("Unget ".$cmd->{'replaced'}."\n");
|
||||||
|
push(@cmdlines, $cmd);
|
||||||
|
}
|
||||||
|
return @cmdlines;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub find_split_positions {
|
||||||
|
# Input:
|
||||||
|
# $file = the file to read
|
||||||
|
# $block = (minimal) --block-size of each chunk
|
||||||
|
# Uses:
|
||||||
|
# $opt::recstart
|
||||||
|
# $opt::recend
|
||||||
|
# Returns:
|
||||||
|
# @positions of block start/end
|
||||||
|
my($file, $block) = @_;
|
||||||
|
my $size = -s $file;
|
||||||
|
# The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
|
||||||
|
# The optimal dd blocksize for freebsd = 2^15..2^17
|
||||||
|
my $dd_block_size = 131072; # 2^17
|
||||||
|
my @pos;
|
||||||
|
my ($recstart,$recend) = recstartrecend();
|
||||||
|
my $recendrecstart = $recend.$recstart;
|
||||||
|
open (my $fh, "<", $file) || die;
|
||||||
|
push(@pos,0);
|
||||||
|
for(my $pos = $block; $pos < $size; $pos += $block) {
|
||||||
|
my $buf;
|
||||||
|
seek($fh, $pos, 0) || die;
|
||||||
|
while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
|
||||||
|
# If match $recend$recstart => Record position
|
||||||
|
my $i = index($buf,$recendrecstart);
|
||||||
|
if($i != -1) {
|
||||||
|
push(@pos,$pos+$i);
|
||||||
|
# Start looking for next record _after_ this match
|
||||||
|
$pos += $i;
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
push(@pos,$size);
|
||||||
|
close $fh;
|
||||||
|
return @pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub cat_partial {
|
||||||
|
# Input:
|
||||||
|
# $file = the file to read
|
||||||
|
# $start = start byte
|
||||||
|
# $end = end byte
|
||||||
|
# Returns:
|
||||||
|
# Efficent perl command to copy $start..$end to stdout
|
||||||
|
my($file, $start, $end) = @_;
|
||||||
|
my $len = $end - $start;
|
||||||
|
return "<". shell_quote_scalar($file) .
|
||||||
|
q{ perl -e 'sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); }' } .
|
||||||
|
" ".$start." ".$len;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub _dd_prefix_part_job {
|
||||||
|
# Input:
|
||||||
|
# $file = the file to read
|
||||||
|
# $start = start byte
|
||||||
|
# $end = end byte
|
||||||
|
# Returns:
|
||||||
|
# Efficent dd command to copy $start..$end to stdout
|
||||||
|
|
||||||
|
my($file, $start, $end) = @_;
|
||||||
|
# The optimal blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
|
||||||
|
# The optimal blocksize for freebsd = 2^15..2^17
|
||||||
|
my $big_block = 131072;
|
||||||
|
my $small_block = 512;
|
||||||
|
# Copy:
|
||||||
|
# start .. 512*n: 1 byte at a time (1 MB/s)
|
||||||
|
# 512*n .. 131072*n: 512 bytes at a time (300 MB/s)
|
||||||
|
# 131072*n1 .. 131072*n2: 131072 bytes at a time (1 GB/s)
|
||||||
|
# 131072*n .. 512*n: 512 bytes at a time (medium speed)
|
||||||
|
# 512*n .. end: 1 byte at a time
|
||||||
|
|
||||||
|
# start = 1234;
|
||||||
|
# end = 4321;
|
||||||
|
# len = end - start = 3087;
|
||||||
|
my $len = $end - $start;
|
||||||
|
# copy1_start = start;
|
||||||
|
my $copy1_start = $start;
|
||||||
|
# copy1_len = (10 - 1234) % 10 = (small_block - copy1_start) % small_block = 6;
|
||||||
|
my $copy1_len = ($small_block - $copy1_start) % $small_block;
|
||||||
|
# copy1_bs = 1;
|
||||||
|
my $copy1_bs = 1;
|
||||||
|
# copy1_count = 6 / 1 = copy1_len / copy1_bs = 6;
|
||||||
|
my $copy1_count = $copy1_len / $copy1_bs;
|
||||||
|
# copy1_skip = 1234 = start / copy1_bs;
|
||||||
|
my $copy1_skip = $start / $copy1_bs;
|
||||||
|
|
||||||
|
# copy2_start = start + copy1_len = 1240;
|
||||||
|
my $copy2_start = $start + $copy1_len;
|
||||||
|
# copy2_len = (100 - 1240) % 100 = (big_block - copy2_start) % big_block = 60;
|
||||||
|
my $copy2_len = ($big_block - $copy2_start) % $big_block;
|
||||||
|
# copy2_bs = small_block = 10;
|
||||||
|
my $copy2_bs = $small_block;
|
||||||
|
# copy2_count = 60 / 10 = copy2_len / copy2_bs = 6;
|
||||||
|
my $copy2_count = $copy2_len / $copy2_bs;
|
||||||
|
# copy2_skip = 1240 / 10 = copy2_start / copy2_bs = 124
|
||||||
|
my $copy2_skip = $copy2_start / $copy2_bs;
|
||||||
|
|
||||||
|
# copy5_len = 4321 % 10 = end % small_block = 1;
|
||||||
|
my $copy5_len = $end % $small_block;
|
||||||
|
# copy5_start = 4321 - 1 = end - copy5_len = 4320;
|
||||||
|
my $copy5_start = $end - $copy5_len;
|
||||||
|
# copy5_bs = 1;
|
||||||
|
my $copy5_bs = 1;
|
||||||
|
# copy5_count = 1 / 1 = copy5_len / copy5_bs = 1;
|
||||||
|
my $copy5_count = $copy5_len / $copy5_bs;
|
||||||
|
# copy5_skip = 4320 / 1 = copy5_start / copy5_bs = 4320
|
||||||
|
my $copy5_skip = $copy5_start / $copy5_bs;
|
||||||
|
|
||||||
|
# copy4_len = 4320 % 100 = copy5_start % big_block = 20;
|
||||||
|
my $copy4_len = $copy5_start % $big_block;
|
||||||
|
# copy4_start = end - copy5_len - copy4_len = 4300;
|
||||||
|
my $copy4_start = $end - $copy5_len - $copy4_len;
|
||||||
|
# copy4_bs = small_block = 10;
|
||||||
|
my $copy4_bs = $small_block;
|
||||||
|
# copy4_count = 20 / 10 = copy4_len / copy4_bs = 2;
|
||||||
|
my $copy4_count = $copy4_len / $copy4_bs;
|
||||||
|
# copy4_skip = 4300 / 10 = copy4_start / copy4_bs = 430
|
||||||
|
my $copy4_skip = $copy4_start / $copy4_bs;
|
||||||
|
|
||||||
|
# copy3_start = start + copy1_len + copy2_len = 1300;
|
||||||
|
my $copy3_start = $start + $copy1_len + $copy2_len;
|
||||||
|
# copy3_len = 4300 - 1300 = copy4_start - copy3_start = 3000;
|
||||||
|
my $copy3_len = $copy4_start - $copy3_start;
|
||||||
|
# copy3_bs = big_block = 100;
|
||||||
|
my $copy3_bs = $big_block;
|
||||||
|
# copy3_count = 3000 / 100 = copy3_len / copy3_bs = 3000;
|
||||||
|
my $copy3_count = $copy3_len / $copy3_bs;
|
||||||
|
# copy3_skip = 1300 / 100 = copy3_start / copy3_bs = 13
|
||||||
|
my $copy3_skip = $copy3_start / $copy3_bs;
|
||||||
|
|
||||||
|
return
|
||||||
|
"dd if=$file bs=$copy1_bs skip=$copy1_skip count=$copy1_count iflag=fullblock;" .
|
||||||
|
"dd if=$file bs=$copy2_bs skip=$copy2_skip count=$copy2_count iflag=fullblock;" .
|
||||||
|
"dd if=$file bs=$copy3_bs skip=$copy3_skip count=$copy3_count iflag=fullblock;" .
|
||||||
|
"dd if=$file bs=$copy4_bs skip=$copy4_skip count=$copy4_count iflag=fullblock;" .
|
||||||
|
"dd if=$file bs=$copy5_bs skip=$copy5_skip count=$copy5_count iflag=fullblock;"
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
sub spreadstdin {
|
sub spreadstdin {
|
||||||
# read a record
|
# read a record
|
||||||
# Spawn a job and print the record to it.
|
# Spawn a job and print the record to it.
|
||||||
|
@ -213,30 +391,7 @@ sub spreadstdin {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
my ($recstart,$recend);
|
my ($recstart,$recend) = recstartrecend();
|
||||||
if(defined($opt::recstart) and defined($opt::recend)) {
|
|
||||||
# If both --recstart and --recend is given then both must match
|
|
||||||
$recstart = $opt::recstart;
|
|
||||||
$recend = $opt::recend;
|
|
||||||
} elsif(defined($opt::recstart)) {
|
|
||||||
# If --recstart is given it must match start of record
|
|
||||||
$recstart = $opt::recstart;
|
|
||||||
$recend = "";
|
|
||||||
} elsif(defined($opt::recend)) {
|
|
||||||
# If --recend is given then it must match end of record
|
|
||||||
$recstart = "";
|
|
||||||
$recend = $opt::recend;
|
|
||||||
}
|
|
||||||
|
|
||||||
if($opt::regexp) {
|
|
||||||
# If $recstart/$recend contains '|' this should only apply to the regexp
|
|
||||||
$recstart = "(?:".$recstart.")";
|
|
||||||
$recend = "(?:".$recend.")";
|
|
||||||
} else {
|
|
||||||
# $recstart/$recend = printf strings (\n)
|
|
||||||
$recstart =~ s/\\([rnt\'\"\\])/"qq|\\$1|"/gee;
|
|
||||||
$recend =~ s/\\([rnt\'\"\\])/"qq|\\$1|"/gee;
|
|
||||||
}
|
|
||||||
my $recendrecstart = $recend.$recstart;
|
my $recendrecstart = $recend.$recstart;
|
||||||
my $chunk_number = 1;
|
my $chunk_number = 1;
|
||||||
my $one_time_through;
|
my $one_time_through;
|
||||||
|
@ -355,6 +510,39 @@ sub spreadstdin {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub recstartrecend {
|
||||||
|
# Uses:
|
||||||
|
# $opt::recstart
|
||||||
|
# $opt::recend
|
||||||
|
# Returns:
|
||||||
|
# $recstart,$recend with default values and regexp conversion
|
||||||
|
my($recstart,$recend);
|
||||||
|
if(defined($opt::recstart) and defined($opt::recend)) {
|
||||||
|
# If both --recstart and --recend is given then both must match
|
||||||
|
$recstart = $opt::recstart;
|
||||||
|
$recend = $opt::recend;
|
||||||
|
} elsif(defined($opt::recstart)) {
|
||||||
|
# If --recstart is given it must match start of record
|
||||||
|
$recstart = $opt::recstart;
|
||||||
|
$recend = "";
|
||||||
|
} elsif(defined($opt::recend)) {
|
||||||
|
# If --recend is given then it must match end of record
|
||||||
|
$recstart = "";
|
||||||
|
$recend = $opt::recend;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($opt::regexp) {
|
||||||
|
# If $recstart/$recend contains '|' this should only apply to the regexp
|
||||||
|
$recstart = "(?:".$recstart.")";
|
||||||
|
$recend = "(?:".$recend.")";
|
||||||
|
} else {
|
||||||
|
# $recstart/$recend = printf strings (\n)
|
||||||
|
$recstart =~ s/\\([rnt\'\"\\])/"qq|\\$1|"/gee;
|
||||||
|
$recend =~ s/\\([rnt\'\"\\])/"qq|\\$1|"/gee;
|
||||||
|
}
|
||||||
|
return ($recstart,$recend);
|
||||||
|
}
|
||||||
|
|
||||||
sub nindex {
|
sub nindex {
|
||||||
# See if string is in buffer N times
|
# See if string is in buffer N times
|
||||||
# Returns:
|
# Returns:
|
||||||
|
@ -591,6 +779,7 @@ sub options_hash {
|
||||||
"header=s" => \$opt::header,
|
"header=s" => \$opt::header,
|
||||||
"cat" => \$opt::cat,
|
"cat" => \$opt::cat,
|
||||||
"fifo" => \$opt::fifo,
|
"fifo" => \$opt::fifo,
|
||||||
|
"pipepart" => \$opt::pipepart,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -630,7 +819,7 @@ sub get_options_from_array {
|
||||||
sub parse_options {
|
sub parse_options {
|
||||||
# Returns: N/A
|
# Returns: N/A
|
||||||
# Defaults:
|
# Defaults:
|
||||||
$Global::version = 20140401;
|
$Global::version = 20140409;
|
||||||
$Global::progname = 'parallel';
|
$Global::progname = 'parallel';
|
||||||
$Global::infinity = 2**31;
|
$Global::infinity = 2**31;
|
||||||
$Global::debug = 0;
|
$Global::debug = 0;
|
||||||
|
@ -3842,7 +4031,6 @@ sub sshcommand_of_sshlogin {
|
||||||
# Run a sleep that outputs data, so it will discover if the ssh connection closes.
|
# Run a sleep that outputs data, so it will discover if the ssh connection closes.
|
||||||
my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}');
|
my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}');
|
||||||
my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep);
|
my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep);
|
||||||
::debug("@master\n");
|
|
||||||
exec(@master);
|
exec(@master);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5206,9 +5394,11 @@ sub new {
|
||||||
$command = $Global::replace{'{}'};
|
$command = $Global::replace{'{}'};
|
||||||
} elsif($opt::pipe) {
|
} elsif($opt::pipe) {
|
||||||
# With --pipe you can have ::: or not
|
# With --pipe you can have ::: or not
|
||||||
if(@opt::a) {
|
# if(@opt::a) {
|
||||||
$command .=" ".$Global::replace{'{}'};
|
# $command .=" ".$Global::replace{'{}'};
|
||||||
}
|
# }
|
||||||
|
} elsif($opt::pipepart) {
|
||||||
|
# With --pipe-part you can have nothing
|
||||||
} else {
|
} else {
|
||||||
# Add {} to the command if there are no {...}'s
|
# Add {} to the command if there are no {...}'s
|
||||||
$command .=" ".$Global::replace{'{}'};
|
$command .=" ".$Global::replace{'{}'};
|
||||||
|
@ -5871,7 +6061,7 @@ sub get {
|
||||||
);
|
);
|
||||||
$cmd_line->populate();
|
$cmd_line->populate();
|
||||||
::debug("cmd_line->number_of_args ".$cmd_line->number_of_args()."\n");
|
::debug("cmd_line->number_of_args ".$cmd_line->number_of_args()."\n");
|
||||||
if($opt::pipe) {
|
if($opt::pipe or $opt::pipepart) {
|
||||||
if($cmd_line->replaced() eq "") {
|
if($cmd_line->replaced() eq "") {
|
||||||
# Empty command - pipe requires a command
|
# Empty command - pipe requires a command
|
||||||
::error("--pipe must have a command to pipe into (e.g. 'cat').\n");
|
::error("--pipe must have a command to pipe into (e.g. 'cat').\n");
|
||||||
|
|
Loading…
Reference in a new issue