mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
parsort: --parallel=N does better what is expected.
This commit is contained in:
parent
5d5cdcf77f
commit
c8e203dfeb
43
src/parsort
43
src/parsort
|
@ -24,6 +24,21 @@ multicore machine.
|
||||||
Hopefully these ideas will make it into GNU B<sort> in the future.
|
Hopefully these ideas will make it into GNU B<sort> in the future.
|
||||||
|
|
||||||
|
|
||||||
|
=head1 OPTIONS
|
||||||
|
|
||||||
|
Same as B<sort>. Except:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item B<--parallel=>I<N>
|
||||||
|
|
||||||
|
Change the number of sorts run concurrently to I<N>. I<N> will be
|
||||||
|
increased to number of files if B<parsort> is given more than I<N>
|
||||||
|
files.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
|
||||||
=head1 EXAMPLE
|
=head1 EXAMPLE
|
||||||
|
|
||||||
Sort files:
|
Sort files:
|
||||||
|
@ -37,7 +52,7 @@ Sort stdin (standard input) numerically:
|
||||||
|
|
||||||
=head1 PERFORMANCE
|
=head1 PERFORMANCE
|
||||||
|
|
||||||
B<parsort> is faster on a file than on stdin (standard input), because
|
B<parsort> is faster on files than on stdin (standard input), because
|
||||||
different parts of a file can be read in parallel.
|
different parts of a file can be read in parallel.
|
||||||
|
|
||||||
On a 48 core machine you should see a speedup of 3x over B<sort>.
|
On a 48 core machine you should see a speedup of 3x over B<sort>.
|
||||||
|
@ -115,7 +130,7 @@ GetOptions(
|
||||||
"C" => \$opt::dummy,
|
"C" => \$opt::dummy,
|
||||||
"compress-program=s" => \$opt::dummy,
|
"compress-program=s" => \$opt::dummy,
|
||||||
"T|temporary-directory=s" => \$opt::dummy,
|
"T|temporary-directory=s" => \$opt::dummy,
|
||||||
"parallel=s" => \$opt::dummy,
|
"parallel=s" => \$opt::parallel,
|
||||||
"u|unique" => \$opt::dummy,
|
"u|unique" => \$opt::dummy,
|
||||||
"S|buffer-size=s" => \$opt::dummy,
|
"S|buffer-size=s" => \$opt::dummy,
|
||||||
"s|stable" => \$opt::dummy,
|
"s|stable" => \$opt::dummy,
|
||||||
|
@ -124,9 +139,20 @@ GetOptions(
|
||||||
$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
|
$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1];
|
||||||
$Global::version = 20230122;
|
$Global::version = 20230122;
|
||||||
if($opt::version) { version(); exit 0; }
|
if($opt::version) { version(); exit 0; }
|
||||||
@Global::sortoptions = grep { ! /^-D$/ }
|
# Remove -D and --parallel=N
|
||||||
shell_quote(@ARGV_before[0..($#ARGV_before-$#ARGV-1)]);
|
my @s = (grep { ! /^-D$|^--parallel=\S+$/ }
|
||||||
|
@ARGV_before[0..($#ARGV_before-$#ARGV-1)]);
|
||||||
|
my @sortoptions;
|
||||||
|
while(@s) {
|
||||||
|
my $o = shift @s;
|
||||||
|
# Remove '--parallel N'
|
||||||
|
if($o eq "--parallel") {
|
||||||
|
$o = shift @s;
|
||||||
|
} else {
|
||||||
|
push @sortoptions, $o;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Global::sortoptions = shell_quote(@sortoptions);
|
||||||
$ENV{'TMPDIR'} ||= "/tmp";
|
$ENV{'TMPDIR'} ||= "/tmp";
|
||||||
|
|
||||||
sub merge {
|
sub merge {
|
||||||
|
@ -158,7 +184,9 @@ sub sort_files {
|
||||||
# Let GNU Parallel generate the commands to read parts of files
|
# Let GNU Parallel generate the commands to read parts of files
|
||||||
# The commands split at \n (or \0)
|
# The commands split at \n (or \0)
|
||||||
# and there will be at least one for each CPU thread
|
# and there will be at least one for each CPU thread
|
||||||
my @subopt = $opt::zero_terminated ? qw(--recend "\0") : ();
|
my @subopt;
|
||||||
|
if($opt::zero_terminated) { push @subopt, qw(--recend "\0"); }
|
||||||
|
if($opt::parallel) { push @subopt, qw(--jobs), $opt::parallel; }
|
||||||
# $uniq is needed because @files could contain \n
|
# $uniq is needed because @files could contain \n
|
||||||
my $uniq = join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
|
my $uniq = join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20);
|
||||||
open(my $par,"-|",qw(parallel), @subopt,
|
open(my $par,"-|",qw(parallel), @subopt,
|
||||||
|
@ -182,7 +210,8 @@ sub sort_stdin {
|
||||||
# Input is stdin
|
# Input is stdin
|
||||||
# Spread the input between n processes that each sort
|
# Spread the input between n processes that each sort
|
||||||
# n = number of CPU threads
|
# n = number of CPU threads
|
||||||
my $numthreads = `parallel --number-of-threads`;
|
my $numthreads;
|
||||||
|
$numthreads = $opt::parallel || `parallel --number-of-threads`;
|
||||||
my @fifos = map { tmpfifo() } 1..$numthreads;
|
my @fifos = map { tmpfifo() } 1..$numthreads;
|
||||||
map { mkfifo($_,0600) } @fifos;
|
map { mkfifo($_,0600) } @fifos;
|
||||||
# This trick removes the fifo as soon as it is connected in the other end
|
# This trick removes the fifo as soon as it is connected in the other end
|
||||||
|
|
Loading…
Reference in a new issue