mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-24 23:17:55 +00:00
parallel: --line-buffer memory usage changed from 2x to 1x.
This commit is contained in:
parent
fd4694c66b
commit
5e1e3775fc
11
doc/haikus
11
doc/haikus
|
@ -2,10 +2,6 @@ Quote of the month:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
With GNU Parallel you sure can!
|
|
||||||
I like getting things done
|
|
||||||
|
|
||||||
--Kyle Lady @kylelady@twitter
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,7 +33,12 @@ It's the MapReduce of our generation!
|
||||||
|
|
||||||
|
|
||||||
=== Used ===
|
=== Used ===
|
||||||
Ok! GNU Parallel is one of the best things out there. Almost as good as vanilla ice cream.
|
With GNU Parallel you sure can!
|
||||||
|
I like getting things done
|
||||||
|
|
||||||
|
--Kyle Lady @kylelady@twitter
|
||||||
|
|
||||||
|
Ok! GNU Parallel is one of the best things out there. Almost as good as vanilla ice cream.
|
||||||
-- @coffe@mastodon.art
|
-- @coffe@mastodon.art
|
||||||
|
|
||||||
HOLY STUFF I LOVE GNU PARALLEL
|
HOLY STUFF I LOVE GNU PARALLEL
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
use strict;
|
use strict;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
$Global::progname="niceload";
|
$Global::progname="niceload";
|
||||||
$Global::version = 20190222;
|
$Global::version = 20190223;
|
||||||
Getopt::Long::Configure("bundling","require_order");
|
Getopt::Long::Configure("bundling","require_order");
|
||||||
get_options_from_array(\@ARGV) || die_usage();
|
get_options_from_array(\@ARGV) || die_usage();
|
||||||
if($opt::version) {
|
if($opt::version) {
|
||||||
|
|
21
src/parallel
21
src/parallel
|
@ -200,13 +200,10 @@ sub pipe_tee_setup() {
|
||||||
sub parcat_script() {
|
sub parcat_script() {
|
||||||
# TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
|
# TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
|
||||||
my $script = q'{
|
my $script = q'{
|
||||||
use Symbol qw(gensym);
|
|
||||||
use IPC::Open3;
|
|
||||||
use POSIX qw(:errno_h);
|
use POSIX qw(:errno_h);
|
||||||
use IO::Select;
|
use IO::Select;
|
||||||
use strict;
|
use strict;
|
||||||
use threads;
|
use threads;
|
||||||
use threads::shared;
|
|
||||||
use Thread::Queue;
|
use Thread::Queue;
|
||||||
use Fcntl qw(:DEFAULT :flock);
|
use Fcntl qw(:DEFAULT :flock);
|
||||||
|
|
||||||
|
@ -369,7 +366,7 @@ sub parcat_script() {
|
||||||
fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
|
fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
|
||||||
}
|
}
|
||||||
}';
|
}';
|
||||||
return ::spacefree(1, $script);
|
return ::spacefree(3, $script);
|
||||||
}
|
}
|
||||||
|
|
||||||
sub sharder_script() {
|
sub sharder_script() {
|
||||||
|
@ -432,7 +429,7 @@ sub pipe_shard_setup() {
|
||||||
open STDOUT, ">","/dev/null";
|
open STDOUT, ">","/dev/null";
|
||||||
# The PERL_HASH_SEED must be the same for all sharders
|
# The PERL_HASH_SEED must be the same for all sharders
|
||||||
# so B::hash will return the same value for any given input
|
# so B::hash will return the same value for any given input
|
||||||
$ENV{PERL_HASH_SEED} = $$;
|
$ENV{'PERL_HASH_SEED'} = $$;
|
||||||
exec qw(parallel --block 100k -q --pipe -j), $njobs,
|
exec qw(parallel --block 100k -q --pipe -j), $njobs,
|
||||||
qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
|
qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
|
||||||
$opt::shard, '{}', (map { (':::+', @{$_}) } @parcatfifos);
|
$opt::shard, '{}', (map { (':::+', @{$_}) } @parcatfifos);
|
||||||
|
@ -1698,7 +1695,7 @@ sub check_invalid_option_combinations() {
|
||||||
|
|
||||||
sub init_globals() {
|
sub init_globals() {
|
||||||
# Defaults:
|
# Defaults:
|
||||||
$Global::version = 20190222;
|
$Global::version = 20190223;
|
||||||
$Global::progname = 'parallel';
|
$Global::progname = 'parallel';
|
||||||
$Global::infinity = 2**31;
|
$Global::infinity = 2**31;
|
||||||
$Global::debug = 0;
|
$Global::debug = 0;
|
||||||
|
@ -5054,6 +5051,10 @@ sub spacefree($$) {
|
||||||
# Keep newlines
|
# Keep newlines
|
||||||
$s =~ s/\n\n+/\n/sg;
|
$s =~ s/\n\n+/\n/sg;
|
||||||
$s =~ s/[ \t]+/ /mg;
|
$s =~ s/[ \t]+/ /mg;
|
||||||
|
} elsif(3 == $spaces) {
|
||||||
|
# Keep perl code required space
|
||||||
|
$s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
|
||||||
|
$s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
|
||||||
} else {
|
} else {
|
||||||
$s =~ s/\s//mg;
|
$s =~ s/\s//mg;
|
||||||
}
|
}
|
||||||
|
@ -8516,7 +8517,7 @@ sub sshlogin_wrap($) {
|
||||||
} else {
|
} else {
|
||||||
$bashfuncset = '$bashfunc = "";'
|
$bashfuncset = '$bashfunc = "";'
|
||||||
}
|
}
|
||||||
if($ENV{"parallel_bash_environment"}) {
|
if($ENV{'parallel_bash_environment'}) {
|
||||||
$bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
|
$bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
|
||||||
}
|
}
|
||||||
::debug("base64",$envset,$bashfuncset,"\n");
|
::debug("base64",$envset,$bashfuncset,"\n");
|
||||||
|
@ -9527,7 +9528,11 @@ sub print_linebuffer($) {
|
||||||
# read remaining
|
# read remaining
|
||||||
my $halfline_ref = $self->{'halfline'}{$fdno};
|
my $halfline_ref = $self->{'halfline'}{$fdno};
|
||||||
if(grep /./, @$halfline_ref) {
|
if(grep /./, @$halfline_ref) {
|
||||||
$self->add_returnsize(length join("",@$halfline_ref));
|
my $returnsize = 0;
|
||||||
|
for(@{$self->{'halfline'}{$fdno}}) {
|
||||||
|
$returnsize += length $_;
|
||||||
|
}
|
||||||
|
$self->add_returnsize($returnsize);
|
||||||
if($opt::tag or defined $opt::tagstring) {
|
if($opt::tag or defined $opt::tagstring) {
|
||||||
# Prepend $tag the the remaining half line
|
# Prepend $tag the the remaining half line
|
||||||
unshift @$halfline_ref, $self->tag();
|
unshift @$halfline_ref, $self->tag();
|
||||||
|
|
|
@ -2148,7 +2148,7 @@ E.g.
|
||||||
B<--shebang-wrap> must be set as the first option.
|
B<--shebang-wrap> must be set as the first option.
|
||||||
|
|
||||||
|
|
||||||
=item B<--shellquote> (alpha testing)
|
=item B<--shellquote> (beta testing)
|
||||||
|
|
||||||
Does not run the command but quotes it. Useful for making quoted
|
Does not run the command but quotes it. Useful for making quoted
|
||||||
composed commands for GNU B<parallel>.
|
composed commands for GNU B<parallel>.
|
||||||
|
|
|
@ -1959,6 +1959,46 @@ https://github.com/codingo/Interlace can be run with GNU B<parallel>:
|
||||||
|
|
||||||
https://github.com/codingo/Interlace (Last checked: 2019-02)
|
https://github.com/codingo/Interlace (Last checked: 2019-02)
|
||||||
|
|
||||||
|
|
||||||
|
=head2 DIFFERENCES BETWEEN otonvm Parallel AND GNU Parallel
|
||||||
|
|
||||||
|
I have been unable to get the code to run at all. It seems unfinished.
|
||||||
|
|
||||||
|
https://github.com/otonvm/Parallel (Last checked: 2019-02)
|
||||||
|
|
||||||
|
|
||||||
|
=head2 DIFFERENCES BETWEEN k-bx par AND GNU Parallel
|
||||||
|
|
||||||
|
B<par> requires Haskell to work. This limits the number of platforms
|
||||||
|
this can work on.
|
||||||
|
|
||||||
|
B<par> does line buffering in memory. The memory usage is 3x the
|
||||||
|
longest line (compared to 1x for B<parallel --lb>). Commands must be
|
||||||
|
given as arguments. There is no template.
|
||||||
|
|
||||||
|
These are the examples from https://github.com/k-bx/par with the
|
||||||
|
corresponding GNU B<parallel> command.
|
||||||
|
|
||||||
|
par "echo foo; sleep 1; echo foo; sleep 1; echo foo" \
|
||||||
|
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||||
|
parallel --lb ::: "echo foo; sleep 1; echo foo; sleep 1; echo foo" \
|
||||||
|
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||||
|
|
||||||
|
par "echo foo; sleep 1; foofoo" \
|
||||||
|
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||||
|
parallel --lb --halt 1 ::: "echo foo; sleep 1; foofoo" \
|
||||||
|
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||||
|
|
||||||
|
par "PARPREFIX=[fooechoer] echo foo" "PARPREFIX=[bar] echo bar"
|
||||||
|
parallel --lb --colsep , --tagstring {1} {2} \
|
||||||
|
::: "[fooechoer],echo foo" "[bar],echo bar"
|
||||||
|
|
||||||
|
par --succeed "foo" "bar" && echo 'wow'
|
||||||
|
parallel "foo" "bar"; true && echo 'wow'
|
||||||
|
|
||||||
|
https://github.com/k-bx/par (Last checked: 2019-02)
|
||||||
|
|
||||||
|
|
||||||
=head2 Todo
|
=head2 Todo
|
||||||
|
|
||||||
Url for spread
|
Url for spread
|
||||||
|
@ -1979,13 +2019,6 @@ https://github.com/xuchenCN/go-pssh
|
||||||
|
|
||||||
https://github.com/amritb/with-this.git
|
https://github.com/amritb/with-this.git
|
||||||
|
|
||||||
https://github.com/fd0/machma Requires Go >= 1.7.
|
|
||||||
|
|
||||||
https://github.com/k-bx/par requires Haskell to work. This limits the
|
|
||||||
number of platforms this can work on.
|
|
||||||
|
|
||||||
https://github.com/otonvm/Parallel
|
|
||||||
|
|
||||||
https://github.com/flesler/parallel
|
https://github.com/flesler/parallel
|
||||||
|
|
||||||
https://github.com/Julian/Verge
|
https://github.com/Julian/Verge
|
||||||
|
|
|
@ -20,6 +20,14 @@ a single file: No need to mess around with environment variables like
|
||||||
PERL5LIB.
|
PERL5LIB.
|
||||||
|
|
||||||
|
|
||||||
|
=head2 Interpreted language
|
||||||
|
|
||||||
|
GNU B<parallel> is designed to be able to run on old systems. That
|
||||||
|
means that it cannot depend on a compiler being installed - and
|
||||||
|
especially not a compiler for a language that is younger than 20 years
|
||||||
|
old.
|
||||||
|
|
||||||
|
|
||||||
=head2 Old Perl style
|
=head2 Old Perl style
|
||||||
|
|
||||||
GNU B<parallel> uses some old, deprecated constructs. This is due to a
|
GNU B<parallel> uses some old, deprecated constructs. This is due to a
|
||||||
|
@ -526,6 +534,63 @@ The real killer comes when you try to combine several of these: Doing
|
||||||
that correctly for all corner cases is next to impossible to do by
|
that correctly for all corner cases is next to impossible to do by
|
||||||
hand.
|
hand.
|
||||||
|
|
||||||
|
=head2 --shard
|
||||||
|
|
||||||
|
The simple way to implement sharding would be to:
|
||||||
|
|
||||||
|
=over 5
|
||||||
|
|
||||||
|
=item 1
|
||||||
|
|
||||||
|
start n jobs,
|
||||||
|
|
||||||
|
=item 2
|
||||||
|
|
||||||
|
split each line into columns,
|
||||||
|
|
||||||
|
=item 3
|
||||||
|
|
||||||
|
select the data from the relevant column
|
||||||
|
|
||||||
|
=item 4
|
||||||
|
|
||||||
|
compute a hash value from the data
|
||||||
|
|
||||||
|
=item 5
|
||||||
|
|
||||||
|
take the modulo n of the hash value
|
||||||
|
|
||||||
|
=item 6
|
||||||
|
|
||||||
|
pass the full line to the jobslot that has the computed value
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
Unfortunately Perl is rather slow at computing the hash value (and
|
||||||
|
somewhat slow at splitting into columns).
|
||||||
|
|
||||||
|
One solution is to use a compiled language for the splitting and
|
||||||
|
hashing, but that would go against the design criteria of not
|
||||||
|
depending on a compiler.
|
||||||
|
|
||||||
|
Luckily those tasks can be parallelized. So GNU B<parallel> starts n
|
||||||
|
sharders that do step 2-6, and passes blocks of 100k to each of those
|
||||||
|
in a round robin manner. To make sure these sharders compute the hash
|
||||||
|
the same way, $PERL_HASH_SEED is set to the same value for all sharders.
|
||||||
|
|
||||||
|
Running n sharders poses a new problem: Instead of having n outputs
|
||||||
|
(one for each computed value) you now have n outputs for each of the n
|
||||||
|
values, so in total n*n outputs; and you need to merge these n*n
|
||||||
|
outputs together into n outputs.
|
||||||
|
|
||||||
|
This can be done by simply running 'parallel -j0 --lb cat :::
|
||||||
|
outputs_for_one_value', but that is rather inefficient, as it spawns a
|
||||||
|
process for each file. Instead the core code from 'parcat' is run,
|
||||||
|
which is also a bit faster.
|
||||||
|
|
||||||
|
All the sharders and parcats communicate through named pipes that are
|
||||||
|
unlinked as soon as they are opened.
|
||||||
|
|
||||||
|
|
||||||
=head2 Shell shock
|
=head2 Shell shock
|
||||||
|
|
||||||
|
|
2
src/sql
2
src/sql
|
@ -574,7 +574,7 @@ $Global::Initfile && unlink $Global::Initfile;
|
||||||
exit ($err);
|
exit ($err);
|
||||||
|
|
||||||
sub parse_options {
|
sub parse_options {
|
||||||
$Global::version = 20190222;
|
$Global::version = 20190223;
|
||||||
$Global::progname = 'sql';
|
$Global::progname = 'sql';
|
||||||
|
|
||||||
# This must be done first as this may exec myself
|
# This must be done first as this may exec myself
|
||||||
|
|
|
@ -275,6 +275,27 @@ par_test_diff_roundrobin_k() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
par_lb_mem_usage() {
|
||||||
|
long_line() {
|
||||||
|
perl -e 'print "x"x100_000_000'
|
||||||
|
}
|
||||||
|
export -f long_line
|
||||||
|
memusage() {
|
||||||
|
round=$1
|
||||||
|
shift
|
||||||
|
/usr/bin/time -v "$@" 2>&1 >/dev/null |
|
||||||
|
perl -ne '/Maximum resident set size .kbytes.: (\d+)/ and print $1,"\n"' |
|
||||||
|
perl -pe '$_ = int($_/'$round')."\n"'
|
||||||
|
}
|
||||||
|
# 1 line - RAM usage 1 x 100 MB
|
||||||
|
memusage 100000 parallel --lb ::: long_line
|
||||||
|
# 2 lines - RAM usage 1 x 100 MB
|
||||||
|
memusage 100000 parallel --lb ::: 'long_line; echo; long_line'
|
||||||
|
# 1 double length line - RAM usage 2 x 100 MB
|
||||||
|
memusage 100000 parallel --lb ::: 'long_line; long_line'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export -f $(compgen -A function | grep par_)
|
export -f $(compgen -A function | grep par_)
|
||||||
compgen -A function | grep par_ | LC_ALL=C sort |
|
compgen -A function | grep par_ | LC_ALL=C sort |
|
||||||
parallel -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'
|
parallel -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'
|
||||||
|
|
|
@ -27,8 +27,8 @@ par_interactive sleep 0.1; echo opt-p 2 ?...n
|
||||||
par_interactive sleep 0.1; echo opt-p 3 ?...y
|
par_interactive sleep 0.1; echo opt-p 3 ?...y
|
||||||
par_interactive spawn /tmp/parallel-script-for-expect
|
par_interactive spawn /tmp/parallel-script-for-expect
|
||||||
par_k ### Test -k
|
par_k ### Test -k
|
||||||
par_k parallel: Warning: Only enough file handles to run 8 jobs in parallel.
|
par_k parallel: Warning: Only enough file handles to run 9 jobs in parallel.
|
||||||
par_k parallel: Warning: Running 'parallel -j0 -N 8 --pipe parallel -j0' or
|
par_k parallel: Warning: Running 'parallel -j0 -N 9 --pipe parallel -j0' or
|
||||||
par_k parallel: Warning: raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf
|
par_k parallel: Warning: raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf
|
||||||
par_k parallel: Warning: or /proc/sys/fs/file-max may help.
|
par_k parallel: Warning: or /proc/sys/fs/file-max may help.
|
||||||
par_k begin
|
par_k begin
|
||||||
|
|
|
@ -62,6 +62,9 @@ par_kill_term_twice parallel: bash -c 'sleep 120 & pid=$!; wait $pid' 1
|
||||||
par_kill_term_twice bash-+-perl---bash---sleep
|
par_kill_term_twice bash-+-perl---bash---sleep
|
||||||
par_kill_term_twice `-pstree
|
par_kill_term_twice `-pstree
|
||||||
par_kill_term_twice bash---pstree
|
par_kill_term_twice bash---pstree
|
||||||
|
par_lb_mem_usage 1
|
||||||
|
par_lb_mem_usage 1
|
||||||
|
par_lb_mem_usage 2
|
||||||
par_multiline_commands bug #50781: joblog format with multiline commands
|
par_multiline_commands bug #50781: joblog format with multiline commands
|
||||||
par_multiline_commands 1
|
par_multiline_commands 1
|
||||||
par_multiline_commands finish 1
|
par_multiline_commands finish 1
|
||||||
|
|
|
@ -61,7 +61,7 @@ echo '### Check that 4 processes are really used'
|
||||||
echo '### --version must have higher priority than retired options'
|
echo '### --version must have higher priority than retired options'
|
||||||
### --version must have higher priority than retired options
|
### --version must have higher priority than retired options
|
||||||
$NICEPAR --version -g -Y -U -W -T | tail
|
$NICEPAR --version -g -Y -U -W -T | tail
|
||||||
GNU parallel 20190123
|
GNU parallel 20190223
|
||||||
Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
|
Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
|
||||||
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
||||||
This is free software: you are free to change and redistribute it.
|
This is free software: you are free to change and redistribute it.
|
||||||
|
|
Loading…
Reference in a new issue