mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-24 23:17:55 +00:00
parallel: --line-buffer memory usage changed from 2x to 1x.
This commit is contained in:
parent
fd4694c66b
commit
5e1e3775fc
|
@ -2,10 +2,6 @@ Quote of the month:
|
|||
|
||||
|
||||
|
||||
With GNU Parallel you sure can!
|
||||
I like getting things done
|
||||
|
||||
--Kyle Lady @kylelady@twitter
|
||||
|
||||
|
||||
|
||||
|
@ -37,6 +33,11 @@ It's the MapReduce of our generation!
|
|||
|
||||
|
||||
=== Used ===
|
||||
With GNU Parallel you sure can!
|
||||
I like getting things done
|
||||
|
||||
--Kyle Lady @kylelady@twitter
|
||||
|
||||
Ok! GNU Parallel is one of the best things out there. Almost as good as vanilla ice cream.
|
||||
-- @coffe@mastodon.art
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
use strict;
|
||||
use Getopt::Long;
|
||||
$Global::progname="niceload";
|
||||
$Global::version = 20190222;
|
||||
$Global::version = 20190223;
|
||||
Getopt::Long::Configure("bundling","require_order");
|
||||
get_options_from_array(\@ARGV) || die_usage();
|
||||
if($opt::version) {
|
||||
|
|
21
src/parallel
21
src/parallel
|
@ -200,13 +200,10 @@ sub pipe_tee_setup() {
|
|||
sub parcat_script() {
|
||||
# TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos
|
||||
my $script = q'{
|
||||
use Symbol qw(gensym);
|
||||
use IPC::Open3;
|
||||
use POSIX qw(:errno_h);
|
||||
use IO::Select;
|
||||
use strict;
|
||||
use threads;
|
||||
use threads::shared;
|
||||
use Thread::Queue;
|
||||
use Fcntl qw(:DEFAULT :flock);
|
||||
|
||||
|
@ -369,7 +366,7 @@ sub parcat_script() {
|
|||
fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
|
||||
}
|
||||
}';
|
||||
return ::spacefree(1, $script);
|
||||
return ::spacefree(3, $script);
|
||||
}
|
||||
|
||||
sub sharder_script() {
|
||||
|
@ -432,7 +429,7 @@ sub pipe_shard_setup() {
|
|||
open STDOUT, ">","/dev/null";
|
||||
# The PERL_HASH_SEED must be the same for all sharders
|
||||
# so B::hash will return the same value for any given input
|
||||
$ENV{PERL_HASH_SEED} = $$;
|
||||
$ENV{'PERL_HASH_SEED'} = $$;
|
||||
exec qw(parallel --block 100k -q --pipe -j), $njobs,
|
||||
qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","),
|
||||
$opt::shard, '{}', (map { (':::+', @{$_}) } @parcatfifos);
|
||||
|
@ -1698,7 +1695,7 @@ sub check_invalid_option_combinations() {
|
|||
|
||||
sub init_globals() {
|
||||
# Defaults:
|
||||
$Global::version = 20190222;
|
||||
$Global::version = 20190223;
|
||||
$Global::progname = 'parallel';
|
||||
$Global::infinity = 2**31;
|
||||
$Global::debug = 0;
|
||||
|
@ -5054,6 +5051,10 @@ sub spacefree($$) {
|
|||
# Keep newlines
|
||||
$s =~ s/\n\n+/\n/sg;
|
||||
$s =~ s/[ \t]+/ /mg;
|
||||
} elsif(3 == $spaces) {
|
||||
# Keep perl code required space
|
||||
$s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg;
|
||||
$s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg;
|
||||
} else {
|
||||
$s =~ s/\s//mg;
|
||||
}
|
||||
|
@ -8516,7 +8517,7 @@ sub sshlogin_wrap($) {
|
|||
} else {
|
||||
$bashfuncset = '$bashfunc = "";'
|
||||
}
|
||||
if($ENV{"parallel_bash_environment"}) {
|
||||
if($ENV{'parallel_bash_environment'}) {
|
||||
$bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
|
||||
}
|
||||
::debug("base64",$envset,$bashfuncset,"\n");
|
||||
|
@ -9527,7 +9528,11 @@ sub print_linebuffer($) {
|
|||
# read remaining
|
||||
my $halfline_ref = $self->{'halfline'}{$fdno};
|
||||
if(grep /./, @$halfline_ref) {
|
||||
$self->add_returnsize(length join("",@$halfline_ref));
|
||||
my $returnsize = 0;
|
||||
for(@{$self->{'halfline'}{$fdno}}) {
|
||||
$returnsize += length $_;
|
||||
}
|
||||
$self->add_returnsize($returnsize);
|
||||
if($opt::tag or defined $opt::tagstring) {
|
||||
# Prepend $tag the the remaining half line
|
||||
unshift @$halfline_ref, $self->tag();
|
||||
|
|
|
@ -2148,7 +2148,7 @@ E.g.
|
|||
B<--shebang-wrap> must be set as the first option.
|
||||
|
||||
|
||||
=item B<--shellquote> (alpha testing)
|
||||
=item B<--shellquote> (beta testing)
|
||||
|
||||
Does not run the command but quotes it. Useful for making quoted
|
||||
composed commands for GNU B<parallel>.
|
||||
|
|
|
@ -1959,6 +1959,46 @@ https://github.com/codingo/Interlace can be run with GNU B<parallel>:
|
|||
|
||||
https://github.com/codingo/Interlace (Last checked: 2019-02)
|
||||
|
||||
|
||||
=head2 DIFFERENCES BETWEEN otonvm Parallel AND GNU Parallel
|
||||
|
||||
I have been unable to get the code to run at all. It seems unfinished.
|
||||
|
||||
https://github.com/otonvm/Parallel (Last checked: 2019-02)
|
||||
|
||||
|
||||
=head2 DIFFERENCES BETWEEN k-bx par AND GNU Parallel
|
||||
|
||||
B<par> requires Haskell to work. This limits the number of platforms
|
||||
this can work on.
|
||||
|
||||
B<par> does line buffering in memory. The memory usage is 3x the
|
||||
longest line (compared to 1x for B<parallel --lb>). Commands must be
|
||||
given as arguments. There is no template.
|
||||
|
||||
These are the examples from https://github.com/k-bx/par with the
|
||||
corresponding GNU B<parallel> command.
|
||||
|
||||
par "echo foo; sleep 1; echo foo; sleep 1; echo foo" \
|
||||
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||
parallel --lb ::: "echo foo; sleep 1; echo foo; sleep 1; echo foo" \
|
||||
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||
|
||||
par "echo foo; sleep 1; foofoo" \
|
||||
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||
parallel --lb --halt 1 ::: "echo foo; sleep 1; foofoo" \
|
||||
"echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success"
|
||||
|
||||
par "PARPREFIX=[fooechoer] echo foo" "PARPREFIX=[bar] echo bar"
|
||||
parallel --lb --colsep , --tagstring {1} {2} \
|
||||
::: "[fooechoer],echo foo" "[bar],echo bar"
|
||||
|
||||
par --succeed "foo" "bar" && echo 'wow'
|
||||
parallel "foo" "bar"; true && echo 'wow'
|
||||
|
||||
https://github.com/k-bx/par (Last checked: 2019-02)
|
||||
|
||||
|
||||
=head2 Todo
|
||||
|
||||
Url for spread
|
||||
|
@ -1979,13 +2019,6 @@ https://github.com/xuchenCN/go-pssh
|
|||
|
||||
https://github.com/amritb/with-this.git
|
||||
|
||||
https://github.com/fd0/machma Requires Go >= 1.7.
|
||||
|
||||
https://github.com/k-bx/par requires Haskell to work. This limits the
|
||||
number of platforms this can work on.
|
||||
|
||||
https://github.com/otonvm/Parallel
|
||||
|
||||
https://github.com/flesler/parallel
|
||||
|
||||
https://github.com/Julian/Verge
|
||||
|
|
|
@ -20,6 +20,14 @@ a single file: No need to mess around with environment variables like
|
|||
PERL5LIB.
|
||||
|
||||
|
||||
=head2 Interpreted language
|
||||
|
||||
GNU B<parallel> is designed to be able to run on old systems. That
|
||||
means that it cannot depend on a compiler being installed - and
|
||||
especially not a compiler for a language that is younger than 20 years
|
||||
old.
|
||||
|
||||
|
||||
=head2 Old Perl style
|
||||
|
||||
GNU B<parallel> uses some old, deprecated constructs. This is due to a
|
||||
|
@ -526,6 +534,63 @@ The real killer comes when you try to combine several of these: Doing
|
|||
that correctly for all corner cases is next to impossible to do by
|
||||
hand.
|
||||
|
||||
=head2 --shard
|
||||
|
||||
The simple way to implement sharding would be to:
|
||||
|
||||
=over 5
|
||||
|
||||
=item 1
|
||||
|
||||
start n jobs,
|
||||
|
||||
=item 2
|
||||
|
||||
split each line into columns,
|
||||
|
||||
=item 3
|
||||
|
||||
select the data from the relevant column
|
||||
|
||||
=item 4
|
||||
|
||||
compute a hash value from the data
|
||||
|
||||
=item 5
|
||||
|
||||
take the modulo n of the hash value
|
||||
|
||||
=item 6
|
||||
|
||||
pass the full line to the jobslot that has the computed value
|
||||
|
||||
=back
|
||||
|
||||
Unfortunately Perl is rather slow at computing the hash value (and
|
||||
somewhat slow at splitting into columns).
|
||||
|
||||
One solution is to use a compiled language for the splitting and
|
||||
hashing, but that would go against the design criteria of not
|
||||
depending on a compiler.
|
||||
|
||||
Luckily those tasks can be parallelized. So GNU B<parallel> starts n
|
||||
sharders that do step 2-6, and passes blocks of 100k to each of those
|
||||
in a round robin manner. To make sure these sharders compute the hash
|
||||
the same way, $PERL_HASH_SEED is set to the same value for all sharders.
|
||||
|
||||
Running n sharders poses a new problem: Instead of having n outputs
|
||||
(one for each computed value) you now have n outputs for each of the n
|
||||
values, so in total n*n outputs; and you need to merge these n*n
|
||||
outputs together into n outputs.
|
||||
|
||||
This can be done by simply running 'parallel -j0 --lb cat :::
|
||||
outputs_for_one_value', but that is rather inefficient, as it spawns a
|
||||
process for each file. Instead the core code from 'parcat' is run,
|
||||
which is also a bit faster.
|
||||
|
||||
All the sharders and parcats communicate through named pipes that are
|
||||
unlinked as soon as they are opened.
|
||||
|
||||
|
||||
=head2 Shell shock
|
||||
|
||||
|
|
2
src/sql
2
src/sql
|
@ -574,7 +574,7 @@ $Global::Initfile && unlink $Global::Initfile;
|
|||
exit ($err);
|
||||
|
||||
sub parse_options {
|
||||
$Global::version = 20190222;
|
||||
$Global::version = 20190223;
|
||||
$Global::progname = 'sql';
|
||||
|
||||
# This must be done first as this may exec myself
|
||||
|
|
|
@ -275,6 +275,27 @@ par_test_diff_roundrobin_k() {
|
|||
fi
|
||||
}
|
||||
|
||||
par_lb_mem_usage() {
|
||||
long_line() {
|
||||
perl -e 'print "x"x100_000_000'
|
||||
}
|
||||
export -f long_line
|
||||
memusage() {
|
||||
round=$1
|
||||
shift
|
||||
/usr/bin/time -v "$@" 2>&1 >/dev/null |
|
||||
perl -ne '/Maximum resident set size .kbytes.: (\d+)/ and print $1,"\n"' |
|
||||
perl -pe '$_ = int($_/'$round')."\n"'
|
||||
}
|
||||
# 1 line - RAM usage 1 x 100 MB
|
||||
memusage 100000 parallel --lb ::: long_line
|
||||
# 2 lines - RAM usage 1 x 100 MB
|
||||
memusage 100000 parallel --lb ::: 'long_line; echo; long_line'
|
||||
# 1 double length line - RAM usage 2 x 100 MB
|
||||
memusage 100000 parallel --lb ::: 'long_line; long_line'
|
||||
}
|
||||
|
||||
|
||||
export -f $(compgen -A function | grep par_)
|
||||
compgen -A function | grep par_ | LC_ALL=C sort |
|
||||
parallel -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1'
|
||||
|
|
|
@ -27,8 +27,8 @@ par_interactive sleep 0.1; echo opt-p 2 ?...n
|
|||
par_interactive sleep 0.1; echo opt-p 3 ?...y
|
||||
par_interactive spawn /tmp/parallel-script-for-expect
|
||||
par_k ### Test -k
|
||||
par_k parallel: Warning: Only enough file handles to run 8 jobs in parallel.
|
||||
par_k parallel: Warning: Running 'parallel -j0 -N 8 --pipe parallel -j0' or
|
||||
par_k parallel: Warning: Only enough file handles to run 9 jobs in parallel.
|
||||
par_k parallel: Warning: Running 'parallel -j0 -N 9 --pipe parallel -j0' or
|
||||
par_k parallel: Warning: raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf
|
||||
par_k parallel: Warning: or /proc/sys/fs/file-max may help.
|
||||
par_k begin
|
||||
|
|
|
@ -62,6 +62,9 @@ par_kill_term_twice parallel: bash -c 'sleep 120 & pid=$!; wait $pid' 1
|
|||
par_kill_term_twice bash-+-perl---bash---sleep
|
||||
par_kill_term_twice `-pstree
|
||||
par_kill_term_twice bash---pstree
|
||||
par_lb_mem_usage 1
|
||||
par_lb_mem_usage 1
|
||||
par_lb_mem_usage 2
|
||||
par_multiline_commands bug #50781: joblog format with multiline commands
|
||||
par_multiline_commands 1
|
||||
par_multiline_commands finish 1
|
||||
|
|
|
@ -61,7 +61,7 @@ echo '### Check that 4 processes are really used'
|
|||
echo '### --version must have higher priority than retired options'
|
||||
### --version must have higher priority than retired options
|
||||
$NICEPAR --version -g -Y -U -W -T | tail
|
||||
GNU parallel 20190123
|
||||
GNU parallel 20190223
|
||||
Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc.
|
||||
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
Loading…
Reference in a new issue