diff --git a/doc/haikus b/doc/haikus index 067c3ddf..8364cdac 100644 --- a/doc/haikus +++ b/doc/haikus @@ -2,10 +2,6 @@ Quote of the month: -With GNU Parallel you sure can! -I like getting things done - ---Kyle Lady @kylelady@twitter @@ -37,7 +33,12 @@ It's the MapReduce of our generation! === Used === - Ok! GNU Parallel is one of the best things out there. Almost as good as vanilla ice cream. +With GNU Parallel you sure can! +I like getting things done + +--Kyle Lady @kylelady@twitter + +Ok! GNU Parallel is one of the best things out there. Almost as good as vanilla ice cream. -- @coffe@mastodon.art HOLY STUFF I LOVE GNU PARALLEL diff --git a/src/niceload b/src/niceload index f9399e67..8e6b15fc 100755 --- a/src/niceload +++ b/src/niceload @@ -23,7 +23,7 @@ use strict; use Getopt::Long; $Global::progname="niceload"; -$Global::version = 20190222; +$Global::version = 20190223; Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV) || die_usage(); if($opt::version) { diff --git a/src/parallel b/src/parallel index 110e33aa..79161e76 100755 --- a/src/parallel +++ b/src/parallel @@ -200,13 +200,10 @@ sub pipe_tee_setup() { sub parcat_script() { # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos my $script = q'{ - use Symbol qw(gensym); - use IPC::Open3; use POSIX qw(:errno_h); use IO::Select; use strict; use threads; - use threads::shared; use Thread::Queue; use Fcntl qw(:DEFAULT :flock); @@ -369,7 +366,7 @@ sub parcat_script() { fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle } }'; - return ::spacefree(1, $script); + return ::spacefree(3, $script); } sub sharder_script() { @@ -432,7 +429,7 @@ sub pipe_shard_setup() { open STDOUT, ">","/dev/null"; # The PERL_HASH_SEED must be the same for all sharders # so B::hash will return the same value for any given input - $ENV{PERL_HASH_SEED} = $$; + $ENV{'PERL_HASH_SEED'} = $$; exec qw(parallel --block 100k -q --pipe -j), $njobs, qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","), $opt::shard, '{}', (map { (':::+', @{$_}) } @parcatfifos); @@ -1698,7 +1695,7 @@ sub check_invalid_option_combinations() { sub init_globals() { # Defaults: - $Global::version = 20190222; + $Global::version = 20190223; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; @@ -5054,6 +5051,10 @@ sub spacefree($$) { # Keep newlines $s =~ s/\n\n+/\n/sg; $s =~ s/[ \t]+/ /mg; + } elsif(3 == $spaces) { + # Keep perl code required space + $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg; + $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg; } else { $s =~ s/\s//mg; } @@ -8516,7 +8517,7 @@ sub sshlogin_wrap($) { } else { $bashfuncset = '$bashfunc = "";' } - if($ENV{"parallel_bash_environment"}) { + if($ENV{'parallel_bash_environment'}) { $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; } ::debug("base64",$envset,$bashfuncset,"\n"); @@ -9527,7 +9528,11 @@ sub print_linebuffer($) { # read remaining my $halfline_ref = $self->{'halfline'}{$fdno}; if(grep /./, @$halfline_ref) { - $self->add_returnsize(length join("",@$halfline_ref)); + my $returnsize = 0; + for(@{$self->{'halfline'}{$fdno}}) { + $returnsize += length $_; + } + $self->add_returnsize($returnsize); if($opt::tag or defined $opt::tagstring) { # Prepend $tag the the remaining half line unshift @$halfline_ref, $self->tag(); diff --git a/src/parallel.pod b/src/parallel.pod index 673add58..ce6a9594 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -2148,7 +2148,7 @@ E.g. B<--shebang-wrap> must be set as the first option. -=item B<--shellquote> (alpha testing) +=item B<--shellquote> (beta testing) Does not run the command but quotes it. Useful for making quoted composed commands for GNU B. diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod index 9e3d9163..49a0f51f 100644 --- a/src/parallel_alternatives.pod +++ b/src/parallel_alternatives.pod @@ -1959,6 +1959,46 @@ https://github.com/codingo/Interlace can be run with GNU B: https://github.com/codingo/Interlace (Last checked: 2019-02) + +=head2 DIFFERENCES BETWEEN otonvm Parallel AND GNU Parallel + +I have been unable to get the code to run at all. It seems unfinished. + +https://github.com/otonvm/Parallel (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN k-bx par AND GNU Parallel + +B requires Haskell to work. This limits the number of platforms +this can work on. + +B does line buffering in memory. The memory usage is 3x the +longest line (compared to 1x for B). Commands must be +given as arguments. There is no template. + +These are the examples from https://github.com/k-bx/par with the +corresponding GNU B command. + + par "echo foo; sleep 1; echo foo; sleep 1; echo foo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + parallel --lb ::: "echo foo; sleep 1; echo foo; sleep 1; echo foo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + + par "echo foo; sleep 1; foofoo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + parallel --lb --halt 1 ::: "echo foo; sleep 1; foofoo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + + par "PARPREFIX=[fooechoer] echo foo" "PARPREFIX=[bar] echo bar" + parallel --lb --colsep , --tagstring {1} {2} \ + ::: "[fooechoer],echo foo" "[bar],echo bar" + + par --succeed "foo" "bar" && echo 'wow' + parallel "foo" "bar"; true && echo 'wow' + +https://github.com/k-bx/par (Last checked: 2019-02) + + =head2 Todo Url for spread @@ -1979,13 +2019,6 @@ https://github.com/xuchenCN/go-pssh https://github.com/amritb/with-this.git -https://github.com/fd0/machma Requires Go >= 1.7. - -https://github.com/k-bx/par requires Haskell to work. This limits the -number of platforms this can work on. - -https://github.com/otonvm/Parallel - https://github.com/flesler/parallel https://github.com/Julian/Verge diff --git a/src/parallel_design.pod b/src/parallel_design.pod index 7ae4868d..a7211060 100644 --- a/src/parallel_design.pod +++ b/src/parallel_design.pod @@ -20,6 +20,14 @@ a single file: No need to mess around with environment variables like PERL5LIB. +=head2 Interpreted language + +GNU B is designed to be able to run on old systems. That +means that it cannot depend on a compiler being installed - and +especially not a compiler for a language that is younger than 20 years +old. + + =head2 Old Perl style GNU B uses some old, deprecated constructs. This is due to a @@ -526,6 +534,63 @@ The real killer comes when you try to combine several of these: Doing that correctly for all corner cases is next to impossible to do by hand. +=head2 --shard + +The simple way to implement sharding would be to: + +=over 5 + +=item 1 + +start n jobs, + +=item 2 + +split each line into columns, + +=item 3 + +select the data from the relevant column + +=item 4 + +compute a hash value from the data + +=item 5 + +take the modulo n of the hash value + +=item 6 + +pass the full line to the jobslot that has the computed value + +=back + +Unfortunately Perl is rather slow at computing the hash value (and +somewhat slow at splitting into columns). + +One solution is to use a compiled language for the splitting and +hashing, but that would go against the design criteria of not +depending on a compiler. + +Luckily those tasks can be parallelized. So GNU B starts n +sharders that do step 2-6, and passes blocks of 100k to each of those +in a round robin manner. To make sure these sharders compute the hash +the same way, $PERL_HASH_SEED is set to the same value for all sharders. + +Running n sharders poses a new problem: Instead of having n outputs +(one for each computed value) you now have n outputs for each of the n +values, so in total n*n outputs; and you need to merge these n*n +outputs together into n outputs. + +This can be done by simply running 'parallel -j0 --lb cat ::: +outputs_for_one_value', but that is rather inefficient, as it spawns a +process for each file. Instead the core code from 'parcat' is run, +which is also a bit faster. + +All the sharders and parcats communicate through named pipes that are +unlinked as soon as they are opened. + =head2 Shell shock diff --git a/src/sql b/src/sql index 50ffbdad..1dc617a3 100755 --- a/src/sql +++ b/src/sql @@ -574,7 +574,7 @@ $Global::Initfile && unlink $Global::Initfile; exit ($err); sub parse_options { - $Global::version = 20190222; + $Global::version = 20190223; $Global::progname = 'sql'; # This must be done first as this may exec myself diff --git a/testsuite/tests-to-run/parallel-local-3s.sh b/testsuite/tests-to-run/parallel-local-3s.sh index 91ca0fbb..9b7adce3 100644 --- a/testsuite/tests-to-run/parallel-local-3s.sh +++ b/testsuite/tests-to-run/parallel-local-3s.sh @@ -275,6 +275,27 @@ par_test_diff_roundrobin_k() { fi } +par_lb_mem_usage() { + long_line() { + perl -e 'print "x"x100_000_000' + } + export -f long_line + memusage() { + round=$1 + shift + /usr/bin/time -v "$@" 2>&1 >/dev/null | + perl -ne '/Maximum resident set size .kbytes.: (\d+)/ and print $1,"\n"' | + perl -pe '$_ = int($_/'$round')."\n"' + } + # 1 line - RAM usage 1 x 100 MB + memusage 100000 parallel --lb ::: long_line + # 2 lines - RAM usage 1 x 100 MB + memusage 100000 parallel --lb ::: 'long_line; echo; long_line' + # 1 double length line - RAM usage 2 x 100 MB + memusage 100000 parallel --lb ::: 'long_line; long_line' +} + + export -f $(compgen -A function | grep par_) compgen -A function | grep par_ | LC_ALL=C sort | parallel -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' diff --git a/testsuite/wanted-results/parallel-local-10s b/testsuite/wanted-results/parallel-local-10s index bafc765a..39c74a3d 100644 --- a/testsuite/wanted-results/parallel-local-10s +++ b/testsuite/wanted-results/parallel-local-10s @@ -27,8 +27,8 @@ par_interactive sleep 0.1; echo opt-p 2 ?...n par_interactive sleep 0.1; echo opt-p 3 ?...y par_interactive spawn /tmp/parallel-script-for-expect par_k ### Test -k -par_k parallel: Warning: Only enough file handles to run 8 jobs in parallel. -par_k parallel: Warning: Running 'parallel -j0 -N 8 --pipe parallel -j0' or +par_k parallel: Warning: Only enough file handles to run 9 jobs in parallel. +par_k parallel: Warning: Running 'parallel -j0 -N 9 --pipe parallel -j0' or par_k parallel: Warning: raising 'ulimit -n' or 'nofile' in /etc/security/limits.conf par_k parallel: Warning: or /proc/sys/fs/file-max may help. par_k begin diff --git a/testsuite/wanted-results/parallel-local-3s b/testsuite/wanted-results/parallel-local-3s index 7c4d7c7c..44e4ad01 100644 --- a/testsuite/wanted-results/parallel-local-3s +++ b/testsuite/wanted-results/parallel-local-3s @@ -62,6 +62,9 @@ par_kill_term_twice parallel: bash -c 'sleep 120 & pid=$!; wait $pid' 1 par_kill_term_twice bash-+-perl---bash---sleep par_kill_term_twice `-pstree par_kill_term_twice bash---pstree +par_lb_mem_usage 1 +par_lb_mem_usage 1 +par_lb_mem_usage 2 par_multiline_commands bug #50781: joblog format with multiline commands par_multiline_commands 1 par_multiline_commands finish 1 diff --git a/testsuite/wanted-results/parallel-local23 b/testsuite/wanted-results/parallel-local23 index ab4c9321..9279e6ff 100644 --- a/testsuite/wanted-results/parallel-local23 +++ b/testsuite/wanted-results/parallel-local23 @@ -61,7 +61,7 @@ echo '### Check that 4 processes are really used' echo '### --version must have higher priority than retired options' ### --version must have higher priority than retired options $NICEPAR --version -g -Y -U -W -T | tail -GNU parallel 20190123 +GNU parallel 20190223 Copyright (C) 2007-2019 Ole Tange and Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it.