diff --git a/NEWS b/NEWS index f1f89e40..345e4814 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,28 @@ +20201222 + +* --pipe engine changed making --pipe alpha quality. + +* --results -.json outputs results as JSON objects on stdout (standard + output). + +* --delay 123auto will auto-adjust --delay. If jobs fail due to being + spawned too quickly, --delay will exponentially increase. + +* Bug fixes and man page updates. + +News about GNU Parallel: + +* Best practices for accelerating data migrations using AWS Snowball + Edge + https://aws.amazon.com/blogs/storage/best-practices-for-accelerating-data-migrations-using-aws-snowball-edge/ + +* Pass the Output of a Command as an Argument for Another + https://www.baeldung.com/linux/pass-cmd-output-as-an-argument + +* Warwick RSE Drop-in - Workflow Management Part 2 - GNU Parallel + https://www.youtube.com/watch?v=t_v2Otgt87g + + 20201122 * Bug fixes and man page updates. diff --git a/doc/haikus b/doc/haikus index f4ba24f4..d477bfdc 100644 --- a/doc/haikus +++ b/doc/haikus @@ -1,8 +1,12 @@ Quote of the month: + GNU Parallel is my single favourite tool for batch processing data + from the command line. + -- Jeff Wintersinger @jwintersinger + Today I'm grateful for GNU parallel, especially with the --colsep and --jobs parameters #GiveThanks - Erin Young @ErinYoun + -- Erin Young @ErinYoun I also prefer gnu parallel. Mainly because it makes embarrassingly parallel tasks embarrassingly easy to run on the command line. diff --git a/doc/release_new_version b/doc/release_new_version index bc68a656..071201cd 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -190,7 +190,7 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20201222 ('Maradona/Yeager') released <<[stable]>> +Subject: GNU Parallel 20201222 ('Vaccine/Maradona/Yeager/Le Carre') released <<[stable]>> GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ @@ -206,16 +206,17 @@ Quote of the month: New in this release: <<>> -https://www.youtube.com/watch?v=t_v2Otgt87g * Bug fixes and man page updates. News about GNU Parallel: -https://aws.amazon.com/blogs/storage/best-practices-for-accelerating-data-migrations-using-aws-snowball-edge/ +* Best practices for accelerating data migrations using AWS Snowball Edge https://aws.amazon.com/blogs/storage/best-practices-for-accelerating-data-migrations-using-aws-snowball-edge/ +* Pass the Output of a Command as an Argument for Another https://www.baeldung.com/linux/pass-cmd-output-as-an-argument +* Warwick RSE Drop-in - Workflow Management Part 2 - GNU Parallel https://www.youtube.com/watch?v=t_v2Otgt87g Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html diff --git a/src/parallel b/src/parallel index 685c489f..32978be5 100755 --- a/src/parallel +++ b/src/parallel @@ -1292,7 +1292,6 @@ sub nindex($$) { $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend); $block_passed = 1; - $job->set_virgin(0); $written += $job->non_blocking_write(); last; } @@ -1355,7 +1354,7 @@ sub rindex64($@) { # Default: search from end $pos = defined $pos ? $pos : $strlen; # No point in doing extra work if we don't need to. - if($strlen < $block_size) { + if($strlen < $block_size or $] > 5.022) { return rindex($$ref, $match, $pos); } @@ -1433,42 +1432,8 @@ sub write_record_to_pipe($$$$$$) { start_more_jobs(); } my $job = shift @Global::virgin_jobs; - # Job is no longer virgin - $job->set_virgin(0); - - if($opt::retries) { - # Copy $buffer[0..$endpos] to $job->{'block'} - # Remove rec_sep - # Run $job->add_transfersize - $job->set_block($header_ref, $buffer_ref, $endpos, - $recstart, $recend); - if(fork()) { - # Skip - } else { - $job->write($job->block_ref()); - close $job->fh(0,"w"); - exit(0); - } - } else { - # We ignore the removed rec_sep which is technically wrong. - $job->add_transfersize($endpos + length $$header_ref); - if(fork()) { - # Skip - } else { - # Chop of at $endpos as we do not know how many rec_sep will - # be removed. - substr($$buffer_ref,$endpos,length $$buffer_ref) = ""; - # Remove rec_sep - if($opt::remove_rec_sep) { - Job::remove_rec_sep($buffer_ref, $recstart, $recend); - } - $job->write($header_ref); - $job->write($buffer_ref); - close $job->fh(0,"w"); - exit(0); - } - } - close $job->fh(0,"w"); + $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend); + $job->write_block(); return 1; } @@ -2179,7 +2144,7 @@ sub check_invalid_option_combinations() { sub init_globals() { # Defaults: - $Global::version = 20201207; + $Global::version = 20201212; $Global::progname = 'parallel'; $::name = "GNU Parallel"; $Global::infinity = 2**31; @@ -3455,15 +3420,7 @@ sub init_run_jobs() { push(@Global::virgin_jobs,$job); } else { # Block already set: This is a retry - if(fork()) { - ::debug("pipe","\n\nWriting ",length ${$job->block_ref()}, - " to ", $job->seq(),"\n"); - close $job->fh(0,"w"); - } else { - $job->write($job->block_ref()); - close $job->fh(0,"w"); - exit(0); - } + $job->write_block(); } } debug("start", "Started as seq ", $job->seq(), @@ -8468,8 +8425,6 @@ sub filter_through_compress($) { } } - - sub set_fh($$$$) { # Set file handle my ($self, $fd_no, $key, $fh) = @_; @@ -8482,6 +8437,32 @@ sub fh($) { return $self->{'fd'}{$fd_no,$key}; } +sub write_block($) { + my $self = shift; + my $stdin_fh = $self->fh(0,"w"); + if(fork()) { + # Close in parent + close $stdin_fh; + } else { + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + + for my $part ( + grep { defined $_ } + $self->{'header'},$self->{'block'}) { + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + while($written = syswrite($stdin_fh,$$part)) { + substr($$part,0,$written) = ""; + } + } + close $stdin_fh; + exit(0); + } +} + sub write($) { my $self = shift; my $remaining_ref = shift; @@ -8514,22 +8495,37 @@ sub set_block($$$$$$) { # N/A my $self = shift; my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; - $self->{'block'} = ($self->virgin() ? $$header_ref : ""). - substr($$buffer_ref,0,$endpos); - if($opt::remove_rec_sep) { - remove_rec_sep(\$self->{'block'},$recstart,$recend); + $self->{'header'} = $header_ref; + if($opt::roundrobin or $opt::remove_rec_sep or $opt::retries) { + my $a = ""; + if(($opt::roundrobin or $opt::retries) and $self->virgin()) { + $a .= $$header_ref; + } + # Job is no longer virgin + $self->set_virgin(0); + # Make a full copy because $buffer will change + $a .= substr($$buffer_ref,0,$endpos); + $self->{'block'} = \$a; + if($opt::remove_rec_sep) { + remove_rec_sep($self->{'block'},$recstart,$recend); + } + $self->{'block_length'} = length ${$self->{'block'}}; + } else { + $self->set_virgin(0); + for(substr($$buffer_ref,0,$endpos)) { + $self->{'block'} = \$_; + } + $self->{'block_length'} = $endpos + length ${$self->{'header'}}; } - $self->{'block_length'} = length $self->{'block'}; $self->{'block_pos'} = 0; $self->add_transfersize($self->{'block_length'}); } sub block_ref($) { my $self = shift; - return \$self->{'block'}; + return $self->{'block'}; } - sub block_length($) { my $self = shift; return $self->{'block_length'}; @@ -8564,7 +8560,7 @@ sub non_blocking_write($) { my $in = $self->fh(0,"w"); my $rv = syswrite($in, - substr($self->{'block'},$self->{'block_pos'})); + substr(${$self->{'block'}},$self->{'block_pos'})); if (!defined($rv) && $! == ::EAGAIN()) { # would block - but would have written $something_written = 0; diff --git a/testsuite/REQUIREMENTS b/testsuite/REQUIREMENTS index dc120110..405ddc30 100644 --- a/testsuite/REQUIREMENTS +++ b/testsuite/REQUIREMENTS @@ -12,15 +12,14 @@ install_packages() { test_pkgs="$test_pkgs xterm libc6-i386 libcrypt1:i386" test_pkgs="$test_pkgs libtest-nowarnings-perl" - # DEBIAN package + # Debian package packaging_pkgs="dpkg-dev build-essential debhelper osc cvs automake-1.15" packaging_pkgs="$packaging_pkgs python3-m2crypto alien" - # SHEBANG TOOLS + # Shebang Tools shebang_pkgs="gnuplot octave ruby r-base-core" - # BUILD TOOLS - build_pkgs="bison" - # SQL TOOLS + + # SQL Tools sql_pkgs="libdbd-pg-perl libdbd-sqlite3-perl libdbd-csv-perl" sql_pkgs="$sql_pkgs libdbd-mysql-perl rlwrap" @@ -37,8 +36,8 @@ install_packages() { # Databases database_pkgs="postgresql mysql-server sqlite" - # Build packages - build_pkgs="libxxhash-dev libzstd-dev liblz4-dev libssl-dev" + # Build Tools + build_pkgs="bison libxxhash-dev libzstd-dev liblz4-dev libssl-dev" build_pkgs="$build_pkgs python3-cmarkgfm" sudo dpkg --add-architecture i386; sudo apt update @@ -351,11 +350,13 @@ rsync_versions() { cd rsync-$1 git reset --hard git checkout $1 + perl -i -pe 's/AC_DEFINE_UNQUOTED.HAVE_REMSH, .HAVE_REMSH./AC_DEFINE_UNQUOTED(HAVE_REMSH, \$HAVE_REMSH,[dummy])/; + s/AC_DEFINE.HAVE_ERRNO_DECL.,/AC_DEFINE(HAVE_ERRNO_DECL,[1],[dummy]),/; + s/AC_DEFINE.HAVE_FNMATCH.,/AC_DEFINE(HAVE_FNMATCH,[1],[dummy]),/;' configure.in autoreconf --install -W gnu - make proto # Make "lib/addrinfo.h" ? LDFLAGS=-static ./configure && - make -j2 && + (make proto; make -j2) && sudo cp rsync /usr/local/bin/rsync-$1 } export -f make_one diff --git a/testsuite/tests-to-run/parallel-local-0.3s.sh b/testsuite/tests-to-run/parallel-local-0.3s.sh index 0894945c..ada4be49 100644 --- a/testsuite/tests-to-run/parallel-local-0.3s.sh +++ b/testsuite/tests-to-run/parallel-local-0.3s.sh @@ -232,13 +232,17 @@ par_delimiter() { par_argfile() { echo '### Test -a and --arg-file: Read input from file instead of stdin' - seq 1 10 >/tmp/parallel_$$-1; parallel -k -a /tmp/parallel_$$-1 echo; rm /tmp/parallel_$$-1 - seq 1 10 >/tmp/parallel_$$-2; parallel -k --arg-file /tmp/parallel_$$-2 echo; rm /tmp/parallel_$$-2 + tmp=$(mktemp) + seq 1 10 >$tmp + parallel -k -a $tmp echo + parallel -k --arg-file $tmp echo + rm $tmp } par_pipe_unneeded_procs() { echo '### Test bug #34241: --pipe should not spawn unneeded processes' - seq 3 | parallel -j30 --pipe --block-size 10 cat\;echo o 2> >(grep -Ev 'Warning: Starting|Warning: Consider') + seq 3 | + parallel -j30 --pipe --block-size 10 cat\;echo o 2> >(grep -Ev 'Warning: Starting|Warning: Consider') } par_results_arg_256() { @@ -257,21 +261,28 @@ par_pipe_to_func() { echo pipefunc and more OK | parallel --pipe 'myfunc {#};echo and more OK' } +par_roundrobin_k() { + echo '### Test -k --round-robin' + seq 1000000 | parallel -j4 -k --round-robin --pipe wc +} + par_pipepart_roundrobin() { echo '### bug #45769: --round-robin --pipepart gives wrong results' - seq 10000 >/tmp/seq10000 - parallel -j2 --pipepart -a /tmp/seq10000 --block 14 --round-robin wc | wc -l - rm /tmp/seq10000 + tmp=$(mktemp) + seq 10000 >$tmp + parallel -j2 --pipepart -a $tmp --block 14 --round-robin wc | wc -l + rm $tmp } par_pipepart_header() { echo '### bug #44614: --pipepart --header off by one' - seq 10 >/tmp/parallel_44616 - parallel --pipepart -a /tmp/parallel_44616 -k --block 5 'echo foo; cat' - parallel --pipepart -a /tmp/parallel_44616 -k --block 2 --regexp --recend 3'\n' 'echo foo; cat' - rm /tmp/parallel_44616 + tmp=$(mktemp) + seq 10 >$tmp + parallel --pipepart -a $tmp -k --block 5 'echo foo; cat' + parallel --pipepart -a $tmp -k --block 2 --regexp --recend 3'\n' 'echo foo; cat' + rm $tmp } par_quote() { @@ -297,9 +308,10 @@ par_read_from_stdin() { par_total_from_joblog() { echo 'bug #47086: [PATCH] Initialize total_completed from joblog' - rm -f /tmp/parallel-47086 - parallel -j1 --joblog /tmp/parallel-47086 --halt now,fail=1 echo '{= $_=$Global::total_completed =};exit {}' ::: 0 0 0 1 0 0 - parallel -j1 --joblog /tmp/parallel-47086 --halt now,fail=1 --resume echo '{= $_=$Global::total_completed =};exit {}' ::: 0 0 0 1 0 0 + tmp=$(mktemp) + parallel -j1 --joblog $tmp --halt now,fail=1 echo '{= $_=$Global::total_completed =};exit {}' ::: 0 0 0 1 0 0 + parallel -j1 --joblog $tmp --halt now,fail=1 --resume echo '{= $_=$Global::total_completed =};exit {}' ::: 0 0 0 1 0 0 + rm $tmp } par_xapply() { @@ -430,9 +442,11 @@ par_empty_line() { par_append_joblog() { echo '### can you append to a joblog using +' - parallel --joblog /tmp/parallel_append_joblog echo ::: 1 - parallel --joblog +/tmp/parallel_append_joblog echo ::: 1 - wc -l /tmp/parallel_append_joblog + tmp=$(mktemp) + parallel --joblog $tmp echo ::: 1 + parallel --joblog +$tmp echo ::: 1 + wc -l < $tmp + rm $tmp } par_file_ending_in_newline() { @@ -441,7 +455,7 @@ par_file_ending_in_newline() { echo >/tmp/parallel_f2' ' echo /tmp/parallel_f1 /tmp/parallel_f2 | - stdout parallel -kv --delimiter ' ' gzip + stdout parallel -kv --delimiter ' ' gzip rm /tmp/parallel_f* } @@ -606,11 +620,12 @@ par_pipe_tag_v() { par_dryrun_append_joblog() { echo '--dry-run should not append to joblog' - rm -f /tmp/jl.$$ - parallel -k --jl /tmp/jl.$$ echo ::: 1 2 3 - parallel --dryrun -k --jl +/tmp/jl.$$ echo ::: 1 2 3 4 + tmp=$(mktemp) + parallel -k --jl $tmp echo ::: 1 2 3 + parallel --dryrun -k --jl +$tmp echo ::: 1 2 3 4 # Job 4 should not show up: 3 lines + header = 4 - wc -l < /tmp/jl.$$ + wc -l < $tmp + rm $tmp } par_0_no_newline() { @@ -660,8 +675,9 @@ par_slow_pipe_regexp() { par_results() { echo "### --results test.csv" - parallel -k --results /tmp/$$.csv echo ::: a b c - rm /tmp/$$.csv + tmp=$(mktemp) + parallel -k --results "$tmp"-dir echo ::: a b c + rm -r $tmp "$tmp"-dir } par_testquote() { @@ -874,7 +890,7 @@ par_group-by_colsep_space() { } par_json() { - printf '"\t\\"' | parallel --results -.json echo :::: - ::: '"' '\\' | + printf '"\t\\"' | parallel -k --results -.json echo :::: - ::: '"' '\\' | perl -pe 's/\d/0/g' } diff --git a/testsuite/tests-to-run/parallel-local-race01.sh b/testsuite/tests-to-run/parallel-local-race01.sh index 15889db5..0fa5a6bf 100755 --- a/testsuite/tests-to-run/parallel-local-race01.sh +++ b/testsuite/tests-to-run/parallel-local-race01.sh @@ -111,17 +111,17 @@ par_delay_Xauto() { exit $m;' $1; } export -f doit - #seq 1000 | ppar --jl - -v --delay 0.1auto -q doit "$tmp" before=`date +%s` out=$(seq 30 | parallel --delay 0.03 -q doit "$tmp") after=`date +%s` - normaldiff=$((after-before)) + # Round to 5 seconds + normaldiff=$(( (after-before)/5 )) echo $normaldiff before=`date +%s` out=$(seq 30 | parallel --delay 0.03auto -q doit "$tmp") after=`date +%s` - autodiff=$((after-before)) + autodiff=$(( (after-before)/5 )) echo $autodiff rm "$tmp" diff --git a/testsuite/wanted-results/parallel-local-0.3s b/testsuite/wanted-results/parallel-local-0.3s index d5042b01..e78dd711 100644 --- a/testsuite/wanted-results/parallel-local-0.3s +++ b/testsuite/wanted-results/parallel-local-0.3s @@ -14,7 +14,7 @@ par_X_eta_div_zero 0:local / 0 / 0 par_append_joblog ### can you append to a joblog using + par_append_joblog 1 par_append_joblog 1 -par_append_joblog 3 /tmp/parallel_append_joblog +par_append_joblog 3 par_argfile ### Test -a and --arg-file: Read input from file instead of stdin par_argfile 1 par_argfile 2 @@ -828,6 +828,11 @@ par_retries_replacement_string 22 par_retries_replacement_string 33 par_retries_replacement_string 33 par_retries_replacement_string 33 +par_roundrobin_k ### Test -k --round-robin +par_roundrobin_k 315464 315464 2097143 +par_roundrobin_k 299592 299592 2097144 +par_roundrobin_k 235148 235148 1646037 +par_roundrobin_k 149796 149796 1048572 par_rpl_that_is_substring_of_longer_rpl ### --rpl % that is a substring of longer --rpl %D par_rpl_that_is_substring_of_longer_rpl a.b/c.d/e.f=a.b/c.d/e.f par_rpl_that_is_substring_of_longer_rpl a.b/c.d=a.b/c.d diff --git a/testsuite/wanted-results/parallel-local-race01 b/testsuite/wanted-results/parallel-local-race01 index b7c41b2a..d67c97d0 100644 --- a/testsuite/wanted-results/parallel-local-race01 +++ b/testsuite/wanted-results/parallel-local-race01 @@ -7,8 +7,8 @@ par_bug56403 1 job1b par_bug56403 2 job2cjob2d par_delay_Xauto TODO: --retries for those that fail and --sshdelay par_delay_Xauto ### bug #58911: --delay Xauto -par_delay_Xauto 1 -par_delay_Xauto 25 +par_delay_Xauto 0 +par_delay_Xauto 5 par_distribute_input_by_ability ### bug #48290: round-robin does not distribute data based on business par_distribute_input_by_ability ### Distribute input to jobs that are ready par_distribute_input_by_ability Job-slot n is 50% slower than n+1, so the order should be 1..7