From 987cc8c30b215dc29a7fc08e2b47b6c3fcd61672 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Wed, 14 Dec 2016 15:04:57 +0100 Subject: [PATCH] Fixed bug #49664: --round-robin does not complete. --- src/parallel | 63 +++--- testsuite/tests-to-run/parallel-local-10s.sh | 16 ++ testsuite/tests-to-run/parallel-local-3s.sh | 119 +++++----- testsuite/wanted-results/parallel-local-10s | 11 + testsuite/wanted-results/parallel-local-3s | 215 ++++++++----------- 5 files changed, 225 insertions(+), 199 deletions(-) diff --git a/src/parallel b/src/parallel index dcef9552..251d2e81 100755 --- a/src/parallel +++ b/src/parallel @@ -453,7 +453,9 @@ sub spreadstdin { } } } - if(not $anything_written and not eof($in)) { + if(not $anything_written + and not eof($in) + and not $Global::no_autoexpand_block) { # Nothing was written - maybe the block size < record size? # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) if($blocksize < $two_gb) { @@ -555,6 +557,7 @@ sub nindex { { my @robin_queue; + my $sleep = 1; sub round_robin_write { # Input: @@ -568,31 +571,41 @@ sub nindex { # Returns: # $something_written = amount of bytes written my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; - my $something_written = 0; + my $written = 0; my $block_passed = 0; - my $sleep = 1; while(not $block_passed) { # Continue flushing existing buffers # until one is empty and a new block is passed - # Make a queue to spread the blocks evenly - if(not @robin_queue) { + if(@robin_queue) { + # Rotate queue once so new blocks get a fair chance + # to be given to another block + push @robin_queue, shift @robin_queue; + } else { + # Make a queue to spread the blocks evenly push @robin_queue, (sort { $a->seq() <=> $b->seq() } values %Global::running); } - while(my $job = shift @robin_queue) { - if($job->block_length() > 0) { - $something_written += $job->non_blocking_write(); - } else { - $job->set_block($header_ref,$buffer_ref,$endpos,$recstart,$recend); - $block_passed = 1; - $job->set_virgin(0); - $something_written += $job->non_blocking_write(); - last; + do { + $written = 0; + for my $job (@robin_queue) { + if($job->block_length() > 0) { + $written += $job->non_blocking_write(); + } else { + $job->set_block($header_ref,$buffer_ref, + $endpos,$recstart,$recend); + $block_passed = 1; + $job->set_virgin(0); + $written += $job->non_blocking_write(); + last; + } } - } + if($written) { + $sleep = $sleep/1.5+0.001; + } + } while($written and not $block_passed); $sleep = ::reap_usleep($sleep); } - return $something_written; + return $written; } } @@ -1572,18 +1585,18 @@ sub find_compression_program { # git clone https://github.com/facebook/zstd.git # cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz - # chmod +x /usr/local/bin/lrz + # chmod +x /usr/local/bin/lrz # seq 120000000|shuf > 1gb # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz" # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread # sort -nk4 jl-? - # 1-core: - # 2-cores: - # 4-cores: + # 1-core: + # 2-cores: + # 4-cores: # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip - # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 + # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip my @prg = qw(pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz @@ -4735,7 +4748,7 @@ sub memfree_recompute { # Pageins: 1798068. # Pageouts: 257. # Object cache: 6603 hits of 1713223 lookups (0% hit rate) - 'darwin' => + 'darwin' => q[ $vm = `vm_stat`; print (($vm =~ /page size of (\d+)/)[0] * (($vm =~ /Pages free:\s+(\d+)/)[0] + @@ -6753,7 +6766,9 @@ sub non_blocking_write { substr($self->{'block'},$self->{'block_pos'})); if (!defined($rv) && $! == EAGAIN) { # would block - but would have written - $something_written = 1; + $something_written = 0; + # avoid triggering auto expanding block + $Global::no_autoexpand_block ||= 1; } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { # incomplete write # Remove the written part @@ -9932,7 +9947,7 @@ sub total_jobs { # shorthand for $job->slot(); $job->slot(); } - + sub replace { # Calculates the corresponding value for a given perl expression # Returns: diff --git a/testsuite/tests-to-run/parallel-local-10s.sh b/testsuite/tests-to-run/parallel-local-10s.sh index 1233cd11..cb7eb71a 100644 --- a/testsuite/tests-to-run/parallel-local-10s.sh +++ b/testsuite/tests-to-run/parallel-local-10s.sh @@ -132,5 +132,21 @@ par_compress_fail() { seq 12 | parallel --compress -k seq {} 1000000 | md5sum } +par_distribute_input_by_ability() { + echo "### Distribute input to jobs that are ready" + echo "Job-slot n is 50% slower than n+1, so the order should be 1..7" + seq 20000000 | + parallel --tagstring {#} -j7 --block 300k --round-robin --pipe \ + 'pv -qL{=$_=$job->seq()**3+9=}0000 |wc -c' | + sort -nk2 | field 1 +} + +par_round_robin_blocks() { + echo "bug #49664: --round-robin does not complete" + + seq 20000000 | parallel --block 10M --round-robin --pipe wc -c | wc -l +} + + export -f $(compgen -A function | grep par_) compgen -A function | grep par_ | sort | parallel -j6 --tag -k '{} 2>&1' diff --git a/testsuite/tests-to-run/parallel-local-3s.sh b/testsuite/tests-to-run/parallel-local-3s.sh index dcb55ed6..dc1d0ae8 100644 --- a/testsuite/tests-to-run/parallel-local-3s.sh +++ b/testsuite/tests-to-run/parallel-local-3s.sh @@ -3,73 +3,82 @@ # Simple jobs that never fails # Each should be taking 3-10s and be possible to run in parallel # I.e.: No race conditions, no logins -cat <<'EOF' | sed -e 's/;$/; /;s/$SERVER1/'$SERVER1'/;s/$SERVER2/'$SERVER2'/' | stdout parallel -vj0 -k --joblog /tmp/jl-`basename $0` -L1 -echo '### bug #42089: --results with arg > 256 chars (should be 1 char shorter)' - parallel --results parallel_test_dir echo ::: 1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456; - ls parallel_test_dir/1/ - rm -rf parallel_test_dir -echo '**' +par_results_arg_256() { + echo '### bug #42089: --results with arg > 256 chars (should be 1 char shorter)' + parallel --results parallel_test_dir echo ::: 1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456; + ls parallel_test_dir/1/ + rm -rf parallel_test_dir +} -echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834'; - seq 1 3 | parallel -j1 "sleep 2; echo {}" | parallel -kj2 echo +par_slow_args_generation() { + echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834' + seq 1 3 | parallel -j1 "sleep 2; echo {}" | parallel -kj2 echo +} -echo '**' +par_kill_term_twice() { + echo '### Are children killed if GNU Parallel receives TERM twice? There should be no sleep at the end' -echo '### Are children killed if GNU Parallel receives TERM twice? There should be no sleep at the end' + parallel -q bash -c 'sleep 120 & pid=$!; wait $pid' ::: 1 & + T=$! + sleep 5 + pstree $$ + kill -TERM $T + sleep 1 + pstree $$ + kill -TERM $T + sleep 1 + pstree $$ +} - parallel -q bash -c 'sleep 120 & pid=$!; wait $pid' ::: 1 & - T=$!; - sleep 5; - pstree $$; - kill -TERM $T; - sleep 1; - pstree $$; - kill -TERM $T; - sleep 1; - pstree $$; +par_kill_int_twice() { + echo '### Are children killed if GNU Parallel receives INT twice? There should be no sleep at the end' -echo '**' + parallel -q bash -c 'sleep 120 & pid=$!; wait $pid' ::: 1 & + T=$! + sleep 5 + pstree $$ + kill -INT $T + sleep 1 + pstree $$ +} -echo '### Are children killed if GNU Parallel receives INT twice? There should be no sleep at the end' +par_children_receive_sig() { + echo '### Do children receive --termseq signals' - parallel -q bash -c 'sleep 120 & pid=$!; wait $pid' ::: 1 & - T=$!; - sleep 5; - pstree $$; - kill -INT $T; - sleep 1; - pstree $$; + show_signals() { + perl -e 'for(keys %SIG) { $SIG{$_} = eval "sub { print STDERR \"Got $_\\n\"; }";} while(1){sleep 1}'; + } + export -f show_signals + echo | stdout parallel --termseq TERM,200,TERM,100,TERM,50,KILL,25 -u \ + --timeout 1 show_signals -echo '**' + echo | stdout parallel --termseq INT,200,TERM,100,KILL,25 -u \ + --timeout 1 show_signals + sleep 3 +} -echo '### Do children receive --termseq signals' +par_wrong_slot_rpl_resume() { + echo '### bug #47644: Wrong slot number replacement when resuming' + seq 0 20 | + parallel -kj 4 --delay 0.2 --joblog /tmp/parallel-bug-47558 \ + 'sleep 1; echo {%} {=$_==10 and exit =}' + seq 0 20 | + parallel -kj 4 --resume --delay 0.2 --joblog /tmp/parallel-bug-47558 \ + 'sleep 1; echo {%} {=$_==110 and exit =}' +} - show_signals() { - perl -e 'for(keys %SIG) { $SIG{$_} = eval "sub { print STDERR \"Got $_\\n\"; }";} while(1){sleep 1}'; - }; - export -f show_signals; - echo | stdout parallel --termseq TERM,200,TERM,100,TERM,50,KILL,25 -u --timeout 1 show_signals; - echo | stdout parallel --termseq INT,200,TERM,100,KILL,25 -u --timeout 1 show_signals; - sleep 3; +par_pipepart_block() { + echo '### --pipepart --block -# (# < 0)' -echo '**' - -echo '### bug #47644: Wrong slot number replacement when resuming' - seq 0 20 | parallel -kj 4 --delay 0.2 --joblog /tmp/parallel-bug-47558 'sleep 1; echo {%} {=$_==10 and exit =}'; - seq 0 20 | parallel -kj 4 --resume --delay 0.2 --joblog /tmp/parallel-bug-47558 'sleep 1; echo {%} {=$_==110 and exit =}' - -echo '**' - -echo '### --pipepart --block -# (# < 0)' - - seq 1000 > /run/shm/parallel$$; - parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$; - parallel -j2 -k --block -1 --pipepart echo {#}-2 :::: /run/shm/parallel$$; - parallel -j2 -k --block -2 --pipepart echo {#}-4 :::: /run/shm/parallel$$; - parallel -j2 -k --block -10 --pipepart echo {#}-20 :::: /run/shm/parallel$$; + seq 1000 > /run/shm/parallel$$ + parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$ + parallel -j2 -k --block -1 --pipepart echo {#}-2 :::: /run/shm/parallel$$ + parallel -j2 -k --block -2 --pipepart echo {#}-4 :::: /run/shm/parallel$$ + parallel -j2 -k --block -10 --pipepart echo {#}-20 :::: /run/shm/parallel$$ rm /run/shm/parallel$$ +} -echo '**' -EOF +export -f $(compgen -A function | grep par_) +compgen -A function | grep par_ | sort | parallel -j6 --tag -k '{} 2>&1' diff --git a/testsuite/wanted-results/parallel-local-10s b/testsuite/wanted-results/parallel-local-10s index fb7a35b5..02851895 100644 --- a/testsuite/wanted-results/parallel-local-10s +++ b/testsuite/wanted-results/parallel-local-10s @@ -1,6 +1,15 @@ par_compress_fail ### bug #41609: --compress fails par_compress_fail 24812dd0f24a26d08a780f988b9d5ad2 - par_compress_fail 24812dd0f24a26d08a780f988b9d5ad2 - +par_distribute_input_by_ability ### Distribute input to jobs that are ready +par_distribute_input_by_ability Job-slot n is 50% slower than n+1, so the order should be 1..7 +par_distribute_input_by_ability 1 +par_distribute_input_by_ability 2 +par_distribute_input_by_ability 3 +par_distribute_input_by_ability 4 +par_distribute_input_by_ability 5 +par_distribute_input_by_ability 6 +par_distribute_input_by_ability 7 par_first_print_halt_on_error_1 ### Test first dying print --halt-on-error 1 par_first_print_halt_on_error_1 0 par_first_print_halt_on_error_1 parallel: This job failed: @@ -529,6 +538,8 @@ par_print_before_halt_on_error 2 exit code 1 par_print_before_halt_on_error 2 0 0 par_print_before_halt_on_error 2 parallel: This job failed: par_print_before_halt_on_error 2 perl -e sleep\ \$ARGV\[0\]\;print\ STDERR\ @ARGV,\"\\n\"\;\ 2\ \>\ 0\ \?\ exit\ shift\ :\ exit\ not\ shift\; 1 +par_round_robin_blocks bug #49664: --round-robin does not complete +par_round_robin_blocks 2 par_testhalt ### testhalt --halt now,fail=0 par_testhalt parallel: This job failed: par_testhalt sleep 1.3; exit 1 diff --git a/testsuite/wanted-results/parallel-local-3s b/testsuite/wanted-results/parallel-local-3s index 61a7c88f..c1b41843 100644 --- a/testsuite/wanted-results/parallel-local-3s +++ b/testsuite/wanted-results/parallel-local-3s @@ -1,120 +1,95 @@ -echo '### bug #42089: --results with arg > 256 chars (should be 1 char shorter)' -### bug #42089: --results with arg > 256 chars (should be 1 char shorter) - parallel --results parallel_test_dir echo ::: 1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456; ls parallel_test_dir/1/ -1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456 -123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 - rm -rf parallel_test_dir -echo '**' -** -echo '### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834'; seq 1 3 | parallel -j1 "sleep 2; echo {}" | parallel -kj2 echo -### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834 -1 -2 -3 -echo '**' -** -echo '### Are children killed if GNU Parallel receives TERM twice? There should be no sleep at the end' -### Are children killed if GNU Parallel receives TERM twice? There should be no sleep at the end - parallel -q bash -c 'sleep 120 & pid=$!; wait $pid' ::: 1 & T=$!; sleep 5; pstree $$; kill -TERM $T; sleep 1; pstree $$; kill -TERM $T; sleep 1; pstree $$; echo '**' -bash-+-perl---bash---sleep - `-pstree -bash-+-perl---bash---sleep - `-pstree -bash---pstree -** -parallel: SIGTERM received. No new jobs will be started. -parallel: Waiting for these 1 jobs to finish. Send SIGTERM again to stop now. -parallel: bash -c sleep\ 120\ \&\ pid\=\$\!\;\ wait\ \$pid 1 -echo '### Are children killed if GNU Parallel receives INT twice? There should be no sleep at the end' -### Are children killed if GNU Parallel receives INT twice? There should be no sleep at the end - parallel -q bash -c 'sleep 120 & pid=$!; wait $pid' ::: 1 & T=$!; sleep 5; pstree $$; kill -INT $T; sleep 1; pstree $$; echo '**' -bash-+-perl---bash---sleep - `-pstree -bash---pstree -** -echo '### Do children receive --termseq signals' -### Do children receive --termseq signals - show_signals() { perl -e 'for(keys %SIG) { $SIG{$_} = eval "sub { print STDERR \"Got $_\\n\"; }";} while(1){sleep 1}'; }; export -f show_signals; echo | stdout parallel --termseq TERM,200,TERM,100,TERM,50,KILL,25 -u --timeout 1 show_signals; echo | stdout parallel --termseq INT,200,TERM,100,KILL,25 -u --timeout 1 show_signals; sleep 3; echo '**' -Got TERM -Got TERM -Got TERM -Got INT -Got TERM -** -echo '### bug #47644: Wrong slot number replacement when resuming' -### bug #47644: Wrong slot number replacement when resuming - seq 0 20 | parallel -kj 4 --delay 0.2 --joblog /tmp/parallel-bug-47558 'sleep 1; echo {%} {=$_==10 and exit =}'; seq 0 20 | parallel -kj 4 --resume --delay 0.2 --joblog /tmp/parallel-bug-47558 'sleep 1; echo {%} {=$_==110 and exit =}' -1 0 -2 1 -3 2 -4 3 -1 4 -2 5 -3 6 -4 7 -1 8 -2 9 -3 10 -4 11 -1 12 -2 13 -3 14 -4 15 -1 16 -2 17 -3 18 -4 19 -1 20 -echo '**' -** -echo '### --pipepart --block -# (# < 0)' -### --pipepart --block -# (# < 0) - seq 1000 > /run/shm/parallel$$; parallel -j2 -k --pipepart echo {#} :::: /run/shm/parallel$$; parallel -j2 -k --block -1 --pipepart echo {#}-2 :::: /run/shm/parallel$$; parallel -j2 -k --block -2 --pipepart echo {#}-4 :::: /run/shm/parallel$$; parallel -j2 -k --block -10 --pipepart echo {#}-20 :::: /run/shm/parallel$$; rm /run/shm/parallel$$ -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 -1-2 -2-2 -1-4 -2-4 -3-4 -4-4 -1-20 -2-20 -3-20 -4-20 -5-20 -6-20 -7-20 -8-20 -9-20 -10-20 -11-20 -12-20 -13-20 -14-20 -15-20 -16-20 -17-20 -18-20 -19-20 -20-20 -echo '**' -** +par_children_receive_sig ### Do children receive --termseq signals +par_children_receive_sig Got TERM +par_children_receive_sig Got TERM +par_children_receive_sig Got TERM +par_children_receive_sig Got INT +par_children_receive_sig Got TERM +par_kill_int_twice ### Are children killed if GNU Parallel receives INT twice? There should be no sleep at the end +par_kill_int_twice bash-+-perl---bash---sleep +par_kill_int_twice `-pstree +par_kill_int_twice bash---pstree +par_kill_term_twice ### Are children killed if GNU Parallel receives TERM twice? There should be no sleep at the end +par_kill_term_twice bash-+-perl---bash---sleep +par_kill_term_twice `-pstree +par_kill_term_twice parallel: SIGTERM received. No new jobs will be started. +par_kill_term_twice parallel: Waiting for these 1 jobs to finish. Send SIGTERM again to stop now. +par_kill_term_twice parallel: bash -c sleep\ 120\ \&\ pid\=\$\!\;\ wait\ \$pid 1 +par_kill_term_twice bash-+-perl---bash---sleep +par_kill_term_twice `-pstree +par_kill_term_twice bash---pstree +par_pipepart_block ### --pipepart --block -# (# < 0) +par_pipepart_block 1 +par_pipepart_block 2 +par_pipepart_block 3 +par_pipepart_block 4 +par_pipepart_block 5 +par_pipepart_block 6 +par_pipepart_block 7 +par_pipepart_block 8 +par_pipepart_block 9 +par_pipepart_block 10 +par_pipepart_block 11 +par_pipepart_block 12 +par_pipepart_block 13 +par_pipepart_block 14 +par_pipepart_block 15 +par_pipepart_block 16 +par_pipepart_block 17 +par_pipepart_block 18 +par_pipepart_block 19 +par_pipepart_block 20 +par_pipepart_block 1-2 +par_pipepart_block 2-2 +par_pipepart_block 1-4 +par_pipepart_block 2-4 +par_pipepart_block 3-4 +par_pipepart_block 4-4 +par_pipepart_block 1-20 +par_pipepart_block 2-20 +par_pipepart_block 3-20 +par_pipepart_block 4-20 +par_pipepart_block 5-20 +par_pipepart_block 6-20 +par_pipepart_block 7-20 +par_pipepart_block 8-20 +par_pipepart_block 9-20 +par_pipepart_block 10-20 +par_pipepart_block 11-20 +par_pipepart_block 12-20 +par_pipepart_block 13-20 +par_pipepart_block 14-20 +par_pipepart_block 15-20 +par_pipepart_block 16-20 +par_pipepart_block 17-20 +par_pipepart_block 18-20 +par_pipepart_block 19-20 +par_pipepart_block 20-20 +par_results_arg_256 ### bug #42089: --results with arg > 256 chars (should be 1 char shorter) +par_results_arg_256 1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456 +par_results_arg_256 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 +par_slow_args_generation ### Test slow arguments generation - https://savannah.gnu.org/bugs/?32834 +par_slow_args_generation 1 +par_slow_args_generation 2 +par_slow_args_generation 3 +par_wrong_slot_rpl_resume ### bug #47644: Wrong slot number replacement when resuming +par_wrong_slot_rpl_resume 1 0 +par_wrong_slot_rpl_resume 2 1 +par_wrong_slot_rpl_resume 3 2 +par_wrong_slot_rpl_resume 4 3 +par_wrong_slot_rpl_resume 1 4 +par_wrong_slot_rpl_resume 2 5 +par_wrong_slot_rpl_resume 3 6 +par_wrong_slot_rpl_resume 4 7 +par_wrong_slot_rpl_resume 1 8 +par_wrong_slot_rpl_resume 2 9 +par_wrong_slot_rpl_resume 3 10 +par_wrong_slot_rpl_resume 4 11 +par_wrong_slot_rpl_resume 1 12 +par_wrong_slot_rpl_resume 2 13 +par_wrong_slot_rpl_resume 3 14 +par_wrong_slot_rpl_resume 4 15 +par_wrong_slot_rpl_resume 1 16 +par_wrong_slot_rpl_resume 2 17 +par_wrong_slot_rpl_resume 3 18 +par_wrong_slot_rpl_resume 4 19 +par_wrong_slot_rpl_resume 1 20