From f07696cc004ac6f76fb3bdc9614d4dbd3782eead Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Thu, 15 Dec 2022 22:30:15 +0100 Subject: [PATCH] parallel: --ll should not print empty lines. --- NEWS | 17 ++++++++ doc/haikus | 6 +-- doc/release_new_version | 18 +++++---- src/parallel | 19 ++++++++- src/parallel.pod | 31 +++++++++------ testsuite/tests-to-run/parallel-freebsd.sh | 1 + testsuite/tests-to-run/parallel-local-10s.sh | 42 ++++++++++++++++++++ testsuite/tests-to-run/parallel-local-30s.sh | 42 -------------------- testsuite/wanted-results/parallel-local-10s | 5 +++ testsuite/wanted-results/parallel-local-30s | 5 --- 10 files changed, 115 insertions(+), 71 deletions(-) diff --git a/NEWS b/NEWS index 84d98c81..ad02be75 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,20 @@ +20221222 + +New in this release: + +* --results works on more file systems (e.g. fat) + +* Joblog gives the same exit code as bash. + +News about GNU Parallel: + +* Programação Shell Linux: Paralelismo de processos com GNU parallel + https://www.youtube.com/watch?v=duheTWLIrp8 + +* Talk Python: Data Science from the Command Line + https://talkpython.fm/episodes/show/392/data-science-from-the-command-line + + 20221122 New in this release: diff --git a/doc/haikus b/doc/haikus index 82a38abc..a090a768 100644 --- a/doc/haikus +++ b/doc/haikus @@ -4,9 +4,6 @@ Quote of the month: - GNU Parallel absolutely rocks. - -- Austin Mordahl@Stackoverflow - Got around to using GNU parallel for the first time from a suggestion by @jdwasmuth ... now I'm wishing I started using this years ago -- Stefan Gavriliuc @GavriliucStefan@twitter @@ -207,6 +204,9 @@ https://negfeedback.blogspot.com/2020/05/indispensable-command-line-tools.html === Used === + GNU Parallel absolutely rocks. + -- Austin Mordahl@Stackoverflow + [GNU Parallel] is the most amazing tool ever invented for bioinformatics! -- Istvan Albert https://www.ialbert.me/ diff --git a/doc/release_new_version b/doc/release_new_version index 039ee42b..e53d4a30 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -23,6 +23,7 @@ With the same things that goes in the announce mail Start termux on Android Start: wssh mac ping fi.dk +niceload --prg firefox & cd testsuite; make mem; make polar; make long; make short == Update version == @@ -232,9 +233,9 @@ http://freshmeat.net/projects/parallel/releases/new == Update Mastodon Twitter == -https://hostux.social/web/@GNU_Parallel +https://hostux.social/@GNU_Parallel -https://joindiaspora.com/stream +# Dead: https://joindiaspora.com/stream New release of #GNU Parallel pi.dk/0 New in this release pi.dk/2 @@ -261,24 +262,25 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20221222 ('ChatGPT/Mauneloa/#A4Revolution/#A4革命/#MahsaAmini') released +Subject: GNU Parallel 20221222 ('ChatGPT') released -GNU Parallel 20221222 ('') has been released. It is available for download at: lbry://@GnuParallel:4 +GNU Parallel 20221222 ('ChatGPT') has been released. It is available for download at: lbry://@GnuParallel:4 Quote of the month: - <<>> + GNU Parallel absolutely rocks. + -- Austin Mordahl@Stackoverflow New in this release: * --results works on more file systems (e.g. fat) -* <<>> +* Joblog gives the same exit code as bash. News about GNU Parallel: -https://www.youtube.com/watch?v=duheTWLIrp8 -https://talkpython.fm/episodes/show/392/data-science-from-the-command-line +* Programação Shell Linux: Paralelismo de processos com GNU parallel https://www.youtube.com/watch?v=duheTWLIrp8 +* Talk Python: Data Science from the Command Line https://talkpython.fm/episodes/show/392/data-science-from-the-command-line GNU Parallel - For people who live life in the parallel lane. diff --git a/src/parallel b/src/parallel index 1a4622e4..25d7602d 100755 --- a/src/parallel +++ b/src/parallel @@ -11335,6 +11335,7 @@ sub print_files($) { sub print_latest_line($) { my $self = shift; my $out_fh = shift; + if(not defined $self->{$out_fh,'latestline'}) { return; } my $row = $self->row(); # Is row visible? if(not ($minvisible <= $row @@ -11440,6 +11441,12 @@ sub print_files($) { # Keep the latest full line my $l = join('', @$halfline_ref, substr($buf,0,$i-1)); + # "ab\rb\n" = "bb", but we cannot process that correctly. + # Line may be: + # foo \r bar \n + # foo \r bar \r baz \r + # If so: Remove 'foo \r' + $l =~ s/.*\r//g; my $j = ((rindex($l,"\n")+1) || (rindex($l,"\r")+1)); $self->{$out_fh,'latestline'} = substr($l,$j); @@ -11507,7 +11514,17 @@ sub print_files($) { if($opt::latestline) { # Force re-computing color if --colorfailed if($opt::colorfailed) { delete $self->{'color'}; } - $self->print_latest_line($out_fh); + if($self->{$out_fh,'latestline'} ne "") { + $self->print_latest_line($out_fh); + } + if(@{$self->{'halfline'}{$fdno}}) { + my $l = join('', @{$self->{'halfline'}{$fdno}}); + if($l ne "") { + $self->{$out_fh,'latestline'} = $l; + } + } else { + $self->{$out_fh,'latestline'} = undef; + } # Print latest line from jobs that are already done while($print_later{$minvisible}) { $print_later{$minvisible}->print_latest_line($out_fh); diff --git a/src/parallel.pod b/src/parallel.pod index 92d402bb..dcf81cbf 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -61,7 +61,7 @@ often be used as a substitute for B or B. =head2 Reader's guide GNU B includes the 4 types of documentation: Tutorial, -how-to, reference and explanation. +how-to, reference and explanation/design. =head3 Tutorial @@ -86,13 +86,19 @@ parallel_examples>. They will give you an idea of what GNU B is capable of, and you may find a solution you can simply adapt to your situation. +If the example do not cover your exact needs, the options map +(https://www.gnu.org/software/parallel/parallel_options_map.pdf) can +help you identify options that are related, so you can look these up +in the man page. + =head3 Reference If you need a one page printable cheat sheet you can find it on: https://www.gnu.org/software/parallel/parallel_cheat.pdf -The man page is the reference for all options. +The man page is the reference for all options, and reading the man +page from cover to cover is probably not what you need. =head3 Design discussion @@ -1025,7 +1031,7 @@ temporary fifo with the name in B<{}>, so you can do: Beware: If the fifo is never opened for reading, the job will block forever: - seq 1000000 | parallel --fifo echo This will block + seq 1000000 | parallel --fifo echo This will block forever seq 1000000 | parallel --fifo 'echo This will not block < {}' By using B<--fifo> instead of B<--cat> you may save I/O as B<--cat> @@ -1075,7 +1081,7 @@ Behave like GNU B. This option historically took precedence over B<--tollef>. The B<--tollef> option is now retired, and therefore may not be -used. B<--gnu> is kept for compatibility. +used. B<--gnu> is kept for compatibility, but does nothing. =item B<--group> @@ -1087,10 +1093,9 @@ command is finished. Stdout (standard output) first followed by stderr (standard error). This takes in the order of 0.5ms CPU time per job and depends on the -speed of your disk for larger output. It can be disabled with B<-u>, -but this means output from different commands can get mixed. +speed of your disk for larger output. -B<--group> is the default. Can be reversed with B<-u>. +B<--group> is the default. See also: B<--line-buffer> B<--ungroup> B<--tag> @@ -1627,9 +1632,9 @@ job continuously while it is running, then lines from the second job while that is running. It will buffer full lines, but jobs will not mix. Compare: - parallel -j0 'echo {};sleep {};echo {}' ::: 1 3 2 4 - parallel -j0 --lb 'echo {};sleep {};echo {}' ::: 1 3 2 4 - parallel -j0 -k --lb 'echo {};sleep {};echo {}' ::: 1 3 2 4 + parallel -j0 'echo [{};sleep {};echo {}]' ::: 1 3 2 4 + parallel -j0 --lb 'echo [{};sleep {};echo {}]' ::: 1 3 2 4 + parallel -j0 -k --lb 'echo [{};sleep {};echo {}]' ::: 1 3 2 4 See also: B<--group> B<--ungroup> B<--keep-order> B<--tag> @@ -1948,8 +1953,10 @@ See also: B<--profile> Add more replacement strings. Activate additional replacement strings: {+/} {+.} {+..} {+...} {..} -{...} {/..} {/...} {##}. The idea being that '{+foo}' matches the opposite of -'{foo}' and {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} = +{...} {/..} {/...} {##}. The idea being that '{+foo}' matches the +opposite of '{foo}' so that: + +{} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} = {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...} B<{##}> is the total number of jobs to be run. It is incompatible with diff --git a/testsuite/tests-to-run/parallel-freebsd.sh b/testsuite/tests-to-run/parallel-freebsd.sh index 93510334..bff12950 100644 --- a/testsuite/tests-to-run/parallel-freebsd.sh +++ b/testsuite/tests-to-run/parallel-freebsd.sh @@ -69,6 +69,7 @@ par_load() { echo '### Test --load (must give 1=true)' parallel -j0 -N0 --timeout 5 --nice 10 'bzip2 < /dev/zero >/dev/null' ::: 1 2 3 4 5 6 & parallel --argsep ,, --joblog - -N0 parallel --load 100% echo ::: 1 ,, 1 | + # Must take > 5 sec parallel -k --colsep '\t' --header : echo '{=4 $_=$_>5=}' } diff --git a/testsuite/tests-to-run/parallel-local-10s.sh b/testsuite/tests-to-run/parallel-local-10s.sh index 9b3e7a51..d80a2fe0 100644 --- a/testsuite/tests-to-run/parallel-local-10s.sh +++ b/testsuite/tests-to-run/parallel-local-10s.sh @@ -8,6 +8,48 @@ # Each should be taking 10-30s and be possible to run in parallel # I.e.: No race conditions, no logins +par_keeporder_roundrobin() { + echo 'bug #50081: --keep-order --round-robin should give predictable results' + . `which env_parallel.bash` + + run_roundrobin() { + random1G() { + < /dev/zero openssl enc -aes-128-ctr -K 1234 -iv 1234 2>/dev/null | + head -c 1G; + } + random1G | + parallel $1 -j13 --block 1m --pipe --roundrobin 'echo {#} $(md5sum)' | + sort + } + env_parset a,b,c run_roundrobin ::: -k -k '' + + if [ "$a" == "$b" ] ; then + # Good: -k should be == -k + if [ "$a" == "$c" ] ; then + # Bad: without -k the command should give different output + echo 'Broken: a == c' + printf "$a\n$b\n$c\n" + else + echo OK + fi + else + echo 'Broken: a <> b' + printf "$a\n$b\n$c\n" + fi +} + +par_load_from_PARALLEL() { + echo "### Test reading load from PARALLEL" + export PARALLEL="--load 300%" + # Ignore stderr due to 'Starting processes took > 2 sec' + seq 1 1000000 | + parallel -kj200 --recend "\n" --spreadstdin gzip -1 2>/dev/null | + zcat | sort -n | md5sum + seq 1 1000000 | + parallel -kj20 --recend "\n" --spreadstdin gzip -1 | + zcat | sort -n | md5sum +} + par_quote_special_results() { doit() { mkfs=$1 diff --git a/testsuite/tests-to-run/parallel-local-30s.sh b/testsuite/tests-to-run/parallel-local-30s.sh index 10c79543..f640b29a 100755 --- a/testsuite/tests-to-run/parallel-local-30s.sh +++ b/testsuite/tests-to-run/parallel-local-30s.sh @@ -112,18 +112,6 @@ par_shard() { perl -pe 's/(.*\d{5,}){3}/OK/' } -par_load_from_PARALLEL() { - echo "### Test reading load from PARALLEL" - export PARALLEL="--load 300%" - # Ignore stderr due to 'Starting processes took > 2 sec' - seq 1 1000000 | - parallel -kj200 --recend "\n" --spreadstdin gzip -1 2>/dev/null | - zcat | sort -n | md5sum - seq 1 1000000 | - parallel -kj20 --recend "\n" --spreadstdin gzip -1 | - zcat | sort -n | md5sum -} - par_exit_code() { echo 'bug #52207: Exit status 0 when child job is killed, even with "now,fail=1"' in_shell_run_command() { @@ -558,36 +546,6 @@ par_plus_dyn_repl() { echo $myvar | parallel --plus echo {} {/%def/} } -par_keeporder_roundrobin() { - echo 'bug #50081: --keep-order --round-robin should give predictable results' - . `which env_parallel.bash` - - run_roundrobin() { - random1G() { - < /dev/zero openssl enc -aes-128-ctr -K 1234 -iv 1234 2>/dev/null | - head -c 1G; - } - random1G | - parallel $1 -j13 --block 1m --pipe --roundrobin 'echo {#} $(md5sum)' | - sort - } - env_parset a,b,c run_roundrobin ::: -k -k '' - - if [ "$a" == "$b" ] ; then - # Good: -k should be == -k - if [ "$a" == "$c" ] ; then - # Bad: without -k the command should give different output - echo 'Broken: a == c' - printf "$a\n$b\n$c\n" - else - echo OK - fi - else - echo 'Broken: a <> b' - printf "$a\n$b\n$c\n" - fi -} - par_test_ipv6_format() { echo '### Host as IPv6 address' ( diff --git a/testsuite/wanted-results/parallel-local-10s b/testsuite/wanted-results/parallel-local-10s index c47fb978..4e964c69 100644 --- a/testsuite/wanted-results/parallel-local-10s +++ b/testsuite/wanted-results/parallel-local-10s @@ -518,6 +518,8 @@ par_k_linebuffer 20 20 par_k_linebuffer stdout top par_k_linebuffer stderr in the middle par_k_linebuffer stdout end +par_keeporder_roundrobin bug #50081: --keep-order --round-robin should give predictable results +par_keeporder_roundrobin OK par_kill_children_timeout ### Test killing children with --timeout and exit value (failed if timed out) par_kill_children_timeout 0 0 0 par_kill_children_timeout parallel: Warning: This job was killed because it timed out: @@ -594,6 +596,9 @@ par_ll_long_line par_ll_long_line 30xxxxxxxxxxxxxxxxxxxxxxxxxxx> par_load_blocks ### Test if --load blocks. Bug. par_load_blocks 53d025127ae99ab79e8502aae2d9bea6 - par_load_blocks 53d025127ae99ab79e8502aae2d9bea6 - +par_load_from_PARALLEL ### Test reading load from PARALLEL +par_load_from_PARALLEL 8a7095c1c23bfadc311fe6b16d950582 - +par_load_from_PARALLEL 8a7095c1c23bfadc311fe6b16d950582 - par_long_line_remote ### Deal with long command lines on remote servers par_long_line_remote 6 6 30006 par_long_line_remote 6 50 250050 diff --git a/testsuite/wanted-results/parallel-local-30s b/testsuite/wanted-results/parallel-local-30s index 7890ca7a..b15176c4 100644 --- a/testsuite/wanted-results/parallel-local-30s +++ b/testsuite/wanted-results/parallel-local-30s @@ -1464,11 +1464,6 @@ par_groupby_pipepart csv , s/^(\d+[\t ,]+){2}(\d+).*/$2/ NewRec par_groupby_pipepart csv , s/^(\d+[\t ,]+){2}(\d+).*/$2/ 90001 90001 1170031 par_groupby_pipepart csv , s/^(\d+[\t ,]+){2}(\d+).*/$2/ NewRec par_groupby_pipepart csv , s/^(\d+[\t ,]+){2}(\d+).*/$2/ 90001 90001 1170031 -par_keeporder_roundrobin bug #50081: --keep-order --round-robin should give predictable results -par_keeporder_roundrobin OK -par_load_from_PARALLEL ### Test reading load from PARALLEL -par_load_from_PARALLEL 8a7095c1c23bfadc311fe6b16d950582 - -par_load_from_PARALLEL 8a7095c1c23bfadc311fe6b16d950582 - par_macron ### See if \257\256 \257<\257> is replaced correctly par_macron -X par_macron -X