From b5b3d5dc3e128c162f0a7cc7d9d693401c01dae2 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Sun, 31 May 2020 16:42:04 +0200 Subject: [PATCH] release procedure updated. parallel: --plus --onall now works. parallel: --blocktimeout must be >= 1. --- Makefile.am | 21 ++ Makefile.in | 20 ++ doc/haikus | 12 +- doc/release_new_version | 155 +++------- packager/obs/home:tange/parallel/.osc/_files | 10 +- packager/obs/home:tange/parallel/.osc/_meta | 4 +- .../home:tange/parallel/.osc/parallel.spec | 7 +- packager/releasescripts/updateversion | 58 ++++ src/parallel | 10 +- src/parallel.pod | 12 +- src/parallel_alternatives.pod | 2 + src/parsort | 278 ++++++++++++++++++ testsuite/tests-to-run/parallel-local-ssh1.sh | 6 + testsuite/wanted-results/parallel-local-ssh1 | 3 + 14 files changed, 472 insertions(+), 126 deletions(-) create mode 100755 packager/releasescripts/updateversion create mode 100755 src/parsort diff --git a/Makefile.am b/Makefile.am index 697daa1b..70da1c35 100644 --- a/Makefile.am +++ b/Makefile.am @@ -123,4 +123,25 @@ testurls: mkdir -p urls cd urls && grep -v '(dead)' ../src/* | grep -h -Po 'https?://[^ $$<>")}]+' | perl -pe 's/(>|\{).*//;s/\\-/-/g;s/\\n//g;s/&/&/g;s/&#.*//;'"s/'.*//" | grep -Ev 'parallel-(20)?$$|coolwebsite.biz' | sort -u | egrep -v 'example.com|##|\*\(' | parallel -j0 --timeout 33 --bar --tag --joblog joblog --retries 3 neno wget -m -l1 -Q1 '{=$$_=Q($$_)=}' +reconf: + rm -fr autom4te.cache aclocal.m4 config.h config.h.in config.log Makefile.in missing install-sh + rm -rf src/Makefile.in + autoreconf --install -W gnu + ./configure + make -j + sudo make install + +pack_unpack_and_test_build: + echo '### Building tar.bz2' + ./configure + make dist + make dist-bzip2 + echo "### Unpack parallel-$(YYYYMMDD).tar.bz2" + cp parallel-$(YYYYMMDD).tar.bz2 /tmp + cd /tmp && \ + tar xjf parallel-$(YYYYMMDD).tar.bz2 && \ + cd parallel-$(YYYYMMDD) && \ + ./configure && make -j && sudo make -j install + + EXTRA_DIST = CITATION CREDITS cc-by-sa.txt fdl.txt diff --git a/Makefile.in b/Makefile.in index 538c1ea4..55c2ee69 100644 --- a/Makefile.in +++ b/Makefile.in @@ -871,6 +871,26 @@ testurls: mkdir -p urls cd urls && grep -v '(dead)' ../src/* | grep -h -Po 'https?://[^ $$<>")}]+' | perl -pe 's/(>|\{).*//;s/\\-/-/g;s/\\n//g;s/&/&/g;s/&#.*//;'"s/'.*//" | grep -Ev 'parallel-(20)?$$|coolwebsite.biz' | sort -u | egrep -v 'example.com|##|\*\(' | parallel -j0 --timeout 33 --bar --tag --joblog joblog --retries 3 neno wget -m -l1 -Q1 '{=$$_=Q($$_)=}' +reconf: + rm -fr autom4te.cache aclocal.m4 config.h config.h.in config.log Makefile.in missing install-sh + rm -rf src/Makefile.in + autoreconf --install -W gnu + ./configure + make -j + sudo make install + +pack_unpack_and_test_build: + echo '### Building tar.bz2' + ./configure + make dist + make dist-bzip2 + echo "### Unpack parallel-$(YYYYMMDD).tar.bz2" + cp parallel-$(YYYYMMDD).tar.bz2 /tmp + cd /tmp && \ + tar xjf parallel-$(YYYYMMDD).tar.bz2 && \ + cd parallel-$(YYYYMMDD) && \ + ./configure && make -j && sudo make -j install + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/doc/haikus b/doc/haikus index 2f33214f..c004213d 100644 --- a/doc/haikus +++ b/doc/haikus @@ -1,5 +1,9 @@ Quote of the month: - + +GNU parallel, which works a little bit like xargs, but has a much more friendly way of handling files with spaces and automatically parallelises calls. This tool has saved me a great deal of coding because it makes it so easy to write a program which does just one part of a task and then run it in parallel with load balancing and a nice progress bar. I cannot recommend this tool enough. + +https://negfeedback.blogspot.com/2020/05/indispensable-command-line-tools.html + Who needs spark when GNU Parallel exists -- MatthijsB @MatthijsBrs@twitter @@ -57,6 +61,12 @@ Quote of the month: === Used === + GNU Parallel: dead simple process-level parallelization of ad hoc + tasks. Write for a chunk, let gnu manage the splitting, permutations + and pool concurrency. + -- Nick Ursa @nickursa@twitter + + I wish more command line software had example pages as robust as GNU Parallel -- Lucidbeaming @lucidbeaming diff --git a/doc/release_new_version b/doc/release_new_version index 2a451206..60a5b041 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -10,116 +10,66 @@ Unmodified beta since last version => production Unmodified alpha since last version => beta Modified => alpha -== Update version == +== Update NEWS == -Get DOI: -https://zenodo.org/deposit/new (Reserve DOI) - -configure.ac: AC_INIT([parallel], [20100422], [bug-parallel@gnu.org]) -src/parallel: $Global::version = 20100422; -README: parallel-20130222 - -DOINO=3840974 -TAG=Kraftwerk -YYYYMMDD=$(echo `yyyymmdd`-1 | bc) -YYYYMMDD=$(echo `yyyymmdd`+1 | bc) -YYYYMMDD=`yyyymmdd` - -updater() { - export DOINO - export TAG - export YYYYMMDD - export DOI=10.5281/zenodo.$DOINO - export YYYY=${YYYYMMDD:0:4} - export MON=`date +%b` - export MONTH=`date +%B` - echo Tag=$TAG Date:$YYYYMMDD Year:$YYYY Mon:$MON Month:$MONTH DOI:$DOI - export TITLE="GNU Parallel $YYYYMMDD ('$TAG')" - - perl -i -pe "s/20\d\d\d\d\d\d/$YYYYMMDD/" configure.ac - perl -i -pe "/version/ and s/20\d\d\d\d\d\d/$YYYYMMDD/" src/sql - perl -i -pe "/version/ and s/20\d\d\d\d\d\d/$YYYYMMDD/" src/niceload - perl -i -pe "s/parallel-20\d\d\d\d\d\d/parallel-$YYYYMMDD/" README - perl -i -pe ' - # Update version 20209999 - /version/ and s/20\d\d\d\d\d\d/$ENV{YYYYMMDD}/; - # Update: 10.5281/zenodo.1146014 - s:10.5281/zenodo.\d+:$ENV{DOI}:; - # Update "@software{tange_2015_16303," - s:tange_\d+_\d+:tange_$ENV{YYYY}_$ENV{DOINO}:; - # Update month = mar, - s/(month\s+=\s+)\S+,",/$1$ENV{MON},",/; - # Update title = {GNU Parallel 20200522 ('Kraftwerk')},", - / title\s+= / and s/\{.*\}/{$ENV{TITLE}}/; - # Tange, O. (2020, May 22). GNU Parallel 20200522 ('Kraftwerk'). - s/(Tange, O. .).*(.. )(GNU.*[)])/$1$ENV{YYYY}, $ENV{MONTH} 22$2$ENV{TITLE}/; - ' src/parallel README - ( - ppar --help - ppar --citation - grep -i 'zenodo|tange' README - ) 2>&1 | grep -E '^ |^}|tange' - mv ~/.parallel/will-cite ~/.parallel/will-cite. - ppar ::: true - mv ~/.parallel/will-cite. ~/.parallel/will-cite -} -updater - -=== Autoconf/automake === - -rm -fr autom4te.cache aclocal.m4 config.h config.h.in config.log Makefile.in missing install-sh -rm -rf src/Makefile.in -autoreconf --install -W gnu -./configure -make -j && sudo make install +With the same things that goes in the announce mail == Testsuite == cd testsuite; make mem; make -== Update NEWS == +== Update version == -With the same things that goes in the announce mail +https://zenodo.org/deposit/new -== Package == +(*) Software +(Reserve DOI) +https://orcid.org/0000-0002-6345-1437 +Description +GNU Parallel is a general parallelizer to run multiple serial command line programs in parallel without changing them. -./configure -make dist -make dist-bzip2 +License: +gpl v3 -== Test the package == +[Save] -YYYYMMDD=`yyyymmdd` -cp parallel-$YYYYMMDD.tar.bz2 /tmp -pushd /tmp -tar xjvf parallel-$YYYYMMDD.tar.bz2 -cd parallel-$YYYYMMDD -./configure && make -j && sudo make -j install -pushd +DOINO=3841377 +TAG=Kraftwerk + +# Update version +. packager/releasescripts/updateversion + +=== Reconfig autoconf/automake, build tar.bz2 and test it === + +make reconf && +make pack_unpack_and_test_build == Upload == -YYYYMMDD=`yyyymmdd` -export YYYYMMDD +export YYYYMMDD=`yyyymmdd` +export YYYYMMDD=${YYYYMMDD:0:6}22 eval `gpg-agent --daemon` # Takes up to 8 minutes make upload # Only needed for alpha: -YYYYMMDD=`yyyymmdd` -export YYYYMMDD +export YYYYMMDD=`yyyymmdd` +export YYYYMMDD=${YYYYMMDD:0:6}22 eval `gpg-agent --daemon` # Takes up to 8 minutes make alphaupload == Update OpenSUSE build system == - +export YYYYMMDD=`yyyymmdd` +export YYYYMMDD=${YYYYMMDD:0:6}22 YYYYMMDD=`yyyymmdd` export YYYYMMDD cd ~/privat/parallel/packager/obs -find home:tange/parallel/* -type f | grep -v parallel.spec | parallel -Xj1 osc rm {} +find home:tange/parallel/* -type f | + grep -v parallel.spec | + parallel -j1 'osc rm {} || rm {}' # This should not create new files osc up home:tange/parallel/ make @@ -127,23 +77,6 @@ make https://build.opensuse.org/package/show/home:tange/parallel # Check that one .deb (Debian 5.0) and one .rpm (CentOS) end with 'succeeded' -== Download and test == - -# Only needed for alpha (part of 'make upload') - -YYYYMMDD=`yyyymmdd` -pushd /tmp -rm -rf parallel-${YYYYMMDD}* -# This can take 7 minutes -#while ! wget http://ftp.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2 ; do sleep 2; done -while ! wget http://alpha.gnu.org/gnu/parallel/parallel-$YYYYMMDD.tar.bz2 ; do sleep 2; done -tar xjvf parallel-$YYYYMMDD.tar.bz2 -cd parallel-$YYYYMMDD -./configure -make -j && sudo make -j install -pushd -sudo cp /usr/local/bin/parallel /usr/local/bin/parallel-$YYYYMMDD - == Update website == http://www.gnu.org/software/parallel/ @@ -172,7 +105,8 @@ git diff # Recheck OBS https://build.opensuse.org/package/show/home:tange/parallel -YYYYMMDD=`yyyymmdd` +export YYYYMMDD=`yyyymmdd` +export YYYYMMDD=${YYYYMMDD:0:6}22 TAG=MyTag echo "Released as $YYYYMMDD ('$TAG')." | grep MyTag && (STOP;STOP;STOP) echo "$TAG" | grep ' ' && (STOP;STOP;STOP) @@ -186,12 +120,16 @@ torsocks git push torsocks git push origin $TAG torsocks git push origin $YYYYMMDD +== Zenodo == + +Add tar.bz2 and publish. == Update documentation == Update version number + 1 -YYYYMMDD=$(echo `yyyymmdd`+1 | bc) +export YYYYMMDD=`yyyymmdd` +export YYYYMMDD=${YYYYMMDD:0:6}23 echo $YYYYMMDD perl -i -pe "/version/ and s/20\d\d\d\d\d\d/$YYYYMMDD/" src/parallel perl -i -pe "/version/ and s/20\d\d\d\d\d\d/$YYYYMMDD/" src/sql @@ -250,9 +188,9 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20200522 ('Kraftwerk') released <<[stable]>> +Subject: GNU Parallel 20200622 ('SpaceX') released <<[stable]>> -GNU Parallel 20200522 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ +GNU Parallel 20200622 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ <> @@ -262,23 +200,14 @@ Quote of the month: New in this release: -* While running a job $PARALLEL_JOBSLOT is the jobslot of the job. It is equal to {%} unless the job is being retried. See {%} for details. - -* While running a job $PARALLEL_SSHLOGIN is the sshlogin line with number of cores removed. E.g. '4//usr/bin/specialssh user@host' becomes: '/usr/bin/specialssh user@host' - -* While running a job $PARALLEL_SSHHOST is the host part of an sshlogin line. E.g. '4//usr/bin/specialssh user@host' becomes: 'host' - -* --plus activates the replacement strings {slot} = $PARALLEL_JOBSLOT, {sshlogin} = $PARALLEL_SSHLOGIN, {host} = $PARALLEL_SSHHOST +* * Bug fixes and man page updates. News about GNU Parallel: -* Portable Batch System (PBS) & GNU Parallel - Running a Program Multiple Times in Parallel https://www.youtube.com/watch?v=6ccbWu6Befo -* GNU Parallel przykład https://www.youtube.com/watch?v=gs_wG4Kt2G4 -* demo of LINUX APP - GNU PARALLEL - running multiple Gstreamer webcam .sh scripts with only 1 command https://www.youtube.com/watch?v=trQuA_wmWjg - +https://negfeedback.blogspot.com/2020/05/indispensable-command-line-tools.html Get the book: GNU Parallel 2018 http://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html diff --git a/packager/obs/home:tange/parallel/.osc/_files b/packager/obs/home:tange/parallel/.osc/_files index 951fe8c2..c4b6b112 100644 --- a/packager/obs/home:tange/parallel/.osc/_files +++ b/packager/obs/home:tange/parallel/.osc/_files @@ -1,6 +1,6 @@ - - - - - + + + + + diff --git a/packager/obs/home:tange/parallel/.osc/_meta b/packager/obs/home:tange/parallel/.osc/_meta index 3ab2baea..18e84c62 100644 --- a/packager/obs/home:tange/parallel/.osc/_meta +++ b/packager/obs/home:tange/parallel/.osc/_meta @@ -13,9 +13,7 @@ You can find more about GNU Parallel at: http://www.gnu.org/s/parallel/ Watch the intro video on http://www.youtube.com/playlist?list=PL284C9FF2488BC6D1 or walk through the tutorial http://www.gnu.org/software/parallel/parallel_tutorial.html -When using GNU Parallel for a publication please cite: - -O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: The USENIX Magazine, February 2011:42-47. +When using GNU Parallel for a publication please cite as per: 'parallel --citation' = About GNU SQL = diff --git a/packager/obs/home:tange/parallel/.osc/parallel.spec b/packager/obs/home:tange/parallel/.osc/parallel.spec index f6f0b422..37cb0cbc 100644 --- a/packager/obs/home:tange/parallel/.osc/parallel.spec +++ b/packager/obs/home:tange/parallel/.osc/parallel.spec @@ -1,7 +1,7 @@ Summary: Shell tool for executing jobs in parallel Name: parallel -Version: 20200322 +Version: 20200522 Release: 1.3 License: GPL-3.0-or-later Group: Productivity/File utilities @@ -66,6 +66,7 @@ rm $RPM_BUILD_ROOT%{_docdir}/sem.html rm $RPM_BUILD_ROOT%{_docdir}/sql.html rm $RPM_BUILD_ROOT%{_docdir}/parcat.html rm $RPM_BUILD_ROOT%{_docdir}/parset.html +rm $RPM_BUILD_ROOT%{_docdir}/parsort.html rm $RPM_BUILD_ROOT%{_docdir}/parallel.texi rm $RPM_BUILD_ROOT%{_docdir}/env_parallel.texi rm $RPM_BUILD_ROOT%{_docdir}/parallel_tutorial.texi @@ -77,6 +78,7 @@ rm $RPM_BUILD_ROOT%{_docdir}/sem.texi rm $RPM_BUILD_ROOT%{_docdir}/sql.texi rm $RPM_BUILD_ROOT%{_docdir}/parcat.texi rm $RPM_BUILD_ROOT%{_docdir}/parset.texi +rm $RPM_BUILD_ROOT%{_docdir}/parsort.texi rm $RPM_BUILD_ROOT%{_docdir}/parallel.pdf rm $RPM_BUILD_ROOT%{_docdir}/env_parallel.pdf rm $RPM_BUILD_ROOT%{_docdir}/parallel_tutorial.pdf @@ -88,6 +90,7 @@ rm $RPM_BUILD_ROOT%{_docdir}/sem.pdf rm $RPM_BUILD_ROOT%{_docdir}/sql.pdf rm $RPM_BUILD_ROOT%{_docdir}/parcat.pdf rm $RPM_BUILD_ROOT%{_docdir}/parset.pdf +rm $RPM_BUILD_ROOT%{_docdir}/parsort.pdf rm $RPM_BUILD_ROOT%{_docdir}/parallel_cheat_bw.pdf %clean @@ -98,7 +101,7 @@ rm -rf $RPM_BUILD_ROOT /usr/bin/* /usr/share/man/man1/* /usr/share/man/man7/* -%doc README NEWS src/parallel.html src/env_parallel.html src/parallel_tutorial.html src/parallel_design.html src/parallel_alternatives.html src/parallel_book.html src/sem.html src/sql.html src/parcat.html src/parset.html src/niceload.html src/parallel.texi src/env_parallel.texi src/parallel_tutorial.texi src/parallel_design.texi src/parallel_alternatives.texi src/parallel_book.texi src/niceload.texi src/sem.texi src/sql.texi src/parcat.texi src/parset.texi src/parallel.pdf src/env_parallel.pdf src/parallel_tutorial.pdf src/parallel_design.pdf src/parallel_alternatives.pdf src/parallel_book.pdf src/niceload.pdf src/sem.pdf src/sql.pdf src/parcat.pdf src/parset.pdf src/parallel_cheat_bw.pdf +%doc README NEWS src/parallel.html src/env_parallel.html src/parallel_tutorial.html src/parallel_design.html src/parallel_alternatives.html src/parallel_book.html src/sem.html src/sql.html src/parcat.html src/parset.html src/parsort.html src/niceload.html src/parallel.texi src/env_parallel.texi src/parallel_tutorial.texi src/parallel_design.texi src/parallel_alternatives.texi src/parallel_book.texi src/niceload.texi src/sem.texi src/sql.texi src/parcat.texi src/parset.texi src/parsort.texi src/parallel.pdf src/env_parallel.pdf src/parallel_tutorial.pdf src/parallel_design.pdf src/parallel_alternatives.pdf src/parallel_book.pdf src/niceload.pdf src/sem.pdf src/sql.pdf src/parcat.pdf src/parset.pdf src/parsort.pdf src/parallel_cheat_bw.pdf %changelog * Sat Jan 22 2011 Ole Tange diff --git a/packager/releasescripts/updateversion b/packager/releasescripts/updateversion new file mode 100755 index 00000000..db7df9ae --- /dev/null +++ b/packager/releasescripts/updateversion @@ -0,0 +1,58 @@ +#!/bin/bash + +updater() { + export DOINO + export TAG + export YYYYMMDD=`yyyymmdd` + export DOI=10.5281/zenodo.$DOINO + export YYYY=${YYYYMMDD:0:4} + export YYYYMM=${YYYYMMDD:0:6} + export YYYYMMDD=${YYYYMM}22 + export MON=`date +%b` + export MONTH=`date +%B` + echo Tag=$TAG Date:$YYYYMMDD Year:$YYYY Mon:$MON Month:$MONTH DOI:$DOI + export TITLE="GNU Parallel $YYYYMMDD ('$TAG')" + + if [ -z "$DOINO" ] ; then + echo '*** Set DOINO and try again ***' + echo 'https://zenodo.org/deposit/new (Reserve DOI)' + echo '[Save]' + echo DOINO=$(cat .last-doi.txt) + return + fi + echo "$DOINO" > .last-doi.txt + if [ -z "$TAG" ] ; then + echo '*** Set TAG and try again ***' + echo TAG=$(cat .last-tag.txt) + return + fi + echo "$TAG" > .last-tag.txt + + perl -i -pe "s/20\d\d\d\d\d\d/$YYYYMMDD/" configure.ac + perl -i -pe "/version/ and s/20\d\d\d\d\d\d/$YYYYMMDD/" src/sql + perl -i -pe "/version/ and s/20\d\d\d\d\d\d/$YYYYMMDD/" src/niceload + perl -i -pe "s/parallel-20\d\d\d\d\d\d/parallel-$YYYYMMDD/" README + perl -i -pe ' + # Update version 20209999 + /version/ and s/20\d\d\d\d\d\d/$ENV{YYYYMMDD}/; + # Update: 10.5281/zenodo.1146014 + s:10.5281/zenodo.\d+:$ENV{DOI}:; + # Update "@software{tange_2015_16303," + s:tange_\d+_\d+:tange_$ENV{YYYY}_$ENV{DOINO}:; + # Update month = mar, + s/(month\s+=\s+)\S+,",/$1$ENV{MON},",/; + # Update title = {GNU Parallel 20200522 ('Kraftwerk')},", + / title\s+= / and s/\{.*\}/{$ENV{TITLE}}/; + # Tange, O. (2020, May 22). GNU Parallel 20200522 ('Kraftwerk'). + s/(Tange, O. .).*(.. )(GNU.*[)])/$1$ENV{YYYY}, $ENV{MONTH} 22$2$ENV{TITLE}/; + ' src/parallel README + ( + ppar --help + ppar --citation + grep -i 'zenodo|tange' README + ) 2>&1 | grep -E '^ |^}|tange' + mv ~/.parallel/will-cite ~/.parallel/will-cite. + ppar ::: true + mv ~/.parallel/will-cite. ~/.parallel/will-cite +} +updater diff --git a/src/parallel b/src/parallel index 3b81fffa..c09d247c 100755 --- a/src/parallel +++ b/src/parallel @@ -948,7 +948,7 @@ sub spreadstdin() { my $two_gb = 2**31-1; my $blocksize = $Global::blocksize; my $in = *STDIN; - my $timeout = ::multiply_time_units($opt::blocktimeout); + my $timeout = $Global::blocktimeout; my $header = find_header(\$buf,$in); my $anything_written; @@ -1762,6 +1762,13 @@ sub parse_options(@) { if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } + if(defined $opt::blocktimeout) { + $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout)); + if($Global::blocktimeout < 1) { + ::error("--block-timeout must be at least 1"); + wait_and_exit(255); + } + } if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } @@ -4535,6 +4542,7 @@ sub onall($@) { ((defined $opt::linebuffer) ? "--linebuffer" : ""), ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::plus) ? "--plus" : ""), ((defined $opt::retries) ? "--retries ".$opt::retries : ""), ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), ((defined $opt::ungroup) ? "-u" : ""), diff --git a/src/parallel.pod b/src/parallel.pod index 29c707bf..78a47be2 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -2910,7 +2910,6 @@ most likely do what is needed. =back - =head1 EXAMPLE: Working as xargs -n1. Argument appending GNU B can work similar to B. @@ -3144,6 +3143,17 @@ If B fails a red FAIL will be printed followed by the failing command; otherwise a green OK will be printed followed by the command. +=head1 EXAMPLE: Continously show the latest line of output + +It can be useful to monitor the output of running jobs. + +This shows the most recent output line until a job finishes. After +which the output of the job is printed in full: + + parallel '{} | tee >(cat >&3)' ::: 'command 1' 'command 2' \ + 3> >(perl -ne '$|=1;chomp;printf"%.'$COLUMNS's\r",$_." "x100') + + =head1 EXAMPLE: Log rotate Log rotation renames a logfile to an extension with a higher number: diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod index 8b450ca6..605498e7 100644 --- a/src/parallel_alternatives.pod +++ b/src/parallel_alternatives.pod @@ -2616,6 +2616,8 @@ https://pypi.org/project/papply/ (Last checked: 2020-04) =head2 Todo +https://gitlab.com/netikras/bthread + https://github.com/JeiKeiLim/simple_distribute_job https://github.com/reggi/pkgrun diff --git a/src/parsort b/src/parsort new file mode 100755 index 00000000..132a5646 --- /dev/null +++ b/src/parsort @@ -0,0 +1,278 @@ +#!/usr/bin/perl + +=pod + +=head1 NAME + +parsort - Sort (big files) in parallel + + +=head1 SYNOPSIS + +B I + + +=head1 DESCRIPTION + +B uses GNU B to sort in parallel. It works just like +B but faster on inputs with more than 1 M lines, if you have a +multicore machine. + +Hopefully these ideas will make it into GNU Sort in the future. + + +=head1 EXAMPLE + +Sort files: + + parsort *.txt > sorted.txt + +Sort stdin (standard input) numerically: + + cat numbers | parsort -n > sorted.txt + + +=head1 PERFORMANCE + +B is faster on files, because these can be read in parallel. + +On a 48 core machine you should see a speedup of 3x over B. + + +=head1 AUTHOR + +Copyright (C) 2020 Ole Tange, +http://ole.tange.dk and Free Software Foundation, Inc. + + +=head1 LICENSE + +Copyright (C) 2012 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + + +=head1 DEPENDENCIES + +B uses B, B, B, and B. + + +=head1 SEE ALSO + +B + + +=cut + +use strict; +use Getopt::Long; +use POSIX qw(mkfifo); + +Getopt::Long::Configure("bundling","require_order"); + +my @ARGV_before = @ARGV; +GetOptions( + "debug|D" => \$opt::D, + "version" => \$opt::version, + "verbose|v" => \$opt::verbose, + "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks, + "d|dictionary-order" => \$opt::dictionary_order, + "f|ignore-case" => \$opt::ignore_case, + "g|general-numeric-sort" => \$opt::general_numeric_sort, + "i|ignore-nonprinting" => \$opt::ignore_nonprinting, + "M|month-sort" => \$opt::month_sort, + "h|human-numeric-sort" => \$opt::human_numeric_sort, + "n|numeric-sort" => \$opt::numeric_sort, + "N|numascii" => \$opt::numascii, + "r|reverse" => \$opt::reverse, + "R|random-sort" => \$opt::random_sort, + "sort=s" => \$opt::sort, + "V|version-sort" => \$opt::version_sort, + "k|key=s" => \@opt::key, + "t|field-separator=s" => \$opt::field_separator, + "z|zero-terminated" => \$opt::zero_terminated, + "files0-from=s" => \$opt::files0_from, + "random-source=s" => \$opt::dummy, + "batch-size=s" => \$opt::dummy, + "check=s" => \$opt::dummy, + "c" => \$opt::dummy, + "C" => \$opt::dummy, + "compress-program=s" => \$opt::dummy, + "T|temporary-directory=s" => \$opt::dummy, + "parallel=s" => \$opt::dummy, + "u|unique" => \$opt::dummy, + "S|buffer-size=s" => \$opt::dummy, + "s|stable" => \$opt::dummy, + "help" => \$opt::dummy, + ) || exit(255); +$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1]; +$Global::version = 20200412; +if($opt::version) { version(); exit 0; } +@Global::sortoptions = @ARGV_before[0..($#ARGV_before-$#ARGV-1)]; +#if($opt::zero_terminated) { $/ = "\0"; } + +$ENV{'TMPDIR'} ||= "/tmp"; + +sub merge { + # Input: + # @cmd = commands to 'cat' (part of) a file + my @cmd = @_; + chomp(@cmd); + while($#cmd > 0) { + my @tmp; + while($#cmd >= 0) { + my $a = shift @cmd; + my $b = shift @cmd; + $a &&= "<($a)"; + $b &&= "<($b)"; + # Ignore errors from mbuffer - it gives errors when a pipe is closed + push @tmp, "sort -m @Global::sortoptions $a $b | ".buffer(); + } + @cmd = @tmp; + } + return @cmd; +} + +sub sort_files { + # Input is files + my @files = @_; + # Let GNU Parallel generate the commands to read parts of files + # The commands split at \n and there will be at least one for each CPU thread + open(my $par,"-|",qw(parallel --pipepart --block -1 --dryrun -vv sort), + @Global::sortoptions, '::::', @files) || die; + my @cmd = merge(<$par>); + close $par; + # The command uses <(...) so it is incompatible with /bin/sh + open(my $bash,"|-","bash") || die; + print $bash @cmd; + close $bash; +} + +sub sort_stdin { + # Input is stdin + # Spread the input between n processes that each sort + # n = number of CPU threads + my $numthreads = `parallel --number-of-threads`; + my @fifos = map { tmpfifo() } 1..$numthreads; + map { mkfifo($_,0600) } @fifos; + # This trick removes the fifo as soon as it is connected in the other end + # (rm fifo; ...) < fifo + my @cmd = map { "(rm $_; sort @Global::sortoptions) < $_" } @fifos; + @cmd = merge(@cmd); + if(fork) { + } else { + exec(qw(parallel -j),$numthreads, + # 1M 30M = 43s + # 3M 30M = 59s + # 300k 30M = 40-45s + # 100k 30M = 47s + # 500k 30M = 44s + # 300k 10M = 41-45s + # 256k 10M = 42-44s + # 300k 3M = 42-45s + # 300k - = 47s + # 286k is the best mean value after testing 250..350 + qw(--block 286k --pipe --roundrobin ),buffer(),qw(> {} :::),@fifos); + } + # The command uses <(...) so it is incompatible with /bin/sh + open(my $bash,"|-","bash") || die; + print $bash @cmd; + close $bash; +} + +sub tmpname { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'"); + } else { + ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'"); + } + ::wait_and_exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $tmpname); + return $tmpname; +} + +sub tmpfifo { + # Find an unused name and mkfifo on it + my $tmpfifo = tmpname("psort"); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +{ + my $buffer; + + sub buffer { + if(not defined $buffer) { + if(which("mbuffker")) { + # Use mbuffer if installed + # 30M = 43s + # 10M = 41-45s + # 3M = 42-45s + # Ignore errors from mbuffer - it gives errors when a pipe is closed + $buffer = "mbuffer -v0 -q -m 30M"; + } else { + $buffer = "cat"; + } + } + return $buffer; + } +} + +sub which { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + if($prg =~ m:/:) { + # Including path + push(@which, grep { not -d $_ and -x $_ } $prg); + } + } + return wantarray ? @which : $which[0]; +} + + +if(@ARGV) { + sort_files(@ARGV); +} elsif(length $opt::files0_from) { + $/="\0"; + open(my $fh,"<",$opt::files0_from) || die; + my @files = <$fh>; + chomp(@files); + sort_files(@files); +} else { + sort_stdin(); +} + +# Test +# -z +# OK: cat bigfile | parsort +# OK: parsort -k4n files*.txt +# OK: parsort files*.txt +# OK: parsort "file with space" + diff --git a/testsuite/tests-to-run/parallel-local-ssh1.sh b/testsuite/tests-to-run/parallel-local-ssh1.sh index d1c6d08d..1ed6dd9d 100644 --- a/testsuite/tests-to-run/parallel-local-ssh1.sh +++ b/testsuite/tests-to-run/parallel-local-ssh1.sh @@ -168,6 +168,12 @@ par_onall_transfer() { echo Cleanup failed } +par_--onall_--plus() { + echo '### Test --plus is respected with --onall/--nonall' + parallel -S bash@lo --onall --plus echo {host} ::: OK + parallel -S bash@lo --nonall --plus echo {host} +} + par_remote_load() { echo '### Test --load remote' ssh parallel@lo 'seq 10 | parallel --nice 19 --timeout 15 -j0 -qN0 perl -e while\(1\)\{\ \}' & diff --git a/testsuite/wanted-results/parallel-local-ssh1 b/testsuite/wanted-results/parallel-local-ssh1 index c4ddd21a..d986ab7e 100644 --- a/testsuite/wanted-results/parallel-local-ssh1 +++ b/testsuite/wanted-results/parallel-local-ssh1 @@ -1,6 +1,9 @@ echo TODO TODO ## echo '### Test --trc --basefile --/./--foo7 :/./:foo8 " "/./" "foo9 ./foo11/./foo11' +par_--onall_--plus ### Test --plus is respected with --onall/--nonall +par_--onall_--plus lo +par_--onall_--plus lo par_PARALLEL_SSH_function ### use function as $PARALLEL_SSH par_PARALLEL_SSH_function Run through FOOSSH? par_PARALLEL_SSH_function FOOSSH