From 73f554ad8f243651a3334ef70a9b897ad1232692 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Wed, 9 Oct 2019 19:18:53 +0200 Subject: [PATCH] parallel: Give better error message if file not found when using --pipepart. --- 10seconds_install | 78 +++++++++++++++++++---------------------- NEWS | 8 +++-- doc/haikus | 10 ++++-- doc/release_new_version | 30 ++++------------ src/parallel | 78 ++++++++++++++++++++++++----------------- src/parallel.pod | 13 +++---- 6 files changed, 108 insertions(+), 109 deletions(-) diff --git a/10seconds_install b/10seconds_install index f7177d81..45213255 100644 --- a/10seconds_install +++ b/10seconds_install @@ -23,18 +23,12 @@ # $ bash install.sh run() { - # tail on openindiana must be /usr/xpg4/bin/tail - tail=$(echo | tail -n 1 2>/dev/null && echo tail || - (echo | /usr/xpg4/bin/tail -n 1 && echo /usr/xpg4/bin/tail)) - # grep on openindiana must be /usr/xpg4/bin/grep - grep=$(echo | grep -vE . 2>/dev/null && echo grep || - (echo | /usr/xpg4/bin/grep -vE . && echo /usr/xpg4/bin/grep)) # FreeBSD prefers 'fetch', MacOS prefers 'curl', Linux prefers 'wget' get=$( (lynx -source /dev/null && echo lynx -source) || (fetch -o /dev/null file:///bin/sh && echo fetch -o -) || (curl -h >/dev/null && echo curl -L) || - (wget -h >/dev/null && echo wget -qO -) || + (wget -h >/dev/null && echo wget -qO -) || echo 'No lynx, wget, curl, fetch: Please inform parallel@gnu.org what you use for downloading URLs' >&2 ) if test "$get" = ""; then @@ -42,17 +36,17 @@ run() { fi if ! perl -e 1; then - echo No perl installed. GNU Parallel depends on perl. Install perl and retry. + echo No perl installed. GNU Parallel depends on perl. Install perl and retry. exit 1 fi LANG=C latest=$($get http://ftpmirror.gnu.org/parallel | - perl -ne '/.*(parallel-\d{8})/ and print $1."\n"' | - sort | $tail -n1) + perl -ne '/.*(parallel-\d{8})/ and print $1."\n"' | + perl -e 'print ((reverse sort <>)[0])') if test \! -e $latest.tar.bz2; then - # Source tar does not exist - rm -f $latest.tar.bz2 $latest.tar.bz2.sig + # Source tar does not exist + rm -f $latest.tar.bz2 $latest.tar.bz2.sig $get http://ftpmirror.gnu.org/parallel/$latest.tar.bz2 > $latest.tar.bz2 $get http://ftpmirror.gnu.org/parallel/$latest.tar.bz2.sig > $latest.tar.bz2.sig fi @@ -60,49 +54,49 @@ run() { fetch_keys() { if gpg -h 2>/dev/null >/dev/null ; then # GnuPG installed - # Setup .gnupg/gpg.conf if not already done - echo | gpg 2>/dev/null >/dev/null + # Setup .gnupg/gpg.conf if not already done + echo | gpg 2>/dev/null >/dev/null keyserver1=keys.gnupg.net keyserver2=pool.sks-keyservers.net - if gpg --keyserver $keyserver1 --recv-key 0xFFFFFFF1 || - gpg --keyserver $keyserver2 --recv-key 0xFFFFFFF1 ; then + if gpg --keyserver $keyserver1 --recv-key 0xFFFFFFF1 || + gpg --keyserver $keyserver2 --recv-key 0xFFFFFFF1 ; then if gpg --keyserver $keyserver1 --recv-key 0x88888888 || - gpg --keyserver $keyserver2 --recv-key 0x88888888; then + gpg --keyserver $keyserver2 --recv-key 0x88888888; then # OK return 0 else - echo + echo echo "Cannot fetch keyID 0x88888888, so the signature cannot be checked." return 1 fi - else + else echo echo "Cannot fetch keyID 0xFFFFFFF1, so the signature cannot be checked." return 1 fi else # GnuPG not installed - echo + echo echo "GnuPG (gpg) is not installed so the signature cannot be checked." return 1 - fi + fi } # Check signature - in case ftpmirror.gnu.org is compromised if fetch_keys; then - if gpg --with-fingerprint $latest.tar.bz2.sig 2>&1 | - $grep -E '^Primary key fingerprint: BE9C B493 81DE 3166 A3BC 66C1 2C62 29E2 FFFF FFF1|^Primary key fingerprint: CDA0 1A42 08C4 F745 0610 7E7B D1AB 4516 8888 8888' ; then - # Source code signed by Ole Tange + if gpg --with-fingerprint $latest.tar.bz2.sig 2>&1 | + perl -e 'exit not grep /^Primary key fingerprint: BE9C B493 81DE 3166 A3BC 66C1 2C62 29E2 FFFF FFF1|^Primary key fingerprint: CDA0 1A42 08C4 F745 0610 7E7B D1AB 4516 8888 8888/, <>'; then + # Source code signed by Ole Tange # KeyID FFFFFFF1/88888888 true else - # GnuPG signature failed + # GnuPG signature failed echo - echo "The signature on $latest.tar.bz2 is wrong. This may indicate that a criminal has changed the code." + echo "The signature on $latest.tar.bz2 is wrong. This may indicate that a criminal has changed the code." echo "THIS IS BAD AND THE CODE WILL NOT BE INSTALLED." echo echo "See http://git.savannah.gnu.org/cgit/parallel.git/tree/README for other installation methods." - exit 1 + exit 1 fi else # GnuPG not installed or public keys not downloaded @@ -111,12 +105,12 @@ run() { echo "Continue anyway? (y/n)" read YN /dev/null; then # $HOME/bin is already in $PATH true - else + else # Add $HOME/bin to $PATH for both bash and csh echo 'PATH=$PATH:$HOME/bin' >> $HOME/.bashrc echo 'setenv PATH ${PATH}:${HOME}/bin' >> $HOME/.cshrc - fi + fi # Is $HOME/share/man already in $MANPATH? - if echo $MANPATH | grep $HOME/share/man >/dev/null; then + if echo $MANPATH | grep $HOME/share/man >/dev/null; then # $HOME/share/man is already in $MANPATH true else # Add $HOME/share/man to $MANPATH for both bash and csh - echo 'MANPATH=$MANPATH:$HOME/share/man' >> $HOME/.bashrc - echo 'setenv MANPATH ${MANPATH}:${HOME}/share/man' >> $HOME/.cshrc - fi + echo 'MANPATH=$MANPATH:$HOME/share/man' >> $HOME/.bashrc + echo 'setenv MANPATH ${MANPATH}:${HOME}/share/man' >> $HOME/.cshrc + fi fi } diff --git a/NEWS b/NEWS index 89537fcd..a73ac0f3 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -20190822 +20190922 * --nice is now inherited by the nice level that GNU Parallel is started at. So 'nice -n10 parallel' will also cause remote jobs to @@ -9,9 +9,11 @@ * GNU Parallel will be presented at Driving IT 2019: https://ida.dk/arrangementer-og-kurser/konferencer/driving-it/tools -* Greenland Ice Sheet solid ice discharge from 1986 through 2017 https://www.earth-syst-sci-data.net/11/769/2019/essd-11-769-2019.pdf +* Greenland Ice Sheet solid ice discharge from 1986 through 2017 + https://www.earth-syst-sci-data.net/11/769/2019/essd-11-769-2019.pdf -* App Center Cloud Build Machines > Installed Software > Utilities https://docs.microsoft.com/en-us/appcenter/build/software +* App Center Cloud Build Machines > Installed Software > Utilities + https://docs.microsoft.com/en-us/appcenter/build/software * Curryfinger - SNI & Host header spoofing utility https://dualuse.io/blog/curryfinger/ diff --git a/doc/haikus b/doc/haikus index 7b370cbc..372441b2 100644 --- a/doc/haikus +++ b/doc/haikus @@ -1,7 +1,10 @@ Quote of the month: - IMHO, SQLite and GNU Parallel are among the world's great software. - -- singe@reddit + Well anyway, It was blazingly fast and astonished by performance. guess I'll never use xargs. + -- (Not) Akaming @_Akamig@twitter + +GNU parallel has helped me kill a Hadoop cluster before. + -- Travis Campbell @hcoyote@twitter Yeah, GNU parallel is a beast when used accordingly. -- @lsde@twitter @@ -55,6 +58,9 @@ Quote of the month: === Used === + IMHO, SQLite and GNU Parallel are among the world's great software. + -- singe@reddit + It is, beyond absolutely any doubt whatsoever, the single most important tool I use in making me a productive bioinformatician. -- A-N-Other@reddit.com diff --git a/doc/release_new_version b/doc/release_new_version index 60eb2b1a..893adeec 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -209,9 +209,9 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20190822 ('Dorian/Stallman') released <<[stable]>> +Subject: GNU Parallel 20191022 ('Driving IT') released <<[stable]>> -GNU Parallel 20190822 ('Dorian') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ +GNU Parallel 20191022 ('Driving IT') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ <> @@ -225,32 +225,16 @@ Quote of the month: New in this release: -* --delay is now accurate to around 5 ms. +Uses GNU Parallel https://github.com/pirovc/genome_updater -* --nice is now inherited by the nice level that GNU Parallel is started at. So 'nice -n10 parallel' will also cause remote jobs to be run at nice level 10. +Using GNU-Parallel for bioinformatics https://www.danielecook.com/using-gnu-parallel-for-bioinformatics/ -* GNU Parallel will be presented at Driving IT 2019: https://ida.dk/arrangementer-og-kurser/konferencer/driving-it/tools +Speeding up PostgreSQL ETL pipeline with the help of GODS https://cfengine.com/company/blog-detail/speeding-up-postgresql-etl-pipeline-with-the-help-of-gods/ -* Greenland Ice Sheet solid ice discharge from 1986 through 2017 https://www.earth-syst-sci-data.net/11/769/2019/essd-11-769-2019.pdf -* App Center Cloud Build Machines > Installed Software > Utilities https://docs.microsoft.com/en-us/appcenter/build/software +https://readthedocs.org/projects/curc/downloads/pdf/latest/ -https://dualuse.io/blog/curryfinger/ - -https://wiki.cac.washington.edu/display/hyakusers/Hyak+parallel-sql - -https://www.reddit.com/r/perl/comments/cyo2qc/a_meditation_on_analysing_data_with_perl_in/ - -https://www.eventbrite.ca/e/inscription-accelerer-ses-taches-avec-gnu-parallel-et-les-lots-de-taches-ul-71412840729 - -https://curc.readthedocs.io/en/latest/software/GNUParallel.html - -https://petelawson.com/post/parallel-in-shell/ - -https://www.usenix.org/conference/lisa19/presentation/maheshwari - -http://atomicer.cn/2018/01/18/gnu-parallel-nfs-%E5%AE%9E%E7%8E%B0%E5%85%B1%E4%BA%AB%E6%96%87%E4%BB%B6%E7%BD%91%E7%BB%9C%E7%9A%84%E5%B9%B6%E5%8F%91%E8%AE%A1%E7%AE%97%E7%AD%89%E6%93%8D%E4%BD%9C/#more -Gnu/parallel + Nfs 实现共享文件网络的并发计算等操作 +* 如何使用Parallel在Shell中并行执行命令https://www.myfreax.com/gnu-parallel/ * Bug fixes and man page updates. diff --git a/src/parallel b/src/parallel index 7d86dd47..0587c366 100755 --- a/src/parallel +++ b/src/parallel @@ -149,9 +149,12 @@ sub pipepart_setup() { $size += -s $_; } elsif(-b $_) { $size += size_of_block_dev($_); - } else { + } elsif(-e $_) { ::error("$_ is neither a file nor a block device"); wait_and_exit(255); + } else { + ::error("File not found: $_"); + wait_and_exit(255); } } # Run in total $job_slots*(- $blocksize) jobs @@ -1528,6 +1531,7 @@ sub options_hash() { "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource, # Before changing this line, please read # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt "bibtex|citation" => \$opt::citation, "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite, # Termination and retries @@ -1636,6 +1640,7 @@ sub parse_options(@) { # Before changing this line, please read # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt if(defined $opt::citation) { citation(\@argv_before,\@ARGV); wait_and_exit(0); @@ -1683,7 +1688,7 @@ sub parse_options(@) { if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts || $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR'; - # Default: Same nice level as GNU Parallel is started at + # Default: Same nice level as GNU Parallel is started at $opt::nice ||= eval { getpriority(0,0) } || 0; if(defined $opt::help) { usage(); exit(0); } if(defined $opt::embed) { embed(); exit(0); } @@ -1937,11 +1942,14 @@ sub parse_options(@) { # the alternatives instead? # See a list in: 'man parallel_alternatives' # - # If you want GNU Parallel to be maintained in the future you should not - # change this line. + # If you want GNU Parallel to be maintained in the future keep + # this line. citation_notice(); - # Seriously: You will be harming free software by removing the notice. - # You make it harder to justify spending time developing it. + # Seriously: YOU will be harming free software by removing the + # notice. You make it harder to justify spending time developing + # it. If you *do* remove the line, please email + # hallofshame@tange.dk if you want to avoid being put in a hall of + # shame. parse_halt(); @@ -2060,6 +2068,7 @@ sub init_globals() { # Defaults: $Global::version = 20190922; $Global::progname = 'parallel'; + $::name = "GNU Parallel"; $Global::infinity = 2**31; $Global::debug = 0; $Global::verbose = 0; @@ -2274,7 +2283,7 @@ sub parse_replacement_string_options() { sub parse_semaphore() { # Semaphore defaults # Must be done before computing number of processes and max_line_length - # because when running as a semaphore GNU Parallel does not read args + # because when running as a semaphore GNU Parallel does not read args # Uses: # $opt::semaphore # $Global::semaphore @@ -3103,7 +3112,7 @@ sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {} # $Global::total_started = total number of jobs started # $Global::joblog = filehandle of joblog # $Global::debug = Is debugging on? -# $Global::exitstatus = status code of GNU Parallel +# $Global::exitstatus = status code of GNU Parallel # $Global::quoting = quote the command to run sub init_run_jobs() { @@ -3403,10 +3412,11 @@ sub drain_job_queue(@) { my $sleep = 0.2; do { while($Global::total_running > 0) { - debug($Global::total_running, "==", scalar + debug("init",$Global::total_running, "==", scalar keys %Global::running," slots: ", $Global::max_jobs_running); if($opt::pipe) { - # When using --pipe sometimes file handles are not closed properly + # When using --pipe sometimes file handles are not + # closed properly for my $job (values %Global::running) { close $job->fh(0,"w"); } @@ -4011,7 +4021,7 @@ sub setup_basefile() { } debug("init", "basesetup: @cmd\n"); my ($exitstatus,$stdout_ref,$stderr_ref) = - run_parallel((join "\n",@cmd),"-j0","--retries",5); + run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5); if($exitstatus) { my @stdout = @$stdout_ref; my @stderr = @$stderr_ref; @@ -4041,7 +4051,7 @@ sub cleanup_basefile() { } debug("init", "basecleanup: @cmd\n"); my ($exitstatus,$stdout_ref,$stderr_ref) = - run_parallel(join("\n",@cmd),"-j0","--retries",5); + run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5); if($exitstatus) { my @stdout = @$stdout_ref; my @stderr = @$stderr_ref; @@ -4050,22 +4060,22 @@ sub cleanup_basefile() { } } -sub run_parallel() { +sub run_gnu_parallel() { my ($stdin,@args) = @_; my $cmd = join "",map { " $_ & " } split /\n/, $stdin; print $Global::original_stderr ` $cmd wait` ; return 0 } -sub _run_parallel() { - # Run GNU Parallel +sub _run_gnu_parallel() { + # Run GNU Parallel # This should ideally just fork an internal copy # and not start it through a shell # Input: - # $stdin = data to provide on stdin for GNU Parallel + # $stdin = data to provide on stdin for GNU Parallel # @args = command line arguments # Returns: - # $exitstatus = exitcode of GNU Parallel run + # $exitstatus = exitcode of GNU Parallel run # \@stdout = standard output # \@stderr = standard error my ($stdin,@args) = @_; @@ -4441,7 +4451,7 @@ sub onall($@) { ); ::debug("init", "| $0 $options\n"); open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") || - ::die_bug("This does not run GNU Parallel: $0 $options"); + ::die_bug("This does not run GNU Parallel: $0 $options"); my @joblogs; for my $host (sort keys %Global::host) { my $sshlogin = $Global::host{$host}; @@ -4505,14 +4515,14 @@ sub sigpipe() { sub signal_children() { # Send signal to all children process groups - # and GNU Parallel itself + # and GNU Parallel itself # Uses: # %SIG # Returns: N/A my $signal = shift; debug("run", "Sending $signal "); kill $signal, map { -$_ } keys %Global::running; - # Use default signal handler for GNU Parallel itself + # Use default signal handler for GNU Parallel itself $SIG{$signal} = undef; kill $signal, $$; } @@ -4793,6 +4803,7 @@ sub usage() { "", # Before changing this line, please read # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt "This helps funding further development; AND IT WON'T COST YOU A CENT.", "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", "", @@ -4823,6 +4834,7 @@ sub citation_notice() { "", # Before changing this line, please read # https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt "This helps funding further development; AND IT WON'T COST YOU A CENT.", "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", "", @@ -4833,7 +4845,7 @@ sub citation_notice() { "" ); mkdir $Global::config_dir; - # Number of times the user has run GNU Parallel without showing + # Number of times the user has run GNU Parallel without showing # willingness to cite my $runs = 0; if(open (my $fh, "<", $Global::config_dir. @@ -4986,7 +4998,7 @@ sub citation() { "", "If you use '--will-cite' in scripts to be run by others you are making", "it harder for others to see the citation notice. The development of", - "GNU parallel is indirectly financed through citations, so if users", + "GNU Parallel is indirectly financed through citations, so if users", "do not know they should cite then you are making it harder to finance", "development. However, if you pay 10000 EUR, you should feel free to", "use '--will-cite' in scripts.", @@ -5008,7 +5020,7 @@ sub show_limits() { } sub embed() { - # Give an embeddable version of GNU Parallel + # Give an embeddable version of GNU Parallel # Tested with: bash, zsh, ksh, ash, dash, sh my $randomstring = "cut-here-".join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); @@ -5053,9 +5065,9 @@ sub embed() { "; print q! -# Embedded GNU Parallel created with --embed +# Embedded GNU Parallel created with --embed parallel() { - # Start GNU Parallel without leaving temporary files + # Start GNU Parallel without leaving temporary files # # Not all shells support 'perl <(cat ...)' # This is a complex way of doing: @@ -5066,7 +5078,7 @@ parallel() { # [1]+ Done cat # Make a temporary fifo that perl can read from - _fifo_with_parallel_source=`perl -e 'use POSIX qw(mkfifo); + _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo); do { $f = "/tmp/parallel-".join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); @@ -5075,9 +5087,9 @@ parallel() { print $f;'` # Put source code into temporary file # so it is easy to copy to the fifo - _file_with_parallel_source=`mktemp`; + _file_with_GNU_Parallel_source=`mktemp`; !, - "cat <<'$randomstring' > \$_file_with_parallel_source\n", + "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n", @source, $randomstring,"\n", q! @@ -5085,10 +5097,10 @@ parallel() { # and remove the file and fifo ASAP # 'sh -c' is needed to avoid # [1]+ Done cat - sh -c "(rm $_file_with_parallel_source; cat >$_fifo_with_parallel_source; rm $_fifo_with_parallel_source) < $_file_with_parallel_source &" + sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &" # Read the source from the fifo - perl $_fifo_with_parallel_source "$@" + perl $_fifo_with_GNU_Parallel_source "$@" } !, @env_parallel_source, @@ -6530,7 +6542,7 @@ sub loadavg($) { # Recompute a new one in the background # The load average is computed as the number of processes waiting for disk # or CPU right now. So it is the server load this instant and not averaged over - # several minutes. This is needed so GNU Parallel will at most start one job + # several minutes. This is needed so GNU Parallel will at most start one job # that will push the load over the limit. # # Returns: @@ -8751,7 +8763,7 @@ sub base64_eval($) { # Will be wrapped in ' so single quote is forbidden. # Spaces are stripped so spaces cannot be significant. # The funny 'use IPC::Open3'-syntax is to avoid spaces and - # to make it clear that this is a GNU Parallel command + # to make it clear that this is a GNU Parallel command # when looking at the process table. # Returns: # $script = 1-liner for perl -e @@ -11420,7 +11432,7 @@ sub get($) { my $ret = $self->{'arg_sub_queue'}->get(); if($ret) { if(grep { index($_->orig(),"\0") > 0 } @$ret) { - # Allow for \0 in position 0 because GNU Parallel uses "\0noarg" + # Allow for \0 in position 0 because GNU Parallel uses "\0noarg" # to mean no-string ::warning("A NUL character in the input was replaced with \\0.", "NUL cannot be passed through in the argument list.", diff --git a/src/parallel.pod b/src/parallel.pod index d31187d3..ca5729b2 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -1610,8 +1610,8 @@ so you are unlikely to ever use this option. Setting B<--nice> will override this nice level. If the nice level is smaller than the current nice level, it will only affect remote jobs -(e.g. current level is 10 and B<--nice 5> will cause local jobs to be -run at level 10, but remote jobs run at nice level 5). +(e.g. if current level is 10 then B<--nice 5> will cause local jobs to +be run at level 10, but remote jobs run at nice level 5). =item B<--interactive> @@ -2786,13 +2786,14 @@ Print the version GNU B and exit. =item B<--wd> I +Jobs will be run in the dir I. + Files transferred using B<--transferfile> and B<--return> will be -relative to I on remote computers, and the command will be -executed in the dir I. +relative to I on remote computers. The special I value B<...> will create working dirs under -B<~/.parallel/tmp/> on the remote computers. If B<--cleanup> is given -these dirs will be removed. +B<~/.parallel/tmp/>. If B<--cleanup> is given these dirs will be +removed. The special I value B<.> uses the current working dir. If the current working dir is beneath your home dir, the value B<.> is