From b8039a27649a5cf642c7977d1569a475a1adb315 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Mon, 4 Mar 2019 03:10:40 +0100 Subject: [PATCH] parallel_cheat.fodt: Initial cheat sheet. --- doc/release_new_version | 26 +-------- src/Makefile.am | 12 +++- src/Makefile.in | 12 +++- src/parallel.pod | 3 + src/parallel_alternatives.pod | 105 +++++++++++++++++++++++++++++++--- 5 files changed, 121 insertions(+), 37 deletions(-) diff --git a/doc/release_new_version b/doc/release_new_version index b6fc414e..18f2697c 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -207,7 +207,7 @@ from:tange@gnu.org to:parallel@gnu.org, bug-parallel@gnu.org stable-bcc: Jesse Alama -Subject: GNU Parallel 20190222 ('INF/Al-Baghuz Fawqani') released <<[stable]>> +Subject: GNU Parallel 20190222 ('indien pakistan Kashmir') released <<[stable]>> GNU Parallel 20190222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ @@ -220,29 +220,9 @@ Quote of the month: New in this release: -* --shard makes it possible to send input to a the same jobslot based on the value in one column of the input. It is similar to sharding in databases. +https://calendar.colorado.edu/event/high_throughput_computing_on_rmacc_summit_and_beyond#.XH2NBhB7mV4 -* --shellquote --shellquote will shell quote the input twice. - -* GNU Parallel is available in Termux https://github.com/termux/termux-packages/tree/master/packages/parallel - -* Linux, command line & MetaCentrum https://trapa.cz/sites/default/files/linux_bash_metacentrum_course_4.pdf - -* How to supercharge your bash workflows with GNU parallel https://medium.freecodecamp.org/how-to-supercharge-your-bash-workflows-with-gnu-parallel-53aab0aea141 - -* GNU Parallel Spring 2017 https://www.youtube.com/watch?v=LHb29uW_KyI - -* Parallelizing Freesurfer blog.cogneurostats.com/?p=148 - -* FreeSurfer #4: Running recon-all in Parallel https://www.youtube.com/watch?v=XHN2tm3tNaw - -* FreeSurfer: recon-all https://www.neurotrivial.com/2019/01/24/freesurfer-recon-all/ - -* Come ridimensionare 10k foto con la riga di comando https://pigrecoinfinito.wordpress.com/2018/12/22/come-ridimensionare-10k-foto-con-la-riga-di-comando/ - -* 并行处理 – Gnu Parallel:嵌套并行 https://codeday.me/bug/20190122/552902.html - -* シェルスクリプトでコマンドを並列実行する方法https://www.kwbtblog.com/entry/2019/01/12/020423 +https://www.cheatography.com/cpriest/cheat-sheets/gnu-parallel/ * Bug fixes and man page updates. diff --git a/src/Makefile.am b/src/Makefile.am index 20e5bd0c..327d6e02 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,9 +20,10 @@ doc_DATA = parallel.html env_parallel.html sem.html sql.html \ niceload.texi parallel_tutorial.texi parallel_book.texi \ parallel_design.texi parallel_alternatives.texi parcat.texi \ parset.texi \ - parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ + parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_book.pdf parallel_design.pdf \ - parallel_alternatives.pdf parcat.pdf parset.pdf + parallel_alternatives.pdf parcat.pdf parset.pdf \ + parallel_cheat.pdf endif # Build documentation file if the tool to build exists. @@ -257,6 +258,10 @@ parset.pdf: parset.pod pod2pdf --output-file $(srcdir)/parset.pdf $(srcdir)/parset.pod --title "GNU parset" \ || echo "Warning: pod2pdf not found. Using old parset.pdf" +parallel_cheat.pdf: parallel_cheat.fodt + libreoffice --headless --convert-to pdf parallel_cheat.fodt \ + || echo "Warning: libreoffice failed. Using old parallel_cheat.pdf" + sem: parallel ln -fs parallel sem @@ -273,7 +278,8 @@ DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parset.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_book.pdf parallel_design.pdf \ - parallel_alternatives.pdf parcat.pdf parset.pdf + parallel_alternatives.pdf parcat.pdf parset.pdf \ + parallel_cheat.pdf EXTRA_DIST = parallel sem sql niceload parcat parset env_parallel \ env_parallel.ash env_parallel.bash env_parallel.csh \ diff --git a/src/Makefile.in b/src/Makefile.in index 94f7dff0..87f69c9b 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -247,9 +247,10 @@ bin_SCRIPTS = parallel sql niceload parcat parset env_parallel \ @DOCUMENTATION_TRUE@ niceload.texi parallel_tutorial.texi parallel_book.texi \ @DOCUMENTATION_TRUE@ parallel_design.texi parallel_alternatives.texi parcat.texi \ @DOCUMENTATION_TRUE@ parset.texi \ -@DOCUMENTATION_TRUE@ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ +@DOCUMENTATION_TRUE@ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ @DOCUMENTATION_TRUE@ parallel_tutorial.pdf parallel_book.pdf parallel_design.pdf \ -@DOCUMENTATION_TRUE@ parallel_alternatives.pdf parcat.pdf parset.pdf +@DOCUMENTATION_TRUE@ parallel_alternatives.pdf parcat.pdf parset.pdf \ +@DOCUMENTATION_TRUE@ parallel_cheat.pdf DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parallel_tutorial.7 parallel_book.7 parallel_design.7 \ @@ -264,7 +265,8 @@ DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ parset.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ parallel_tutorial.pdf parallel_book.pdf parallel_design.pdf \ - parallel_alternatives.pdf parcat.pdf parset.pdf + parallel_alternatives.pdf parcat.pdf parset.pdf \ + parallel_cheat.pdf EXTRA_DIST = parallel sem sql niceload parcat parset env_parallel \ env_parallel.ash env_parallel.bash env_parallel.csh \ @@ -848,6 +850,10 @@ parset.pdf: parset.pod pod2pdf --output-file $(srcdir)/parset.pdf $(srcdir)/parset.pod --title "GNU parset" \ || echo "Warning: pod2pdf not found. Using old parset.pdf" +parallel_cheat.pdf: parallel_cheat.fodt + libreoffice --headless --convert-to pdf parallel_cheat.fodt \ + || echo "Warning: libreoffice failed. Using old parallel_cheat.pdf" + sem: parallel ln -fs parallel sem diff --git a/src/parallel.pod b/src/parallel.pod index ce6a9594..4cb88f1e 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -1060,6 +1060,9 @@ sshlogin in sorted order. If used with B<--pipe --roundrobin> and the same input, the jobslots will get the same blocks in the same order in every run. +B<-k> only affects the order in which the output is printed - not the +order in which jobs are run. + =item B<-L> I diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod index 49a0f51f..832902f8 100644 --- a/src/parallel_alternatives.pod +++ b/src/parallel_alternatives.pod @@ -1998,6 +1998,74 @@ corresponding GNU B command. https://github.com/k-bx/par (Last checked: 2019-02) +=head2 DIFFERENCES BETWEEN parallelshell AND GNU Parallel + +B does not allow for composed commands: + + # This does not work + parallelshell 'echo foo;echo bar' 'echo baz;echo quuz' + +Instead you have to wrap that in a shell: + + parallelshell 'sh -c "echo foo;echo bar"' 'sh -c "echo baz;echo quuz"' + +It buffers output in RAM. All commands must be given on the command +line and all commands are started in parallel at the same time. This +will cause the system to freeze if there are so many jobs that there +is not enough memory to run them all at the same time. + +https://github.com/keithamus/parallelshell (Last checked: 2019-02) + +https://github.com/darkguy2008/parallelshell (Last checked: 2019-03) + + +=head2 DIFFERENCES BETWEEN shell-executor AND GNU Parallel + +B does not allow for composed commands: + + # This does not work + sx 'echo foo;echo bar' 'echo baz;echo quuz' + +Instead you have to wrap that in a shell: + + sx 'sh -c "echo foo;echo bar"' 'sh -c "echo baz;echo quuz"' + +It buffers output in RAM. All commands must be given on the command +line and all commands are started in parallel at the same time. This +will cause the system to freeze if there are so many jobs that there +is not enough memory to run them all at the same time. + +https://github.com/royriojas/shell-executor (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN non-GNU par AND GNU Parallel + +B buffers in memory to avoid mixing of jobs. It takes 1s per 1 +million output lines. + +B needs to have all commands before starting the first job. The +jobs are read from stdin (standard input) so any quoting will have to +be done by the user. + +Stdout (standard output) is prepended with o:. Stderr (standard error) +is sendt to stdout (standard output) and prepended with e:. + +For short jobs with little output B is 20% faster than GNU B. + +http://savannah.nongnu.org/projects/par (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN fd AND GNU Parallel + +B does not support composed commands, so commands must be wrapped +in B. + +It buffers output in RAM. + +It only takes file names from the filesystem as input (similar to B). + +https://github.com/sharkdp/fd (Last checked: 2019-02) + =head2 Todo @@ -2005,16 +2073,10 @@ Url for spread https://github.com/reggi/pkgrun -https://github.com/benoror/better-npm-run +https://github.com/benoror/better-npm-run - not obvious how to use https://github.com/bahmutov/with-package -https://github.com/spion/npm-parallel - -https://github.com/royriojas/shell-executor - -https://github.com/darkguy2008/parallelshell - https://github.com/xuchenCN/go-pssh https://github.com/amritb/with-this.git @@ -2028,7 +2090,7 @@ https://github.com/Julian/Verge There are certain issues that are very common on parallelizing tools. Here are a few stress tests. Be warned: If the tool is badly -coded it may overload you machine. +coded it may overload your machine. =head2 MIX: Output mixes @@ -2069,6 +2131,33 @@ This test stresses whether output mixes. # and there should only be a single line per job +=head2 STDERRMERGE: Stderr is merged with stdout + +Output from stdout and stderr should not be merged, but kept separated. + +This test shows whether stdout is mixed with stderr. + + #!/bin/bash + + paralleltool="parallel -j0" + + cat <<-EOF > mycommand + #!/bin/bash + + echo stdout + echo stderr >&2 + echo stdout + echo stderr >&2 + EOF + chmod +x mycommand + + # Run one job + echo | + $paralleltool ./mycommand > stdout 2> stderr + cat stdout + cat stderr + + =head2 RAM: Output limited by RAM Some tools cache output in RAM. This makes them extremely slow if the