2011-01-18 17:15:42 +00:00
|
|
|
parallel: --joblog implemented. Testsuite missing.
|
|
|
|
parallel: --spreadstdin prototype. Testsuite missing.
|
2011-01-02 00:01:21 +00:00
|
|
|
|
|
|
|
codecoverage
|
2010-12-22 09:21:58 +00:00
|
|
|
|
2011-01-02 00:01:21 +00:00
|
|
|
Testsuite: sem without ~/.parallel
|
2010-12-21 17:08:16 +00:00
|
|
|
|
2010-12-19 00:38:36 +00:00
|
|
|
Til QUOTING:
|
|
|
|
|
|
|
|
FN="two spaces"
|
|
|
|
echo 1 | parallel -q echo {} "$FN"
|
|
|
|
# Prints 2 spaces between 'two' and 'spaces'
|
|
|
|
|
|
|
|
-q will not work with composed commands as it will quote the ; as
|
|
|
|
well. So composed commands have to be quoted by hand:
|
|
|
|
|
|
|
|
# Using export:
|
|
|
|
FN2="two spaces"
|
|
|
|
export FN2
|
|
|
|
echo 1 | parallel echo {} \"\$FN2\" \; echo \"\$FN2\" {}
|
|
|
|
# Prints 2 spaces between 'two' and 'spaces'
|
|
|
|
|
|
|
|
# Without export:
|
|
|
|
FN3="two spaces"
|
|
|
|
echo 1 | parallel echo {} \""$FN3"\" \; echo \'"$FN3"\' {}
|
|
|
|
|
|
|
|
# By quoting the space in the variable
|
|
|
|
FN4='two\ \ spaces'
|
|
|
|
echo 1 | parallel echo {} $FN4 \; echo $FN4 {}
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-12-15 23:12:02 +00:00
|
|
|
= Bug? ==
|
2010-11-29 22:59:16 +00:00
|
|
|
|
|
|
|
locate .gz | parallel -X find {} -size +1000 -size -2000 | parallel --workdir ... -S .. --trc {/}.bz2 'zcat {} | bzip2 > {/}.bz2'
|
2010-11-22 09:35:53 +00:00
|
|
|
|
|
|
|
|
2010-10-26 23:50:58 +00:00
|
|
|
== Compare ==
|
|
|
|
|
|
|
|
http://code.google.com/p/spawntool/
|
|
|
|
http://code.google.com/p/push/
|
|
|
|
|
|
|
|
== Bug? ==
|
|
|
|
|
|
|
|
.parallel/config with --long-options
|
|
|
|
|
|
|
|
time find . -type f | parallel -j+0 --eta -S..,: --progress --trc {}.gz gzip {}
|
|
|
|
|
2010-09-14 16:37:26 +00:00
|
|
|
== SQL ==
|
|
|
|
|
|
|
|
Example with %0a as newline
|
2010-09-21 23:17:05 +00:00
|
|
|
sql :my_postgres?'\dt %0a SELECT * FROM users'
|
2010-09-14 16:37:26 +00:00
|
|
|
|
2010-09-22 21:54:42 +00:00
|
|
|
cat ~/.sql/aliases | parallel --colsep '\s' sql {1} '"select 0.14+3;" | grep -q 3.14 || (echo dead: {1}; exit 1)'
|
2010-09-14 16:37:26 +00:00
|
|
|
|
2010-09-01 13:26:45 +00:00
|
|
|
== FEX ==
|
|
|
|
|
2010-08-14 18:39:33 +00:00
|
|
|
fex syntax for splitting fields
|
|
|
|
http://www.semicomplete.com/projects/fex/
|
|
|
|
sql :foo 'select * from bar' | parallel --fex '|{1,2}' do_stuff {2} {1}
|
2010-08-01 18:09:31 +00:00
|
|
|
|
2010-07-18 02:17:49 +00:00
|
|
|
|
|
|
|
--autocolsep: Læs alle linjer.
|
|
|
|
Prøv fastlængde: Find tegn, som står i alle linjer på de samme pladser. Risiko for falske pos
|
|
|
|
Prøv fieldsep: Find eet tegn, som optræder det samme antal gange i alle linjer (tab sep)
|
|
|
|
Prøv klyngesep: Find den samme klynge tegn, som står samme antal gange i alle linjer (' | ' sep)
|
|
|
|
Fjern whitespace før og efter colonne
|
|
|
|
|
2010-07-18 16:04:07 +00:00
|
|
|
hvis der er n af tegn A og 2n af tegn B, så
|
|
|
|
|
|
|
|
a | b | c
|
|
|
|
|
|
|
|
Simpleste: tab sep
|
|
|
|
|
|
|
|
for hver linje
|
|
|
|
max,min count for hver char
|
|
|
|
|
|
|
|
for hver char
|
|
|
|
if max == min :
|
|
|
|
potentiel
|
|
|
|
min_potentiel = min(min_potentiel,min)
|
|
|
|
|
|
|
|
for potentiel:
|
|
|
|
if min % min_potentiel = 0: sepchars += potentiel,no of sepchars += min / min_potentiel
|
|
|
|
|
|
|
|
colsep = [sepchars]{no_of_sepchars}
|
|
|
|
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
# Hvordan udregnes system limits på remote systems hvis jeg ikke ved, hvormange
|
|
|
|
# argumenter, der er? Lav system limits lokalt og lad det være max
|
|
|
|
|
|
|
|
# TODO max_line_length on remote
|
|
|
|
# TODO compute how many can be transferred within max_line_length
|
|
|
|
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
|
2010-06-14 22:05:47 +00:00
|
|
|
|
2010-07-09 12:53:56 +00:00
|
|
|
=head1 YouTube video2
|
|
|
|
|
|
|
|
Converting of WAV files to MP3 using GNU Parallel
|
|
|
|
|
|
|
|
# Run one jobs per CPU core
|
|
|
|
# For 'foo.wav' call the output file 'foo.mp3'
|
|
|
|
|
|
|
|
find music-files -type f | parallel -j+0 lame {} -o {.}.mp3
|
|
|
|
|
|
|
|
# Run one jobs per CPU core
|
|
|
|
# Run on local computer + 2 remote computers
|
|
|
|
# Give us progress information
|
|
|
|
# For 'foo.wav' call the output file 'foo.mp3'
|
|
|
|
|
2010-09-21 20:00:30 +00:00
|
|
|
find music-files -type f | parallel -j+0 -S :,server1,server2 \
|
2010-07-09 12:53:56 +00:00
|
|
|
--eta --trc {.}.mp3 lame {} -o {.}.mp3
|
|
|
|
|
2010-08-21 23:29:26 +00:00
|
|
|
# Colsep
|
|
|
|
# sem
|
2010-09-21 20:00:30 +00:00
|
|
|
# --retry
|
2010-08-21 23:29:26 +00:00
|
|
|
|
2010-10-05 20:22:52 +00:00
|
|
|
(echo a1.txt; echo b1.txt; echo c1.txt; echo a2.txt; echo b2.txt; echo c2.txt;)| \
|
|
|
|
parallel -X -N 3 my-program --file={}
|
|
|
|
|
|
|
|
(echo a1.txt; echo b1.txt; echo c1.txt; echo d1.txt; echo e1.txt; echo f1.txt;)| \
|
|
|
|
parallel -X my-program --file={}
|
|
|
|
|
|
|
|
# First job controls the tty
|
|
|
|
# -u needed because output should not be saved for later
|
|
|
|
|
|
|
|
find . -type f | parallel -uXj1 vim
|
|
|
|
find . -type f | parallel -uXj1 emacs
|
|
|
|
|
|
|
|
# If you have 1000 files only one contains 'foobar'
|
|
|
|
# stop when this one is found
|
|
|
|
|
|
|
|
find . -type f | parallel grep -l foobar | head -1
|
|
|
|
|
|
|
|
|
|
|
|
# To test a list of hosts are up and pingable save this
|
|
|
|
# to a file called machinesup
|
|
|
|
|
|
|
|
#!/usr/bin/parallel --shebang --no-run-if-empty ping -c 3 {} >/dev/null 2>&1
|
|
|
|
|
|
|
|
google.com
|
|
|
|
yahoo.com
|
|
|
|
nowhere.gone
|
|
|
|
|
|
|
|
# Then:
|
|
|
|
# chmod 755 machinesup
|
|
|
|
# ./machinesup || echo Some machines are down
|
|
|
|
|
|
|
|
|
2010-07-09 12:53:56 +00:00
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
=head1 YouTube video
|
|
|
|
|
|
|
|
GNU Parallel is a tool with lots of uses in shell. Every time you use
|
|
|
|
xargs or a for-loop GNU Parallel can probably do that faster, safer
|
|
|
|
and more readable.
|
|
|
|
|
|
|
|
If you have access to more computers through ssh, GNU Parallel makes
|
|
|
|
it easy to distribute jobs to these.
|
|
|
|
|
|
|
|
terminal2: ssh parallel@vh2.pi.dk
|
|
|
|
ssh parallel@vh2.pi.dk
|
2010-06-16 03:03:52 +00:00
|
|
|
and
|
2010-06-12 23:24:25 +00:00
|
|
|
|
2010-06-16 03:03:52 +00:00
|
|
|
PS1="\[\e[7m\]GNU Parallel:\[\033[01;34m\]\w\[\033[00m\e[27m\]$ "
|
2010-06-12 23:24:25 +00:00
|
|
|
gunzip logs/*gz
|
2010-06-16 03:03:52 +00:00
|
|
|
rm -f logs/*bz2*
|
2010-06-12 23:24:25 +00:00
|
|
|
rm -rf zip/*[^p]
|
2010-06-16 03:03:52 +00:00
|
|
|
rm -rf dirs/*
|
|
|
|
rm -rf parallel-*bz2
|
|
|
|
|
|
|
|
xvidcap
|
2010-06-22 13:24:55 +00:00
|
|
|
ffmpeg -i 20100616_002.mp4 -ab 320k -ar 44100 speak.mp3
|
2010-06-16 03:03:52 +00:00
|
|
|
# Merge video using youtube
|
|
|
|
#ffmpeg -i speak.mp3 -i xvidcap.mpeg -target mpeg -hq -minrate 8000000 \
|
2010-06-22 13:24:55 +00:00
|
|
|
#-title "GNU Parallel" -author "Ole Tange" -copyright "(CC-By-SA) 2010" -comment "Intro video of GNU Parallel 20100616" videoaudio.mpg
|
2010-06-16 03:03:52 +00:00
|
|
|
|
|
|
|
# GNU PARALLEL - BASIC USAGE
|
|
|
|
# A GNU tool for parallelizing shell commands
|
2010-06-12 23:24:25 +00:00
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
## Ole Tange Author
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
# GET GNU PARALLEL
|
2010-06-22 13:24:55 +00:00
|
|
|
wget ftp://ftp.gnu.org/gnu/parallel/parallel-20100620.tar.bz2
|
|
|
|
tar xjf parallel-20100620.tar.bz2
|
|
|
|
cd parallel-20100620
|
2010-06-12 23:24:25 +00:00
|
|
|
./configure && make ##
|
|
|
|
su
|
|
|
|
make install
|
|
|
|
exit
|
|
|
|
cd
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
## scp /usr/local/bin/parallel root@parallel:/usr/local/bin/
|
|
|
|
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
# YOUR FIRST PARALLEL JOBS
|
|
|
|
cd logs
|
|
|
|
du
|
2010-06-16 03:03:52 +00:00
|
|
|
/usr/bin/time gzip -1 *
|
|
|
|
## 24 sek - 22 sek
|
2010-06-12 23:24:25 +00:00
|
|
|
/usr/bin/time gunzip *
|
2010-06-16 03:03:52 +00:00
|
|
|
## 24 sek - 18
|
|
|
|
ls | time parallel gzip -1
|
|
|
|
## 17 sek - 10
|
2010-06-12 23:24:25 +00:00
|
|
|
ls | time parallel gunzip
|
2010-06-16 03:03:52 +00:00
|
|
|
## 25 sek - 19
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# RECOMPRESS gz TO bz2
|
2010-06-16 03:03:52 +00:00
|
|
|
ls | time parallel gzip -1
|
2010-06-22 13:24:55 +00:00
|
|
|
ls *.gz | time parallel -j+0 --eta 'zcat {} | bzip2 -9 >{.}.bz2'
|
2010-06-16 03:03:52 +00:00
|
|
|
## Explain command line
|
2010-06-12 23:24:25 +00:00
|
|
|
## vis top local
|
2010-06-16 03:03:52 +00:00
|
|
|
## Man that is boring
|
2010-06-22 13:24:55 +00:00
|
|
|
## 2m41s - 2m - 3m35s
|
2010-06-16 03:03:52 +00:00
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
# RECOMPRESS gz TO bz2 USING local(:) AND REMOTE server1-4
|
|
|
|
ls *.gz |time parallel -j+0 --eta -Sserver1,server2,server3,server4,: \
|
|
|
|
--transfer --return {.}.bz2 --cleanup 'zcat {} | bzip2 -9 > {.}.bz2'
|
2010-06-16 03:03:52 +00:00
|
|
|
## Explain command line
|
|
|
|
## Explain server config
|
2010-06-12 23:24:25 +00:00
|
|
|
## vis top local
|
2010-06-16 03:03:52 +00:00
|
|
|
## vis top remote1-3
|
|
|
|
## 49 sek
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
# RECOMPRESS gz TO bz2 USING A SCRIPT ON local AND REMOTE server1-2,4
|
2010-06-16 03:03:52 +00:00
|
|
|
# (imagine the script is way more complex)
|
|
|
|
cp ../recompress /tmp
|
|
|
|
cat /tmp/recompress
|
2010-06-22 13:24:55 +00:00
|
|
|
ls *.gz |time parallel -j+0 --progress -Sserver1,server2,server4,: \
|
2010-06-16 03:03:52 +00:00
|
|
|
--trc {.}.bz2 --basefile /tmp/recompress '/tmp/recompress {} {.}.bz2'
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# MAKING SMALL SCRIPTS
|
|
|
|
cd ../zip
|
|
|
|
ls -l
|
|
|
|
ls *.zip | parallel 'mkdir {.} && cd {.} && unzip ../{}' ###
|
|
|
|
ls -l
|
|
|
|
|
|
|
|
# GROUP OUTPUT
|
|
|
|
traceroute debian.org
|
|
|
|
traceroute debian.org & traceroute freenetproject.org ###
|
2010-06-16 03:03:52 +00:00
|
|
|
(echo debian.org; echo freenetproject.org) | parallel traceroute ###
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# KEEP ORDER
|
2010-06-16 03:03:52 +00:00
|
|
|
(echo debian.org; echo freenetproject.org) | parallel -k traceroute ###
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# RUN MANY JOBS. USE OUTPUT
|
|
|
|
# Find the number of hosts responding to ping
|
2010-06-16 03:03:52 +00:00
|
|
|
ping -c 1 178.63.11.1
|
|
|
|
ping -c 1 178.63.11.1 | grep '64 bytes'
|
2010-06-12 23:24:25 +00:00
|
|
|
seq 1 255 | parallel -j255 ping -c 1 178.63.11.{} 2>&1 \
|
|
|
|
| grep '64 bytes' | wc -l
|
|
|
|
seq 1 255 | parallel -j0 ping -c 1 178.63.11.{} 2>&1 \
|
|
|
|
| grep '64 bytes' | wc -l
|
|
|
|
|
|
|
|
# MULTIPLE ARGUMENTS
|
2010-06-22 13:24:55 +00:00
|
|
|
# make dir: test-(1-5000).dir
|
2010-06-12 23:24:25 +00:00
|
|
|
cd ../dirs
|
|
|
|
rm -rf *; sync
|
2010-06-16 03:03:52 +00:00
|
|
|
seq 1 10 | parallel echo mkdir test-{}.dir
|
|
|
|
seq 1 5000 | time parallel mkdir test-{}.dir
|
|
|
|
## 15 sek
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
rm -rf *; sync
|
2010-06-16 03:03:52 +00:00
|
|
|
seq 1 10 | parallel -X echo mkdir test-{}.dir
|
|
|
|
seq 1 5000 | time parallel -X mkdir test-{}.dir
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# CALLING GNU PARALLEL FROM GNU PARALLEL
|
2010-06-16 03:03:52 +00:00
|
|
|
# make dir: top-(1-100)/sub-(1-100)
|
2010-06-12 23:24:25 +00:00
|
|
|
rm -rf *; sync
|
2010-06-16 03:03:52 +00:00
|
|
|
seq 1 100 | time parallel -I @@ \
|
|
|
|
'mkdir top-@@; seq 1 100 | parallel -X mkdir top-@@/sub-{}'
|
2010-06-12 23:24:25 +00:00
|
|
|
find | wc -l
|
|
|
|
|
2010-06-16 03:03:52 +00:00
|
|
|
cd
|
|
|
|
# Thank you for watching
|
|
|
|
#
|
|
|
|
# If you like GNU Parallel:
|
|
|
|
# * Post this video on your blog/Twitter/Facebook/Linkedin
|
|
|
|
# * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
|
|
|
|
# * Request or write a review for your favourite magazine
|
|
|
|
# * Request or build a package for your favourite distribution
|
|
|
|
# * Invite me for your next conference (Contact http://ole.tange.dk)
|
|
|
|
#
|
|
|
|
# If GNU Parallel saves you money:
|
|
|
|
# * Donate to FSF https://my.fsf.org/donate/
|
|
|
|
#
|
2010-06-12 23:24:25 +00:00
|
|
|
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
|
|
|
|
2010-06-16 03:03:52 +00:00
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
# GIVE ME THE FIRST RESULT
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel -H2 traceroute {}";false"
|
|
|
|
|
|
|
|
find . -type f | parallel -k -j150% -n 1000 -m grep -H -n STRING {}
|
|
|
|
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute
|
2010-06-09 22:39:35 +00:00
|
|
|
|
|
|
|
|
2010-04-19 07:07:12 +00:00
|
|
|
=head1 IDEAS
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
Kan vi lave flere ssh'er, hvis vi venter lidt?
|
|
|
|
En ssh med 20% loss og 900 ms delay, så kan login nås på 15 sek.
|
2010-04-19 07:07:12 +00:00
|
|
|
|
|
|
|
Test if -0 works on filenames ending in '\n'
|
|
|
|
|
2010-06-14 22:05:47 +00:00
|
|
|
If there are nomore jobs (STDIN is eof) then make sure to
|
2010-04-19 07:07:12 +00:00
|
|
|
distribute the arguments evenly if running -X.
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
=head1 options
|
2010-04-19 07:07:12 +00:00
|
|
|
|
2011-01-02 00:01:21 +00:00
|
|
|
One char options not used: A F G K O Q R Z c f
|
2010-04-19 07:07:12 +00:00
|
|
|
|
2010-11-02 16:10:19 +00:00
|
|
|
=head1 sem
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
Parallelize so this can be done:
|
|
|
|
mdm.screen find dir -execdir mdm-run cmd {} \;
|
|
|
|
Maybe:
|
|
|
|
find dir -execdir par$ --communication-file /tmp/comfile cmd {} \;
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
find dir -execdir mutex -j4 -b cmd {} \;
|
|
|
|
|
|
|
|
|
|
|
|
=head1 Unlikely
|
|
|
|
|
|
|
|
Accept signal INT instead of TERM to complete current running jobs but
|
|
|
|
do not start new jobs. Print out the number of jobs waiting to
|
|
|
|
complete on STDERR. Accept sig INT again to kill now. This seems to be
|
|
|
|
hard, as all foreground processes get the INT from the shell.
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-08-14 18:39:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Gzip all files in parallel
|
|
|
|
parallel gzip ::: *
|
|
|
|
|
|
|
|
# Convert *.wav to *.mp3 using LAME running one process per CPU core:
|
|
|
|
parallel -j+0 lame {} -o {.}.mp3 ::: *.wav
|
|
|
|
|
|
|
|
# Make an uncompressed version of all *.gz
|
|
|
|
parallel zcat {} ">"{.} ::: *.gz
|
|
|
|
|
|
|
|
# Recompress all .gz files using bzip2 running 1 job per CPU core:
|
|
|
|
find . -name '*.gz' | parallel -j+0 "zcat {} | bzip2 >{.}.bz2 && rm {}"
|
|
|
|
|
|
|
|
# Create a directory for each zip-file and unzip it in that dir
|
|
|
|
parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip
|
|
|
|
|
|
|
|
# Convert all *.mp3 in subdirs to *.ogg running
|
|
|
|
# one process per CPU core on local computer and server2
|
|
|
|
find . -name '*.mp3' | parallel --trc {.}.ogg -j+0 -S server2,: \
|
|
|
|
'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg'
|
|
|
|
|
|
|
|
# Run mycmd on column 1-3 of each row of TAB separated values
|
|
|
|
parallel -a table_file.tsv --colsep '\t' mycmd -o {2} {3} -i {1}
|
|
|
|
|
|
|
|
# Run traceroute in parallel, but keep the output order the same
|
|
|
|
parallel -k traceroute ::: foss.org.my debian.org freenetproject.org
|