2010-08-14 18:39:33 +00:00
|
|
|
fex syntax for splitting fields
|
|
|
|
http://www.semicomplete.com/projects/fex/
|
|
|
|
sql :foo 'select * from bar' | parallel --fex '|{1,2}' do_stuff {2} {1}
|
2010-08-01 18:09:31 +00:00
|
|
|
|
2010-08-28 00:46:44 +00:00
|
|
|
Example:
|
2010-08-01 18:09:31 +00:00
|
|
|
sql :foo 'select * from bar' | parallel --colsep '\|' do_stuff {4} {1}
|
|
|
|
|
2010-08-28 00:46:44 +00:00
|
|
|
Virker shebang?
|
|
|
|
#!/usr/bin/sql -t
|
|
|
|
|
|
|
|
GNU kræver:
|
|
|
|
--version
|
|
|
|
--help
|
|
|
|
-h
|
|
|
|
|
2010-07-18 02:17:49 +00:00
|
|
|
|
|
|
|
--autocolsep: Læs alle linjer.
|
|
|
|
Prøv fastlængde: Find tegn, som står i alle linjer på de samme pladser. Risiko for falske pos
|
|
|
|
Prøv fieldsep: Find eet tegn, som optræder det samme antal gange i alle linjer (tab sep)
|
|
|
|
Prøv klyngesep: Find den samme klynge tegn, som står samme antal gange i alle linjer (' | ' sep)
|
|
|
|
Fjern whitespace før og efter colonne
|
|
|
|
|
2010-07-18 16:04:07 +00:00
|
|
|
hvis der er n af tegn A og 2n af tegn B, så
|
|
|
|
|
|
|
|
a | b | c
|
|
|
|
|
|
|
|
Simpleste: tab sep
|
|
|
|
|
|
|
|
for hver linje
|
|
|
|
max,min count for hver char
|
|
|
|
|
|
|
|
for hver char
|
|
|
|
if max == min :
|
|
|
|
potentiel
|
|
|
|
min_potentiel = min(min_potentiel,min)
|
|
|
|
|
|
|
|
for potentiel:
|
|
|
|
if min % min_potentiel = 0: sepchars += potentiel,no of sepchars += min / min_potentiel
|
|
|
|
|
|
|
|
colsep = [sepchars]{no_of_sepchars}
|
|
|
|
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
# Hvordan udregnes system limits på remote systems hvis jeg ikke ved, hvormange
|
|
|
|
# argumenter, der er? Lav system limits lokalt og lad det være max
|
|
|
|
|
|
|
|
# TODO max_line_length on remote
|
|
|
|
# TODO compute how many can be transferred within max_line_length
|
|
|
|
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
|
|
|
|
# TODO --max-number-of-jobs print the system limited number of jobs
|
|
|
|
|
|
|
|
# TODO Debian package
|
|
|
|
|
|
|
|
# TODO to kill from a run script parallel should set PARALLEL_PID that can be sig termed
|
|
|
|
# TAGS: parallel | parallel processing | multicore | multiprocessor | Clustering/Distributed Networks
|
|
|
|
# job control | multiple jobs | parallelization | text processing | cluster | filters
|
|
|
|
# Clustering Tools | Command Line Tools | Utilities | System Administration
|
|
|
|
# Bash parallel
|
2010-06-14 22:05:47 +00:00
|
|
|
|
2010-07-09 12:53:56 +00:00
|
|
|
=head1 YouTube video2
|
|
|
|
|
|
|
|
Converting of WAV files to MP3 using GNU Parallel
|
|
|
|
|
|
|
|
# Run one jobs per CPU core
|
|
|
|
# For 'foo.wav' call the output file 'foo.mp3'
|
|
|
|
|
|
|
|
find music-files -type f | parallel -j+0 lame {} -o {.}.mp3
|
|
|
|
|
|
|
|
# Run one jobs per CPU core
|
|
|
|
# Run on local computer + 2 remote computers
|
|
|
|
# Give us progress information
|
|
|
|
# For 'foo.wav' call the output file 'foo.mp3'
|
|
|
|
|
|
|
|
find music-files -type f | parallel -j+0 -S :,computer1.examle.com,computer2.example.com \
|
|
|
|
--eta --trc {.}.mp3 lame {} -o {.}.mp3
|
|
|
|
|
2010-08-21 23:29:26 +00:00
|
|
|
# Colsep
|
|
|
|
# sem
|
|
|
|
|
2010-07-09 12:53:56 +00:00
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
=head1 YouTube video
|
|
|
|
|
|
|
|
GNU Parallel is a tool with lots of uses in shell. Every time you use
|
|
|
|
xargs or a for-loop GNU Parallel can probably do that faster, safer
|
|
|
|
and more readable.
|
|
|
|
|
|
|
|
If you have access to more computers through ssh, GNU Parallel makes
|
|
|
|
it easy to distribute jobs to these.
|
|
|
|
|
|
|
|
terminal2: ssh parallel@vh2.pi.dk
|
|
|
|
ssh parallel@vh2.pi.dk
|
2010-06-16 03:03:52 +00:00
|
|
|
and
|
2010-06-12 23:24:25 +00:00
|
|
|
|
2010-06-16 03:03:52 +00:00
|
|
|
PS1="\[\e[7m\]GNU Parallel:\[\033[01;34m\]\w\[\033[00m\e[27m\]$ "
|
2010-06-12 23:24:25 +00:00
|
|
|
gunzip logs/*gz
|
2010-06-16 03:03:52 +00:00
|
|
|
rm -f logs/*bz2*
|
2010-06-12 23:24:25 +00:00
|
|
|
rm -rf zip/*[^p]
|
2010-06-16 03:03:52 +00:00
|
|
|
rm -rf dirs/*
|
|
|
|
rm -rf parallel-*bz2
|
|
|
|
|
|
|
|
xvidcap
|
2010-06-22 13:24:55 +00:00
|
|
|
ffmpeg -i 20100616_002.mp4 -ab 320k -ar 44100 speak.mp3
|
2010-06-16 03:03:52 +00:00
|
|
|
# Merge video using youtube
|
|
|
|
#ffmpeg -i speak.mp3 -i xvidcap.mpeg -target mpeg -hq -minrate 8000000 \
|
2010-06-22 13:24:55 +00:00
|
|
|
#-title "GNU Parallel" -author "Ole Tange" -copyright "(CC-By-SA) 2010" -comment "Intro video of GNU Parallel 20100616" videoaudio.mpg
|
2010-06-16 03:03:52 +00:00
|
|
|
|
|
|
|
# GNU PARALLEL - BASIC USAGE
|
|
|
|
# A GNU tool for parallelizing shell commands
|
2010-06-12 23:24:25 +00:00
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
## Ole Tange Author
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
# GET GNU PARALLEL
|
2010-06-22 13:24:55 +00:00
|
|
|
wget ftp://ftp.gnu.org/gnu/parallel/parallel-20100620.tar.bz2
|
|
|
|
tar xjf parallel-20100620.tar.bz2
|
|
|
|
cd parallel-20100620
|
2010-06-12 23:24:25 +00:00
|
|
|
./configure && make ##
|
|
|
|
su
|
|
|
|
make install
|
|
|
|
exit
|
|
|
|
cd
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
## scp /usr/local/bin/parallel root@parallel:/usr/local/bin/
|
|
|
|
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
# YOUR FIRST PARALLEL JOBS
|
|
|
|
cd logs
|
|
|
|
du
|
2010-06-16 03:03:52 +00:00
|
|
|
/usr/bin/time gzip -1 *
|
|
|
|
## 24 sek - 22 sek
|
2010-06-12 23:24:25 +00:00
|
|
|
/usr/bin/time gunzip *
|
2010-06-16 03:03:52 +00:00
|
|
|
## 24 sek - 18
|
|
|
|
ls | time parallel gzip -1
|
|
|
|
## 17 sek - 10
|
2010-06-12 23:24:25 +00:00
|
|
|
ls | time parallel gunzip
|
2010-06-16 03:03:52 +00:00
|
|
|
## 25 sek - 19
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# RECOMPRESS gz TO bz2
|
2010-06-16 03:03:52 +00:00
|
|
|
ls | time parallel gzip -1
|
2010-06-22 13:24:55 +00:00
|
|
|
ls *.gz | time parallel -j+0 --eta 'zcat {} | bzip2 -9 >{.}.bz2'
|
2010-06-16 03:03:52 +00:00
|
|
|
## Explain command line
|
2010-06-12 23:24:25 +00:00
|
|
|
## vis top local
|
2010-06-16 03:03:52 +00:00
|
|
|
## Man that is boring
|
2010-06-22 13:24:55 +00:00
|
|
|
## 2m41s - 2m - 3m35s
|
2010-06-16 03:03:52 +00:00
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
# RECOMPRESS gz TO bz2 USING local(:) AND REMOTE server1-4
|
|
|
|
ls *.gz |time parallel -j+0 --eta -Sserver1,server2,server3,server4,: \
|
|
|
|
--transfer --return {.}.bz2 --cleanup 'zcat {} | bzip2 -9 > {.}.bz2'
|
2010-06-16 03:03:52 +00:00
|
|
|
## Explain command line
|
|
|
|
## Explain server config
|
2010-06-12 23:24:25 +00:00
|
|
|
## vis top local
|
2010-06-16 03:03:52 +00:00
|
|
|
## vis top remote1-3
|
|
|
|
## 49 sek
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
# RECOMPRESS gz TO bz2 USING A SCRIPT ON local AND REMOTE server1-2,4
|
2010-06-16 03:03:52 +00:00
|
|
|
# (imagine the script is way more complex)
|
|
|
|
cp ../recompress /tmp
|
|
|
|
cat /tmp/recompress
|
2010-06-22 13:24:55 +00:00
|
|
|
ls *.gz |time parallel -j+0 --progress -Sserver1,server2,server4,: \
|
2010-06-16 03:03:52 +00:00
|
|
|
--trc {.}.bz2 --basefile /tmp/recompress '/tmp/recompress {} {.}.bz2'
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# MAKING SMALL SCRIPTS
|
|
|
|
cd ../zip
|
|
|
|
ls -l
|
|
|
|
ls *.zip | parallel 'mkdir {.} && cd {.} && unzip ../{}' ###
|
|
|
|
ls -l
|
|
|
|
|
|
|
|
# GROUP OUTPUT
|
|
|
|
traceroute debian.org
|
|
|
|
traceroute debian.org & traceroute freenetproject.org ###
|
2010-06-16 03:03:52 +00:00
|
|
|
(echo debian.org; echo freenetproject.org) | parallel traceroute ###
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# KEEP ORDER
|
2010-06-16 03:03:52 +00:00
|
|
|
(echo debian.org; echo freenetproject.org) | parallel -k traceroute ###
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# RUN MANY JOBS. USE OUTPUT
|
|
|
|
# Find the number of hosts responding to ping
|
2010-06-16 03:03:52 +00:00
|
|
|
ping -c 1 178.63.11.1
|
|
|
|
ping -c 1 178.63.11.1 | grep '64 bytes'
|
2010-06-12 23:24:25 +00:00
|
|
|
seq 1 255 | parallel -j255 ping -c 1 178.63.11.{} 2>&1 \
|
|
|
|
| grep '64 bytes' | wc -l
|
|
|
|
seq 1 255 | parallel -j0 ping -c 1 178.63.11.{} 2>&1 \
|
|
|
|
| grep '64 bytes' | wc -l
|
|
|
|
|
|
|
|
# MULTIPLE ARGUMENTS
|
2010-06-22 13:24:55 +00:00
|
|
|
# make dir: test-(1-5000).dir
|
2010-06-12 23:24:25 +00:00
|
|
|
cd ../dirs
|
|
|
|
rm -rf *; sync
|
2010-06-16 03:03:52 +00:00
|
|
|
seq 1 10 | parallel echo mkdir test-{}.dir
|
|
|
|
seq 1 5000 | time parallel mkdir test-{}.dir
|
|
|
|
## 15 sek
|
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
rm -rf *; sync
|
2010-06-16 03:03:52 +00:00
|
|
|
seq 1 10 | parallel -X echo mkdir test-{}.dir
|
|
|
|
seq 1 5000 | time parallel -X mkdir test-{}.dir
|
2010-06-12 23:24:25 +00:00
|
|
|
|
|
|
|
# CALLING GNU PARALLEL FROM GNU PARALLEL
|
2010-06-16 03:03:52 +00:00
|
|
|
# make dir: top-(1-100)/sub-(1-100)
|
2010-06-12 23:24:25 +00:00
|
|
|
rm -rf *; sync
|
2010-06-16 03:03:52 +00:00
|
|
|
seq 1 100 | time parallel -I @@ \
|
|
|
|
'mkdir top-@@; seq 1 100 | parallel -X mkdir top-@@/sub-{}'
|
2010-06-12 23:24:25 +00:00
|
|
|
find | wc -l
|
|
|
|
|
2010-06-16 03:03:52 +00:00
|
|
|
cd
|
|
|
|
# Thank you for watching
|
|
|
|
#
|
|
|
|
# If you like GNU Parallel:
|
|
|
|
# * Post this video on your blog/Twitter/Facebook/Linkedin
|
|
|
|
# * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
|
|
|
|
# * Request or write a review for your favourite magazine
|
|
|
|
# * Request or build a package for your favourite distribution
|
|
|
|
# * Invite me for your next conference (Contact http://ole.tange.dk)
|
|
|
|
#
|
|
|
|
# If GNU Parallel saves you money:
|
|
|
|
# * Donate to FSF https://my.fsf.org/donate/
|
|
|
|
#
|
2010-06-12 23:24:25 +00:00
|
|
|
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
|
|
|
|
2010-06-16 03:03:52 +00:00
|
|
|
|
2010-06-12 23:24:25 +00:00
|
|
|
# GIVE ME THE FIRST RESULT
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel -H2 traceroute {}";false"
|
|
|
|
|
|
|
|
find . -type f | parallel -k -j150% -n 1000 -m grep -H -n STRING {}
|
|
|
|
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute
|
2010-06-09 22:39:35 +00:00
|
|
|
|
|
|
|
|
2010-04-19 07:07:12 +00:00
|
|
|
=head1 IDEAS
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
Kan vi lave flere ssh'er, hvis vi venter lidt?
|
|
|
|
En ssh med 20% loss og 900 ms delay, så kan login nås på 15 sek.
|
2010-04-19 07:07:12 +00:00
|
|
|
|
|
|
|
Test if -0 works on filenames ending in '\n'
|
|
|
|
|
2010-06-14 22:05:47 +00:00
|
|
|
If there are nomore jobs (STDIN is eof) then make sure to
|
2010-04-19 07:07:12 +00:00
|
|
|
distribute the arguments evenly if running -X.
|
|
|
|
|
2010-06-22 13:24:55 +00:00
|
|
|
|
|
|
|
=head1 search terms
|
|
|
|
|
|
|
|
GNU parallel execution shell bash script simultaneous concurrent linux
|
|
|
|
scripting run xargs ppss code.google.com/p/ppss/
|
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
=head1 options
|
2010-04-19 07:07:12 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
One char options not used: F G J K P Q Y
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
Skilletegn i sshlogin:
|
|
|
|
#=item B<--sshlogin> I<[ncpu/]sshlogin[,[ncpu/]sshlogin[,...]]> (beta testing)
|
|
|
|
# Skilletegn:
|
|
|
|
# No: "#!&()?\<>|;*'~ shellspecial
|
|
|
|
# No: @.- part of user@i.p.n.r i.p.n.r host-name
|
|
|
|
# No: , separates different sshlogins
|
|
|
|
# No: space Will make it hard to do: 8/server1,server2
|
|
|
|
# Maybe: / 8//usr/bin/myssh,//usr/bin/ssh
|
|
|
|
# %/=:_^
|
2010-04-19 07:07:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
=head2 mutex
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
mutex -b -n -l lockid -m max_locks [command]
|
2010-04-19 07:07:12 +00:00
|
|
|
mutex -u lockid
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
-b run command in background
|
2010-04-19 07:07:12 +00:00
|
|
|
-l lockfile will lock using the lockid
|
|
|
|
-n nonblocking
|
|
|
|
-m maximal number of locks (default 1)
|
|
|
|
-u unlock
|
|
|
|
|
|
|
|
If command given works like: mutex -l lockfile -n number_of_locks ; command; mutex -u lockfile
|
2010-06-05 23:03:39 +00:00
|
|
|
If -b given works like: mutex -l lockfile -n number_of_locks ; (command; mutex -u lockfile)&
|
|
|
|
|
|
|
|
Kan vi finde på lockid som giver mening?
|
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
Parallelize so this can be done:
|
|
|
|
mdm.screen find dir -execdir mdm-run cmd {} \;
|
|
|
|
Maybe:
|
|
|
|
find dir -execdir par$ --communication-file /tmp/comfile cmd {} \;
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
find dir -execdir mutex -j4 -b cmd {} \;
|
|
|
|
|
|
|
|
=head2 Comfile
|
|
|
|
|
|
|
|
This will put a lock on /tmp/comfile. The number of locks is the number of running commands.
|
|
|
|
If the number is smaller than -j then it will start a process in the background ( cmd & ),
|
|
|
|
otherwise wait.
|
|
|
|
|
|
|
|
par$ --wait /tmp/comfile will wait until no more locks on the file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
=head1 Unlikely
|
|
|
|
|
|
|
|
Accept signal INT instead of TERM to complete current running jobs but
|
|
|
|
do not start new jobs. Print out the number of jobs waiting to
|
|
|
|
complete on STDERR. Accept sig INT again to kill now. This seems to be
|
|
|
|
hard, as all foreground processes get the INT from the shell.
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-08-14 18:39:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Gzip all files in parallel
|
|
|
|
parallel gzip ::: *
|
|
|
|
|
|
|
|
# Convert *.wav to *.mp3 using LAME running one process per CPU core:
|
|
|
|
parallel -j+0 lame {} -o {.}.mp3 ::: *.wav
|
|
|
|
|
|
|
|
# Make an uncompressed version of all *.gz
|
|
|
|
parallel zcat {} ">"{.} ::: *.gz
|
|
|
|
|
|
|
|
# Recompress all .gz files using bzip2 running 1 job per CPU core:
|
|
|
|
find . -name '*.gz' | parallel -j+0 "zcat {} | bzip2 >{.}.bz2 && rm {}"
|
|
|
|
|
|
|
|
# Create a directory for each zip-file and unzip it in that dir
|
|
|
|
parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip
|
|
|
|
|
|
|
|
# Convert all *.mp3 in subdirs to *.ogg running
|
|
|
|
# one process per CPU core on local computer and server2
|
|
|
|
find . -name '*.mp3' | parallel --trc {.}.ogg -j+0 -S server2,: \
|
|
|
|
'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg'
|
|
|
|
|
|
|
|
# Run mycmd on column 1-3 of each row of TAB separated values
|
|
|
|
parallel -a table_file.tsv --colsep '\t' mycmd -o {2} {3} -i {1}
|
|
|
|
|
|
|
|
# Run traceroute in parallel, but keep the output order the same
|
|
|
|
parallel -k traceroute ::: foss.org.my debian.org freenetproject.org
|