2010-06-12 23:24:25 +00:00
|
|
|
=head1 YouTube video
|
|
|
|
|
|
|
|
GNU Parallel is a tool with lots of uses in shell. Every time you use
|
|
|
|
xargs or a for-loop GNU Parallel can probably do that faster, safer
|
|
|
|
and more readable.
|
|
|
|
|
|
|
|
If you have access to more computers through ssh, GNU Parallel makes
|
|
|
|
it easy to distribute jobs to these.
|
|
|
|
|
|
|
|
terminal2: ssh parallel@vh2.pi.dk
|
|
|
|
ssh parallel@vh2.pi.dk
|
|
|
|
|
|
|
|
|
|
|
|
PS1="\e[7mGNU Parallel:\[\033[01;34m\]\w\[\033[00m\]\e[27m$ "
|
|
|
|
gunzip logs/*gz
|
|
|
|
rm logs/*bz2
|
|
|
|
rm -rf zip/*[^p]
|
|
|
|
|
|
|
|
# GET GNU PARALLEL
|
|
|
|
wget ftp://ftp.gnu.org/gnu/parallel/parallel-20100601.tar.bz2
|
|
|
|
tar xvjf parallel-20100601.tar.bz2
|
|
|
|
cd parallel-20100601
|
|
|
|
./configure && make ##
|
|
|
|
su
|
|
|
|
make install
|
|
|
|
exit
|
|
|
|
cd
|
|
|
|
|
|
|
|
# YOUR FIRST PARALLEL JOBS
|
|
|
|
cd logs
|
|
|
|
du
|
|
|
|
/usr/bin/time gzip *
|
|
|
|
/usr/bin/time gunzip *
|
|
|
|
ls | time parallel gzip
|
|
|
|
ls | time parallel gunzip
|
|
|
|
|
|
|
|
# RECOMPRESS gz TO bz2
|
|
|
|
ls | time parallel gzip
|
|
|
|
ls *.gz | time parallel 'zcat {} | bzip2 > {.}.bz2'
|
|
|
|
## vis top local
|
|
|
|
# RECOMPRESS gz TO bz2 USING local(:) AND REMOTE server1
|
|
|
|
ls *.gz | time parallel -S server1,: --trc {.}.bz2 \
|
|
|
|
'zcat {} | bzip2 > {.}.bz2'
|
|
|
|
## vis top local
|
|
|
|
## vis top remote
|
|
|
|
|
|
|
|
# MAKING SMALL SCRIPTS
|
|
|
|
cd ../zip
|
|
|
|
ls -l
|
|
|
|
ls *.zip | parallel 'mkdir {.} && cd {.} && unzip ../{}' ###
|
|
|
|
ls -l
|
|
|
|
|
|
|
|
# GROUP OUTPUT
|
|
|
|
traceroute debian.org
|
|
|
|
traceroute debian.org & traceroute freenetproject.org ###
|
|
|
|
(echo debian.org; echo freenetproject.org) \
|
|
|
|
| parallel traceroute
|
|
|
|
|
|
|
|
# KEEP ORDER
|
|
|
|
(echo debian.org; echo freenetproject.org) \
|
|
|
|
| parallel -k traceroute
|
|
|
|
|
|
|
|
# RUN MANY JOBS. USE OUTPUT
|
|
|
|
# Find the number of hosts responding to ping
|
|
|
|
seq 1 255 | parallel -j255 ping -c 1 178.63.11.{} 2>&1 \
|
|
|
|
| grep '64 bytes' | wc -l
|
|
|
|
seq 1 255 | parallel -j0 ping -c 1 178.63.11.{} 2>&1 \
|
|
|
|
| grep '64 bytes' | wc -l
|
|
|
|
|
|
|
|
# MULTIPLE ARGUMENTS
|
|
|
|
# make dir: (1-20000).dir
|
|
|
|
cd ../dirs
|
|
|
|
rm -rf *; sync
|
|
|
|
seq 1 20000 | time parallel mkdir {}.dir
|
|
|
|
rm -rf *; sync
|
|
|
|
seq 1 20000 | time parallel -X mkdir {}.dir
|
|
|
|
|
|
|
|
# CALLING GNU PARALLEL FROM GNU PARALLEL
|
|
|
|
# make dir: top-(1-100)/sub-(1-300)
|
|
|
|
rm -rf *; sync
|
|
|
|
seq 1 100 | time parallel -I /// \
|
|
|
|
'mkdir top-///;cd top-///; seq 1 300 | parallel -X mkdir sub-{}'
|
|
|
|
find | wc -l
|
|
|
|
|
|
|
|
# Thanks for watching
|
|
|
|
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
|
|
|
|
|
|
|
# GIVE ME THE FIRST RESULT
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel -H2 traceroute {}";false"
|
|
|
|
|
|
|
|
find . -type f | parallel -k -j150% -n 1000 -m grep -H -n STRING {}
|
|
|
|
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute
|
2010-06-09 22:39:35 +00:00
|
|
|
|
|
|
|
|
2010-04-19 07:07:12 +00:00
|
|
|
=head1 IDEAS
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
Kan vi lave flere ssh'er, hvis vi venter lidt?
|
|
|
|
En ssh med 20% loss og 900 ms delay, så kan login nås på 15 sek.
|
2010-04-19 07:07:12 +00:00
|
|
|
|
|
|
|
Test if -0 works on filenames ending in '\n'
|
|
|
|
|
|
|
|
monitor to see which jobs are currently running
|
|
|
|
http://code.google.com/p/ppss/
|
|
|
|
|
|
|
|
If there are nomore jobs (STDIN is closed) then make sure to
|
|
|
|
distribute the arguments evenly if running -X.
|
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
=head1 options
|
2010-04-19 07:07:12 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
One char options not used: F G J K P Q Y
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
Skilletegn i sshlogin:
|
|
|
|
#=item B<--sshlogin> I<[ncpu/]sshlogin[,[ncpu/]sshlogin[,...]]> (beta testing)
|
|
|
|
# Skilletegn:
|
|
|
|
# No: "#!&()?\<>|;*'~ shellspecial
|
|
|
|
# No: @.- part of user@i.p.n.r i.p.n.r host-name
|
|
|
|
# No: , separates different sshlogins
|
|
|
|
# No: space Will make it hard to do: 8/server1,server2
|
|
|
|
# Maybe: / 8//usr/bin/myssh,//usr/bin/ssh
|
|
|
|
# %/=:_^
|
2010-04-19 07:07:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
=head2 mutex
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
mutex -b -n -l lockid -m max_locks [command]
|
2010-04-19 07:07:12 +00:00
|
|
|
mutex -u lockid
|
|
|
|
|
2010-06-05 23:03:39 +00:00
|
|
|
-b run command in background
|
2010-04-19 07:07:12 +00:00
|
|
|
-l lockfile will lock using the lockid
|
|
|
|
-n nonblocking
|
|
|
|
-m maximal number of locks (default 1)
|
|
|
|
-u unlock
|
|
|
|
|
|
|
|
If command given works like: mutex -l lockfile -n number_of_locks ; command; mutex -u lockfile
|
2010-06-05 23:03:39 +00:00
|
|
|
If -b given works like: mutex -l lockfile -n number_of_locks ; (command; mutex -u lockfile)&
|
|
|
|
|
|
|
|
Kan vi finde på lockid som giver mening?
|
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
Parallelize so this can be done:
|
|
|
|
mdm.screen find dir -execdir mdm-run cmd {} \;
|
|
|
|
Maybe:
|
|
|
|
find dir -execdir par$ --communication-file /tmp/comfile cmd {} \;
|
2010-06-05 23:03:39 +00:00
|
|
|
|
2010-06-09 20:26:59 +00:00
|
|
|
find dir -execdir mutex -j4 -b cmd {} \;
|
|
|
|
|
|
|
|
=head2 Comfile
|
|
|
|
|
|
|
|
This will put a lock on /tmp/comfile. The number of locks is the number of running commands.
|
|
|
|
If the number is smaller than -j then it will start a process in the background ( cmd & ),
|
|
|
|
otherwise wait.
|
|
|
|
|
|
|
|
par$ --wait /tmp/comfile will wait until no more locks on the file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
=head1 Unlikely
|
|
|
|
|
|
|
|
Accept signal INT instead of TERM to complete current running jobs but
|
|
|
|
do not start new jobs. Print out the number of jobs waiting to
|
|
|
|
complete on STDERR. Accept sig INT again to kill now. This seems to be
|
|
|
|
hard, as all foreground processes get the INT from the shell.
|
2010-06-05 23:03:39 +00:00
|
|
|
|