mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2025-01-04 11:07:55 +00:00
602 lines
16 KiB
Plaintext
602 lines
16 KiB
Plaintext
Postkort:
|
|
- Forside kun figur
|
|
- Bagside:
|
|
- Logo med figur - evt gnu.org/s/parallel
|
|
- kort grå tekst, der forklarer hvad det er.
|
|
- Eet eksempel: parallel gzip ::: *
|
|
- Link til video. http://nd.gd/0s
|
|
|
|
GNU parallel is a UNIX-tool for running commands in parallel.
|
|
To gzip all files running one job per CPU write:
|
|
parallel gzip ::: *
|
|
Watch the intro video to learn more: http://nd.gd/0s
|
|
Or read more about GNU parallel: www.gnu.org/s/parallel
|
|
|
|
job->start():
|
|
$jobslot = Global::jobslot->$sshlogin
|
|
|
|
sub get_jobslot {
|
|
my $sshlogin = shift;
|
|
my $jobslot_id = pop @Global::jobslots{$sshlogin};
|
|
if not defined $jobslot_id {
|
|
$jobslot_id = ++$Global::max_jobslot_id;
|
|
}
|
|
return $jobslot_id;
|
|
}
|
|
|
|
sub release_jobslot {
|
|
my $sshlogin = shift;
|
|
my $jobslot_id = shift;
|
|
push @Global::jobslots{$sshlogin}, $jobslot_id;
|
|
}
|
|
|
|
Test sshlogins in parallel. Assume parallel is in path
|
|
|
|
seq 1 10 | parallel -I {o} 'seq 1 255 | parallel echo ssh -oNoHostAuthenticationForLocalhost=true 127.0.{o}.{}' >/tmp/sshloginfile
|
|
seq 1 1000 | parallel --sshloginfile /tmp/sshloginfile echo
|
|
|
|
ssh -F /tmp/
|
|
|
|
Example:
|
|
Chop mbox into emails
|
|
Parallel sort
|
|
|
|
codecoverage
|
|
|
|
Testsuite: sem without ~/.parallel
|
|
|
|
Til QUOTING:
|
|
|
|
FN="two spaces"
|
|
echo 1 | parallel -q echo {} "$FN"
|
|
# Prints 2 spaces between 'two' and 'spaces'
|
|
|
|
-q will not work with composed commands as it will quote the ; as
|
|
well. So composed commands have to be quoted by hand:
|
|
|
|
# Using export:
|
|
FN2="two spaces"
|
|
export FN2
|
|
echo 1 | parallel echo {} \"\$FN2\" \; echo \"\$FN2\" {}
|
|
# Prints 2 spaces between 'two' and 'spaces'
|
|
|
|
# Without export:
|
|
FN3="two spaces"
|
|
echo 1 | parallel echo {} \""$FN3"\" \; echo \'"$FN3"\' {}
|
|
|
|
# By quoting the space in the variable
|
|
FN4='two\ \ spaces'
|
|
echo 1 | parallel echo {} $FN4 \; echo $FN4 {}
|
|
|
|
|
|
|
|
= Bug? ==
|
|
|
|
locate .gz | parallel -X find {} -size +1000 -size -2000 | parallel --workdir ... -S .. --trc {/}.bz2 'zcat {} | bzip2 > {/}.bz2'
|
|
|
|
|
|
== Compare ==
|
|
|
|
http://code.google.com/p/spawntool/
|
|
http://code.google.com/p/push/
|
|
|
|
== Bug? ==
|
|
|
|
.parallel/config with --long-options
|
|
|
|
time find . -type f | parallel -j+0 --eta -S..,: --progress --trc {}.gz gzip {}
|
|
|
|
== SQL ==
|
|
|
|
Example with %0a as newline
|
|
sql :my_postgres?'\dt %0a SELECT * FROM users'
|
|
|
|
cat ~/.sql/aliases | parallel --colsep '\s' sql {1} '"select 0.14+3;" | grep -q 3.14 || (echo dead: {1}; exit 1)'
|
|
|
|
== FEX ==
|
|
|
|
fex syntax for splitting fields
|
|
http://www.semicomplete.com/projects/fex/
|
|
sql :foo 'select * from bar' | parallel --fex '|{1,2}' do_stuff {2} {1}
|
|
|
|
|
|
--autocolsep: Læs alle linjer.
|
|
Prøv fastlængde: Find tegn, som står i alle linjer på de samme pladser. Risiko for falske pos
|
|
Prøv fieldsep: Find eet tegn, som optræder det samme antal gange i alle linjer (tab sep)
|
|
Prøv klyngesep: Find den samme klynge tegn, som står samme antal gange i alle linjer (' | ' sep)
|
|
Fjern whitespace før og efter colonne
|
|
|
|
hvis der er n af tegn A og 2n af tegn B, så
|
|
|
|
a | b | c
|
|
|
|
Simpleste: tab sep
|
|
|
|
for hver linje
|
|
max,min count for hver char
|
|
|
|
for hver char
|
|
if max == min :
|
|
potentiel
|
|
min_potentiel = min(min_potentiel,min)
|
|
|
|
for potentiel:
|
|
if min % min_potentiel = 0: sepchars += potentiel,no of sepchars += min / min_potentiel
|
|
|
|
colsep = [sepchars]{no_of_sepchars}
|
|
|
|
|
|
# Hvordan udregnes system limits på remote systems hvis jeg ikke ved, hvormange
|
|
# argumenter, der er? Lav system limits lokalt og lad det være max
|
|
|
|
# TODO max_line_length on remote
|
|
# TODO compute how many can be transferred within max_line_length
|
|
# TODO Unittest with filename that is long and requires a lot of quoting. Will there be to many
|
|
|
|
=head1 YouTube video --pipe
|
|
|
|
cp parallel.fasta parallel.mbox lucene.tar
|
|
|
|
# GNU Parallel 20110205 - The FOSDEM Release
|
|
|
|
I assume you already know GNU Parallel. If not watch the intro video first.
|
|
|
|
GNU Parallel has so far worked similar to xargs. But the FOSDEM
|
|
release of GNU Parallel introduces the new --pipe option. It makes GNU
|
|
Parallel work similar to tee.
|
|
|
|
tee pipes a copy of the output to a file and a copy to another
|
|
program.
|
|
|
|
seq 1 5 | tee myfile | wc
|
|
|
|
Here it pipes a copy to the file myfile and to the command word count (wc).
|
|
|
|
cat myfile
|
|
|
|
and we can see the content is what we expected.
|
|
|
|
The pipe option of GNU Parallel splits data into records and pipes a
|
|
block of records into a program:
|
|
|
|
seq 1 5 | parallel --pipe -N1 cat';' echo foo
|
|
|
|
Here we pipe each number to the command cat and print foo after
|
|
running cat.
|
|
|
|
GNU Parallel does this in parallel starting one process per cpu so the
|
|
order may be different because one command may finish before another.
|
|
|
|
# RECORD SEPARATORS
|
|
|
|
GNU Parallel splits on record separators.
|
|
|
|
seq 1 5 | parallel --pipe --recend '\n' -N1 cat';' echo foo
|
|
|
|
This is the example we saw before: the record separator is \n and
|
|
--recend will keep the record separator at the end of the record.
|
|
|
|
But if what your records start with a record separator? Here is a
|
|
fast-a file:
|
|
|
|
cat parallel.fasta
|
|
|
|
Every record start with a >. To keep that with the record you use
|
|
--recstart:
|
|
|
|
cat parallel.fasta | parallel --pipe --recstart '>' -N1 cat';' echo foo
|
|
|
|
But what if you have both? mbox files is an example that has both an
|
|
ending and starting separator:
|
|
|
|
cat parallel.mbox |
|
|
parallel --pipe --recend '\n\n' --recstart 'From ' -N1 cat';' echo foo | less #
|
|
|
|
The two newlines are staying with the email before and the From_ stays with the next record.
|
|
|
|
GNU Parallel cannot guarantee the first record will start with record
|
|
separator and it cannot guarantee the last record will end with record
|
|
separator. You will simply get what is first and last.
|
|
|
|
But GNU Parallel _does_ guarantee that it will only split at record
|
|
separators.
|
|
|
|
# NUMBER OF RECORDS
|
|
|
|
So far we have used -N1. This tells GNU Parallel to pipe one record to
|
|
the program.
|
|
|
|
seq 1 5 | parallel --pipe -N1 cat';' echo foo
|
|
|
|
But we can choose any amount:
|
|
|
|
seq 1 5 | parallel --pipe -N3 cat';' echo foo
|
|
|
|
This will pipe blocks of 3 records into cat and if there is not enough the last will only get two.
|
|
|
|
# BLOCKSIZE
|
|
|
|
However, using -N is inefficient. It is faster to pipe a full block into the program.
|
|
|
|
cat /usr/share/dict/words | parallel --pipe --blocksize 500k wc
|
|
|
|
We here tell GNU Parallel to split on \n and pipe blocks of 500 KB to
|
|
wc. 1 MB is the default:
|
|
|
|
cat /usr/share/dict/words | parallel --pipe wc
|
|
|
|
If you just have a bunch of bytes you often do not care about the
|
|
record separator. To split input into chunks you can disable the
|
|
--recend
|
|
|
|
ls -l lucene.tar
|
|
cat lucene.tar | parallel --pipe --recend '' -k gzip > lucene.tar.gz
|
|
|
|
GNU Parallel will then split the input into 1 MB blocks; pipe that to
|
|
gzip and -k will make sure the order of the output is kept before
|
|
saving to the tar.gz file.
|
|
|
|
The beauty of gzip is that if you concatenate two gzip files it is a
|
|
valid gzip file. To test this:
|
|
|
|
tar tvzf lucene.tar.gz #
|
|
|
|
# OUTPUT AS FILE
|
|
|
|
Sometimes the output of GNU Parallel cannot be mixed in a single stream like this:
|
|
|
|
seq 1 10 | shuf | parallel --pipe -N 3 sort -n
|
|
|
|
As you can see each block of 3 is sorted but the whole output is not sorted.
|
|
|
|
GNU Parallel can give the output in file. GNU Parallel will the list the
|
|
files created:
|
|
|
|
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n
|
|
|
|
Each of these files contains a sorted block:
|
|
|
|
cat
|
|
|
|
Sort has -m to merge sorted files into a sorted stream
|
|
|
|
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n | parallel -mj1 sort -nm
|
|
|
|
-m will append all the files behind the sort command and the -j1 will
|
|
make sure we only run one command. The only part missing now is
|
|
cleaning up by removing the temporary files. We can do that by
|
|
appending rm
|
|
|
|
seq 1 10 | shuf | parallel --pipe --files -N 3 sort -n |
|
|
parallel -mj1 sort -nm {} ";"rm {}
|
|
|
|
|
|
# Thank you for watching
|
|
#
|
|
# If you like GNU Parallel:
|
|
# * Post this video on forums/blogs/Twitter/Facebook/Linkedin
|
|
# * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
|
|
# * Request or write a review for your favourite magazine
|
|
# * Request or build a package for your favourite distribution
|
|
# * Invite me for your next conference (Contact http://ole.tange.dk)
|
|
#
|
|
# If GNU Parallel saves you money:
|
|
# * (Have your company) donate to FSF https://my.fsf.org/donate/
|
|
#
|
|
# If you use GNU Parallel for a publication please cite:
|
|
# O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login:
|
|
# The USENIX Magazine, February 2011:42-47.
|
|
#
|
|
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
|
|
|
|
|
|
|
=head1 YouTube video2
|
|
|
|
Converting of WAV files to MP3 using GNU Parallel
|
|
|
|
# Run one jobs per CPU core
|
|
# For 'foo.wav' call the output file 'foo.mp3'
|
|
|
|
find music-files -type f | parallel -j+0 lame {} -o {.}.mp3
|
|
|
|
# Run one jobs per CPU core
|
|
# Run on local computer + 2 remote computers
|
|
# Give us progress information
|
|
# For 'foo.wav' call the output file 'foo.mp3'
|
|
|
|
find music-files -type f | parallel -j+0 -S :,server1,server2 \
|
|
--eta --trc {.}.mp3 lame {} -o {.}.mp3
|
|
|
|
# Colsep
|
|
# sem
|
|
# --retry
|
|
|
|
(echo a1.txt; echo b1.txt; echo c1.txt; echo a2.txt; echo b2.txt; echo c2.txt;)| \
|
|
parallel -X -N 3 my-program --file={}
|
|
|
|
(echo a1.txt; echo b1.txt; echo c1.txt; echo d1.txt; echo e1.txt; echo f1.txt;)| \
|
|
parallel -X my-program --file={}
|
|
|
|
# First job controls the tty
|
|
# -u needed because output should not be saved for later
|
|
|
|
find . -type f | parallel -uXj1 vim
|
|
find . -type f | parallel -uXj1 emacs
|
|
|
|
# If you have 1000 files only one contains 'foobar'
|
|
# stop when this one is found
|
|
|
|
find . -type f | parallel grep -l foobar | head -1
|
|
|
|
|
|
# To test a list of hosts are up and pingable save this
|
|
# to a file called machinesup
|
|
|
|
#!/usr/bin/parallel --shebang --no-run-if-empty ping -c 3 {} >/dev/null 2>&1
|
|
|
|
google.com
|
|
yahoo.com
|
|
nowhere.gone
|
|
|
|
# Then:
|
|
# chmod 755 machinesup
|
|
# ./machinesup || echo Some machines are down
|
|
|
|
|
|
|
|
=head1 YouTube video
|
|
|
|
GNU Parallel is a tool with lots of uses in shell. Every time you use
|
|
xargs or a for-loop GNU Parallel can probably do that faster, safer
|
|
and more readable.
|
|
|
|
If you have access to more computers through ssh, GNU Parallel makes
|
|
it easy to distribute jobs to these.
|
|
|
|
terminal2: ssh parallel@vh2.pi.dk
|
|
ssh parallel@vh2.pi.dk
|
|
and
|
|
|
|
PS1="\[\e[7m\]GNU Parallel:\[\033[01;34m\]\w\[\033[00m\e[27m\]$ "
|
|
gunzip logs/*gz
|
|
rm -f logs/*bz2*
|
|
rm -rf zip/*[^p]
|
|
rm -rf dirs/*
|
|
rm -rf parallel-*bz2
|
|
|
|
xvidcap
|
|
ffmpeg -i 20100616_002.mp4 -ab 320k -ar 44100 speak.mp3
|
|
# Merge video using youtube
|
|
#ffmpeg -i speak.mp3 -i xvidcap.mpeg -target mpeg -hq -minrate 8000000 \
|
|
#-title "GNU Parallel" -author "Ole Tange" -copyright "(CC-By-SA) 2010" -comment "Intro video of GNU Parallel 20100616" videoaudio.mpg
|
|
|
|
# GNU PARALLEL - BASIC USAGE
|
|
# A GNU tool for parallelizing shell commands
|
|
|
|
## Ole Tange Author
|
|
|
|
# GET GNU PARALLEL
|
|
wget ftp://ftp.gnu.org/gnu/parallel/parallel-20100620.tar.bz2
|
|
tar xjf parallel-20100620.tar.bz2
|
|
cd parallel-20100620
|
|
./configure && make ##
|
|
su
|
|
make install
|
|
exit
|
|
cd
|
|
|
|
## scp /usr/local/bin/parallel root@parallel:/usr/local/bin/
|
|
|
|
|
|
# YOUR FIRST PARALLEL JOBS
|
|
cd logs
|
|
du
|
|
/usr/bin/time gzip -1 *
|
|
## 24 sek - 22 sek
|
|
/usr/bin/time gunzip *
|
|
## 24 sek - 18
|
|
ls | time parallel gzip -1
|
|
## 17 sek - 10
|
|
ls | time parallel gunzip
|
|
## 25 sek - 19
|
|
|
|
# RECOMPRESS gz TO bz2
|
|
ls | time parallel gzip -1
|
|
ls *.gz | time parallel -j+0 --eta 'zcat {} | bzip2 -9 >{.}.bz2'
|
|
## Explain command line
|
|
## vis top local
|
|
## Man that is boring
|
|
## 2m41s - 2m - 3m35s
|
|
|
|
# RECOMPRESS gz TO bz2 USING local(:) AND REMOTE server1-4
|
|
ls *.gz |time parallel -j+0 --eta -Sserver1,server2,server3,server4,: \
|
|
--transfer --return {.}.bz2 --cleanup 'zcat {} | bzip2 -9 > {.}.bz2'
|
|
## Explain command line
|
|
## Explain server config
|
|
## vis top local
|
|
## vis top remote1-3
|
|
## 49 sek
|
|
|
|
# RECOMPRESS gz TO bz2 USING A SCRIPT ON local AND REMOTE server1-2,4
|
|
# (imagine the script is way more complex)
|
|
cp ../recompress /tmp
|
|
cat /tmp/recompress
|
|
ls *.gz |time parallel -j+0 --progress -Sserver1,server2,server4,: \
|
|
--trc {.}.bz2 --basefile /tmp/recompress '/tmp/recompress {} {.}.bz2'
|
|
|
|
|
|
# MAKING SMALL SCRIPTS
|
|
cd ../zip
|
|
ls -l
|
|
ls *.zip | parallel 'mkdir {.} && cd {.} && unzip ../{}' ###
|
|
ls -l
|
|
|
|
# GROUP OUTPUT
|
|
traceroute debian.org
|
|
traceroute debian.org & traceroute freenetproject.org ###
|
|
(echo debian.org; echo freenetproject.org) | parallel traceroute ###
|
|
|
|
# KEEP ORDER
|
|
(echo debian.org; echo freenetproject.org) | parallel -k traceroute ###
|
|
|
|
# RUN MANY JOBS. USE OUTPUT
|
|
# Find the number of hosts responding to ping
|
|
ping -c 1 178.63.11.1
|
|
ping -c 1 178.63.11.1 | grep '64 bytes'
|
|
seq 1 255 | parallel -j255 ping -c 1 178.63.11.{} 2>&1 \
|
|
| grep '64 bytes' | wc -l
|
|
seq 1 255 | parallel -j0 ping -c 1 178.63.11.{} 2>&1 \
|
|
| grep '64 bytes' | wc -l
|
|
|
|
# MULTIPLE ARGUMENTS
|
|
# make dir: test-(1-5000).dir
|
|
cd ../dirs
|
|
rm -rf *; sync
|
|
seq 1 10 | parallel echo mkdir test-{}.dir
|
|
seq 1 5000 | time parallel mkdir test-{}.dir
|
|
## 15 sek
|
|
|
|
rm -rf *; sync
|
|
seq 1 10 | parallel -X echo mkdir test-{}.dir
|
|
seq 1 5000 | time parallel -X mkdir test-{}.dir
|
|
|
|
# CALLING GNU PARALLEL FROM GNU PARALLEL
|
|
# make dir: top-(1-100)/sub-(1-100)
|
|
rm -rf *; sync
|
|
seq 1 100 | time parallel -I @@ \
|
|
'mkdir top-@@; seq 1 100 | parallel -X mkdir top-@@/sub-{}'
|
|
find | wc -l
|
|
|
|
cd
|
|
# Thank you for watching
|
|
#
|
|
# If you like GNU Parallel:
|
|
# * Post this video on your blog/Twitter/Facebook/Linkedin
|
|
# * Join the mailing list http://lists.gnu.org/mailman/listinfo/parallel
|
|
# * Request or write a review for your favourite magazine
|
|
# * Request or build a package for your favourite distribution
|
|
# * Invite me for your next conference (Contact http://ole.tange.dk)
|
|
#
|
|
# If GNU Parallel saves you money:
|
|
# * Donate to FSF https://my.fsf.org/donate/
|
|
#
|
|
# Find GNU Parallel at http://www.gnu.org/software/parallel/
|
|
|
|
|
|
# GIVE ME THE FIRST RESULT
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel -H2 traceroute {}";false"
|
|
|
|
find . -type f | parallel -k -j150% -n 1000 -m grep -H -n STRING {}
|
|
|
|
(echo foss.org.my; echo debian.org; echo freenetproject.org) | parallel traceroute
|
|
|
|
|
|
=head1 IDEAS
|
|
|
|
Kan vi lave flere ssh'er, hvis vi venter lidt?
|
|
En ssh med 20% loss og 900 ms delay, så kan login nås på 15 sek.
|
|
|
|
Test if -0 works on filenames ending in '\n'
|
|
|
|
If there are nomore jobs (STDIN is eof) then make sure to
|
|
distribute the arguments evenly if running -X.
|
|
|
|
|
|
=head1 options
|
|
|
|
One char options not used: A F G K O Q R Z c f
|
|
|
|
=head1 sem
|
|
|
|
Parallelize so this can be done:
|
|
mdm.screen find dir -execdir mdm-run cmd {} \;
|
|
Maybe:
|
|
find dir -execdir par$ --communication-file /tmp/comfile cmd {} \;
|
|
|
|
find dir -execdir mutex -j4 -b cmd {} \;
|
|
|
|
|
|
=head1 Unlikely
|
|
|
|
Accept signal INT instead of TERM to complete current running jobs but
|
|
do not start new jobs. Print out the number of jobs waiting to
|
|
complete on STDERR. Accept sig INT again to kill now. This seems to be
|
|
hard, as all foreground processes get the INT from the shell.
|
|
|
|
|
|
|
|
# Gzip all files in parallel
|
|
parallel gzip ::: *
|
|
|
|
# Convert *.wav to *.mp3 using LAME running one process per CPU core:
|
|
parallel -j+0 lame {} -o {.}.mp3 ::: *.wav
|
|
|
|
# Make an uncompressed version of all *.gz
|
|
parallel zcat {} ">"{.} ::: *.gz
|
|
|
|
# Recompress all .gz files using bzip2 running 1 job per CPU core:
|
|
find . -name '*.gz' | parallel -j+0 "zcat {} | bzip2 >{.}.bz2 && rm {}"
|
|
|
|
# Create a directory for each zip-file and unzip it in that dir
|
|
parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip
|
|
|
|
# Convert all *.mp3 in subdirs to *.ogg running
|
|
# one process per CPU core on local computer and server2
|
|
find . -name '*.mp3' | parallel --trc {.}.ogg -j+0 -S server2,: \
|
|
'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg'
|
|
|
|
# Run mycmd on column 1-3 of each row of TAB separated values
|
|
parallel -a table_file.tsv --colsep '\t' mycmd -o {2} {3} -i {1}
|
|
|
|
# Run traceroute in parallel, but keep the output order the same
|
|
parallel -k traceroute ::: foss.org.my debian.org freenetproject.org
|
|
|
|
|
|
Test of signal passing through ssh
|
|
#!/bin/bash
|
|
|
|
SERVER1=parallel-server3
|
|
SERVER2=parallel-server2
|
|
|
|
export BG_PROC
|
|
|
|
start_remote_sleep() {
|
|
parallel -D -u -S parallel@$SERVER2 sleep ::: 370 &
|
|
BG_PROC=$!
|
|
while ! ssh parallel@$SERVER2 ps -A -o cmd | grep -q '^sleep 370' ; do
|
|
sleep 0.3
|
|
done
|
|
}
|
|
|
|
stop_local_parallel() {
|
|
kill -9 $BG_PROC
|
|
}
|
|
|
|
check_and_stop_remote_sleep() {
|
|
ssh parallel@$SERVER2 ps -A -o cmd | grep '^sleep 370'
|
|
ssh parallel@$SERVER2 killall sleep
|
|
}
|
|
|
|
echo '### Test kill signals'
|
|
start_remote_sleep 2>/dev/null
|
|
kill -1 $BG_PROC
|
|
check_and_stop_remote_sleep
|
|
|
|
sub propagate_signal {
|
|
my $signal = shift;
|
|
# $signal = "KILL";
|
|
::debug("Sending $signal to ",keys %Global::running);
|
|
kill $signal, keys %Global::running;
|
|
if(defined $Global::original_sig{$signal}) {
|
|
&{$Global::original_sig{$signal}};
|
|
}
|
|
}
|
|
|
|
my %do_not_propagate = map { $_ => 1 } qw(TTOU TTIN CONT TSTP __WARN__ __DIE__);
|
|
for (keys %SIG) {
|
|
$do_not_propagate{$_} and next;
|
|
$SIG{$_} = eval 'sub { propagate_signal("'.$_.'"); };';
|
|
}
|
|
|