parallel: --compress passes tests.

This commit is contained in:
Ole Tange 2014-02-21 04:07:56 +01:00
parent 18ce76f598
commit 9aaa763447
5 changed files with 70 additions and 58 deletions

View file

@ -33,7 +33,7 @@ if test "$GET" = ""; then
fi
LANG=C
LATEST=$($GET http://ftpmirror.gnu.org/parallel | perl -ne '/(parallel-\d{8})/ and print $1."\n"' | sort | $TAIL -n1)
LATEST=$($GET http://ftpmirror.gnu.org/parallel | perl -ne '/.*(parallel-\d{8})/ and print $1."\n"' | sort | $TAIL -n1)
if test \! -e $LATEST.tar.bz2; then
# Source tar does not exist
rm -f $LATEST.tar.bz2 $LATEST.tar.bz2.sig

View file

@ -204,51 +204,52 @@ cc:Sandro Cazzaniga <kharec@mandriva.org>,
Ryoichiro Suzuki <ryoichiro.suzuki@gmail.com>,
Jesse Alama <jesse.alama@gmail.com>
Subject: GNU Parallel 20140222 ('Со́чи') released
Subject: GNU Parallel 20140222 ('Со́чи/Euromaidan') released
GNU Parallel 20140222 ('Со́чи') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/
GNU Parallel 20140222 ('Со́чи/Euromaidan') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/
Packagers that default to --tollef should take note that --tollef is now retired after a full year of being obsolete.
The --compress change is so big that this release is of alpha quality.
New in this release:
* --tollef has been retired.
* --compress has be redesigned due to bugs.
* GNU Parallel was cited in: Speaker Verification and Keyword Spotting
Systems for Forensic Applications
http://lantana.tenet.res.in/website_files/thesis/Phd/srikanth.pdf
* GNU Parallel was cited in: Speaker Verification and Keyword Spotting Systems for Forensic Applications http://lantana.tenet.res.in/website_files/thesis/Phd/srikanth.pdf
* GNU Parallel was cited in: Scaleable Code Clone Detection
http://scg.unibe.ch/archive/phd/schwarz-phd.pdf
* GNU Parallel was cited in: Scaleable Code Clone Detection http://scg.unibe.ch/archive/phd/schwarz-phd.pdf
* GNU Parallel was used (unfortunately without citation) in:
Performance and Energy Efficiency of Common Compression /
Decompression Utilities
http://www.researchgate.net/publication/243962643_Performance_and_Energy_Efficiency_of_Common_CompressionDecompression_Utilities_An_Experimental_Study_in_Mobile_and_Workstation_Computer_Platforms/file/3deec51d1dbc0474f9.pdf
* GNU Parallel was cited in: Clone Detection that scales http://scg.unibe.ch/archive/masters/Vogt14a.pdf
* GNU Parallel was recommended (without citation) in: Redesigning the
Specificity of Protein-DNA Interactions with Rosetta
http://link.springer.com/protocol/10.1007/978-1-62703-968-0_17
* GNU Parallel was cited in: Scalable Positional Analysis for Studying Evolution of Nodes in Networks http://arxiv-web3.library.cornell.edu/pdf/1402.3797
* GNU Parallel is co-distributed with RepeatExplorer
http://www.vcru.wisc.edu/simonlab/bioinformatics/programs/repeatexplorer/README.txt
* GNU Parallel was cited in: Aggregate Characterization of User Behavior in Twitter and Analysis of the Retweet Graph http://arxiv.org/pdf/1402.2671v1
* Convert MP3 to a scrolling spectrum waterfall plot video
http://jdesbonnet.blogspot.dk/2014/02/convert-mp3-to-scrolling-spectrum.html
* GNU Parallel was used (unfortunately without citation) in: Causal Network Models for Predicting Compound Targets and Driving Pathways in Cancer http://jbx.sagepub.com/content/early/2014/02/10/1087057114522690.full
* PHP wrapper class for the GNU Parallel tool
https://github.com/geopal-solutions/gnu-parallel-wrapper/tree/master
* GNU Parallel was used (unfortunately without citation) in: Performance and Energy Efficiency of Common Compression / Decompression Utilities http://www.researchgate.net/publication/243962643_Performance_and_Energy_Efficiency_of_Common_CompressionDecompression_Utilities_An_Experimental_Study_in_Mobile_and_Workstation_Computer_Platforms/file/3deec51d1dbc0474f9.pdf
* Exploratory Data Analysis
http://www.slideshare.net/thinrhino/gnunify
* GNU Parallel was used (unfortunately without citation) in: SpringFS: Bridging Agility and Performance in Elastic Distributed Storage http://www.istc-cc.cmu.edu/publications/papers/2014/SpringFS-FAST14.pdf
* Single-Thread-Programme auf Multicore-Rechnern parallelisieren
http://www.adlerweb.info/blog/2014/02/08/linux-single-thread-programme-auf-multicore-rechnern-parallelisieren
* GNU Parallel was recommended (without citation) in: Redesigning the Specificity of Protein-DNA Interactions with Rosetta http://link.springer.com/protocol/10.1007/978-1-62703-968-0_17
* Распараллеливаем процессы для ускорения вычислений и выполнения заданий в Linux
http://savepearlharbor.com/?p=210480
* GNU Parallel is co-distributed with RepeatExplorer http://www.vcru.wisc.edu/simonlab/bioinformatics/programs/repeatexplorer/README.txt
* Convert MP3 to a scrolling spectrum waterfall plot video http://jdesbonnet.blogspot.dk/2014/02/convert-mp3-to-scrolling-spectrum.html
* PHP wrapper class for the GNU Parallel tool https://github.com/geopal-solutions/gnu-parallel-wrapper/tree/master
* Exploratory Data Analysis http://www.slideshare.net/thinrhino/gnunify
* Copias de archivos con rsync y parallel http://linuxmanr4.com/2014/02/20/copias-de-archivos-con-rsync-y-parallel
* Single-Thread-Programme auf Multicore-Rechnern parallelisieren http://www.adlerweb.info/blog/2014/02/08/linux-single-thread-programme-auf-multicore-rechnern-parallelisieren
* Распараллеливаем процессы для ускорения вычислений и выполнения заданий в Linux http://savepearlharbor.com/?p=210480
* Bug fixes and man page updates.

View file

@ -4037,21 +4037,17 @@ sub seq {
sub cattail {
# Returns:
# $cattail = perl program for: cattail "decompress program" [file_to_decompress or stdin]
# $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
my $cattail = q{
# cat followed by tail.
# SIGHUP says there will be appended no more, so just finish after this round.
# If $writerpid dead: finish after this round
use Fcntl;
$SIG{HUP} = sub { $Global::sighup = 1; };
$|=1;
$cmd = shift;
if(@ARGV) {
open(IN,"<",$ARGV[0]) || die("Cannot open $ARGV[0]");
my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
if($read_file) {
open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
} else {
*IN = *STDIN;
}
@ -4060,14 +4056,16 @@ sub cattail {
fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
$flags |= O_NONBLOCK; # Add non-blocking to the flags
fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
open(OUT,"|-",$cmd) || die;
open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
while(1) {
# clear EOF
seek(IN,0,1);
my $writer_running = kill 0, $writerpid;
$read = sysread(IN,$buf,1_000_000);
if($read) {
# We can unlink the file now: The writer has written something
-e $unlink_file and unlink $unlink_file;
# Blocking print
while($buf) {
my $bytes_written = syswrite(OUT,$buf);
@ -4077,12 +4075,14 @@ sub cattail {
# Something printed: Wait less next time
$sleep /= 2;
} else {
if(eof(IN) and $Global::sighup) {
# SIGHUP received: There will never be more to read => exit
if(eof(IN) and not $writer_running) {
# Writer dead: There will never be more to read => exit
exit;
}
# Nothing read: Wait longer next time
$sleep = 1.05*$sleep + 0.01;
# TODO This could probably be done more efficiently using select(2)
# Nothing read: Wait longer before next read
# Up to 30 milliseconds
$sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
usleep($sleep);
}
}
@ -4159,12 +4159,13 @@ sub openoutputfiles {
# cattail get pid i $pid = $self->fh($fdno,'rpid');
my $cattail = cattail();
for my $fdno (1,2) {
my $wpid = open(my $fdw,"|-","$opt::compress_program >".
my $wpid = open(my $fdw,"|-","$opt::compress_program >>".
$self->fh($fdno,'name')) || die $?;
$self->set_fh($fdno,'w',$fdw);
$self->set_fh($fdno,'wpid',$wpid);
my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail,
$opt::decompress_program, $self->fh($fdno,'name')) || die $?;
$opt::decompress_program, $wpid,
$self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?;
$self->set_fh($fdno,'r',$fdr);
$self->set_fh($fdno,'rpid',$rpid);
# Unlink if required but only when cattail and compress_program has started.
@ -4927,7 +4928,6 @@ sub print {
if($opt::pipe and $self->virgin()) {
} else {
if($Global::joblog) { $self->print_joblog() }
}
# Printing is only relevant for grouped output.
$Global::grouped or return;
@ -4950,6 +4950,7 @@ sub print {
# so flush to avoid STDOUT being buffered
flush STDOUT;
}
}
for my $fdno (sort { $a <=> $b } keys %Global::fd) {
# Sort by file descriptor numerically: 1,2,3,..,9,10,11
$fdno == 0 and next;
@ -4961,7 +4962,7 @@ sub print {
}
next;
}
::debug("File descriptor $fdno:");
::debug("File descriptor $fdno (".$self->fh($fdno,"name")."):");
if($opt::files) {
# If --compress: $in_fh must be closed first.
close $self->fh($fdno,"w");
@ -4978,8 +4979,13 @@ sub print {
} elsif($opt::linebuffer) {
# Line buffered print out
my $partial = \$self->{'partial_line',$fdno};
if(defined $self->{'exitstatus'}) {
# If the job is dead: close printing fh. Needed for --compress
close $self->fh($fdno,"w");
}
# This seek will clear EOF
seek $in_fh, tell($in_fh), 0;
# The read is non-blocking: The $in_fh is set to non-blocking.
while(read($in_fh,substr($$partial,length $$partial),1_000_000)) {
# Append to $$partial
# Find the last \n
@ -5007,8 +5013,11 @@ sub print {
}
}
if(defined $self->{'exitstatus'} and length $$partial > 0) {
if($self->fh($fdno,"rpid") and kill 0, $self->fh($fdno,"rpid")) {
# decompress still running
} elsif(defined $self->{'exitstatus'} and length $$partial > 0) {
# If the job is dead: print the remaining partial line
# read remaining
if($opt::tag or defined $opt::tagstring) {
my $tag = $self->tag();
$$partial =~ s/^/$tag/gm;
@ -5021,16 +5030,7 @@ sub print {
} else {
my $buf;
close $self->fh($fdno,"w");
if($self->fh($fdno,"wpid")) {
# This is --compress
# Wait for $compress_program to finish
waitpid($self->fh($fdno,"wpid"),0);
# Then tell cattail this the last reading
CORE::kill "HUP", $self->fh($fdno,"rpid");
} else {
# Seek to start
seek $in_fh, 0, 0;
}
# $in_fh is now ready for reading at position 0
if($opt::tag or defined $opt::tagstring) {
my $tag = $self->tag();
@ -5064,8 +5064,8 @@ sub print {
close $in_fh;
}
flush $out_fd;
::debug("<<joboutput $command\n");
}
::debug("<<joboutput $command\n");
}
sub print_joblog {

View file

@ -35,6 +35,12 @@ echo "### bug #41609: --compress fails"
seq 12 | parallel --compress --compress-program bzip2 -k seq {} 1000000 | md5sum
seq 12 | parallel --compress -k seq {} 1000000 | md5sum
echo "### --compress race condition (use nice): Fewer than 400 would run"
seq 400| nice parallel -j200 --compress echo | wc
echo "### -v --pipe: Dont spawn too many - 1 is enough"
seq 1 | parallel -j10 -v --pipe cat
echo "### Test -I";
seq 1 10 | parallel -k 'seq 1 {} | parallel -k -I :: echo {} ::'

View file

@ -41,6 +41,11 @@ echo 4
### bug #41609: --compress fails
24812dd0f24a26d08a780f988b9d5ad2 -
24812dd0f24a26d08a780f988b9d5ad2 -
### --compress race condition (use nice): Fewer than 400 would run
400 400 1492
### -v --pipe: Dont spawn too many - 1 is enough
cat
1
### Test -I
1 1
2 1