diff --git a/doc/release_new_version b/doc/release_new_version index 41b2c29f..d55f3155 100644 --- a/doc/release_new_version +++ b/doc/release_new_version @@ -217,60 +217,24 @@ cc:Tim Cuthbertson , Ryoichiro Suzuki , Jesse Alama -Subject: GNU Parallel 20140722 ('MH17') released +Subject: GNU Parallel 20140822 ('Argentina/Gaza') released -GNU Parallel 20140722 ('MH17') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ - -This release contains a major change in central parts of the code and should be considered beta quality. As always it passes the testsuite, so most functionality clearly works. - -Thanks to Malcolm Cook for the brilliant idea to use a general perl expression as a replacement string. +GNU Parallel 20140822 ('Argentina/Gaza') has been released. It is available for download at: http://ftp.gnu.org/gnu/parallel/ Haiku of the month: - Are you tired of + >>>Are you tired of inflexible replacements? Use Perl expressions. -- Ole Tange New in this release: -* {= perl expression =} can be used as replacement string. The expression should modify $_. E.g. {= s/\.gz$// =} to remove .gz from the string. This makes replacement strings extremely flexible. +* GNU Parallel was cited in: A Web Service for Scholarly Big Data Information Extraction http://patshih.ist.psu.edu/publications/Williams-CiteSeerExtractor-ICWS14.pdf -* Positional perl expressions (similar to {2}) are given as {=2 perl expression=} where 2 is the position. +* GNU Parallel was covered in the webcast 2014-08-20: Data Science at the Command Line http://www.oreilly.com/pub/e/3115 -* One small backwards incompatability: {1}_{2} will replace {2} with the empty string if there is only one argument. Previously {2} would have been left untouched. - -* Replacement strings can be defined using --rpl. E.g. parallel --rpl '{.gz} s/\.gz$//' echo {.gz} ::: *.gz - -* The parenthesis around {= perl expression =} can be changed with --parens. - -* --tmux will direct the output to a tmux session instead of files. Each running jobs will be in its own window. - -* --halt 10% will stop spawning new jobs if 10% failed so far. - -* GNU Parallel was cited in: bammds: A tool for assessing the ancestry of low depth whole genome data using multidimensional scaling (MDS) http://bioinformatics.oxfordjournals.org/content/early/2014/06/28/bioinformatics.btu410.abstract - -* GNU Parallel was cited in: Molecular ferroelectric contributions to anomalous hysteresis in hybrid perovskite solar cells http://people.bath.ac.uk/aw558/publications/2014/arxiv_hysteresis_14.pdf - -* GNU Parallel was cited in: Energy Sorghum-a genetic model for the design of C4 grass bioenergy crops http://jxb.oxfordjournals.org/content/early/2014/06/20/jxb.eru229.short - -* GNU Parallel was cited in: Web-scale Content Reuse Detection www.isi.edu/~johnh/PAPERS/Ardi14a.pdf - -* Tell your friends to sign up for the Webcast at 2014-08-20 covering GNU Parallel: Data Science at the Command Line http://www.oreilly.com/pub/e/3115 - -* GNU Parallel all the things! http://longwayaround.org.uk/notes/gnu-parallel-all-the-things/ - -* Shell command composition and dispatch http://lukeluo.blogspot.dk/2014/07/linux-virtual-console6-shell-command.html - -* Parallelising plink (or anything else) the easy way http://chrisladroue.com/2012/03/parallelising-plink-or-anything-else-the-easy-way/ - -* Easy and cheap cluster building on AWS https://grapeot.me/easy-and-cheap-cluster-building-on-aws.html - -* Paralelizace běžných činností v konzoli pomocí GNU Parallel http://www.abclinuxu.cz/clanky/paralelizace-beznych-cinnosti-v-konzoli-pomoci-gnu-parallel - -* [原] Ubuntu 下使用 parallel 命令的注意事项 http://blog.ailms.me/2014/06/28/ubuntu-with-parallel.html - -* 简单的并行, 可使用GNU parallel http://jerkwin.github.io/2014/06/10/GNU-parallel/ +* Сборка GNU parallel для CentOS/RHEL http://www.stableit.ru/2014/07/gnu-parallel-centosrhel.html * Bug fixes and man page updates. diff --git a/src/parallel b/src/parallel index ecab950b..933641dd 100755 --- a/src/parallel +++ b/src/parallel @@ -230,6 +230,7 @@ sub find_split_positions { # @positions of block start/end my($file, $block, $headerlen) = @_; my $size = -s $file; + $block = int $block; # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 # The optimal dd blocksize for freebsd = 2^15..2^17 my $dd_block_size = 131072; # 2^17 @@ -770,6 +771,7 @@ sub parse_options { $Global::trim = 'n'; $Global::max_jobs_running = 0; $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; @ARGV=read_options(); @@ -1315,7 +1317,7 @@ sub read_args_from_command_line { # Group of arguments on the command line. # Put them into a file. # Create argfile - my ($outfh,$name) = ::tempfile(SUFFIX => ".arg"); + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); unlink($name); # Put args into argfile print $outfh map { $_,$/ } @group; @@ -1820,7 +1822,7 @@ sub progress { $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; # [\011\013\014] messes up display in the terminal $arg =~ tr/[\011-\016\033\302-\365]//d; - my $bar_text = + my $bar_text = sprintf("%d%% %d:%d=%ds %s", $pctcomplete*100, $completed, $left, $this_eta, $arg); my $rev = "\033[7m"; @@ -2173,7 +2175,7 @@ sub filter_hosts { # 'echo' is used to get the best possible value for an ssh login time push(@echo, $host."\t".$sshcmd." echo\n"); } - my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".ssh"); + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); print $fh @cores, @cpus, @maxline, @echo; close $fh; # --timeout 5: Setting up an SSH connection and running a simple @@ -2289,7 +2291,7 @@ sub onall { if(not defined $joblog) { return undef; } - my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".log"); + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); close $fh; return $tmpfile; } @@ -2301,7 +2303,7 @@ sub onall { # Copy all @fhlist into tempfiles my @argfiles = (); for my $fh (@fhlist) { - my ($outfh, $name) = ::tempfile(SUFFIX => ".all", UNLINK => 1); + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); print $outfh (<$fh>); close $outfh; push @argfiles, $name; @@ -2857,6 +2859,9 @@ sub multiply_binary_prefix { return $s; } +sub tmpfile { + return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); +} sub __DEBUGGING__ {} sub debug { @@ -3100,7 +3105,7 @@ sub swap_activity { # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file my $file = $self->{'swap_activity_file'}; - my ($dummy_fh, $tmpfile) = ::tempfile(SUFFIX => ".swp"); + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); ::debug("swap", "\n", $swap_activity, "\n"); qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; } @@ -3322,7 +3327,7 @@ sub loadavg { # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file my $file = $self->{'loadavg_file'}; - my ($dummy_fh, $tmpfile) = ::tempfile(SUFFIX => ".loa"); + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; } return $self->{'loadavg'}; @@ -4574,14 +4579,14 @@ sub openoutputfiles { if(@Global::tee_jobs) { # files must be removed when the tee is done } elsif($opt::files) { - ($outfhw, $outname) = ::tempfile(SUFFIX => ".par"); - ($errfhw, $errname) = ::tempfile(SUFFIX => ".par"); + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); # --files => only remove stderr $self->set_fh(1,"unlink",""); $self->set_fh(2,"unlink",$errname); } else { - ($outfhw, $outname) = ::tempfile(SUFFIX => ".par"); - ($errfhw, $errname) = ::tempfile(SUFFIX => ".par"); + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); $self->set_fh(1,"unlink",$outname); $self->set_fh(2,"unlink",$errname); } @@ -5320,7 +5325,7 @@ sub start { # Bug: # If the command does not read the first char, the temp file # is not deleted. - my ($dummy_fh, $tmpfile) = ::tempfile(SUFFIX => ".chr"); + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); $command = qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; @@ -5406,7 +5411,7 @@ sub tmux_wrap { my $self = shift; my $actual_command = shift; # Temporary file name. Used for fifo to communicate exit val - my ($fh, $tmpfile) = ::tempfile(SUFFIX => ".tmx"); + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx"); $Global::unlink{$tmpfile}=1; close $fh; unlink $tmpfile; @@ -5719,25 +5724,42 @@ sub set_exitsignal { } { - my ($disk_full_fh,$error_printed); + my ($disk_full_fh,$b8193); sub exit_if_disk_full { # Checks if $TMPDIR is full by writing 8kb to a tmpfile # If the disk is full: Exit immediately. # Returns: # N/A if(not $disk_full_fh) { - $disk_full_fh = ::tempfile(); + $disk_full_fh = ::tmpfile(SUFFIX => ".df"); + $b8193 = "x"x8193; } my $pos = tell $disk_full_fh; - print $disk_full_fh "x"x8193; + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; if(not $disk_full_fh or tell $disk_full_fh == $pos) { - ::error("Output is incomplete. Cannot append to buffer file in \$TMPDIR. Is the disk full?\n"); + ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); ::wait_and_exit(255); } - truncate $disk_full_fh, $pos; + truncate $disk_full_fh, 0; } } @@ -5802,7 +5824,7 @@ sub populate { if($opt::cat or $opt::fifo) { # Get a tempfile name - my($outfh,$name) = ::tempfile(SUFFIX => ".pip"); + my($outfh,$name) = ::tmpfile(SUFFIX => ".pip"); close $outfh; # Unlink is needed if: ssh otheruser@localhost unlink $name; diff --git a/testsuite/tests-to-run/parallel-local22.sh b/testsuite/tests-to-run/parallel-local22.sh index b27ac92b..f24111be 100755 --- a/testsuite/tests-to-run/parallel-local22.sh +++ b/testsuite/tests-to-run/parallel-local22.sh @@ -49,16 +49,21 @@ echo '### bug #42041: Implement $PARALLEL_JOBSLOT' echo '### bug #42363: --pipepart and --fifo/--cat does not work' seq 100 > /tmp/bug42363; - parallel --pipepart --block 31 -a /tmp/bug42363 -k --fifo wc | perl -pe s:/tmp/...........pip:/tmp/XXXX: ; - parallel --pipepart --block 31 -a /tmp/bug42363 -k --cat wc | perl -pe s:/tmp/...........pip:/tmp/XXXX: ; + parallel --pipepart --block 31 -a /tmp/bug42363 -k --fifo wc | perl -pe s:/tmp/.........pip:/tmp/XXXX: ; + parallel --pipepart --block 31 -a /tmp/bug42363 -k --cat wc | perl -pe s:/tmp/.........pip:/tmp/XXXX: ; echo '### bug #42055: --pipe -a bigfile should not require sequential reading of bigfile' parallel --pipepart -a /etc/passwd -L 1 should not be run parallel --pipepart -a /etc/passwd -N 1 should not be run parallel --pipepart -a /etc/passwd -l 1 should not be run -echo '### --tmux test - check termination' - perl -e 'map {printf "$_%o%c\n",$_,$_}1..255' | stdout parallel --tmux echo {} :::: - ::: a b | perl -pe 's/\d/0/g' +# TODO This is too unstable +# echo '### --tmux test - check termination' +# perl -e 'map {printf "$_%o%c\n",$_,$_}1..255' | stdout parallel --tmux echo {} :::: - ::: a b | perl -pe 's/\d/0/g' +echo '### bug #42893: --block should not cause decimals in cat_partial' + seq 100000 >/tmp/parallel-decimal; + parallel --dry-run -kvv --pipepart --block 0.12345M -a /tmp/parallel-decimal true; + rm /tmp/parallel-decimal EOF diff --git a/testsuite/wanted-results/parallel-local-30s b/testsuite/wanted-results/parallel-local-30s index 2d1c59ab..1e23b616 100644 --- a/testsuite/wanted-results/parallel-local-30s +++ b/testsuite/wanted-results/parallel-local-30s @@ -15,7 +15,7 @@ echo '**' echo "### Test --tmpdir running full. bug #40733 was caused by this" ### Test --tmpdir running full. bug #40733 was caused by this stdout parallel -j1 --tmpdir $SHM cat /dev/zero ::: dummy -parallel: Error: Output is incomplete. Cannot append to buffer file in $TMPDIR. Is the disk full? +parallel: Error: Output is incomplete. Cannot append to buffer file in /tmp/shm/parallel. Is the disk full? parallel: Error: Change $TMPDIR with --tmpdir or use --compress. echo '**' ** diff --git a/testsuite/wanted-results/parallel-local22 b/testsuite/wanted-results/parallel-local22 index 97053448..e80fe016 100644 --- a/testsuite/wanted-results/parallel-local22 +++ b/testsuite/wanted-results/parallel-local22 @@ -47,7 +47,7 @@ echo '### bug #42041: Implement $PARALLEL_JOBSLOT' 2 echo '### bug #42363: --pipepart and --fifo/--cat does not work' ### bug #42363: --pipepart and --fifo/--cat does not work - seq 100 > /tmp/bug42363; parallel --pipepart --block 31 -a /tmp/bug42363 -k --fifo wc | perl -pe s:/tmp/...........pip:/tmp/XXXX: ; parallel --pipepart --block 31 -a /tmp/bug42363 -k --cat wc | perl -pe s:/tmp/...........pip:/tmp/XXXX: ; echo '### bug #42055: --pipe -a bigfile should not require sequential reading of bigfile' + seq 100 > /tmp/bug42363; parallel --pipepart --block 31 -a /tmp/bug42363 -k --fifo wc | perl -pe s:/tmp/.........pip:/tmp/XXXX: ; parallel --pipepart --block 31 -a /tmp/bug42363 -k --cat wc | perl -pe s:/tmp/.........pip:/tmp/XXXX: ; echo '### bug #42055: --pipe -a bigfile should not require sequential reading of bigfile' 13 14 32 /tmp/XXXX 11 11 33 /tmp/XXXX 11 11 33 /tmp/XXXX @@ -73,7 +73,14 @@ parallel: Error: --pipepart is incompatible with --max-replace-args, --max-lines parallel: Error: --pipepart is incompatible with --max-replace-args, --max-lines, and -L. parallel --pipepart -a /etc/passwd -l 1 should not be run parallel: Error: --pipepart is incompatible with --max-replace-args, --max-lines, and -L. -echo '### --tmux test - check termination' -### --tmux test - check termination - perl -e 'map {printf "$_%o%c\n",$_,$_}1..255' | stdout parallel --tmux echo {} :::: - ::: a b | perl -pe 's/\d/0/g' -See output with: tmux attach -t p00000 +# TODO This is too unstable +# echo '### --tmux test - check termination' +# perl -e 'map {printf "$_%o%c\n",$_,$_}1..255' | stdout parallel --tmux echo {} :::: - ::: a b | perl -pe 's/\d/0/g' +echo '### bug #42893: --block should not cause decimals in cat_partial' +### bug #42893: --block should not cause decimals in cat_partial + seq 100000 >/tmp/parallel-decimal; parallel --dry-run -kvv --pipepart --block 0.12345M -a /tmp/parallel-decimal true; rm /tmp/parallel-decimal + 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 129449|(true) + 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 129449 129450|(true) + 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 258899 129450|(true) + 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 388349 129450|(true) + 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 517799 71096|(true)