From 196afa553b4cf20890084c6ba74bcc33d23f1837 Mon Sep 17 00:00:00 2001 From: Ole Tange Date: Sun, 4 Sep 2016 10:42:04 +0200 Subject: [PATCH] parcat: Initial version. --- src/Makefile.am | 41 ++++-- src/Makefile.in | 41 ++++-- src/parallel.pod | 28 ++-- src/parallel_tutorial.pod | 25 ++++ src/parcat | 303 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 405 insertions(+), 33 deletions(-) create mode 100644 src/parcat diff --git a/src/Makefile.am b/src/Makefile.am index fba9bc98..6df36999 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -bin_SCRIPTS = parallel sql niceload \ +bin_SCRIPTS = parallel sql niceload parcat \ env_parallel env_parallel.bash env_parallel.zsh env_parallel.fish \ env_parallel.ksh env_parallel.pdksh env_parallel.csh env_parallel.tcsh @@ -8,13 +8,13 @@ install-exec-hook: if DOCUMENTATION man_MANS = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ - parallel_tutorial.7 parallel_design.7 + parallel_tutorial.7 parallel_design.7 parcat.1 doc_DATA = parallel.html env_parallel.html sem.html sql.html niceload.html \ - parallel_tutorial.html parallel_design.html \ + parallel_tutorial.html parallel_design.html parcat.html \ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ - parallel_tutorial.texi parallel_design.texi \ + parallel_tutorial.texi parallel_design.texi parcat.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ - parallel_tutorial.pdf parallel_design.pdf + parallel_tutorial.pdf parallel_design.pdf parcat.pdf endif # Build documentation file if the tool to build exists. @@ -61,6 +61,12 @@ niceload.1: niceload.pod && mv $(srcdir)/niceload.1n $(srcdir)/niceload.1 \ || echo "Warning: pod2man not found. Using old niceload.1" +parcat.1: parcat + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 $(srcdir)/parcat > $(srcdir)/parcat.1n \ + && mv $(srcdir)/parcat.1n $(srcdir)/parcat.1 \ + || echo "Warning: pod2man not found. Using old parcat.1" + parallel.html: parallel.pod pod2html --title "GNU Parallel" $(srcdir)/parallel.pod > $(srcdir)/parallel.htmln \ && mv $(srcdir)/parallel.htmln $(srcdir)/parallel.html \ @@ -109,6 +115,13 @@ niceload.html: niceload.pod sql.html || echo "Warning: pod2html not found. Using old niceload.html" rm -f $(srcdir)/pod2htm* +# Depending on niceload.html to avoid stupid pod2html race condition +parcat.html: parcat niceload.html + pod2html --title "GNU parcat" $(srcdir)/parcat > $(srcdir)/parcat.htmln \ + && mv $(srcdir)/parcat.htmln $(srcdir)/parcat.html \ + || echo "Warning: pod2html not found. Using old parcat.html" + rm -f $(srcdir)/pod2htm* + parallel.texi: parallel.pod pod2texi --output=$(srcdir)/parallel.texi $(srcdir)/parallel.pod \ || echo "Warning: pod2texi not found. Using old parallel.texi" @@ -137,6 +150,10 @@ niceload.texi: niceload.pod pod2texi --output=$(srcdir)/niceload.texi $(srcdir)/niceload.pod \ || echo "Warning: pod2texi not found. Using old niceload.texi" +parcat.texi: parcat + pod2texi --output=$(srcdir)/parcat.texi $(srcdir)/parcat \ + || echo "Warning: pod2texi not found. Using old parcat.texi" + parallel.pdf: parallel.pod pod2pdf --output-file $(srcdir)/parallel.pdf $(srcdir)/parallel.pod --title "GNU Parallel" \ || echo "Warning: pod2pdf not found. Using old parallel.pdf" @@ -165,19 +182,23 @@ niceload.pdf: niceload.pod pod2pdf --output-file $(srcdir)/niceload.pdf $(srcdir)/niceload.pod --title "GNU niceload" \ || echo "Warning: pod2pdf not found. Using old niceload.pdf" +parcat.pdf: parcat + pod2pdf --output-file $(srcdir)/parcat.pdf $(srcdir)/parcat --title "GNU parcat" \ + || echo "Warning: pod2pdf not found. Using old parcat.pdf" + sem: parallel ln -fs parallel sem DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ - parallel_tutorial.7 parallel_design.7 \ + parallel_tutorial.7 parallel_design.7 parcat.1 \ parallel.html env_parallel.html sem.html sql.html niceload.html \ - parallel_tutorial.html parallel_design.html \ + parallel_tutorial.html parallel_design.html parcat.html \ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ - parallel_tutorial.texi parallel_design.texi \ + parallel_tutorial.texi parallel_design.texi parcat.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ - parallel_tutorial.pdf parallel_design.pdf + parallel_tutorial.pdf parallel_design.pdf parcat.pdf -EXTRA_DIST = parallel sem sql niceload env_parallel \ +EXTRA_DIST = parallel sem sql niceload parcat env_parallel \ env_parallel.bash env_parallel.zsh env_parallel.fish env_parallel.ksh \ env_parallel.pdksh env_parallel.csh env_parallel.tcsh \ sem.pod parallel.pod env_parallel.pod niceload.pod parallel_tutorial.pod \ diff --git a/src/Makefile.in b/src/Makefile.in index bd7b377d..a0dc2138 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -229,30 +229,30 @@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -bin_SCRIPTS = parallel sql niceload \ +bin_SCRIPTS = parallel sql niceload parcat \ env_parallel env_parallel.bash env_parallel.zsh env_parallel.fish \ env_parallel.ksh env_parallel.pdksh env_parallel.csh env_parallel.tcsh @DOCUMENTATION_TRUE@man_MANS = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ -@DOCUMENTATION_TRUE@ parallel_tutorial.7 parallel_design.7 +@DOCUMENTATION_TRUE@ parallel_tutorial.7 parallel_design.7 parcat.1 @DOCUMENTATION_TRUE@doc_DATA = parallel.html env_parallel.html sem.html sql.html niceload.html \ -@DOCUMENTATION_TRUE@ parallel_tutorial.html parallel_design.html \ +@DOCUMENTATION_TRUE@ parallel_tutorial.html parallel_design.html parcat.html \ @DOCUMENTATION_TRUE@ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ -@DOCUMENTATION_TRUE@ parallel_tutorial.texi parallel_design.texi \ +@DOCUMENTATION_TRUE@ parallel_tutorial.texi parallel_design.texi parcat.texi \ @DOCUMENTATION_TRUE@ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ -@DOCUMENTATION_TRUE@ parallel_tutorial.pdf parallel_design.pdf +@DOCUMENTATION_TRUE@ parallel_tutorial.pdf parallel_design.pdf parcat.pdf DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ - parallel_tutorial.7 parallel_design.7 \ + parallel_tutorial.7 parallel_design.7 parcat.1 \ parallel.html env_parallel.html sem.html sql.html niceload.html \ - parallel_tutorial.html parallel_design.html \ + parallel_tutorial.html parallel_design.html parcat.html \ parallel.texi env_parallel.texi sem.texi sql.texi niceload.texi \ - parallel_tutorial.texi parallel_design.texi \ + parallel_tutorial.texi parallel_design.texi parcat.texi \ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ - parallel_tutorial.pdf parallel_design.pdf + parallel_tutorial.pdf parallel_design.pdf parcat.pdf -EXTRA_DIST = parallel sem sql niceload env_parallel \ +EXTRA_DIST = parallel sem sql niceload parcat env_parallel \ env_parallel.bash env_parallel.zsh env_parallel.fish env_parallel.ksh \ env_parallel.pdksh env_parallel.csh env_parallel.tcsh \ sem.pod parallel.pod env_parallel.pod niceload.pod parallel_tutorial.pod \ @@ -644,6 +644,12 @@ niceload.1: niceload.pod && mv $(srcdir)/niceload.1n $(srcdir)/niceload.1 \ || echo "Warning: pod2man not found. Using old niceload.1" +parcat.1: parcat + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 $(srcdir)/parcat > $(srcdir)/parcat.1n \ + && mv $(srcdir)/parcat.1n $(srcdir)/parcat.1 \ + || echo "Warning: pod2man not found. Using old parcat.1" + parallel.html: parallel.pod pod2html --title "GNU Parallel" $(srcdir)/parallel.pod > $(srcdir)/parallel.htmln \ && mv $(srcdir)/parallel.htmln $(srcdir)/parallel.html \ @@ -692,6 +698,13 @@ niceload.html: niceload.pod sql.html || echo "Warning: pod2html not found. Using old niceload.html" rm -f $(srcdir)/pod2htm* +# Depending on niceload.html to avoid stupid pod2html race condition +parcat.html: parcat niceload.html + pod2html --title "GNU parcat" $(srcdir)/parcat > $(srcdir)/parcat.htmln \ + && mv $(srcdir)/parcat.htmln $(srcdir)/parcat.html \ + || echo "Warning: pod2html not found. Using old parcat.html" + rm -f $(srcdir)/pod2htm* + parallel.texi: parallel.pod pod2texi --output=$(srcdir)/parallel.texi $(srcdir)/parallel.pod \ || echo "Warning: pod2texi not found. Using old parallel.texi" @@ -720,6 +733,10 @@ niceload.texi: niceload.pod pod2texi --output=$(srcdir)/niceload.texi $(srcdir)/niceload.pod \ || echo "Warning: pod2texi not found. Using old niceload.texi" +parcat.texi: parcat + pod2texi --output=$(srcdir)/parcat.texi $(srcdir)/parcat \ + || echo "Warning: pod2texi not found. Using old parcat.texi" + parallel.pdf: parallel.pod pod2pdf --output-file $(srcdir)/parallel.pdf $(srcdir)/parallel.pod --title "GNU Parallel" \ || echo "Warning: pod2pdf not found. Using old parallel.pdf" @@ -748,6 +765,10 @@ niceload.pdf: niceload.pod pod2pdf --output-file $(srcdir)/niceload.pdf $(srcdir)/niceload.pod --title "GNU niceload" \ || echo "Warning: pod2pdf not found. Using old niceload.pdf" +parcat.pdf: parcat + pod2pdf --output-file $(srcdir)/parcat.pdf $(srcdir)/parcat --title "GNU parcat" \ + || echo "Warning: pod2pdf not found. Using old parcat.pdf" + sem: parallel ln -fs parallel sem diff --git a/src/parallel.pod b/src/parallel.pod index 097a9fc5..051c9419 100644 --- a/src/parallel.pod +++ b/src/parallel.pod @@ -86,9 +86,7 @@ B: Use B. B: Use B. -B: Use B. - -B: No solution. +B: Use B. B: Use B. @@ -96,7 +94,6 @@ B: Use B. B: Use B. - The command cannot contain the character \257 (macron: ¯). =item B<{}> @@ -254,7 +251,8 @@ Replace with calculated I. B<$_> will contain the same as B<{}>. After evaluating I B<$_> will be used as the value. It is recommended to only change $_ but you have full access to all of GNU B's internal functions and data -structures. A few convenience functions have been made: +structures. A few convenience functions and data structures have been +made: =over 15 @@ -274,6 +272,10 @@ slot number of job sequence number of job +=item Z<> B<@arg> + +the arguments + =back Example: @@ -4805,19 +4807,19 @@ lacks many functions. All of these fail: # -q to protect quoted $ and space parallel -q perl -e '$a=shift; print "$a"x10000000' ::: a b c - # Argument is command - parallel {} 10 ::: echo seq # Generation of combination of inputs parallel echo {1} {2} ::: red green blue ::: S M L XL XXL - # Composed commands - parallel echo {} '|' wc ::: a - # Output is mixed - parallel (-q) perl -e 'print"{}"x10000000' ::: a b c | - perl -pe '$_=join "\n",split //, $_' | uniq -c # Show what would be executed parallel --dry-run echo ::: a + # Run different shell dialects + zsh -c 'parallel echo \={} ::: zsh && true' + csh -c 'parallel echo \$\{\} ::: shell && true' + bash -c 'parallel echo \$\({}\) ::: pwd && true' -Rust parallel has no remote facilities. +Rust parallel lacks B<::::>, B<--pipe>, and has no remote facilities. + +Rust parallel buffers in RAM like gargs. This can cause +death-by-swapping. See B. =head2 DIFFERENCES BETWEEN ClusterSSH AND GNU Parallel diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod index 6476da6d..f574629c 100644 --- a/src/parallel_tutorial.pod +++ b/src/parallel_tutorial.pod @@ -904,6 +904,31 @@ Output: pre-A-post +=head2 Respecting the shell + +This tutorial uses Bash as the shell. GNU B respects which +shell you are using, so in B you can do: + + parallel echo \={} ::: zsh bash ls + +Output: + + /usr/bin/zsh + /bin/bash + /bin/ls + +In B you can do: + + parallel 'set a="{}"; if( { test -d "$a" } ) echo "$a is a dir"' ::: * + +Output: + + [somedir] is a dir + +This also becomes useful if you use GNU B in a shell script: +GNU B will use the same shell as the shell script. + + =head1 Controlling the output The output can prefixed with the argument: diff --git a/src/parcat b/src/parcat new file mode 100644 index 00000000..5d9c943a --- /dev/null +++ b/src/parcat @@ -0,0 +1,303 @@ +#!/usr/bin/perl + +=head1 NAME + +parcat - cat files or fifos in parallel + +=head1 SYNOPSIS + +B file(s) + +=head1 DESCRIPTION + +GNU B reads files or fifos in parallel. It writes full lines +so there will be no problem with mixed-half-lines which you risk if +you use: + + (cat file1 & cat file2 &) | ... + + +=head1 EXAMPLES + +=head2 Do be done + + mkfifo slot-{1..5}-digit-{0..9} + parallel -j5 'seq 100000 | grep {} > slot-{%}-digit-{}' ::: {0..9} & + parallel parcat slot-{1..5}-digit-{} '>' digit-{} ::: {0..9} + +=head1 REPORTING BUGS + +GNU B is part of GNU B. Report bugs to . + + +=head1 AUTHOR + +When using GNU B for a publication please cite: + +O. Tange (2011): GNU SQL - A Command Line Tool for Accessing Different +Databases Using DBURLs, ;login: The USENIX Magazine, April 2011:29-32. + +Copyright (C) 2008,2009,2010 Ole Tange http://ole.tange.dk + +Copyright (C) 2010,2011 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +=head1 LICENSE + +Copyright (C) 2007,2008,2009,2010,2011 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this documentation +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, with no Front-Cover Texts, and with no Back-Cover +Texts. A copy of the license is included in the file fdl.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B + +to copy, distribute and transmit the work + +=item B + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B + +In no way are any of the following rights affected by the license: + +=over 9 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=item B + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as cc-by-sa.txt. + +=head1 DEPENDENCIES + +GNU B uses Perl. + + +=head1 SEE ALSO + +B(1), B(1) + +=cut + + +use Symbol qw(gensym); +use IPC::Open3; +use POSIX qw(:errno_h); +use IO::Select; +use strict; +use threads; +use threads::shared; +use Thread::Queue; + + +my $opened :shared; +my $q = Thread::Queue->new(); +my $okq = Thread::Queue->new(); +my @producers; + +if(not @ARGV) { + print "Usage:\n"; + print " parcat file(s)\n"; +} + +for (@ARGV) { + push @producers, threads->create('producer', $_); +} + +sub producer { + # Open a file/fifo, set non blocking, enqueue fileno of the file handle + my $file = shift; + open(my $fh, "<", $file) || do { + print STDERR "parcat: Cannot open $file\n"; + exit(1); + }; + set_fh_non_blocking($fh); + $q->enqueue(fileno($fh)); + $opened++; + # Get an OK that the $fh is opened and we can release the $fh + while(1) { + my $ok = $okq->dequeue(); + if($ok == fileno($fh)) { last; } + # Not ours - very unlikely to happen + $okq->enqueue($ok); + } + return; +} + +my $s = IO::Select->new(); +my %fhr; +my %buffer; + +sub add_file { + my $fd = shift; + open(my $fh, "<&=", $fd) || die; + $s->add($fh); + $fhr{$fh}++; + # Tell the producer now opened here and can be released + $okq->enqueue($fd); + # Initialize the buffer + @{$buffer{$fh}} = (); +} + +sub add_files { + # Non-blocking dequeue + while(defined(my $fd = $q->dequeue_nb())) { + add_file($fd); + } +} + +sub add_files_block { + # Blocking dequeue + my $fd = $q->dequeue(); + add_file($fd); +} + + +my $fd; +my (@ready,$file,$rv,$buf); +do { + # Wait until at least one file is opened + add_files_block(); + while($q->pending or keys %fhr) { + add_files(); + while(keys %fhr) { + @ready = $s->can_read(0.01); + if(not @ready) { + add_files(); + } + for $file (@ready) { + $rv = sysread($file, $buf, 65536); + if (!$rv) { + if($! == EAGAIN) { + # Would block: Nothing read + next; + } else { + # This file is done + $s->remove($file); + delete $fhr{$file}; + print @{$buffer{$file}}; + delete $buffer{$file}; + # Closing the $file causes it to block + # close $file; + add_files(); + next; + } + } + + # Find \n for full line + my $i = (rindex($buf,"\n")+1); + if($i) { + # Print full line + for(@{$buffer{$file}}, substr($buf,0,$i)) { + syswrite(STDOUT,$_); + } + # @buffer = remaining half line + @{$buffer{$file}} = (substr($buf,$i,$rv-$i)); + redo; + } else { + # Something read, but not a full line + push @{$buffer{$file}}, $buf; + redo; + } + } + } + } +} while($opened <= $#ARGV); + + +for (@producers) { + $_->join(); +} + +sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle +} +