mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 05:57:54 +00:00
parcat: --rm, reading args from stdin.
This commit is contained in:
parent
bbd336643c
commit
67c4377715
3
CREDITS
3
CREDITS
|
@ -1,3 +1,6 @@
|
||||||
|
People who have helped GNU Parallel different ways.
|
||||||
|
|
||||||
|
John Rusnak: Feedback on all documentation.
|
||||||
FrithMartin: Bug patch for orphan blocks.
|
FrithMartin: Bug patch for orphan blocks.
|
||||||
Rasmus Villemoes: Code snips for signal processing.
|
Rasmus Villemoes: Code snips for signal processing.
|
||||||
Martin d'Anjou: Code snips for signal processing.
|
Martin d'Anjou: Code snips for signal processing.
|
||||||
|
|
|
@ -119,4 +119,4 @@ monitorman:
|
||||||
# If man page changed: open new pdfman
|
# If man page changed: open new pdfman
|
||||||
inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f . | parallel -uj1 'echo {=/\.pod$$/ or skip()=};make -j && sudo make install; pdfman {/.} &'
|
inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f . | parallel -uj1 'echo {=/\.pod$$/ or skip()=};make -j && sudo make install; pdfman {/.} &'
|
||||||
|
|
||||||
EXTRA_DIST = CITATION CREDITS
|
EXTRA_DIST = CITATION CREDITS cc-by-sa.txt fdl.txt
|
||||||
|
|
|
@ -275,7 +275,7 @@ top_build_prefix = @top_build_prefix@
|
||||||
top_builddir = @top_builddir@
|
top_builddir = @top_builddir@
|
||||||
top_srcdir = @top_srcdir@
|
top_srcdir = @top_srcdir@
|
||||||
SUBDIRS = src
|
SUBDIRS = src
|
||||||
EXTRA_DIST = CITATION CREDITS
|
EXTRA_DIST = CITATION CREDITS cc-by-sa.txt fdl.txt
|
||||||
all: config.h
|
all: config.h
|
||||||
$(MAKE) $(AM_MAKEFLAGS) all-recursive
|
$(MAKE) $(AM_MAKEFLAGS) all-recursive
|
||||||
|
|
||||||
|
|
|
@ -195,6 +195,8 @@ file:///home/tange/privat/parallel/doc/release_new_version
|
||||||
|
|
||||||
from:tange@gnu.org
|
from:tange@gnu.org
|
||||||
to:parallel@gnu.org, bug-parallel@gnu.org
|
to:parallel@gnu.org, bug-parallel@gnu.org
|
||||||
|
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
|
||||||
|
|
||||||
|
|
||||||
Subject: GNU Parallel 20170822 ('<<>>') released <<[stable]>>
|
Subject: GNU Parallel 20170822 ('<<>>') released <<[stable]>>
|
||||||
|
|
||||||
|
@ -214,10 +216,24 @@ New in this release:
|
||||||
http://meta.askubuntu.com/a/16750/22307
|
http://meta.askubuntu.com/a/16750/22307
|
||||||
http://meta.serverfault.com/a/9040/45704
|
http://meta.serverfault.com/a/9040/45704
|
||||||
|
|
||||||
* GNU Parallel was cited in:
|
* GNU Parallel was cited in: https://springerplus.springeropen.com/articles/10.1186/s40064-016-2022-y
|
||||||
|
|
||||||
|
https://dzone.com/articles/running-bash-commands-in-parallel-1
|
||||||
|
|
||||||
* https://medium.com/@nornagon/today-i-learned-gnu-parallel-plate-tectonics-9fcf24045e63
|
* https://medium.com/@nornagon/today-i-learned-gnu-parallel-plate-tectonics-9fcf24045e63
|
||||||
|
|
||||||
|
* https://www.upf.edu/web/sct-sit/gnu-parallel-tutorial
|
||||||
|
|
||||||
|
|
||||||
|
http://blogs.fluidinfo.com/terry/2017/08/05/do-stuff-on-things-in-parallel/
|
||||||
|
|
||||||
|
http://ino.pm/outreach/presentations/2014/03/genomics-wranglers/index.html#/5
|
||||||
|
http://www.ettemalab.org/using-for-loop-vs-gnu-parallel-for-blast/
|
||||||
|
|
||||||
|
https://medium.com/@nornagon/today-i-learned-gnu-parallel-plate-tectonics-9fcf24045e63
|
||||||
|
|
||||||
|
https://gxnotes.com/article/175866.html
|
||||||
|
|
||||||
<<Citation not OK: BAMClipper: removing primers from alignments to minimize false-negative mutations in amplicon next-generation sequencing https://www.nature.com/articles/s41598-017-01703-6>>
|
<<Citation not OK: BAMClipper: removing primers from alignments to minimize false-negative mutations in amplicon next-generation sequencing https://www.nature.com/articles/s41598-017-01703-6>>
|
||||||
|
|
||||||
<<Wrong citation https://iris.sissa.it/retrieve/handle/20.500.11767/36149/10823/And%C3%B2_tesi.pdf>>
|
<<Wrong citation https://iris.sissa.it/retrieve/handle/20.500.11767/36149/10823/And%C3%B2_tesi.pdf>>
|
||||||
|
|
121
src/parcat
121
src/parcat
|
@ -16,24 +16,44 @@ my $okq = Thread::Queue->new();
|
||||||
my @producers;
|
my @producers;
|
||||||
|
|
||||||
if(not @ARGV) {
|
if(not @ARGV) {
|
||||||
print "Usage:\n";
|
if(-t *STDIN) {
|
||||||
print " parcat file(s)\n";
|
print "Usage:\n";
|
||||||
|
print " parcat file(s)\n";
|
||||||
|
print " cat argfile | parcat\n";
|
||||||
|
} else {
|
||||||
|
# Read arguments from stdin
|
||||||
|
chomp(@ARGV = <STDIN>);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
my $files_to_open = 0;
|
||||||
|
# Default: fd = stdout
|
||||||
|
my $fd = 1;
|
||||||
for (@ARGV) {
|
for (@ARGV) {
|
||||||
push @producers, threads->create('producer', $_);
|
# --rm = remove file when opened
|
||||||
|
/^--rm$/ and do { $opt::rm = 1; next; };
|
||||||
|
# -1 = output to fd 1, -2 = output to fd 2
|
||||||
|
/^-(\d+)$/ and do { $fd = $1; next; };
|
||||||
|
push @producers, threads->create('producer', $_, $fd);
|
||||||
|
$files_to_open++;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub producer {
|
sub producer {
|
||||||
# Open a file/fifo, set non blocking, enqueue fileno of the file handle
|
# Open a file/fifo, set non blocking, enqueue fileno of the file handle
|
||||||
my $file = shift;
|
my $file = shift;
|
||||||
|
my $output_fd = shift;
|
||||||
open(my $fh, "<", $file) || do {
|
open(my $fh, "<", $file) || do {
|
||||||
print STDERR "parcat: Cannot open $file\n";
|
print STDERR "parcat: Cannot open $file\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
};
|
};
|
||||||
|
# Remove file when it has been opened
|
||||||
|
if($opt::rm) {
|
||||||
|
unlink $file;
|
||||||
|
}
|
||||||
set_fh_non_blocking($fh);
|
set_fh_non_blocking($fh);
|
||||||
$q->enqueue(fileno($fh));
|
|
||||||
$opened++;
|
$opened++;
|
||||||
|
# Pass the fileno to parent
|
||||||
|
$q->enqueue(fileno($fh),$output_fd);
|
||||||
# Get an OK that the $fh is opened and we can release the $fh
|
# Get an OK that the $fh is opened and we can release the $fh
|
||||||
while(1) {
|
while(1) {
|
||||||
my $ok = $okq->dequeue();
|
my $ok = $okq->dequeue();
|
||||||
|
@ -48,31 +68,38 @@ my $s = IO::Select->new();
|
||||||
my %buffer;
|
my %buffer;
|
||||||
|
|
||||||
sub add_file {
|
sub add_file {
|
||||||
my $fd = shift;
|
my $infd = shift;
|
||||||
open(my $fh, "<&=", $fd) || die;
|
my $outfd = shift;
|
||||||
$s->add($fh);
|
open(my $infh, "<&=", $infd) || die;
|
||||||
|
open(my $outfh, ">&=", $outfd) || die;
|
||||||
|
$s->add($infh);
|
||||||
# Tell the producer now opened here and can be released
|
# Tell the producer now opened here and can be released
|
||||||
$okq->enqueue($fd);
|
$okq->enqueue($infd);
|
||||||
# Initialize the buffer
|
# Initialize the buffer
|
||||||
@{$buffer{$fh}} = ();
|
@{$buffer{$infh}{$outfd}} = ();
|
||||||
|
$Global::fh{$outfd} = $outfh;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub add_files {
|
sub add_files {
|
||||||
# Non-blocking dequeue
|
# Non-blocking dequeue
|
||||||
while(defined(my $fd = $q->dequeue_nb())) {
|
my ($infd,$outfd);
|
||||||
add_file($fd);
|
do {
|
||||||
}
|
($infd,$outfd) = $q->dequeue_nb(2);
|
||||||
|
if(defined($outfd)) {
|
||||||
|
add_file($infd,$outfd);
|
||||||
|
}
|
||||||
|
} while(defined($outfd));
|
||||||
}
|
}
|
||||||
|
|
||||||
sub add_files_block {
|
sub add_files_block {
|
||||||
# Blocking dequeue
|
# Blocking dequeue
|
||||||
my $fd = $q->dequeue();
|
my ($infd,$outfd) = $q->dequeue(2);
|
||||||
add_file($fd);
|
add_file($infd,$outfd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
my $fd;
|
my $fd;
|
||||||
my (@ready,$file,$rv,$buf);
|
my (@ready,$infh,$rv,$buf);
|
||||||
do {
|
do {
|
||||||
# Wait until at least one file is opened
|
# Wait until at least one file is opened
|
||||||
add_files_block();
|
add_files_block();
|
||||||
|
@ -83,43 +110,46 @@ do {
|
||||||
if(not @ready) {
|
if(not @ready) {
|
||||||
add_files();
|
add_files();
|
||||||
}
|
}
|
||||||
for $file (@ready) {
|
for $infh (@ready) {
|
||||||
$rv = sysread($file, $buf, 65536);
|
# There is only one key, namely the output file descriptor
|
||||||
if (!$rv) {
|
for my $outfd (keys %{$buffer{$infh}}) {
|
||||||
if($! == EAGAIN) {
|
$rv = sysread($infh, $buf, 65536);
|
||||||
# Would block: Nothing read
|
if (!$rv) {
|
||||||
next;
|
if($! == EAGAIN) {
|
||||||
|
# Would block: Nothing read
|
||||||
|
next;
|
||||||
|
} else {
|
||||||
|
# Nothing read, but would not block:
|
||||||
|
# This file is done
|
||||||
|
$s->remove($infh);
|
||||||
|
syswrite($Global::fh{$outfd},"@{$buffer{$infh}{$outfd}}");
|
||||||
|
delete $buffer{$infh};
|
||||||
|
# Closing the $infh causes it to block
|
||||||
|
# close $infh;
|
||||||
|
add_files();
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Something read.
|
||||||
|
# Find \n or \r for full line
|
||||||
|
my $i = (rindex($buf,"\n")+1);
|
||||||
|
if($i) {
|
||||||
|
# Print full line
|
||||||
|
for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) {
|
||||||
|
syswrite($Global::fh{$outfd},$_);
|
||||||
|
}
|
||||||
|
# @buffer = remaining half line
|
||||||
|
$buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)];
|
||||||
} else {
|
} else {
|
||||||
# This file is done
|
# Something read, but not a full line
|
||||||
$s->remove($file);
|
push @{$buffer{$infh}{$outfd}}, $buf;
|
||||||
print @{$buffer{$file}};
|
|
||||||
delete $buffer{$file};
|
|
||||||
# Closing the $file causes it to block
|
|
||||||
# close $file;
|
|
||||||
add_files();
|
|
||||||
next;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
# Find \n for full line
|
|
||||||
my $i = (rindex($buf,"\n")+1);
|
|
||||||
if($i) {
|
|
||||||
# Print full line
|
|
||||||
for(@{$buffer{$file}}, substr($buf,0,$i)) {
|
|
||||||
syswrite(STDOUT,$_);
|
|
||||||
}
|
|
||||||
# @buffer = remaining half line
|
|
||||||
@{$buffer{$file}} = (substr($buf,$i,$rv-$i));
|
|
||||||
redo;
|
|
||||||
} else {
|
|
||||||
# Something read, but not a full line
|
|
||||||
push @{$buffer{$file}}, $buf;
|
|
||||||
redo;
|
redo;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while($opened <= $#ARGV);
|
} while($opened < $files_to_open);
|
||||||
|
|
||||||
|
|
||||||
for (@producers) {
|
for (@producers) {
|
||||||
|
@ -139,4 +169,3 @@ sub set_fh_non_blocking {
|
||||||
$flags |= &O_NONBLOCK; # Add non-blocking to the flags
|
$flags |= &O_NONBLOCK; # Add non-blocking to the flags
|
||||||
fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
|
fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,15 +19,20 @@ you use:
|
||||||
|
|
||||||
=head1 EXAMPLES
|
=head1 EXAMPLES
|
||||||
|
|
||||||
=head2 Do be done
|
=head2 Simple line buffered output
|
||||||
|
|
||||||
mkfifo slot-{1..5}-digit-{0..9}
|
GNU Parallel saves output to tempfiles. If the amount of data is
|
||||||
parallel -j5 'seq 100000 | grep {} > slot-{%}-digit-{}' ::: {0..9} &
|
bigger than the free disk space, then you can use this technique to do
|
||||||
parallel parcat slot-{1..5}-digit-{} '>' digit-{} ::: {0..9}
|
line buffering without saving to disk:
|
||||||
|
|
||||||
|
mkfifo slot-{1..5}
|
||||||
|
seq 10000000 | parallel -j5 --round --pipe 'cat > slot-{%}' &
|
||||||
|
parcat slot-{1..5} | wc
|
||||||
|
|
||||||
=head1 REPORTING BUGS
|
=head1 REPORTING BUGS
|
||||||
|
|
||||||
GNU B<parcat> is part of GNU B<parallel>. Report bugs to <bug-parallel@gnu.org>.
|
GNU B<parcat> is part of GNU B<parallel>. Report bugs to
|
||||||
|
<bug-parallel@gnu.org>.
|
||||||
|
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
|
@ -728,6 +728,25 @@ par_X_eta_div_zero() {
|
||||||
perl -pe 's/\d+/0/g'
|
perl -pe 's/\d+/0/g'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
par_parcat_args_stdin() {
|
||||||
|
echo 'bug #51690: parcat: read args from stdin'
|
||||||
|
tmp1=$(tempfile)
|
||||||
|
tmp2=$(tempfile)
|
||||||
|
echo OK1 > $tmp1
|
||||||
|
echo OK2 > $tmp2
|
||||||
|
(echo $tmp1
|
||||||
|
echo $tmp2) | parcat
|
||||||
|
rm $tmp1 $tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
par_parcat_rm() {
|
||||||
|
echo 'bug #51691: parcat --rm remove fifo when opened'
|
||||||
|
tmp1=$(tempfile)
|
||||||
|
echo OK1 > $tmp1
|
||||||
|
parcat --rm $tmp1
|
||||||
|
rm $tmp1 2>/dev/null || echo OK file removed
|
||||||
|
}
|
||||||
|
|
||||||
export -f $(compgen -A function | grep par_)
|
export -f $(compgen -A function | grep par_)
|
||||||
compgen -A function | grep par_ | sort |
|
compgen -A function | grep par_ | sort |
|
||||||
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'
|
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'
|
||||||
|
|
|
@ -152,5 +152,28 @@ newlines"' ::: a b c d e | sort
|
||||||
) | perl -pe 's/\0/<null>/g;s/\d+/./g'
|
) | perl -pe 's/\0/<null>/g;s/\d+/./g'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
par_parcat_mixing() {
|
||||||
|
echo 'parcat output should mix: a b a b'
|
||||||
|
mktmpfifo() {
|
||||||
|
tmp=$(tempfile)
|
||||||
|
rm $tmp
|
||||||
|
mkfifo $tmp
|
||||||
|
echo $tmp
|
||||||
|
}
|
||||||
|
slow_output() {
|
||||||
|
string=$1
|
||||||
|
perl -e 'print "'$string'"x9000,"start\n"'
|
||||||
|
sleep 1
|
||||||
|
perl -e 'print "'$string'"x9000,"end\n"'
|
||||||
|
}
|
||||||
|
tmp1=$(mktmpfifo)
|
||||||
|
tmp2=$(mktmpfifo)
|
||||||
|
slow_output a > $tmp1 &
|
||||||
|
sleep 0.5
|
||||||
|
slow_output b > $tmp2 &
|
||||||
|
parcat $tmp1 $tmp2 | tr -s ab
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export -f $(compgen -A function | grep par_)
|
export -f $(compgen -A function | grep par_)
|
||||||
compgen -A function | grep par_ | sort | parallel -j6 --tag -k '{} 2>&1'
|
compgen -A function | grep par_ | sort | parallel -j6 --tag -k '{} 2>&1'
|
||||||
|
|
|
@ -52,6 +52,11 @@ par_multiline_commands finish 2
|
||||||
par_multiline_commands parallel: Warning: Command lines contain newline. Forcing --null.
|
par_multiline_commands parallel: Warning: Command lines contain newline. Forcing --null.
|
||||||
par_multiline_commands 3
|
par_multiline_commands 3
|
||||||
par_multiline_commands finish 3
|
par_multiline_commands finish 3
|
||||||
|
par_parcat_mixing parcat output should mix: a b a b
|
||||||
|
par_parcat_mixing astart
|
||||||
|
par_parcat_mixing bstart
|
||||||
|
par_parcat_mixing bend
|
||||||
|
par_parcat_mixing aend
|
||||||
par_pipepart_block ### --pipepart --block -# (# < 0)
|
par_pipepart_block ### --pipepart --block -# (# < 0)
|
||||||
par_pipepart_block 1
|
par_pipepart_block 1
|
||||||
par_pipepart_block 2
|
par_pipepart_block 2
|
||||||
|
|
Loading…
Reference in a new issue