parallel: Fixed bug #54328: --pipe --recend '' blocks.

This commit is contained in:
Ole Tange 2018-07-18 09:49:24 +02:00
parent 896048efbc
commit e1e8693797
5 changed files with 87 additions and 19 deletions

View file

@ -199,9 +199,9 @@ to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm> stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20180622 ('Kim Trump/GDPR/Guatamala') released <<[stable]>> Subject: GNU Parallel 20180722 ('?') released <<[stable]>>
GNU Parallel 20180622 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/ GNU Parallel 20180722 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
<<No new functionality was introduced so this is a good candidate for a stable release.>> <<No new functionality was introduced so this is a good candidate for a stable release.>>
@ -214,6 +214,11 @@ Quote of the month:
New in this release: New in this release:
* The CPU calculation has changed. By default GNU Parallel uses the number of CPU threads as the number of CPUs. This can be change to the number of CPU cores or number of CPU sockets with --use-cores-instead-of-threads or --use-sockets-instead-of-threads.
* The detected number of sockets, cores, and threads can be shown with --number-of-sockets, --number-of-cores, and --number-of-threads.
* env_parallel now support mksh using env_parallel.mksh.
* GNU Parallel is distributed as part of Snippy https://github.com/tseemann/snippy * GNU Parallel is distributed as part of Snippy https://github.com/tseemann/snippy

View file

@ -519,7 +519,8 @@ sub spreadstdin {
$Global::max_number_of_args * ($Global::max_lines || 1); $Global::max_number_of_args * ($Global::max_lines || 1);
# (?!negative lookahead) is needed to avoid backtracking # (?!negative lookahead) is needed to avoid backtracking
# See: https://unix.stackexchange.com/questions/439356/ # See: https://unix.stackexchange.com/questions/439356/
while($buf =~ /# From start up till recend while($buf =~
/# From start up till recend
^((?:(?!$recend$recstart).)*?$recend ^((?:(?!$recend$recstart).)*?$recend
# Then n-1 times recstart.*recend # Then n-1 times recstart.*recend
(?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines}) (?:$recstart(?:(?!$recend$recstart).)*?$recend){$read_n_lines})
@ -561,7 +562,9 @@ sub spreadstdin {
my $i = 0; my $i = 0;
my $read_n_lines = my $read_n_lines =
$Global::max_number_of_args * ($Global::max_lines || 1); $Global::max_number_of_args * ($Global::max_lines || 1);
while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1
and
length $buf) {
$i += length $recend; # find the actual splitting location $i += length $recend; # find the actual splitting location
$anything_written += $anything_written +=
write_record_to_pipe($chunk_number++,\$header,\$buf, write_record_to_pipe($chunk_number++,\$header,\$buf,
@ -657,6 +660,10 @@ sub recstartrecend {
# If --recend is given then it must match end of record # If --recend is given then it must match end of record
$recstart = ""; $recstart = "";
$recend = $opt::recend; $recend = $opt::recend;
if($opt::regexp and $recend eq '') {
# --regexp --recend ''
$recend = '.';
}
} }
if($opt::regexp) { if($opt::regexp) {
@ -1385,10 +1392,10 @@ sub parse_options {
if(defined $opt::eta) { $opt::progress = $opt::eta; } if(defined $opt::eta) { $opt::progress = $opt::eta; }
if(defined $opt::bar) { $opt::progress = $opt::bar; } if(defined $opt::bar) { $opt::progress = $opt::bar; }
# Funding a free software project is hard. GNU parallel is no # Funding a free software project is hard. GNU Parallel is no
# exception. On top of that it seems the less visible a project # exception. On top of that it seems the less visible a project
# is, the harder it is to get funding. And the nature of GNU # is, the harder it is to get funding. And the nature of GNU
# parallel is that it will never be seen by "the guy with the # Parallel is that it will never be seen by "the guy with the
# checkbook", but only by the people doing the actual work. # checkbook", but only by the people doing the actual work.
# #
# This problem has been covered by others - though no solution has # This problem has been covered by others - though no solution has
@ -1402,7 +1409,7 @@ sub parse_options {
# #
# Having to spend 10 seconds on running 'parallel --citation' once # Having to spend 10 seconds on running 'parallel --citation' once
# is no doubt not an ideal solution, but no one has so far come up # is no doubt not an ideal solution, but no one has so far come up
# with an ideal solution - neither for funding GNU parallel nor # with an ideal solution - neither for funding GNU Parallel nor
# other free software. # other free software.
# #
# If you believe you have the perfect solution, you should try it # If you believe you have the perfect solution, you should try it
@ -1418,9 +1425,11 @@ sub parse_options {
# #
# Description of the xt:Commerce case in OLG Duesseldorf # Description of the xt:Commerce case in OLG Duesseldorf
# http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx # http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
# https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx
# #
# The verdict in German # The verdict in German
# https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
# https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09
# #
# Other free software limiting derivates by the same name # Other free software limiting derivates by the same name
# https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects
@ -4263,6 +4272,26 @@ sub citation_notice {
"To silence this citation notice: run 'parallel --citation' once.", "To silence this citation notice: run 'parallel --citation' once.",
"" ""
); );
mkdir $Global::config_dir;
# Number of times the user has run GNU Parallel without showing
# willingness to cite
my $runs = 0;
if(open (my $fh, "<", $Global::config_dir.
"/runs-without-willing-to-cite")) {
$runs = <$fh>;
close $fh;
}
$runs++;
if(open (my $fh, ">", $Global::config_dir.
"/runs-without-willing-to-cite")) {
print $fh $runs;
close $fh;
if($runs >= 10) {
::status("Come on: You have run parallel $runs times. Isn't it about time ",
"you run 'parallel --citation' once to silence the citation notice?",
"");
}
}
} }
} }
@ -9175,6 +9204,7 @@ sub print_linebuffer {
my ($buf,$i,$rv); my ($buf,$i,$rv);
while($rv = sysread($in_fh, $buf, 131072)) { while($rv = sysread($in_fh, $buf, 131072)) {
$outputlength += $rv; $outputlength += $rv;
# TODO --recend
# Treat both \n and \r as line end # Treat both \n and \r as line end
$i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1); $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);
if($i) { if($i) {
@ -9182,6 +9212,7 @@ sub print_linebuffer {
if($opt::tag or defined $opt::tagstring) { if($opt::tag or defined $opt::tagstring) {
# Replace ^ with $tag within the full line # Replace ^ with $tag within the full line
my $tag = $self->tag(); my $tag = $self->tag();
# TODO --recend
substr($buf,0,$i-1) =~ s/(?<=[\n\r])/$tag/gm; substr($buf,0,$i-1) =~ s/(?<=[\n\r])/$tag/gm;
# The length changed, so find the new ending pos # The length changed, so find the new ending pos
$i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1); $i = (rindex($buf,"\n")+1) || (rindex($buf,"\r")+1);

View file

@ -1056,11 +1056,11 @@ If used with B<--pipe --roundrobin> and the same input, the jobslots
will get the same blocks in the same order in every run. will get the same blocks in the same order in every run.
=item B<-L> I<max-lines> =item B<-L> I<recsize>
When used with B<--pipe>: Read records of I<max-lines>. When used with B<--pipe>: Read records of I<recsize>.
When used otherwise: Use at most I<max-lines> nonblank input lines per When used otherwise: Use at most I<recsize> nonblank input lines per
command line. Trailing blanks cause an input line to be logically command line. Trailing blanks cause an input line to be logically
continued on the next input line. continued on the next input line.
@ -1070,14 +1070,14 @@ line.
Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set. Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set.
=item B<--max-lines>[=I<max-lines>] =item B<--max-lines>[=I<recsize>]
=item B<-l>[I<max-lines>] =item B<-l>[I<recsize>]
When used with B<--pipe>: Read records of I<max-lines>. When used with B<--pipe>: Read records of I<recsize> lines.
When used otherwise: Synonym for the B<-L> option. Unlike B<-L>, the When used otherwise: Synonym for the B<-L> option. Unlike B<-L>, the
I<max-lines> argument is optional. If I<max-lines> is not specified, I<recsize> argument is optional. If I<recsize> is not specified,
it defaults to one. The B<-l> option is deprecated since the POSIX it defaults to one. The B<-l> option is deprecated since the POSIX
standard specifies B<-L> instead. standard specifies B<-L> instead.
@ -3027,6 +3027,19 @@ B<parallel> finally uses B<wget> to fetch the images.
parallel wget parallel wget
=head1 EXAMPLE: Download video playlist in parallel
B<youtube-dl> is an excellent tool to download videos. It can,
however, not download videos in parallel. This takes a playlist and
downloads 10 videos in parallel.
url='https://youtu.be/watch?v=0wOf2Fgi3DE&list=UU_cznB5YZZmvAmeq7Y3EriQ'
export url
youtube-dl --flat-playlist "$url" |
parallel --tagstring {#} --lb -j10 \
LC_ALL= youtube-dl --playlist-start {#} --playlist-end {#} '"$url"'
=head1 EXAMPLE: Copy files as last modified date (ISO8601) with added =head1 EXAMPLE: Copy files as last modified date (ISO8601) with added
random digits random digits

View file

@ -819,6 +819,12 @@ par_PARALLEL_ENV() {
rm $PARALLEL_ENV rm $PARALLEL_ENV
} }
par_pipe_recend() {
echo 'bug #54328: --pipe --recend '' blocks'
seq 3 | parallel -k --pipe --regexp --recend '' -n 1 xxd
seq 3 | parallel -k --pipe --recend '' -n 1 xxd
}
export -f $(compgen -A function | grep par_) export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | sort | compgen -A function | grep par_ | sort |
parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1' parallel -j6 --tag -k --joblog +/tmp/jl-`basename $0` '{} 2>&1'

View file

@ -1388,6 +1388,19 @@ par_parcat_rm OK1
par_parcat_rm OK file removed par_parcat_rm OK file removed
par_pipe_no_command ### --pipe without command par_pipe_no_command ### --pipe without command
par_pipe_no_command parallel: Error: --pipe/--pipepart must have a command to pipe into (e.g. 'cat'). par_pipe_no_command parallel: Error: --pipe/--pipepart must have a command to pipe into (e.g. 'cat').
par_pipe_recend bug #54328: --pipe --recend blocks
par_pipe_recend 00000000: 31 1
par_pipe_recend 00000000: 0a .
par_pipe_recend 00000000: 32 2
par_pipe_recend 00000000: 0a .
par_pipe_recend 00000000: 33 3
par_pipe_recend 00000000: 0a .
par_pipe_recend 00000000: 31 1
par_pipe_recend 00000000: 0a .
par_pipe_recend 00000000: 32 2
par_pipe_recend 00000000: 0a .
par_pipe_recend 00000000: 33 3
par_pipe_recend 00000000: 0a .
par_pipe_record_size_in_lines bug #34958: --pipe with record size measured in lines par_pipe_record_size_in_lines bug #34958: --pipe with record size measured in lines
par_pipe_record_size_in_lines 1 par_pipe_record_size_in_lines 1
par_pipe_record_size_in_lines 2 par_pipe_record_size_in_lines 2