mirror of
https://git.savannah.gnu.org/git/parallel.git
synced 2024-11-22 14:07:55 +00:00
parallel: --group-by supports --colsep ' '.
This commit is contained in:
parent
8df731b9ab
commit
fca3e928d8
|
@ -190,7 +190,7 @@ from:tange@gnu.org
|
||||||
to:parallel@gnu.org, bug-parallel@gnu.org
|
to:parallel@gnu.org, bug-parallel@gnu.org
|
||||||
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
|
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
|
||||||
|
|
||||||
Subject: GNU Parallel 20201222 ('Maradona') released <<[stable]>>
|
Subject: GNU Parallel 20201222 ('Maradona/Yeager') released <<[stable]>>
|
||||||
|
|
||||||
GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
|
GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/
|
||||||
|
|
||||||
|
|
14
src/parallel
14
src/parallel
|
@ -860,14 +860,15 @@ sub column_perlexpr($$$) {
|
||||||
sub group_by_loop($$) {
|
sub group_by_loop($$) {
|
||||||
# Generate perl code for group-by loop
|
# Generate perl code for group-by loop
|
||||||
# Insert a $recsep when the column value changes
|
# Insert a $recsep when the column value changes
|
||||||
# The column value can be computed with $perexpr
|
# The column value can be computed with $perlexpr
|
||||||
my($fh,$recsep) = @_;
|
my($fh,$recsep) = @_;
|
||||||
my $groupby = $opt::groupby;
|
my $groupby = $opt::groupby;
|
||||||
if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
|
if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
|
||||||
# Group by column name
|
# Group by column name
|
||||||
# (Yes, this will also wrongly match a perlexpr like: chop)
|
# (Yes, this will also wrongly match a perlexpr like: chop)
|
||||||
my($read,$char,@line);
|
my($read,$char,@line);
|
||||||
# A full line, but nothing more (the rest must be read by the child)
|
# Read a full line, but nothing more
|
||||||
|
# (the rest must be read by the child)
|
||||||
# $Global::header used to prepend block to each job
|
# $Global::header used to prepend block to each job
|
||||||
do {
|
do {
|
||||||
$read = sysread($fh,$char,1);
|
$read = sysread($fh,$char,1);
|
||||||
|
@ -918,12 +919,19 @@ sub group_by_stdin_filter() {
|
||||||
my $sep = $opt::colsep;
|
my $sep = $opt::colsep;
|
||||||
$sep =~ s/\t/\\t/g;
|
$sep =~ s/\t/\\t/g;
|
||||||
$sep =~ s/\"/\\"/g;
|
$sep =~ s/\"/\\"/g;
|
||||||
|
# man perlrun: -Fpattern [...] You can't use literal whitespace
|
||||||
|
$sep =~ s/ /\\040{1}/g;
|
||||||
push @filter, "-F$sep";
|
push @filter, "-F$sep";
|
||||||
}
|
}
|
||||||
push @filter, "-pe";
|
push @filter, "-pe";
|
||||||
push @filter, group_by_loop(*STDIN,$opt::recstart);
|
push @filter, group_by_loop(*STDIN,$opt::recstart);
|
||||||
::debug("init", "@filter\n");
|
::debug("init", "@filter\n");
|
||||||
open(STDIN, '-|', @filter) || die ("Cannot start @filter");
|
open(STDIN, '-|', @filter) || die ("Cannot start @filter");
|
||||||
|
if(which("mbuffer")) {
|
||||||
|
# You get a speed up of 30% by going through mbuffer
|
||||||
|
open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") ||
|
||||||
|
die ("Cannot start mbuffer");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sub spreadstdin() {
|
sub spreadstdin() {
|
||||||
|
@ -2164,7 +2172,7 @@ sub check_invalid_option_combinations() {
|
||||||
|
|
||||||
sub init_globals() {
|
sub init_globals() {
|
||||||
# Defaults:
|
# Defaults:
|
||||||
$Global::version = 20201122;
|
$Global::version = 20201207;
|
||||||
$Global::progname = 'parallel';
|
$Global::progname = 'parallel';
|
||||||
$::name = "GNU Parallel";
|
$::name = "GNU Parallel";
|
||||||
$Global::infinity = 2**31;
|
$Global::infinity = 2**31;
|
||||||
|
|
|
@ -860,6 +860,19 @@ par_PARALLEL_HOME_with_+() {
|
||||||
rm -rf "$tmp"
|
rm -rf "$tmp"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
par_group-by_colsep_space() {
|
||||||
|
echo '### --colsep " " should work like ","'
|
||||||
|
input() {
|
||||||
|
sep="$1"
|
||||||
|
printf "a\t${sep}b\n"
|
||||||
|
printf "a${sep}${sep}b\n"
|
||||||
|
printf "b${sep}${sep}a\n"
|
||||||
|
printf "b${sep}a${sep}b\n"
|
||||||
|
}
|
||||||
|
input ',' | parallel --pipe --group-by 2 --colsep ',' -kN1 wc
|
||||||
|
input ' ' | parallel --pipe --group-by 2 --colsep ' ' -kN1 wc
|
||||||
|
}
|
||||||
|
|
||||||
export -f $(compgen -A function | grep par_)
|
export -f $(compgen -A function | grep par_)
|
||||||
compgen -A function | grep par_ | LC_ALL=C sort |
|
compgen -A function | grep par_ | LC_ALL=C sort |
|
||||||
parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' |
|
parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' |
|
||||||
|
|
|
@ -154,6 +154,13 @@ par_file_ending_in_newline gzip '/tmp/parallel_f2
|
||||||
par_file_ending_in_newline '
|
par_file_ending_in_newline '
|
||||||
par_fish ### https://github.com/fish-shell/fish-shell/issues/5582
|
par_fish ### https://github.com/fish-shell/fish-shell/issues/5582
|
||||||
par_fish OK
|
par_fish OK
|
||||||
|
par_group-by_colsep_space ### --colsep " " should work like ","
|
||||||
|
par_group-by_colsep_space 1 2 5
|
||||||
|
par_group-by_colsep_space 2 2 10
|
||||||
|
par_group-by_colsep_space 1 1 6
|
||||||
|
par_group-by_colsep_space 1 2 5
|
||||||
|
par_group-by_colsep_space 2 4 10
|
||||||
|
par_group-by_colsep_space 1 3 6
|
||||||
par_halt_on_error_division_by_zero ### --halt-on-error soon,fail=100% with no input should not give division by zero
|
par_halt_on_error_division_by_zero ### --halt-on-error soon,fail=100% with no input should not give division by zero
|
||||||
par_halt_on_error_division_by_zero 0
|
par_halt_on_error_division_by_zero 0
|
||||||
par_halt_one_job # Halt soon if there is a single job
|
par_halt_one_job # Halt soon if there is a single job
|
||||||
|
|
Loading…
Reference in a new issue