parallel: --group-by supports --colsep ' '.

This commit is contained in:
Ole Tange 2020-12-08 18:59:10 +01:00
parent 8df731b9ab
commit fca3e928d8
4 changed files with 32 additions and 4 deletions

View file

@ -190,7 +190,7 @@ from:tange@gnu.org
to:parallel@gnu.org, bug-parallel@gnu.org
stable-bcc: Jesse Alama <jessealama@fastmail.fm>
Subject: GNU Parallel 20201222 ('Maradona') released <<[stable]>>
Subject: GNU Parallel 20201222 ('Maradona/Yeager') released <<[stable]>>
GNU Parallel 20201222 ('') <<[stable]>> has been released. It is available for download at: http://ftpmirror.gnu.org/parallel/

View file

@ -860,14 +860,15 @@ sub column_perlexpr($$$) {
sub group_by_loop($$) {
# Generate perl code for group-by loop
# Insert a $recsep when the column value changes
# The column value can be computed with $perexpr
# The column value can be computed with $perlexpr
my($fh,$recsep) = @_;
my $groupby = $opt::groupby;
if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) {
# Group by column name
# (Yes, this will also wrongly match a perlexpr like: chop)
my($read,$char,@line);
# A full line, but nothing more (the rest must be read by the child)
# Read a full line, but nothing more
# (the rest must be read by the child)
# $Global::header used to prepend block to each job
do {
$read = sysread($fh,$char,1);
@ -918,12 +919,19 @@ sub group_by_stdin_filter() {
my $sep = $opt::colsep;
$sep =~ s/\t/\\t/g;
$sep =~ s/\"/\\"/g;
# man perlrun: -Fpattern [...] You can't use literal whitespace
$sep =~ s/ /\\040{1}/g;
push @filter, "-F$sep";
}
push @filter, "-pe";
push @filter, group_by_loop(*STDIN,$opt::recstart);
::debug("init", "@filter\n");
open(STDIN, '-|', @filter) || die ("Cannot start @filter");
if(which("mbuffer")) {
# You get a speed up of 30% by going through mbuffer
open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") ||
die ("Cannot start mbuffer");
}
}
sub spreadstdin() {
@ -2164,7 +2172,7 @@ sub check_invalid_option_combinations() {
sub init_globals() {
# Defaults:
$Global::version = 20201122;
$Global::version = 20201207;
$Global::progname = 'parallel';
$::name = "GNU Parallel";
$Global::infinity = 2**31;

View file

@ -860,6 +860,19 @@ par_PARALLEL_HOME_with_+() {
rm -rf "$tmp"
}
par_group-by_colsep_space() {
echo '### --colsep " " should work like ","'
input() {
sep="$1"
printf "a\t${sep}b\n"
printf "a${sep}${sep}b\n"
printf "b${sep}${sep}a\n"
printf "b${sep}a${sep}b\n"
}
input ',' | parallel --pipe --group-by 2 --colsep ',' -kN1 wc
input ' ' | parallel --pipe --group-by 2 --colsep ' ' -kN1 wc
}
export -f $(compgen -A function | grep par_)
compgen -A function | grep par_ | LC_ALL=C sort |
parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' |

View file

@ -154,6 +154,13 @@ par_file_ending_in_newline gzip '/tmp/parallel_f2
par_file_ending_in_newline '
par_fish ### https://github.com/fish-shell/fish-shell/issues/5582
par_fish OK
par_group-by_colsep_space ### --colsep " " should work like ","
par_group-by_colsep_space 1 2 5
par_group-by_colsep_space 2 2 10
par_group-by_colsep_space 1 1 6
par_group-by_colsep_space 1 2 5
par_group-by_colsep_space 2 4 10
par_group-by_colsep_space 1 3 6
par_halt_on_error_division_by_zero ### --halt-on-error soon,fail=100% with no input should not give division by zero
par_halt_on_error_division_by_zero 0
par_halt_one_job # Halt soon if there is a single job