diff --git a/src/parsort b/src/parsort index d48a1996..78404a4d 100755 --- a/src/parsort +++ b/src/parsort @@ -41,7 +41,7 @@ On a 48 core machine you should see a speedup of 3x over B. =head1 AUTHOR -Copyright (C) 2020 Ole Tange, +Copyright (C) 2020-2021 Ole Tange, http://ole.tange.dk and Free Software Foundation, Inc. @@ -118,9 +118,10 @@ GetOptions( "help" => \$opt::dummy, ) || exit(255); $Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1]; -$Global::version = 20201223; +$Global::version = 20210102; if($opt::version) { version(); exit 0; } -@Global::sortoptions = @ARGV_before[0..($#ARGV_before-$#ARGV-1)]; +@Global::sortoptions = + shell_quote(@ARGV_before[0..($#ARGV_before-$#ARGV-1)]); #if($opt::zero_terminated) { $/ = "\0"; } $ENV{'TMPDIR'} ||= "/tmp"; @@ -217,7 +218,7 @@ sub version() { print join ("\n", "GNU $Global::progname $Global::version", - "Copyright (C) 2020 Ole Tange, http://ole.tange.dk and Free Software", + "Copyright (C) 2020-2021 Ole Tange, http://ole.tange.dk and Free Software", "Foundation, Inc.", "License GPLv3+: GNU GPL version 3 or later ", "This is free software: you are free to change and redistribute it.", @@ -227,6 +228,107 @@ sub version() { ); } +sub shell_quote(@) { + # Input: + # @strings = strings to be quoted + # Returns: + # @shell_quoted_strings = string quoted as needed by the shell + return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_); +} + +sub shell_quote_scalar_rc($) { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh($) { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + my $s = $_[0]; + if($s =~ /[^-_.+a-z0-9\/]/i) { + $s =~ s/'/'"'"'/g; # "-quote single quotes + $s = "'$s'"; # '-quote entire string + $s =~ s/^''//; # Remove unneeded '' at ends + $s =~ s/''$//; # (faster than s/^''|''$//g) + return $s; + } elsif ($s eq "") { + return "''"; + } else { + # No quoting needed + return $s; + } +} + +sub shell_quote_scalar($) { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::cshell) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar($_[0]); +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); + no warnings 'redefine'; + *Q = \&::shell_quote_scalar; + return $ret; +} + + if(@ARGV) { sort_files(@ARGV); } elsif(length $opt::files0_from) { diff --git a/testsuite/tests-to-run/parsort-local-01.sh b/testsuite/tests-to-run/parsort-local-01.sh new file mode 100644 index 00000000..40c36fc7 --- /dev/null +++ b/testsuite/tests-to-run/parsort-local-01.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +par_whitespace_delimiter() { + echo 'bug #59779: parsort does not work with white characters as delimiters' + doit() { + del="$1" + tmp=$(tempfile) + ( + printf "a%s8%se\n" "$del" "$del" + printf "b%s7%sf\n" "$del" "$del" + printf "c%s3%sg\n" "$del" "$del" + printf "d%s5%sh\n" "$del" "$del" + ) > "$tmp" + parsort -t "$del" -k2 "$tmp" + } + doit ',' + doit ' ' + tab="$(printf '\t')" + doit "$tab" +} + +export -f $(compgen -A function | grep par_) +compgen -A function | grep par_ | LC_ALL=C sort | + parallel --timeout 1000% -j6 --tag -k --joblog /tmp/jl-`basename $0` '{} 2>&1' | + perl -pe 's:/usr/bin:/bin:g' + + diff --git a/testsuite/wanted-results/parsort-local-01 b/testsuite/wanted-results/parsort-local-01 new file mode 100644 index 00000000..2baa88c7 --- /dev/null +++ b/testsuite/wanted-results/parsort-local-01 @@ -0,0 +1,13 @@ +par_whitespace_delimiter bug #59779: parsort does not work with white characters as delimiters +par_whitespace_delimiter c,3,g +par_whitespace_delimiter d,5,h +par_whitespace_delimiter b,7,f +par_whitespace_delimiter a,8,e +par_whitespace_delimiter c 3 g +par_whitespace_delimiter d 5 h +par_whitespace_delimiter b 7 f +par_whitespace_delimiter a 8 e +par_whitespace_delimiter c 3 g +par_whitespace_delimiter d 5 h +par_whitespace_delimiter b 7 f +par_whitespace_delimiter a 8 e