parallel: --header with regexp (try: --header '\n')

This commit is contained in:
Ole Tange 2012-01-11 00:08:23 +01:00
parent 08e1366c5d
commit c34e6c489c
4 changed files with 85 additions and 10 deletions

View file

@ -72,7 +72,7 @@ if($::opt_skip_first_line) {
my $fh = $fhlist[0];
<$fh>;
}
if($::opt_header) {
if($::opt_header and not $::opt_pipe) {
my $fh = $fhlist[0];
my $line = <$fh>;
chomp($line);
@ -177,6 +177,17 @@ sub spreadstdin {
# Spawn a job and print the record to it.
my $record;
my $buf = "";
my $header = "";
if($::opt_header) {
my $non_greedy_regexp = $::opt_header;
# ? , * , + , {} => ?? , *? , +? , {}?
$non_greedy_regexp =~ s/(\?|\*|\+|\})/$1\?/g;
while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
if($buf=~s/^(.*?$non_greedy_regexp)//) {
$header = $1; last;
}
}
}
my ($recstart,$recend,$recerror);
if(defined($::opt_recstart) and defined($::opt_recend)) {
# If both --recstart and --recend is given then both must match
@ -205,8 +216,9 @@ sub spreadstdin {
$recend =~ s/\\([rnt'"\\])/"qq|\\$1|"/gee;
}
my $recendrecstart = $recend.$recstart;
while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
# Force the while-loop once if everything was read by header reading
my $force_one_time_through = 0;
while(!$force_one_time_through++ or read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
# substr above = append to $buf
reap_if_needed(); # Re-enable reaping after read() (Bug#33352)
if($::opt_r) {
@ -220,14 +232,14 @@ sub spreadstdin {
if($Global::max_number_of_args) {
# -N => (start..*?end){n}
while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) {
$record = $1;
$record = $header.$1;
::debug("Read record -N: ".length($record)."\n");
write_record_to_pipe(\$record,$recstart,$recend);
}
} else {
# Find the last recend-recstart in $buf
if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
$record = $1;
$record = $header.$1;
::debug("Matched record: ".length($record)."/".length($buf)."\n");
write_record_to_pipe(\$record,$recstart,$recend);
}
@ -238,7 +250,7 @@ sub spreadstdin {
my $i = 0;
while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) {
$i += length $recend; # find the actual splitting location
my $record = substr($buf,0,$i);
my $record = $header.substr($buf,0,$i);
substr($buf,0,$i) = "";
::debug("Read record: ".length($record)."\n");
write_record_to_pipe(\$record,$recstart,$recend);
@ -248,7 +260,7 @@ sub spreadstdin {
my $i = rindex($buf,$recendrecstart);
if($i != -1) {
$i += length $recend; # find the actual splitting location
my $record = substr($buf,0,$i);
my $record = $header.substr($buf,0,$i);
substr($buf,0,$i) = "";
::debug("Read record: ".length($record)."\n");
write_record_to_pipe(\$record,$recstart,$recend);
@ -258,6 +270,7 @@ sub spreadstdin {
do_not_reap(); # Disable reaping before read(STDIN) (Bug#33352)
}
# If there is anything left in the buffer write it
substr($buf,0,0) = $header;
write_record_to_pipe(\$buf,$recstart,$recend);
::debug("Done reading STDIN\n");
@ -498,7 +511,7 @@ sub options_hash {
"shebang|hashbang" => \$::opt_shebang,
"Y" => \$::opt_retired,
"skip-first-line" => \$::opt_skip_first_line,
"header" => \$::opt_header,
"header=s" => \$::opt_header,
);
}
@ -795,7 +808,6 @@ sub open_joblog {
}
}
sub read_options {
# Read options from command line, profile and $PARALLEL
# Returns:

View file

@ -59,7 +59,7 @@ echo '### Test --resume --joblog followed by --resume --joblog';
rm -f /tmp/joblog2;
echo '### Test --header';
printf "a\tb\n1.2\t3/4.5" | parallel --header echo {b} {a} {b.} {b/} {b//} {b/.};
printf "a\tb\n1.2\t3/4.5" | parallel --header "\n" echo {b} {a} {b.} {b/} {b//} {b/.};
echo '### 64-bit wierdness - this did not complete on a 64-bit machine';
seq 1 2 | parallel -j1 'seq 1 1 | parallel true'

View file

@ -12,6 +12,12 @@ mem300=$(echo $out300 | tr -cd 0-9);
echo "Test if memory consumption(300 jobs) < memory consumption(30 jobs) * 150% ";
echo $(($mem300*100 < $mem30 * 150))
echo "### Test --header with -N";
(echo h1; echo h2; echo 1a;echo 1b; echo 2a;echo 2b; echo 3a)| parallel -j1 --pipe -N2 -k --header '\n.*\n' echo Start\;cat \; echo Stop
echo "### Test --header with --block 1k";
(echo h1; echo h2; perl -e '$a="x"x110;for(1..22){print $_,$a,"\n"'})| parallel -j1 --pipe -k --block 1k --header '\n.*\n' echo Start\;cat \; echo Stop
echo '### Test --shellquote'
cat <<'_EOF' | parallel --shellquote
awk -v FS="\",\"" '{print $1, $3, $4, $5, $9, $14}' | grep -v "#" | sed -e '1d' -e 's/\"//g' -e 's/\/\/\//\t/g' | cut -f1-6,11 | sed -e 's/\/\//\t/g' -e 's/ /\t/g

View file

@ -1,5 +1,62 @@
### Test memory consumption stays (almost) the same for 30 and 300 jobs
Test if memory consumption(300 jobs) < memory consumption(30 jobs) * 150%
1
### Test --header with -N
Start
h1
h2
1a
1b
Stop
Start
h1
h2
2a
2b
Stop
Start
h1
h2
3a
Stop
### Test --header with --block 1k
Start
h1
h2
1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
3xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
4xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
5xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
6xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
7xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
8xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Stop
Start
h1
h2
9xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
10xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
11xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
12xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
13xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
14xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
15xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
16xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
17xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Stop
Start
h1
h2
18xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
19xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
20xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
21xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
22xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Stop
Start
h1
h2
Stop
### Test --shellquote
awk\ -v\ FS=\"\\\",\\\"\"\ \'\{print\ \$1,\ \$3,\ \$4,\ \$5,\ \$9,\ \$14\}\'\ \|\ grep\ -v\ \"\#\"\ \|\ sed\ -e\ \'1d\'\ -e\ \'s/\\\"//g\'\ -e\ \'s/\\/\\/\\//\\t/g\'\ \|\ cut\ -f1-6,11\ \|\ sed\ -e\ \'s/\\/\\//\\t/g\'\ -e\ \'s/\ /\\t/g