Code refactor: move :::: and ::: processing.

This commit is contained in:
Ole Tange 2010-07-21 13:47:59 +02:00
parent ad25eb3f5a
commit e8a30d9fac
4 changed files with 124 additions and 80 deletions

View file

@ -1,3 +1,20 @@
Code refactor: move :::: and ::: processing.
Example:
Given a list of URLs, list all URLs that fail to download. Print the
line number and the URL.
cat urlfile | parallel "wget {} 2>>/dev/null || grep -n {} urlfile"
EXAMPLE:
Copy foo.es.ext to foo.ext:
ls *.es.* | perl -pe 'print; s/\.es//' | parallel -N2 cp {1} {2}
Import sql Import sql
inputfile tabel, Split colonner til {n} inputfile tabel, Split colonner til {n}

View file

@ -210,6 +210,9 @@ Line is a command. The input line contains more than one argument or
the input line needs to be evaluated by the shell. This is the default the input line needs to be evaluated by the shell. This is the default
if I<command> is not set. Can be reversed with B<-f>. if I<command> is not set. Can be reversed with B<-f>.
Most people will never need this because GNU B<parallel> normally
selects the correct B<-f> or B<-c>.
=item B<--delimiter> I<delim> =item B<--delimiter> I<delim>
@ -259,6 +262,9 @@ Line is a filename. The input line contains a filename that will be
quoted so it is not evaluated by the shell. This is the default if quoted so it is not evaluated by the shell. This is the default if
I<command> is set. Can be reversed with B<-c>. I<command> is set. Can be reversed with B<-c>.
Most people will never need this because GNU B<parallel> normally
selects the correct B<-f> or B<-c>.
=item B<--group> =item B<--group>
@ -1952,94 +1958,31 @@ sub parse_options {
} }
if(grep /^$Global::arg_sep$/o, @ARGV) { if(grep /^$Global::arg_sep$/o, @ARGV) {
# Arguments on the command line. @ARGV=read_args_from_command_line();
# Ignore STDIN by reading from /dev/null
# or another file if user has given --arg-file
if(not @::opt_a) { push @::opt_a, "/dev/null"; }
# Input: @ARGV = command option ::: arg arg arg
my @new_argv = ();
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_sep) {
$Global::input_is_filename = (@new_argv);
if($Global::input_is_filename) {
unget_arg(shell_quote(@ARGV));
} else {
unget_arg(@ARGV);
}
$Global::total_jobs += @ARGV;
@ARGV=();
last;
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
@ARGV=@new_argv;
} }
if(grep /^$Global::arg_file_sep$/o, @ARGV) { if(grep /^$Global::arg_file_sep$/o, @ARGV) {
# convert :::: to multiple -a @ARGV=convert_argfiles_from_command_line_to_multiple_opt_a();
my @new_argv = ();
my @argument_files;
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_file_sep) {
@argument_files = @ARGV;
@ARGV=();
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
@ARGV=@new_argv;
push @::opt_a, @argument_files;
} }
# must be done after ::: and :::: because they mess with @ARGV # must be done after ::: and :::: because they mess with @ARGV
$Global::input_is_filename ||= (@ARGV); $Global::input_is_filename ||= (@ARGV);
if(@::opt_a) { if(@::opt_a) {
# must be done after opt_arg_sep # must be done after
# convert_argfiles_from_command_line_to_multiple_opt_a
if($#::opt_a == 0) { if($#::opt_a == 0) {
# One -a => xargs compatibility # One -a => xargs compatibility
$Global::argfile = open_or_exit($::opt_a[0]); $Global::argfile = open_or_exit($::opt_a[0]);
} else { } else {
# Multiple -a => xapply style # Multiple -a => xapply style
$Global::argfile = open_or_exit("/dev/null"); argfiles_xapply_style();
$::opt_N = $#::opt_a+1;
$Global::max_number_of_args = $#::opt_a+1;
# read the files
my @content;
my $max_lineno = 0;
my $in_fh = gensym;
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
$in_fh = open_or_exit($::opt_a[$fileno]);
for (my $lineno=0;
$content[$fileno][$lineno] = get_next_arg_from_fh($in_fh);
$lineno++) {
$max_lineno = max($max_lineno,$lineno);
}
close $in_fh;
}
for (my $lineno=0; $lineno <= $max_lineno; $lineno++) {
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
my $arg = $content[$fileno][$lineno];
if(defined $arg) {
unget_arg($arg);
} else {
unget_arg("");
}
}
}
$Global::total_jobs += $max_lineno;
} }
} }
if(($::opt_l || $::opt_L || $::opt_n || $::opt_N || $::opt_s) and not if(($::opt_l || $::opt_L || $::opt_n || $::opt_N || $::opt_s) and not
($::opt_m or $::opt_X)) { ($::opt_m or $::opt_X)) {
# These --max-line, -l, -L, --max-args, -n, --max-chars, -s # The options --max-line, -l, -L, --max-args, -n, --max-chars, -s
# do not make sense without -X or -m # do not make sense without -X or -m
# so default to -X # so default to -X
# Needs to be done after :::: and @opt_a, as they can set $::opt_N # Needs to be done after :::: and @opt_a, as they can set $::opt_N
@ -2069,6 +2012,9 @@ sub parse_options {
if(remote_hosts() and ($Global::xargs or $Global::Xargs) if(remote_hosts() and ($Global::xargs or $Global::Xargs)
and not $::opt_N) { and not $::opt_N) {
# As we do not know the max line length on the remote machine
# long commands generated by xargs may fail
# If opt_N is set, it is probably safe
print STDERR ("Warning: using -X or -m with --sshlogin may fail\n"); print STDERR ("Warning: using -X or -m with --sshlogin may fail\n");
} }
@ -2087,6 +2033,92 @@ sub parse_options {
} }
} }
sub read_args_from_command_line {
# Arguments given on the command line after ::: ($Global::arg_sep)
# Removes the arguments from @ARGV and puts it into the argument queue
# Ignore STDIN by reading from /dev/null
# or another file if user has given --arg-file
# Returns:
# @ARGV without ::: and following args
if(not @::opt_a) { push @::opt_a, "/dev/null"; }
# Input: @ARGV = command option ::: arg arg arg
my @new_argv = ();
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_sep) {
$Global::input_is_filename = (@new_argv);
if($Global::input_is_filename) {
unget_arg(shell_quote(@ARGV));
} else {
unget_arg(@ARGV);
}
$Global::total_jobs += @ARGV;
@ARGV=();
last;
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
return @new_argv;
}
sub convert_argfiles_from_command_line_to_multiple_opt_a {
# convert :::: to multiple -a
# Remove :::: from @ARGV and move the following arguments to @::opt_a
# Returns:
# @ARGV without :::: and following args
my @new_argv = ();
my @argument_files;
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_file_sep) {
@argument_files = @ARGV;
@ARGV=();
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
push @::opt_a, @argument_files;
return @new_argv;
}
sub argfiles_xapply_style {
# Multiple -a => xapply style
# Convert the n files into one queue
# Every n'th entry is from the same file
# Set opt_N to read n entries per invocation
$Global::argfile = open_or_exit("/dev/null");
$::opt_N = $#::opt_a+1;
$Global::max_number_of_args = $#::opt_a+1;
# read the files
my @content;
my $max_lineno = 0;
my $in_fh = gensym;
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
$in_fh = open_or_exit($::opt_a[$fileno]);
for (my $lineno=0;
$content[$fileno][$lineno] = get_next_arg_from_fh($in_fh);
$lineno++) {
$max_lineno = max($max_lineno,$lineno);
}
close $in_fh;
}
for (my $lineno=0; $lineno <= $max_lineno; $lineno++) {
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
my $arg = $content[$fileno][$lineno];
if(defined $arg) {
unget_arg($arg);
} else {
unget_arg("");
}
}
}
$Global::total_jobs += $max_lineno;
}
sub open_or_exit { sub open_or_exit {
# Returns: # Returns:
# file handle to read-opened file # file handle to read-opened file

View file

@ -211,7 +211,7 @@ stdout xargs -eEOF echo < eofstr.xi
stdout parallel -k -eEOF echo < eofstr.xi stdout parallel -k -eEOF echo < eofstr.xi
echo '### -e echo < eof_.xi' echo '### -e echo < eof_.xi'
stdout xargs -e echo < eof_.xi stdout xargs -e echo < eof_.xi
stdout parallel -k -e echo < eof_.xi stdout parallel -e -k echo < eof_.xi
echo '### -E_ echo < eof1.xi' echo '### -E_ echo < eof1.xi'
stdout xargs -E_ echo < eof1.xi stdout xargs -E_ echo < eof1.xi
stdout parallel -k -E_ echo < eof1.xi stdout parallel -k -E_ echo < eof1.xi

View file

@ -877,16 +877,11 @@ firstline
secondline secondline
### -e echo < eof_.xi ### -e echo < eof_.xi
one two _ three four one two _ three four
Can't exec "one": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168. one
open3: exec of one failed at /usr/local/bin/parallel line 3349 two
Can't exec "two": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168. _
open3: exec of two failed at /usr/local/bin/parallel line 3349 three
Can't exec "_": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168. four
open3: exec of _ failed at /usr/local/bin/parallel line 3349
Can't exec "three": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of three failed at /usr/local/bin/parallel line 3349
Can't exec "four": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of four failed at /usr/local/bin/parallel line 3349
### -E_ echo < eof1.xi ### -E_ echo < eof1.xi
firstline secondline firstline secondline
firstline firstline