Code refactor: move :::: and ::: processing.

This commit is contained in:
Ole Tange 2010-07-21 13:47:59 +02:00
parent ad25eb3f5a
commit e8a30d9fac
4 changed files with 124 additions and 80 deletions

View file

@ -1,3 +1,20 @@
Code refactor: move :::: and ::: processing.
Example:
Given a list of URLs, list all URLs that fail to download. Print the
line number and the URL.
cat urlfile | parallel "wget {} 2>>/dev/null || grep -n {} urlfile"
EXAMPLE:
Copy foo.es.ext to foo.ext:
ls *.es.* | perl -pe 'print; s/\.es//' | parallel -N2 cp {1} {2}
Import sql
inputfile tabel, Split colonner til {n}

View file

@ -210,6 +210,9 @@ Line is a command. The input line contains more than one argument or
the input line needs to be evaluated by the shell. This is the default
if I<command> is not set. Can be reversed with B<-f>.
Most people will never need this because GNU B<parallel> normally
selects the correct B<-f> or B<-c>.
=item B<--delimiter> I<delim>
@ -259,6 +262,9 @@ Line is a filename. The input line contains a filename that will be
quoted so it is not evaluated by the shell. This is the default if
I<command> is set. Can be reversed with B<-c>.
Most people will never need this because GNU B<parallel> normally
selects the correct B<-f> or B<-c>.
=item B<--group>
@ -1952,94 +1958,31 @@ sub parse_options {
}
if(grep /^$Global::arg_sep$/o, @ARGV) {
# Arguments on the command line.
# Ignore STDIN by reading from /dev/null
# or another file if user has given --arg-file
if(not @::opt_a) { push @::opt_a, "/dev/null"; }
# Input: @ARGV = command option ::: arg arg arg
my @new_argv = ();
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_sep) {
$Global::input_is_filename = (@new_argv);
if($Global::input_is_filename) {
unget_arg(shell_quote(@ARGV));
} else {
unget_arg(@ARGV);
}
$Global::total_jobs += @ARGV;
@ARGV=();
last;
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
@ARGV=@new_argv;
@ARGV=read_args_from_command_line();
}
if(grep /^$Global::arg_file_sep$/o, @ARGV) {
# convert :::: to multiple -a
my @new_argv = ();
my @argument_files;
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_file_sep) {
@argument_files = @ARGV;
@ARGV=();
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
@ARGV=@new_argv;
push @::opt_a, @argument_files;
@ARGV=convert_argfiles_from_command_line_to_multiple_opt_a();
}
# must be done after ::: and :::: because they mess with @ARGV
$Global::input_is_filename ||= (@ARGV);
if(@::opt_a) {
# must be done after opt_arg_sep
# must be done after
# convert_argfiles_from_command_line_to_multiple_opt_a
if($#::opt_a == 0) {
# One -a => xargs compatibility
$Global::argfile = open_or_exit($::opt_a[0]);
} else {
# Multiple -a => xapply style
$Global::argfile = open_or_exit("/dev/null");
$::opt_N = $#::opt_a+1;
$Global::max_number_of_args = $#::opt_a+1;
# read the files
my @content;
my $max_lineno = 0;
my $in_fh = gensym;
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
$in_fh = open_or_exit($::opt_a[$fileno]);
for (my $lineno=0;
$content[$fileno][$lineno] = get_next_arg_from_fh($in_fh);
$lineno++) {
$max_lineno = max($max_lineno,$lineno);
}
close $in_fh;
}
for (my $lineno=0; $lineno <= $max_lineno; $lineno++) {
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
my $arg = $content[$fileno][$lineno];
if(defined $arg) {
unget_arg($arg);
} else {
unget_arg("");
}
}
}
$Global::total_jobs += $max_lineno;
argfiles_xapply_style();
}
}
if(($::opt_l || $::opt_L || $::opt_n || $::opt_N || $::opt_s) and not
($::opt_m or $::opt_X)) {
# These --max-line, -l, -L, --max-args, -n, --max-chars, -s
# The options --max-line, -l, -L, --max-args, -n, --max-chars, -s
# do not make sense without -X or -m
# so default to -X
# Needs to be done after :::: and @opt_a, as they can set $::opt_N
@ -2069,6 +2012,9 @@ sub parse_options {
if(remote_hosts() and ($Global::xargs or $Global::Xargs)
and not $::opt_N) {
# As we do not know the max line length on the remote machine
# long commands generated by xargs may fail
# If opt_N is set, it is probably safe
print STDERR ("Warning: using -X or -m with --sshlogin may fail\n");
}
@ -2087,6 +2033,92 @@ sub parse_options {
}
}
sub read_args_from_command_line {
# Arguments given on the command line after ::: ($Global::arg_sep)
# Removes the arguments from @ARGV and puts it into the argument queue
# Ignore STDIN by reading from /dev/null
# or another file if user has given --arg-file
# Returns:
# @ARGV without ::: and following args
if(not @::opt_a) { push @::opt_a, "/dev/null"; }
# Input: @ARGV = command option ::: arg arg arg
my @new_argv = ();
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_sep) {
$Global::input_is_filename = (@new_argv);
if($Global::input_is_filename) {
unget_arg(shell_quote(@ARGV));
} else {
unget_arg(@ARGV);
}
$Global::total_jobs += @ARGV;
@ARGV=();
last;
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
return @new_argv;
}
sub convert_argfiles_from_command_line_to_multiple_opt_a {
# convert :::: to multiple -a
# Remove :::: from @ARGV and move the following arguments to @::opt_a
# Returns:
# @ARGV without :::: and following args
my @new_argv = ();
my @argument_files;
while(@ARGV) {
my $arg = shift @ARGV;
if($arg eq $Global::arg_file_sep) {
@argument_files = @ARGV;
@ARGV=();
} else {
push @new_argv, $arg;
}
}
# Output: @ARGV = command option
push @::opt_a, @argument_files;
return @new_argv;
}
sub argfiles_xapply_style {
# Multiple -a => xapply style
# Convert the n files into one queue
# Every n'th entry is from the same file
# Set opt_N to read n entries per invocation
$Global::argfile = open_or_exit("/dev/null");
$::opt_N = $#::opt_a+1;
$Global::max_number_of_args = $#::opt_a+1;
# read the files
my @content;
my $max_lineno = 0;
my $in_fh = gensym;
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
$in_fh = open_or_exit($::opt_a[$fileno]);
for (my $lineno=0;
$content[$fileno][$lineno] = get_next_arg_from_fh($in_fh);
$lineno++) {
$max_lineno = max($max_lineno,$lineno);
}
close $in_fh;
}
for (my $lineno=0; $lineno <= $max_lineno; $lineno++) {
for (my $fileno = 0; $fileno <= $#::opt_a; $fileno++) {
my $arg = $content[$fileno][$lineno];
if(defined $arg) {
unget_arg($arg);
} else {
unget_arg("");
}
}
}
$Global::total_jobs += $max_lineno;
}
sub open_or_exit {
# Returns:
# file handle to read-opened file

View file

@ -211,7 +211,7 @@ stdout xargs -eEOF echo < eofstr.xi
stdout parallel -k -eEOF echo < eofstr.xi
echo '### -e echo < eof_.xi'
stdout xargs -e echo < eof_.xi
stdout parallel -k -e echo < eof_.xi
stdout parallel -e -k echo < eof_.xi
echo '### -E_ echo < eof1.xi'
stdout xargs -E_ echo < eof1.xi
stdout parallel -k -E_ echo < eof1.xi

View file

@ -877,16 +877,11 @@ firstline
secondline
### -e echo < eof_.xi
one two _ three four
Can't exec "one": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of one failed at /usr/local/bin/parallel line 3349
Can't exec "two": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of two failed at /usr/local/bin/parallel line 3349
Can't exec "_": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of _ failed at /usr/local/bin/parallel line 3349
Can't exec "three": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of three failed at /usr/local/bin/parallel line 3349
Can't exec "four": No such file or directory at /usr/share/perl/5.10/IPC/Open3.pm line 168.
open3: exec of four failed at /usr/local/bin/parallel line 3349
one
two
_
three
four
### -E_ echo < eof1.xi
firstline secondline
firstline