App-Jawk
view release on metacpan or search on metacpan
" -e 'print \"\@F\n\"' : more like awk but with \@F, and perl\n" .
" -w means 'use warnings' for perlcode run via -e\n" .
" -v means show version of jawk, and quit\n";
#" -a is for old (deprecated) 'awky' mode with \$1 \$2 etc (from perl)\n" .
}
############################################
sub main {
#memoize( 'convert_args_to_fields' ); # this speeds up named-query-stats from 2.33 to 1.7 minutes (~25%).
my @args;
my $exe = ""; # whatever perl code we should execute for each line
# HANDLE Command Line Processing... Manually!
#
# shift items off @ARGV, processing as we go along,
# putting fields like 1, 1.. or 1..2 etc into @args
#
# With all the code below, we've finally replicated most of Getopt::Long's -- option :).
# Except Getopt::Long doesn't store away @args and @ARGV separately like we do.
# (although it would be nice to support named options like --version).
ARGVLOOP:
while( defined($_ = shift @ARGV) ) { # manual handling of command-line options
if (/^-x/) { $exclude = 1; } # -x option
#elsif (/^-a/) { $awky = 1; } # -a is DEPRECATED # -a option, $1 instead of @F # AWK MODE IS DISABLED
elsif (/^-D/) { $debug = 1; } # debug
elsif (/^-w/) { $warnings = 1; } # turn on perl warnings -e code
elsif (/^-v/) { print "$prog: version $VERSION\n"; exit(0); }
elsif (/^-e$/) { $exe .= shift( @ARGV ) || die "$prog: -e (exe) needs param\n"; }
elsif (/^-e=(.*)$/) { $exe .= $1 || die "$prog: -e (exe) needs param\n"; }
elsif (/^-d$/) { $delimiter = shift( @ARGV ) || die "$prog: -d (delim) needs param\n"; }
elsif (/^-d=(.*)$/) { $delimiter = $1 || die "$prog: -d (delim) needs param\n"; }
elsif (/^-j$/) { $joiner = shift( @ARGV ) || die "$prog: -j (joiner) needs param\n"; }
elsif (/^-j=(.*)$/) { $joiner = $1 || die "$prog: -j (joiner) needs param\n"; }
elsif (/^-\d+/) { push(@args, $_); } # negative digits
elsif (/^--?$/) { last ARGVLOOP; } # stop processing at - or --
elsif (/^-/) { die "$prog: Option not understood: $_\n" . Usage(); } # other -options
else { push(@args, $_); }
# ok; non-hyphenated option like digit or ..
}
#print "$prog: args passed: @ARGV\n" if $debug;
warn "$prog: Doesn't make sense to use numbered fields and -e, fields ignored\n" if ($exe && @args);
my $quote_meta_delimiter = defined($delimiter) ? quotemeta( $delimiter ) : "";
# read lines with the magical diamond operator. note use of '--' option, documented above.
while( defined( my $line = <> ) ) {
chomp($line);
# split the line into parts
my @parts;
if ($delimiter eq ' ' ) {
@parts = split( ' ', $line ); # ' ' is a special case with split, which acts special. look it up!
} else {
@parts = split(/$quote_meta_delimiter/, $line); # so you can split on chars like "("
#shift(@parts) while (@parts && $parts[0] =~ /^\s*$/); # should we strip leading blank fields?
}
if ($exe) { # if we have an exe from the command line, run it for each input line
# if they passed a line to execute, then run it for each line we read
my $exe_expanded = replace_exe_vars( $exe, \@parts ); # expand to perl script
eval "$exe_expanded"; # string eval.
warn "$prog: Error running: $exe_expanded: $@\n" if $@;
} else { # otherwise, pull out fields via numbered args.
# convert the args (things from @ARGV that don't look like command-line options)
# into fields. Must be done for each line, because we need the
# number of elements.
print STDERR "args are @args, parts are @parts\n" if $debug;
my @fields = convert_args_to_fields( \@args, scalar(@parts) );
# if we're in -x mode, invert the fields to
# figure out which are left after exclusions.
if ($exclude) {
@fields = invert_fields( \@fields, scalar(@parts) );
}
print (join($joiner, @parts[@fields]) . "\n");
}
}
exit(0); # done
}
############################################
# my $exe_expanded = replace_exe_vars( $exe, \@fields )
sub replace_exe_vars {
my ($exe, $fieldsref) = @_;
my @fields = @$fieldsref;
# AWK MODE IS DISABLED ABOVE
if ($awky) {
# awky style, to be deprecated. Replace $1 $2 $3 etc.
# since we can't actually assign to $1, $2, $3, etc easily,
# we manually parse out $\d+ and ${\d+} sequences from the exe string they pass
# and pass back a string to be eval'ed :)
#print "incoming exe: $exe; [@fields]\n";
#$exe =~ s/ \$ ([0-9]+) /my $c = $fields->[\$1-1];\\$c/geex; # $fields->[$1-1]/xg;
while( $exe =~ m/ \$ ([0-9]+) /x) {
my $field = $1;
my $search = '\$' . $field;
my $replace = ($field > 0 && $field <= @$fieldsref) ? $fieldsref->[$field-1] : "";
#print "$prog: Replacing field $field: $search with $replace\n";
$exe =~ s/ $search /$replace/xg;
}
} else { # non awky style, this is recommended
# $exe holds the code to run on @F
use vars qw( @F ); # make a global @F
@F = @$fieldsref; # yes we 0-th element, because @F IS NORMAL PERL!
my $tmpexe = "no strict; ";
$tmpexe .= "no warnings; " unless $warnings;
$tmpexe .= $exe;
$exe = $tmpexe;
}
#print "outgoing exe: $exe\n";
return $exe;
}
#########################################################
# convert_args_to_fields( $args_ref, $numparts_in_line )
# args come in 1-based (or negative), and are returned 0-based
# handles ranges like 1..3 or 3..1
# as well as negative args alone or in ranges,
# like -2..1, or like -1..1
# note: could probably be optimized 20-30% by doing this all in one giant map{}
sub convert_args_to_fields {
my ($args_ref, $numparts) = @_;
return () unless $numparts;
print STDERR "$prog: 0: initially (@$args_ref)\n" if $debug;
my $numreg = '-?\d+'; # our 'number regex'
# convert the user's fields to field numbers within the fields
# parse the command line arguments for ints and ranges like a..b , a.. , and b.. .
# skip indexes that we don't have a value for, and
# shift each int down by one; 1-based to 0-based.
# (This was originally more broken up for clarity, but we optimized (and shortened)
# it to this.)
my @ret = map { $_ - 1 }
grep {$_ <= $numparts && $_ >= 1 }
map { /^$numreg$/ ? (_converted_version($_, $numparts)) : # ** a single int
/^($numreg)\.\.($numreg)$/ ? (get_range($1, $2, $numparts)) : # ** an int range
/^($numreg)\.\.$/ ? (get_range($1, $numparts, $numparts)) : # ** an integer and up
( run in 1.724 second using v1.01-cache-2.11-cpan-5623c5533a1 )