App-RecordStream
view release on metacpan or search on metacpan
lib/App/RecordStream/KeySpec.pm view on Meta::CPAN
package App::RecordStream::KeySpec;
=head1 NAME
App::RecordStream::KeySpec
=head1 AUTHOR
Benjamin Bernard <perlhacker@benjaminbernard.com>
Keith Amling <keith.amling@gmail.com>
=head1 DESCRIPTION
This class knows out to look up a keyspec in a datastructure
=head1 SYNOPSIS
use App::RecordStream::KeySpec;
my $data_ref = App::RecordStream::KeySpec::find_key($r, 'Foo/Bar');
=cut
our $VERSION = "4.0.25";
use strict;
use warnings;
use App::RecordStream::KeySpec;
use Data::Dumper;
my $registry = {};
sub find_key {
my ($data, $spec, $no_vivify, $throw_error) = @_;
my $spec_obj = __PACKAGE__->new($spec);
return $spec_obj->guess_key($data, $no_vivify, $throw_error);
}
sub new
{
my $class = shift;
my $spec = shift;
if ( exists $registry->{$spec} ) {
return $registry->{$spec};
}
my $this = {
SPEC => $spec,
};
bless $this, $class;
$this->init();
$registry->{$spec} = $this;
return $this;
}
sub init
{
my $this = shift;
$this->_parse_key_spec();
}
{
my $guessed_keys = {};
sub _search_string_to_key {
my $key_chain = shift;
my $string = shift;
return $guessed_keys->{join('-', @$key_chain)}->{$string};
}
sub _add_string_key_mapping {
my $key_chain = shift;
my $string = shift;
my $key = shift;
$guessed_keys->{join('-', @$key_chain)}->{$string} = $key;
}
}
sub _guess_key_name_raw {
my ($this, $data, $key_chain, $search_string) = @_;
my $fuzzy = $this->{'FUZZY'};
if ( UNIVERSAL::isa($data, 'ARRAY') ) {
if ( $search_string =~ m/^#(\d+)$/ ) {
return $1;
}
else {
die "Cannot select non-numeric index: $search_string (did you forget to prefix with a '#'?) for array: " . Dumper($data);
}
}
return $search_string if ( ! $fuzzy );
my $found_key;
if ( my $key = _search_string_to_key($key_chain, $search_string) ) {
return $key;
}
# First check exact match
if ( defined $data->{$search_string} ) {
$found_key = $search_string;
}
else {
# Next check prefixes, no interpolation
foreach my $key ( CORE::sort(CORE::keys %$data) ) {
if ( $key =~ m/^\Q$search_string\E/i ) {
$found_key = $key;
}
}
}
if ( !$found_key ) {
# Check for match anywhere in the keys, allow regex interpolation
foreach my $key ( CORE::sort(CORE::keys %$data) ) {
if ( $key =~ m/$search_string/i ) {
$found_key = $key;
}
}
}
if ( !$found_key ) {
$found_key = $search_string;
}
_add_string_key_mapping($key_chain, $search_string, $found_key);
return $found_key
}
sub has_key_spec {
my ($this, $data) = @_;
eval { $this->guess_key($data, 0, 1) };
if ( $@ =~ m/^NoSuchKey/ ) {
return 0;
}
elsif ( $@ ) {
#Rethrow if a different error
die $@;
}
return 1;
}
sub get_key_list_for_spec {
my ($this, $data) = @_;
return $this->_guess_key_recurse(
$data,
[],
1,
0,
1,
@{$this->{'PARSED_KEYS'}},
);
}
sub _parse_key_spec {
my ($this) = @_;
my $spec = $this->{'SPEC'};
my $fuzzy = 0;
my $spec_name = $spec;
if ( substr($spec, 0, 1) eq '@' ) {
$fuzzy = 1;
$spec = substr($spec, 1);
}
my $keys = [];
my $current_key = '';
my $last_char = '';
for (my $index = 0; $index < length($spec); $index++) {
my $current_char = substr($spec, $index, 1);
if ( $current_char eq '/' && $last_char ne '\\' ) {
push @$keys, $current_key;
$current_key = '';
$last_char = '';
next;
}
else {
if ( $current_char eq '/' ) {
chop $current_key;
}
$current_key .= $current_char;
$last_char = $current_char;
next;
}
}
if ( $current_key ne '' ) {
push @$keys, $current_key;
}
$this->{'PARSED_KEYS'} = $keys;
$this->{'FUZZY'} = $fuzzy;
}
{
my $keylookup_hash = {};
sub guess_key {
my ($this, $data, $no_vivify, $throw_error) = @_;
my @args = @{$this->{'PARSED_KEYS'}};
$no_vivify ||= 0;
$throw_error ||= 0;
my $args_string = join('-', @args, $no_vivify, $throw_error);
if ( my $code = $keylookup_hash->{$args_string} ) {
return $code->($data);
}
my $keys = $this->_guess_key_recurse(
$data,
[],
$no_vivify,
$throw_error,
1,
@args,
);
my $code = $this->_generate_keylookup_sub($keys, $no_vivify);
$keylookup_hash->{$args_string} = $code;
return $code->($data);
}
}
# Performance! Oh god, performance. Generate a lookup subroutine that will
# lookup the passed keys, for execution later
sub _generate_keylookup_sub {
my $this = shift;
my $keys = shift;
my $no_vivify = shift;
my $throw_error = shift;
if ( scalar @$keys == 0 ) {
return eval 'sub { if ( \$throw_error ) { die "NoSuchKey"; } return ""; }';
}
my $code_string = 'sub { my $record = shift;';
my $key_accessor = '$record';
my $action = "return ''";
$action = "die 'NoSuchKey'" if ( $throw_error );
my $check_actions = '';
foreach my $key (@$keys) {
if ( $key =~ m/^#(\d+)$/ ) {
my $index = $1;
$key_accessor .= "->[$index]";
}
else {
my @hex_bytes = unpack('C*', $key);
my $hex_string = '';
foreach my $byte (@hex_bytes) {
$hex_string .= "\\x" . sprintf ("%lx", $byte);
}
$key_accessor .= "->{\"$hex_string\"}";
}
$check_actions .= "$action if ( ! exists $key_accessor );";
}
if ( $no_vivify || $throw_error ) {
$code_string .= $check_actions;
}
$code_string .= "return \\($key_accessor)}";
my $sub_ref = eval $code_string;
if ( $@ ) {
warn "Unexpected error in creating key lookup!\n";
die $@;
}
return $sub_ref;
}
sub _guess_key_recurse {
my ($this, $data, $key_chain, $no_vivify, $throw_error,
$return_key_chain, $search_string, @next_strings) = @_;
my $type = ref($data);
if ( $type eq 'SCALAR' || UNIVERSAL::isa(\$data, 'SCALAR') ) {
die "Cannot look for $search_string in scalar: " . Dumper($data);
}
my $key = $this->_guess_key_name_raw($data, $key_chain, $search_string);
my $value;
if ( $type eq 'ARRAY' ) {
$value = \($data->[$key]);
$key = "#$key";
}
else {
if ( $no_vivify && (!exists $data->{$key}) ) {
return $return_key_chain ? [] : '';
}
$value = \($data->{$key})
}
if ( scalar @next_strings > 0 ) {
if ( ! defined $$value ) {
die "NoSuchKey" if ( $throw_error );
if ( $no_vivify ) {
return $return_key_chain ? [] : '';
}
if ( substr($next_strings[0], 0, 1) eq '#' ) {
$$value = [];
}
else {
$$value = {};
}
}
return $this->_guess_key_recurse(
$$value,
[@$key_chain, $key],
$no_vivify,
$throw_error,
$return_key_chain,
@next_strings,
);
}
return $return_key_chain ? [@$key_chain, $key] : $value;
}
sub keyspec_help {
return <<KEYSPECS_HELP;
KEY SPECS
__FORMAT_TEXT__
A key spec is short way of specifying a field with prefixes or regular
expressions, it may also be nested into hashes and arrays. Use a '/' to nest
into a hash and a '#NUM' to index into an array (i.e. #2)
An example is in order, take a record like this:
__FORMAT_TEXT__
{"biz":["a","b","c"],"foo":{"bar 1":1},"zap":"blah1"}
{"biz":["a","b","c"],"foo":{"bar 1":2},"zap":"blah2"}
{"biz":["a","b","c"],"foo":{"bar 1":3},"zap":"blah3"}
__FORMAT_TEXT__
In this case a key spec of 'foo/bar 1' would have the values 1,2, and 3
in the respective records.
Similarly, 'biz/#0' would have the value of 'a' for all 3 records
You can also prefix key specs with '\@' to engage the fuzzy matching logic
__FORMAT_TEXT__
__FORMAT_TEXT__
Fuzzy matching works like this in order, first key to match wins
__FORMAT_TEXT__
1. Exact match ( eq )
2. Prefix match ( m/^/ )
3. Match anywehre in the key (m//)
__FORMAT_TEXT__
So, in the above example '\@b/#2', the 'b' portion would expand to 'biz' and 2
would be the index into the array, so all records would have the value of 'c'
Simiarly, \@f/b would have values 1, 2, and 3
You can escape / with a \\. For example, if you have a record:
__FORMAT_TEXT__
{"foo/bar":2}
__FORMAT_TEXT__
You can address that key with foo\\/bar
__FORMAT_TEXT__
KEYSPECS_HELP
}
1;
( run in 0.826 second using v1.01-cache-2.11-cpan-39bf76dae61 )