Business-CompanyDesignator
view release on metacpan or search on metacpan
lib/Business/CompanyDesignator.pm view on Meta::CPAN
package Business::CompanyDesignator;
# Require perl 5.010 because the 'track' functionality of Regexp::Assemble
# is unsafe for earlier versions.
use 5.010001;
use Moose;
use utf8;
use warnings qw(FATAL utf8);
use FindBin qw($Bin);
use YAML;
use File::ShareDir qw(dist_file);
use List::MoreUtils qw(uniq);
use Regexp::Assemble;
use Unicode::Normalize;
use Carp;
use Business::CompanyDesignator::Record;
use Business::CompanyDesignator::SplitResult;
our $VERSION = '0.17';
# Hardcode the set of languages that we treat as 'continuous'
# i.e. their non-ascii designators don't require a word break
# before/after.
our %LANG_CONTINUA = map { $_ => 1 } qw(
zh
ja
ko
);
has 'datafile' => ( is => 'ro', default => sub {
# Development/test version
my $local_datafile = "$Bin/../share/company_designator_dev.yml";
return $local_datafile if -f $local_datafile;
$local_datafile = "$Bin/../share/company_designator.yml";
return $local_datafile if -f $local_datafile;
# Installed version
return dist_file('Business-CompanyDesignator', 'company_designator.yml');
});
# data is the raw dataset as loaded from datafile, keyed by long designator
has data => ( is => 'ro', lazy_build => 1 );
# regex_cache is a cache of regexes by language and type, since they're expensive to build
has 'regex_cache' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
# abbr_long_map is a hash mapping abbreviations (strings) back to an arrayref of
# long designators (since abbreviations are not necessarily unique)
has 'abbr_long_map' => ( is => 'ro', isa => 'HashRef', lazy_build => 1 );
# pattern_string_map is a hash mapping patterns back to their source string,
# since we do things like add additional patterns without diacritics
has 'pattern_string_map' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
# pattern_string_map_lang is a hash of hashes, mapping language codes to hashes
# of patterns back to their source string
has 'pattern_string_map_lang' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
sub _build_data {
my $self = shift;
YAML::LoadFile($self->datafile);
}
sub _build_abbr_long_map {
my $self = shift;
my $map = {};
while (my ($long, $entry) = each %{ $self->data }) {
if (my $abbr = $entry->{abbr_std}) {
$map->{$abbr} ||= [];
push @{ $map->{$abbr} }, $long;
}
my $abbr_list = $entry->{abbr} or next;
$abbr_list = [ $abbr_list ] if ! ref $abbr_list;
for my $abbr (@$abbr_list) {
$map->{$abbr} ||= [];
push @{ $map->{$abbr} }, $long;
}
}
return $map;
}
sub long_designators {
my $self = shift;
sort keys %{ $self->data };
}
sub abbreviations {
my $self = shift;
sort keys %{ $self->abbr_long_map };
}
sub designators {
my $self = shift;
sort $self->long_designators, $self->abbreviations;
}
# Return the B::CD::Record for $long designator
sub record {
my ($self, $long) = @_;
my $entry = $self->data->{$long}
or croak "Invalid long designator '$long'";
return Business::CompanyDesignator::Record->new( long => $long, record => $entry );
}
# Return a list of B::CD::Records for $designator
sub records {
my ($self, $designator) = @_;
croak "Missing designator" if ! $designator;
if (exists $self->data->{$designator}) {
return ( $self->record($designator) );
}
elsif (my $long_set = $self->abbr_long_map->{$designator}) {
return map { $self->record($_) } @$long_set
}
else {
croak "Invalid designator '$designator'";
}
( run in 1.281 second using v1.01-cache-2.11-cpan-97f6503c9c8 )