Text-UnAbbrev

 view release on metacpan or  search on metacpan

lib/Text/UnAbbrev.pm  view on Meta::CPAN

package Text::UnAbbrev;

use common::sense;
use charnames q(:full);
use Carp;
use English qw[-no_match_vars];
use File::Find;
use File::Spec::Functions ();
use IO::File;
use Moo;
use File::ShareDir::ProjectDistDir;
use Unicode::CaseFold;

our $VERSION = '0.03'; # VERSION

has dict => ( is => q(rw), default => sub { {}; }, );
has language  => ( is => q(rw) );
has domain    => ( is => q(rw) );
has subdomain => ( is => q(rw) );

sub BUILD {
    my $self = shift;
    my $args = shift;

    my $pkg = __PACKAGE__;
    $pkg =~ s{::}{-}g;
    my $share_dir = dist_dir($pkg);
    my @dict_file;
    find( sub { push @dict_file, $File::Find::name if -e }, $share_dir, );

    while ( my $dict_file = shift @dict_file ) {
        $self->_load_dict($dict_file);
    }

    if ( ref $args eq q(HASH) ) {
        foreach my $method ( keys %{$args} ) {
            if ( __PACKAGE__->can($method) ) {
                my $value = delete $args->{$method};
                $self->$method($value);
            }
            else { croak( sprintf q(method unknown: '%s'), $method ); }
        }
    }

    return 1;
} ## end sub BUILD

sub _load_dict {
    my $self      = shift;
    my $dict_file = shift;

    my ( $language, $domain, $subdomain )
        = ( File::Spec::Functions::splitdir($dict_file) )[ -3, -2, -1 ];

    my $fh = IO::File->new( $dict_file, q(<:utf8) );
    while ( my $line = $fh->getline() ) {
        chomp $line;
        my ( $abbrev, $expansion ) = split m{\t+|\N{SPACE}{2,}}msx, $line;
        $abbrev = $self->_norm_abbrev($abbrev);
        push @{ $self->dict->{$language}{$domain}{$subdomain}{$abbrev} },
            $expansion;
    }
    $fh->close();

    return 1;
} ## end sub _load_dict

sub lookup {
    my $self   = shift;
    my $abbrev = shift;
    my $mode   = shift;

    return unless defined $abbrev;

    my $query = $self->_norm_abbrev($abbrev);
    my @result;
    my @language = $self->language() || keys %{ $self->dict() };
    foreach my $language (@language) {
        my $language_node = $self->dict->{$language};
        my @domain = $self->domain() || keys %{$language_node};
        foreach my $domain (@domain) {
            my $domain_node = $language_node->{$domain};
            my @subdomain = $self->subdomain() || keys %{$domain_node};
            foreach my $subdomain (@subdomain) {
                my $subdomain_node = $domain_node->{$subdomain};
                if ( exists $subdomain_node->{$query} ) {
                    my $origin = {
                        language  => $language,
                        domain    => $domain,
                        subdomain => $subdomain,
                    };
                    push @result,
                        $self->_proc_results( $subdomain_node->{$query},
                        $mode, $origin, );
                }
            }
        } ## end foreach my $domain (@domain)
    } ## end foreach my $language (@language)

    return @result;
} ## end sub lookup

sub _proc_results {
    my $self    = shift;
    my $results = shift;
    my $mode    = shift;
    my $origin  = shift;

    my @result;
    foreach my $result ( @{$results} ) {
        if ( defined $mode && $mode eq q(with_origin) ) {
            push @result, { $result => $origin };
        }
        else {
            push @result, $result;
        }
    }



( run in 1.175 second using v1.01-cache-2.11-cpan-71847e10f99 )