Lingua-Han-PinYin

 view release on metacpan or  search on metacpan

lib/Lingua/Han/PinYin.pm  view on Meta::CPAN

package Lingua::Han::PinYin;

use strict;
use warnings;
our $VERSION = '0.23';

use File::Spec ();
use Lingua::Han::Utils qw/Unihan_value/;

sub new {
    my $class = shift;

    my $dir   = __FILE__;
    $dir =~ s/\.pm//o;
    -d $dir or die "Directory $dir does not exists, please consider to reinstall this module.";

    my %args = (@_ % 2 == 1) ? %{ $_[0] } : (@_);

    my %py;
    my $file = File::Spec->catfile( $dir, 'Mandarin.dat' );
    open(my $fh, '<', $file) or die "Can't open $file: $!";
    while (my $line = <$fh>) {
        chomp($line);
        my ( $uni, $py ) = split(/\s+/, $line);
        $py{$uni} = $py;
    }
    close($fh);

    $args{'py'} = \%py;

    return bless \%args => $class;
}

sub han2pinyin1 {
    my ($self, $word) = @_;
    my $code = Unihan_value($word);
    my $value = $self->{'py'}->{$code};
    if (defined $value) {
        $value = $self->_fix_val( $value );
    } else {
        # not found in dictionary, return original word
        $value = $word;
    }
    return $value;
}

sub han2pinyin {
    my ( $self, $hanzi ) = @_;

    my @code = Unihan_value($hanzi);

    my @result;
    foreach my $code (@code) {
        my $value = $self->{'py'}->{$code};
        if ( defined $value ) {
            $value = $self->_fix_val( $value );
        }
        else {
            # if it's not a Chinese, return original word
            $value = pack( "U*", hex $code );
        }
        push @result, ($self->{capitalize} ? ucfirst $value : $value);
    }

    return wantarray ? @result : join( '', @result );

}

sub gb2pinyin {
    my ($self, $hanzi) = @_;

    # convert only normal Chinese letter. Ignore Chinese symbols
    # which fall within [0xa1,0xb0) region. 0xb0==0260
    # if it is not normal Chinese, retain original characters
    $hanzi =~ s/[\260-\377][\200-\377]/$self->han2pinyin1($&)/ge;
    return $hanzi;
}

sub _fix_val {
    my ( $self, $value ) = @_;

    if ($self->{unicode}) {
        return $value;
    }

    # convert into ascii
    $value =~ s/Å«/u/g and $value .= '1';
    $value =~ s/Ç–/u/g and $value .= '1';
    $value =~ s/Ä«/i/g and $value .= '1';
    $value =~ s/ō/o/g and $value .= '1';
    $value =~ s/ā/a/g and $value .= '1';

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.051 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )