Lingua-Han-PinYin
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Lingua/Han/PinYin.pm view on Meta::CPAN
package Lingua::Han::PinYin;
use strict;
use warnings;
our $VERSION = '0.23';
use File::Spec ();
use Lingua::Han::Utils qw/Unihan_value/;
sub new {
my $class = shift;
my $dir = __FILE__;
$dir =~ s/\.pm//o;
-d $dir or die "Directory $dir does not exists, please consider to reinstall this module.";
my %args = (@_ % 2 == 1) ? %{ $_[0] } : (@_);
my %py;
my $file = File::Spec->catfile( $dir, 'Mandarin.dat' );
open(my $fh, '<', $file) or die "Can't open $file: $!";
while (my $line = <$fh>) {
chomp($line);
my ( $uni, $py ) = split(/\s+/, $line);
$py{$uni} = $py;
}
close($fh);
$args{'py'} = \%py;
return bless \%args => $class;
}
sub han2pinyin1 {
my ($self, $word) = @_;
my $code = Unihan_value($word);
my $value = $self->{'py'}->{$code};
if (defined $value) {
$value = $self->_fix_val( $value );
} else {
# not found in dictionary, return original word
$value = $word;
}
return $value;
}
sub han2pinyin {
my ( $self, $hanzi ) = @_;
my @code = Unihan_value($hanzi);
my @result;
foreach my $code (@code) {
my $value = $self->{'py'}->{$code};
if ( defined $value ) {
$value = $self->_fix_val( $value );
}
else {
# if it's not a Chinese, return original word
$value = pack( "U*", hex $code );
}
push @result, ($self->{capitalize} ? ucfirst $value : $value);
}
return wantarray ? @result : join( '', @result );
}
sub gb2pinyin {
my ($self, $hanzi) = @_;
# convert only normal Chinese letter. Ignore Chinese symbols
# which fall within [0xa1,0xb0) region. 0xb0==0260
# if it is not normal Chinese, retain original characters
$hanzi =~ s/[\260-\377][\200-\377]/$self->han2pinyin1($&)/ge;
return $hanzi;
}
sub _fix_val {
my ( $self, $value ) = @_;
if ($self->{unicode}) {
return $value;
}
# convert into ascii
$value =~ s/Å«/u/g and $value .= '1';
$value =~ s/Ç/u/g and $value .= '1';
$value =~ s/Ä«/i/g and $value .= '1';
$value =~ s/Å/o/g and $value .= '1';
$value =~ s/Ä/a/g and $value .= '1';
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.051 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )