Lingua-Han-Cantonese
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Lingua/Han/Cantonese.pm view on Meta::CPAN
package Lingua::Han::Cantonese;
use warnings;
use strict;
use vars qw($VERSION);
$VERSION = '0.12';
use File::Spec;
use Lingua::Han::Utils qw/Unihan_value/;
sub new {
my $class = shift;
my $dir = __FILE__; $dir =~ s/\.pm//o;
-d $dir or die "Directory $dir nonexistent!";
my $self = { @_ };
my %ct;
my $file = File::Spec->catfile($dir, 'Cantonese.dat');
open(FH, $file) or die "$file: $!";
while(<FH>) {
my ($uni, $ct) = split(/\s+/);
$ct{$uni} = $ct;
}
close(FH);
$self->{'ct'} = \%ct;
return bless $self => $class;
}
sub han2Cantonese {
my ($self, $hanzi) = @_;
my @code = Unihan_value($hanzi);
my @result;
foreach my $code (@code) {
my $value = $self->{'ct'}->{$code};
if (defined $value) {
$value =~ s/\d//isg unless ($self->{'tone'});
} else {
# if it's not a Chinese, return original word
$value = pack("U*", hex $code);
}
push @result, lc $value;
}
return wantarray ? @result : join('', @result);
}
1;
__END__
=encoding utf8
=head1 NAME
Lingua::Han::Cantonese - Retrieve the Cantonese(GuangDongHua) of Chinese character(HanZi).
=head1 SYNOPSIS
use Lingua::Han::Cantonese;
my $h2p = new Lingua::Han::Cantonese();
print $h2p->han2Cantonese("æ"); # ngo
my @result = $h2p->han2Cantonese("ç±ä½ "); # @result = ('ngoi', 'nei');
# we can set the tone up
my $h2p = new Lingua::Han::Cantonese(tone => 1);
print $h2p->han2Cantonese("æ"); #ngo5
my @result = $h2p->han2Cantonese("ç±ä½ "); # @result = ('ngoi3', 'nei5');
print $h2p->han2Cantonese("æé"); #lam4dou3
print $h2p->han2Cantonese("I love ä½çå a"); #i love jyu4seoi6waa4 a
=head1 DESCRIPTION
Retrieve the Cantonese(GuangDongHua) of Chinese character(HanZi).
=head1 RETURN VALUE
Usually, it returns its Cantonese/spell. It includes more than 20,000 words (from Unicode.org Unihan.txt, version 4.1.0).
if not(I mean it's not a Chinese character), returns the original word;
=head1 OPTION
=over 4
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.518 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )