Lingua-ZH-CEDICT
view release on metacpan or search on metacpan
lib/Lingua/ZH/CEDICT.pm view on Meta::CPAN
$self->{_findPos} = 0;
$self->{_findTerm} = $term;
}
# returns a reference to the first/following entry that matches
sub find {
my ($self) = @_;
my $term = $self->{_findTerm};
while ($self->{_findPos} < $self->numEntries) {
$self->{_findPos}++;
my $e = $self->{entry}->[$self->{_findPos} - 1];
return $e
if (($e->[0] eq $term) or
($e->[1] eq $term) or
($e->[2] =~ /^$term$/i) or
($e->[3] =~ /^$term$/i) or
($e->[4] =~ /^$term$/i));
}
# nothing found
return undef;
}
# Formatting ****************************************************************
my %xlat =
(a1 => "Ä", e1 => "Ä", i1 => "Ä«",
o1 => "Å", u1 => "Å«", 'v1' => "Ç",
a2 => "á", e2 => "é", i2 => "Ã",
o2 => "ó", u2 => "ú", 'v2' => "Ç",
a3 => "Ç", e3 => "Ä", i3 => "Ç",
o3 => "Ç", u3 => "Ç", 'v3' => "Ç",
a4 => "à ", e4 => "è", i4 => "ì",
o4 => "ò", u4 => "ù", 'v4' => "Ç",
a5 => 'a', e5 => 'e', i5 => 'i',
o5 => 'o', u5 => 'u', 'v5' => 'ü');
sub utf8Pinyin {
my ($self, $p) = @_;
$p = $self unless ref($self);
# normalize u: and v to v
$p =~ s/u:/v/g;
$p =~ s/([iuv]?)([aeiouv])([a-z]*)([1-5])/$1$xlat{"$2$4"}$3/g;
return $p;
}
sub formatEnglish {
my ($self, $en) = @_;
$en = $self unless ref($self);
my $separator = " · ";
# my $separator = "/";
# $en =~ s|/|$separator|g;
# return $en;
my @terms = split m|/|, $en;
foreach (0..$#terms) {
$terms[$_] =~ s|\(([^(]+)\)$|<i>$1</i>|;
}
return join($separator, @terms);
}
sub removePinyinTones {
my ($self, $p) = @_;
$p =~ s/[12345]//g;
$p =~ s/(u:|v)/u/g;
return $p;
}
sub englishToKeywords {
my ($self, $en) = @_;
my @kw;
foreach (split(m|/|, $en)) {
next if /^\([^()]+\)$/;
# remove trailing explanation in brackets
s/\s+\([^(]+\)$//;
s/^\(?(to|the|a|an|to be)\)?\s+//i;
# remove characters we don't like in keywords
s|[^-a-zA-Z0-9 /.]||g;
s|^\.+||;
# s!(\w|\d|\s|-|/)!!g;
# remove leading and trailing and multiple whitespace
s/^\s+//;
s/\s+$//;
s/\s\s+/ /g;
# definitions like "(a sense of) uncertainty"
if (/^\((.+?)\)\s+(.+)$/) {
push @kw, uc($2);
push @kw, uc("$1 $2");
} else {
push @kw, uc($_);
}
}
# return non-empty keywords
return grep /\w/, @kw;
}
1;
__END__
=head1 NAME
Lingua::ZH::CEDICT - Interface for CEDICT, a Chinese-English dictionary
=head1 SYNOPSIS
( run in 0.593 second using v1.01-cache-2.11-cpan-71847e10f99 )