Acme-Lou
view release on metacpan or search on metacpan
author/en2kana.csv view on Meta::CPAN
maker,ã¡ã¼ã«ã¼
making,ã¡ã¤ãã³ã°
mama,ãã
man,ãã³
manage,ããã¼ã¸
manager,ããã¼ã¸ã£ã¼
mankind,ãã³ã«ã¤ã³ã
manner,ããã¼
manual,ããã¥ã¢ã«
many,ã¡ãã¼
map,ããã
march,ãã¼ã
mark,ãã¼ã¯
market,ãã¼ã±ãã
married,ããªã¼ã
marry,ããªã¼
marvelous,ãã¼ãã©ã¹
mass,ãã¹
master,ãã¹ã¿ã¼
match,ããã
mate,ã¡ã¤ã
author/ja2kana.pl view on Meta::CPAN
my %en2kana;
open my $en2kana, '<:encoding(utf8)', "$Bin/lou-en2kana.csv";
while (<$en2kana>) {
chomp;
next unless $_;
next if /^#/;
my ($en, $kana) = split ',';
$en2kana{lc $en} = $kana;
}
my %skip_word = map { $_ => 1 } qw(
now say new be come see is as
one two three four five
six seven eight nine ten
law raw row whole weigh
hurt hut firm fare flesh
youth lack role waste worth
few pray health sex
);
warn "make $Bin/lou-ja2kana.csv...\n";
author/loucsv.pl view on Meta::CPAN
useage:
cat mecab-ipadic-2.7.0-20060707/*.csv | perl loucsv.pl > lou.csv
original:
http://chasen.org/~taku/blog/archives/2007/01/_mecab.html
=cut
my $ja2kana = get_ja2kana();
my $pos_form_okuri_map = {
'åè©-*' => '',
'æåè©-*' => '',
'æ¥ç¶è©-*' => '',
'é£ä½è©-*' => '',
'åè©-ä»®å®å½¢' => 'ãã',
'åè©-ä»®å®ç¸®ç´ï¼' => 'ãã',
'åè©-åºæ¬å½¢' => 'ãã',
'åè©-ä½è¨æ¥ç¶' => 'ãã',
'åè©-ä½è¨æ¥ç¶ç¹æ®ï¼' => 'ã',
'åè©-æèªåºæ¬å½¢' => 'ãã',
author/loucsv.pl view on Meta::CPAN
# 表層形,å·¦æèID,峿èID,ã³ã¹ã,åè©,åè©ç´°åé¡1,åè©ç´°åé¡2,åè©ç´°åé¡3,æ´»ç¨å,æ´»ç¨å½¢,åå½¢,èªã¿,çºé³
my ($c, $d, $e, $cost, $pos, $f, $g, $h, $type, $form, $lemma, $i, $j) = split /,/, $feature;
my $kana = $ja2kana->{$lemma};
if (not defined $kana) {
return;
}
my $pos_form = "$pos-$form";
my $okuri = $pos_form_okuri_map->{$pos_form};
if (not defined $okuri) {
return;
}
$cost -= 1000;
$cost = 1 if $cost <= 0;
return join ",", $c, $d, $e, $cost, $pos, $f, $g, $h, $type, $form, $lemma, $i, $j, $kana, $okuri, "\n";
}
( run in 1.015 second using v1.01-cache-2.11-cpan-49f99fa48dc )