Acme-Lou
view release on metacpan or search on metacpan
author/loucsv.pl view on Meta::CPAN
use strict;
use warnings;
use utf8;
use open OUT => qw/:utf8 :std/;
use autodie;
use Encode;
use FindBin;
=head1 USAGE
useage:
cat mecab-ipadic-2.7.0-20060707/*.csv | perl loucsv.pl > lou.csv
original:
http://chasen.org/~taku/blog/archives/2007/01/_mecab.html
=cut
my $ja2kana = get_ja2kana();
my $pos_form_okuri_map = {
'åè©-*' => '',
'æåè©-*' => '',
'æ¥ç¶è©-*' => '',
'é£ä½è©-*' => '',
'åè©-ä»®å®å½¢' => 'ãã',
'åè©-ä»®å®ç¸®ç´ï¼' => 'ãã',
'åè©-åºæ¬å½¢' => 'ãã',
'åè©-ä½è¨æ¥ç¶' => 'ãã',
'åè©-ä½è¨æ¥ç¶ç¹æ®ï¼' => 'ã',
'åè©-æèªåºæ¬å½¢' => 'ãã',
'åè©-æªç¶ã¬ã«æ¥ç¶' => 'ã',
#'åè©-æªç¶å½¢' => '',
#'åè©-æªç¶ç¹æ®' => '',
'åè©-å½ä»¤ï½
' => '',
'åè©-å½ä»¤ï½ï½' => '',
'åè©-å½ä»¤ï½ï½' => '',
#'åè©-é£ç¨ã¿æ¥ç¶' => '',
'形容è©-ã¬ã«æ¥ç¶' => '',
'åè©-é£ç¨å½¢' => 'ã',
'形容è©-ä»®å®å½¢' => 'ãªã',
'形容è©-ä»®å®ç¸®ç´ï¼' => 'ãªã',
'形容è©-ä»®å®ç¸®ç´ï¼' => 'ãªã',
'形容è©-åºæ¬å½¢' => 'ãª',
'形容è©-ä½è¨æ¥ç¶' => 'ãª',
'形容è©-æèªåºæ¬å½¢' => '',
'形容è©-æªç¶ã¦æ¥ç¶' => 'ã ã',
'形容è©-æªç¶ãæ¥ç¶' => 'ãããã',
'形容è©-å½ä»¤ï½
' => 'ã§ãã',
'形容è©-é£ç¨ã´ã¶ã¤æ¥ç¶' => '',
'形容è©-é£ç¨ã¿æ¥ç¶' => 'ã ã£',
'形容è©-é£ç¨ãæ¥ç¶' => 'ã«',
};
while (my $line = <>) {
chomp $line;
$line = decode('euc-jp', $line);
my $lou = lou($line);
next if not defined $lou;
print $lou;
}
print <DATA>;
exit;
sub lou {
my $feature = shift or return;
# 表層形,å·¦æèID,峿èID,ã³ã¹ã,åè©,åè©ç´°åé¡1,åè©ç´°åé¡2,åè©ç´°åé¡3,æ´»ç¨å,æ´»ç¨å½¢,åå½¢,èªã¿,çºé³
my ($c, $d, $e, $cost, $pos, $f, $g, $h, $type, $form, $lemma, $i, $j) = split /,/, $feature;
( run in 1.002 second using v1.01-cache-2.11-cpan-98e64b0badf )