Acme-Lou

 view release on metacpan or  search on metacpan

author/loucsv.pl  view on Meta::CPAN

use strict;
use warnings;
use utf8;
use open OUT => qw/:utf8 :std/;
use autodie;
use Encode;
use FindBin;

=head1 USAGE

useage:
    cat mecab-ipadic-2.7.0-20060707/*.csv | perl loucsv.pl > lou.csv

original:
    http://chasen.org/~taku/blog/archives/2007/01/_mecab.html

=cut

my $ja2kana = get_ja2kana();

my $pos_form_okuri_map = {
    '名詞-*'                => '',
    '感動詞-*'              => '',
    '接続詞-*'              => '',
    '連体詞-*'              => '',
    '動詞-仮定形'           => 'すれ',
    '動詞-仮定縮約1'       => 'すれ',
    '動詞-基本形'           => 'する',
    '動詞-体言接続'         => 'する',
    '動詞-体言接続特殊2'   => 'す',
    '動詞-文語基本形'       => 'する',
    '動詞-未然レル接続'     => 'せ',
   #'動詞-未然形'           => '',
   #'動詞-未然特殊'         => '',
    '動詞-命令e'           => '',
    '動詞-命令ro'         => '',
    '動詞-命令yo'         => '',
   #'動詞-連用タ接続'       => '',
    '形容詞-ガル接続'       => '',
    '動詞-連用形'           => 'し',
    '形容詞-仮定形'         => 'なら',
    '形容詞-仮定縮約1'     => 'なら',
    '形容詞-仮定縮約2'     => 'なら',
    '形容詞-基本形'         => 'な',
    '形容詞-体言接続'       => 'な',
    '形容詞-文語基本形'     => '',
    '形容詞-未然ウ接続'     => 'だろ',
    '形容詞-未然ヌ接続'     => 'らしから',
    '形容詞-命令e'         => 'であれ',
    '形容詞-連用ゴザイ接続' => '',
    '形容詞-連用タ接続'     => 'だっ',
    '形容詞-連用テ接続'     => 'に',
};

while (my $line = <>) {
    chomp $line;
    $line = decode('euc-jp', $line);

    my $lou = lou($line);
    next if not defined $lou;

    print $lou;
}

print <DATA>;

exit;

sub lou {
    my $feature = shift or return;

    # 表層形,左文脈ID,右文脈ID,コスト,品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用型,活用形,原形,読み,発音
    my ($c, $d, $e, $cost, $pos, $f, $g, $h, $type, $form, $lemma, $i, $j) = split /,/, $feature;
    



( run in 1.002 second using v1.01-cache-2.11-cpan-98e64b0badf )