Lingua-NATools
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Lingua/NATools.pm view on Meta::CPAN
use IPC::Open2;
use Compress::Zlib;
use MLDBM qw/DB_File Storable/;
use Fcntl;
use Storable;
use Time::HiRes;
use XML::TMX::Reader;
use Lingua::PT::PLNbase;
use Lingua::Identify qw/:all/;
our $DEBUG = 0;
use parent 'DynaLoader';
bootstrap Lingua::NATools $VERSION;
my $BINPREFIX = Lingua::NATools::ConfigData->config('bindir');
my $LIBPREFIX = Lingua::NATools::ConfigData->config('libdir');
our $LOG;
sub DEBUG {
$DEBUG && print STDERR join(" ",@_),"\n"
}
sub homedir {
my $self = shift;
return $self->{conf}->param("homedir");
}
sub init {
my $class = shift;
my $ops = ref($_[0]) ? shift @_ : {};
my ($dir, $name, @langs) = @_;
my $homedir = $name;
$homedir = catfile $dir, $name unless file_name_is_absolute($name);
die "Can not delete existing '$homedir'\n" unless !-d $homedir || remove_tree($homedir);
die "Can'not create directory '$homedir'\n" unless make_path($homedir);
my $cfg = catfile $homedir, "nat.cnf";
my $self = {};
$self->{conf} = Lingua::NATools::Config->new();
$self->{conf}->param("nr-chunks" => 0);
$self->{conf}->param("name" => $name);
$self->{conf}->param("homedir" => $homedir);
$self->{conf}->param("cfg" => $cfg);
$self->{conf}->param("nr-tus" => 0);
$self->{conf}->param("csize" => $ops->{csize} || 70000);
if (@langs) {
$self->{conf}->param("source-language" => $langs[0]);
$self->{conf}->param("target-language" => $langs[1]);
$self->{conf}->param("noLanguageIdentification" => 1);
}
$self->{conf}->write($self->{conf}->param("cfg"));
return bless $self => $class
}
sub load {
my ($class, $dir) = @_;
return undef unless $dir && -d $dir;
my $self;
my $conf = catfile $dir, "nat.cnf";
if (-f $conf) {
$self->{conf} = Lingua::NATools::Config->new($conf);
return bless $self => $class;
}
print STDERR "Couldn't open config file [$conf]\n";
return undef;
}
sub _new_logger {
my ($verbose, $file) = @_;
$verbose and return sub {
my $filename = $file;
if ($filename) {
open my $fh, ">>", $filename or die $!;
print $fh @_;
close $fh;
}
print STDERR @_;
};
return sub { };
}
sub codify {
my ($self, $ops, $txt1, $txt2) = @_;
$LOG = _new_logger($ops->{verbose} || 0, $ops->{log_file});
# If true, the texts will be tokenized.
my $tokenize = $self->{tokenize};
die "Not a valid NATools object\n" unless $self->isa('Lingua::NATools');
die "File '$txt1' does not exist\n" unless -f $txt1;
die "File '$txt2' does not exist\n" unless -f $txt2;
unless ($self->{conf}->param("noLanguageIdentification")) {
$LOG->(" Identifying languages...\n");
# Identify source language
my $source_language = uc(langof_file($txt1));
$LOG->(" - Source language: $source_language\n");
if (!$self->{conf}->param("source-language")) {
$self->{conf}->param("source-language", $source_language);
} elsif ($self->{conf}->param("source-language") ne $source_language) {
print STDERR "Warning!! Source-language seems different from previous chunk\n"
}
# Identify target language
my $target_language = uc(langof_file($txt2));
$LOG->(" - Target language: $target_language\n");
if (!$self->{conf}->param("target-language")) {
$self->{conf}->param("target-language", $target_language);
} elsif ($self->{conf}->param("target-language") ne $target_language) {
print STDERR "Warning!! Target-language seems different from previous chunk\n"
}
$LOG->("\n");
}
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.919 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )