Lingua-NATools

 view release on metacpan or  search on metacpan

lib/Lingua/NATools.pm  view on Meta::CPAN

use IPC::Open2;
use Compress::Zlib;
use MLDBM qw/DB_File Storable/;
use Fcntl;
use Storable;
use Time::HiRes;
use XML::TMX::Reader;
use Lingua::PT::PLNbase;
use Lingua::Identify qw/:all/;

our $DEBUG = 0;

use parent 'DynaLoader';
bootstrap Lingua::NATools $VERSION;

my $BINPREFIX = Lingua::NATools::ConfigData->config('bindir');
my $LIBPREFIX = Lingua::NATools::ConfigData->config('libdir');

our $LOG;

sub DEBUG {
    $DEBUG && print STDERR join(" ",@_),"\n"
}

sub homedir {
    my $self = shift;
    return $self->{conf}->param("homedir");
}


sub init {
    my $class = shift;
    my $ops = ref($_[0]) ? shift @_ : {};
    my ($dir, $name, @langs) = @_;

    my $homedir = $name;
    $homedir = catfile $dir, $name unless file_name_is_absolute($name);

    die "Can not delete existing '$homedir'\n"  unless !-d $homedir || remove_tree($homedir);
    die "Can'not create directory '$homedir'\n" unless make_path($homedir);

    my $cfg = catfile $homedir, "nat.cnf";

    my $self = {};
    $self->{conf} = Lingua::NATools::Config->new();
    $self->{conf}->param("nr-chunks" => 0);
    $self->{conf}->param("name"      => $name);
    $self->{conf}->param("homedir"   => $homedir);
    $self->{conf}->param("cfg"       => $cfg);
    $self->{conf}->param("nr-tus"    => 0);
    $self->{conf}->param("csize"     => $ops->{csize} || 70000);

    if (@langs) {
        $self->{conf}->param("source-language"          => $langs[0]);
        $self->{conf}->param("target-language"          => $langs[1]);
        $self->{conf}->param("noLanguageIdentification" => 1);
    }

    $self->{conf}->write($self->{conf}->param("cfg"));

    return bless $self => $class
}



sub load {
    my ($class, $dir) = @_;

    return undef unless $dir && -d $dir;

    my $self;
    my $conf = catfile $dir, "nat.cnf";

    if (-f $conf) {
        $self->{conf} = Lingua::NATools::Config->new($conf);
        return bless $self => $class;
    }

    print STDERR "Couldn't open config file [$conf]\n";
    return undef;
}

sub _new_logger {
    my ($verbose, $file) = @_;

    $verbose and return sub {
		my $filename = $file;
		if ($filename) {
        	open my $fh, ">>", $filename or die $!;
        	print $fh @_;
        	close $fh;
		}
        
        print STDERR @_;
    };
    return sub { };
}

sub codify {
    my ($self, $ops, $txt1, $txt2) = @_;
    $LOG = _new_logger($ops->{verbose} || 0, $ops->{log_file});

    # If true, the texts will be tokenized.
    my $tokenize = $self->{tokenize};

    die "Not a valid NATools object\n"   unless $self->isa('Lingua::NATools');

    die "File '$txt1' does not exist\n"  unless -f $txt1;
    die "File '$txt2' does not exist\n"  unless -f $txt2;

    unless ($self->{conf}->param("noLanguageIdentification")) {

        $LOG->(" Identifying languages...\n");

        # Identify source language
        my $source_language = uc(langof_file($txt1));
        $LOG->(" - Source language: $source_language\n");

        if (!$self->{conf}->param("source-language")) {
            $self->{conf}->param("source-language", $source_language);
        } elsif ($self->{conf}->param("source-language") ne $source_language) {
            print STDERR "Warning!! Source-language seems different from previous chunk\n"
        }

        # Identify target language
        my $target_language = uc(langof_file($txt2));
        $LOG->(" - Target language: $target_language\n");

        if (!$self->{conf}->param("target-language")) {
            $self->{conf}->param("target-language", $target_language);
        } elsif ($self->{conf}->param("target-language") ne $target_language) {
            print STDERR "Warning!! Target-language seems different from previous chunk\n"
        }

        $LOG->("\n");
    }

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.919 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )