DTA-CAB

 view release on metacpan or  search on metacpan

CAB/Analyzer/LangId.pm  view on Meta::CPAN

## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Analyzer::LangId.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: Lingua::LangId::Map wrapper

package DTA::CAB::Analyzer::LangId;
use DTA::CAB::Analyzer;
use DTA::CAB::Datum ':all';
use Lingua::LangId::Map;

use Encode qw(encode decode);
use IO::File;
use Carp;

use strict;

##==============================================================================
## Globals
##==============================================================================

our @ISA = qw(DTA::CAB::Analyzer);

##==============================================================================
## Constructors etc.
##==============================================================================

## $obj = CLASS_OR_OBJ->new(%args)
##  + object structure:
##    (
##     ##-- Filename Options
##     mapFile => $filename,     ##-- default: none (REQUIRED)
##
##     ##-- Analysis Options
##     analyzeWhich     => $which, ##-- one of 'token', 'sentence', 'document'; default='document'
##     vlabel           => $label, ##-- verbose destination key (default='langid')
##     label            => $label, ##-- simple destination key (default='lang')
##
##
##     ##-- Analysis Objects
##     map            => $map,   ##-- a Lingua::LangId::Map object
##    )
sub new {
  my $that = shift;
  my $lid = $that->SUPER::new(
			       ##-- filenames
			       mapFile => undef,

			       ##-- options
			       analyzeWhich => 'document',
			       vlabel       => 'langid',
			       label        => 'lang',

			       ##-- analysis objects
			       #map => undef,

			       ##-- user args
			       @_
			      );
  return $lid;
}

## $lid = $lid->clear()
sub clear {
  my $lid = shift;

  ##-- analysis sub(s)
  $lid->dropClosures();

  ##-- analysis objects
  delete($lid->{map});

  return $lid;
}

##==============================================================================
## Methods: Generic
##==============================================================================

## $bool = $lid->mapOk()
##  + should return false iff map is undefined or "empty"
##  + default version checks for non-empty 'map' and 'sigs'
sub mapOk {
  return defined($_[0]{map}) && %{$_[0]{map}{sigs}};
}

## @keys = $anl->typeKeys(\%opts)
##  + returns list of type-wise keys to be expanded for this analyzer by expandTypes()
##  + default returns @{$anl->{typeKeys}} if defined, otherwise ($anl->{label})
sub typeKeys {
  return ($_[0]{typeKeys} ? @{$_[0]{typeKeys}} : qw());
}

##==============================================================================
## Methods: I/O
##==============================================================================

##--------------------------------------------------------------
## Methods: I/O: Input: all

## $bool = $lid->ensureLoaded()
##  + ensures model data is loaded from default files (if available)
sub ensureLoaded {
  my $lid = shift;
  ##-- ensure: map
  if ( defined($lid->{mapFile}) && !$lid->mapOk ) {
    return $lid->loadMap($lid->{mapFile});
  }
  return 1; ##-- allow empty models
}

CAB/Analyzer/LangId.pm  view on Meta::CPAN

 ## Methods: Analysis
 
 $bool = $anl->canAnalyze();
 $thingy = $lid->analyzeThingy($thingy, \$str, \%opts);
 $doc = $anl->analyzeDocument($doc,\%opts);
 

=cut

##========================================================================
## DESCRIPTION
=pod

=head1 DESCRIPTION

DTA::CAB::Analyzer::LangId provides a
L<DTA::CAB::Analyzer|DTA::CAB::Analyzer>
interface to the L<Lingua::LangId|Lingua::LangId>
language-guessing library.
Its current implementation only has proof-of-concept status.

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Globals
=pod

=head2 Globals

=over 4

=item Variable: @ISA

DTA::CAB::Analyzer::LangId
inherits from L<DTA::CAB::Analyzer|DTA::CAB::Analyzer>
and implements the L<DTA::CAB::Analyzer|DTA::CAB::Analyzer> API.

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Constructors etc.
=pod

=head2 Constructors etc.

=over 4

=item new

 $obj = CLASS_OR_OBJ->new(%args);

object structure:

    (
     ##-- Filename Options
     mapFile => $filename,     ##-- default: none (REQUIRED)
     ##-- Analysis Options
     analyzeWhich     => $which, ##-- one of 'token', 'sentence', 'document'; default='document'
     vlabel           => $label, ##-- verbose destination key (default='langid')
     label            => $label, ##-- simple destination key (default='lang')
     ##-- Analysis Objects
     map            => $map,   ##-- a Lingua::LangId::Map object
    )

=item clear

 $lid = $lid->clear();

(undocumented)

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Methods: Generic
=pod

=head2 Methods: Generic

=over 4

=item mapOk

 $bool = $lid->mapOk();


=over 4


=item *

should return false iff map is undefined or "empty"

=item *

default version checks for non-empty 'map' and 'sigs'

=back

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Methods: I/O: Input: all
=pod

=head2 Methods: I/O: Input: all

=over 4

=item ensureLoaded

 $bool = $lid->ensureLoaded();

ensures model data is loaded from default files (if available)

=back



( run in 3.711 seconds using v1.01-cache-2.11-cpan-5735350b133 )