DTA-CAB
view release on metacpan or search on metacpan
CAB/Analyzer/LangId.pm view on Meta::CPAN
## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Analyzer::LangId.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: Lingua::LangId::Map wrapper
package DTA::CAB::Analyzer::LangId;
use DTA::CAB::Analyzer;
use DTA::CAB::Datum ':all';
use Lingua::LangId::Map;
use Encode qw(encode decode);
use IO::File;
use Carp;
use strict;
##==============================================================================
## Globals
##==============================================================================
our @ISA = qw(DTA::CAB::Analyzer);
##==============================================================================
## Constructors etc.
##==============================================================================
## $obj = CLASS_OR_OBJ->new(%args)
## + object structure:
## (
## ##-- Filename Options
## mapFile => $filename, ##-- default: none (REQUIRED)
##
## ##-- Analysis Options
## analyzeWhich => $which, ##-- one of 'token', 'sentence', 'document'; default='document'
## vlabel => $label, ##-- verbose destination key (default='langid')
## label => $label, ##-- simple destination key (default='lang')
##
##
## ##-- Analysis Objects
## map => $map, ##-- a Lingua::LangId::Map object
## )
sub new {
my $that = shift;
my $lid = $that->SUPER::new(
##-- filenames
mapFile => undef,
##-- options
analyzeWhich => 'document',
vlabel => 'langid',
label => 'lang',
##-- analysis objects
#map => undef,
##-- user args
@_
);
return $lid;
}
## $lid = $lid->clear()
sub clear {
my $lid = shift;
##-- analysis sub(s)
$lid->dropClosures();
##-- analysis objects
delete($lid->{map});
return $lid;
}
##==============================================================================
## Methods: Generic
##==============================================================================
## $bool = $lid->mapOk()
## + should return false iff map is undefined or "empty"
## + default version checks for non-empty 'map' and 'sigs'
sub mapOk {
return defined($_[0]{map}) && %{$_[0]{map}{sigs}};
}
## @keys = $anl->typeKeys(\%opts)
## + returns list of type-wise keys to be expanded for this analyzer by expandTypes()
## + default returns @{$anl->{typeKeys}} if defined, otherwise ($anl->{label})
sub typeKeys {
return ($_[0]{typeKeys} ? @{$_[0]{typeKeys}} : qw());
}
##==============================================================================
## Methods: I/O
##==============================================================================
##--------------------------------------------------------------
## Methods: I/O: Input: all
## $bool = $lid->ensureLoaded()
## + ensures model data is loaded from default files (if available)
sub ensureLoaded {
my $lid = shift;
##-- ensure: map
if ( defined($lid->{mapFile}) && !$lid->mapOk ) {
return $lid->loadMap($lid->{mapFile});
}
return 1; ##-- allow empty models
}
CAB/Analyzer/LangId.pm view on Meta::CPAN
## Methods: Analysis
$bool = $anl->canAnalyze();
$thingy = $lid->analyzeThingy($thingy, \$str, \%opts);
$doc = $anl->analyzeDocument($doc,\%opts);
=cut
##========================================================================
## DESCRIPTION
=pod
=head1 DESCRIPTION
DTA::CAB::Analyzer::LangId provides a
L<DTA::CAB::Analyzer|DTA::CAB::Analyzer>
interface to the L<Lingua::LangId|Lingua::LangId>
language-guessing library.
Its current implementation only has proof-of-concept status.
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Globals
=pod
=head2 Globals
=over 4
=item Variable: @ISA
DTA::CAB::Analyzer::LangId
inherits from L<DTA::CAB::Analyzer|DTA::CAB::Analyzer>
and implements the L<DTA::CAB::Analyzer|DTA::CAB::Analyzer> API.
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Constructors etc.
=pod
=head2 Constructors etc.
=over 4
=item new
$obj = CLASS_OR_OBJ->new(%args);
object structure:
(
##-- Filename Options
mapFile => $filename, ##-- default: none (REQUIRED)
##-- Analysis Options
analyzeWhich => $which, ##-- one of 'token', 'sentence', 'document'; default='document'
vlabel => $label, ##-- verbose destination key (default='langid')
label => $label, ##-- simple destination key (default='lang')
##-- Analysis Objects
map => $map, ##-- a Lingua::LangId::Map object
)
=item clear
$lid = $lid->clear();
(undocumented)
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Methods: Generic
=pod
=head2 Methods: Generic
=over 4
=item mapOk
$bool = $lid->mapOk();
=over 4
=item *
should return false iff map is undefined or "empty"
=item *
default version checks for non-empty 'map' and 'sigs'
=back
=back
=cut
##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::LangId: Methods: I/O: Input: all
=pod
=head2 Methods: I/O: Input: all
=over 4
=item ensureLoaded
$bool = $lid->ensureLoaded();
ensures model data is loaded from default files (if available)
=back
( run in 3.711 seconds using v1.01-cache-2.11-cpan-5735350b133 )