Lingua-Identify-CLD

 view release on metacpan or  search on metacpan

lib/Lingua/Identify/CLD.pm  view on Meta::CPAN

=cut

our $VERSION = '0.10';

use XSLoader;
BEGIN {
    XSLoader::load('Lingua::Identify::CLD', $VERSION);
}

=head1 SYNOPSIS

    use Lingua::Identify::CLD;

    # Vanilla identification
    my $cld = Lingua::Identify::CLD->new();
    my $lang = $cld->identify("Text");

    # using TLD hint
    my $lang = $cld->identify("Text", tld => 'by');

    # obtaining further information
    my @lang = $cld->identify("Text");
    # $lang[0] -> language name
    # $lang[1] -> language id
    # $lang[2] -> confidence
    # $lang[3] -> is_reliable (bool)

    # CLD object can also be created with this option
    my $bycld = Lingua::Identify::CLD->new(tld => 'by');

=head1 METHODS

=head2 new

Constructs a CLD object. You can pass some global configuration
options. Currently supported options are listed bellow:

=over 4

=item tld

A top level domain (tld) to help on the language identification.

=item isPlainText

By default is set to true. If you have some HTML/XML markup, set it to
false.

=item allowExtendedLanguages

Set to true by default, let you control weather extended languages
should be checked. Like Klingon or Pig Latin.

=back

=cut

sub new {
    my ($class, %options) = @_;
    my $self = {%options};
    return bless $self => $class # amen
}

=head2 identify

Receives a string, returns a language name. Following the text a set
of key/value options may be supplied. The supported options are the
same as of C<new>.

=cut

sub identify {
    my ($self, $text, %options) = @_;

    my %cfg = ( %$self, %options );

    my $tld       = exists($cfg{tld})         ? $cfg{tld}         : "";
    my $plaintext = exists($cfg{isPlainText}) ? $cfg{isPlainText} : 1;
    my $extended  = exists($cfg{allowExtendedLanguages}) ? $cfg{allowExtendedLanguages} : 1;

    my $confidence = 0;
    my $is_reliable = 0;
    my $id = '';

    my $lang = _identify($text, $tld, $plaintext, $extended, $id, $confidence, $is_reliable);

    return wantarray ? (uc $lang, $id, $confidence, $is_reliable) : uc $lang;
}

=head1 AUTHOR

Alberto Simoes, C<< <ambs at cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to
L<http://github.com/ambs/Lingua-Identify-CLD>.  I will be notified,
and then you'll automatically be notified of progress on your bug as I
make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Lingua::Identify::CLD


You can also look for information at:

=over 4

=item * Git repository and ticket tracker

L<http://github.com/ambs/Lingua-Identify-CLD>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Lingua-Identify-CLD>

=item * CPAN Ratings

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.531 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )