AI-Classifier

 view release on metacpan or  search on metacpan

MANIFEST  view on Meta::CPAN

META.json
META.yml
Makefile.PL
README
README.pod
dist.ini
lib/AI/Classifier/Text.pm
lib/AI/Classifier/Text/Analyzer.pm
lib/AI/Classifier/Text/FileLearner.pm
t/data/training_cache/predictor
t/data/training_initial_features/ham/1
t/data/training_initial_features/ham/1.data
t/data/training_set_ordered/ham/2
t/data/training_set_ordered/spam/1
t/file_reader.t
t/model.dat
t/release-pod-coverage.t
t/release-pod-syntax.t
t/state.t
t/text.t

README.pod  view on Meta::CPAN

};

around load => sub {
    my ($orig, $class) = (shift, shift);
    my $self = $class->$orig(@_);
    Module::Load::load($self->{classifier_class});
    return $self;
};

sub classify {
    my( $self, $text, $features ) = @_;
    return $self->classifier->classify( $self->analyzer->analyze( $text, $features ) );
}

__PACKAGE__->meta->make_immutable;

1;

__END__

# ABSTRACT: A convenient class for text classification

t/data/training_initial_features/ham/1.data  view on Meta::CPAN

{
    initial_features => { some_tag => 3 },
}

t/file_reader.t  view on Meta::CPAN

my $iterator = AI::Classifier::Text::FileLearner->new( 
    training_dir => File::Spec->catdir( @training_dirs ) );


my %hash;
while( my $doc = $iterator->next ){
    $hash{$doc->{file}} = $doc;
}
my $target = {
    File::Spec->catfile( @training_dirs, 'spam', '1' ) => {
        'features' => { ccccc => 1, NO_URLS => 2 },
        'file' => File::Spec->catfile( @training_dirs, 'spam', '1' ),
        'categories' => [ 'spam' ]
    },
    File::Spec->catfile( @training_dirs, 'ham', '2' ) => {
        'features' => { ccccc => 1, aaaa => 1, NO_URLS => 2 },
        'file' => File::Spec->catfile( @training_dirs, 'ham', '2' ),
        'categories' => [ 'ham' ]
    }
};
is_deeply( \%hash, $target );

my $classifier = AI::Classifier::Text::FileLearner->new( training_dir => File::Spec->catdir( @training_dirs ) )->classifier;

ok( $classifier, 'Classifier created' );
ok( $classifier->classifier->model()->{prior_probs}{ham}, 'ham prior probs' );
ok( $classifier->classifier->model()->{prior_probs}{spam}, 'spam prior probs' );
{
    my $iterator = AI::Classifier::Text::FileLearner->new( training_dir => File::Spec->catdir( qw( t data training_initial_features ) ) );

    my %hash;
    while( my $doc = $iterator->next ){
        $hash{$doc->{file}} = $doc;
    }
    my $target = {
        File::Spec->catfile( qw( t data training_initial_features ham 1 ) ) => {
            'file' => File::Spec->catfile( qw( t data training_initial_features ham 1 ) ),
            'categories' => [ 'ham' ],
            features => { trala => 1, some_tag => 3, NO_URLS => 2 }
        },
    };
    is_deeply( \%hash, $target );
}

{
    {
        package TestLearner;

        sub new { bless { examples => [] } };

t/text.t  view on Meta::CPAN

use strict;
use warnings;

use Test::More;
use AI::Classifier::Text::Analyzer;

my $analyzer = AI::Classifier::Text::Analyzer->new();
    
ok( $analyzer, 'Analyzer created' );

my $features = {};
$analyzer->analyze( 'aaaa http://www.example.com/bbb?xx=yy&bb=cc;dd=ff', $features );
is_deeply( $features, { aaaa => 1, 'example.com' => 1, MANY_URLS => 2 } );

$features = $analyzer->analyze( 'nothing special' );
is_deeply( $features, { nothing => 1, special => 1, NO_URLS => 2 } );

my $text = 'http://www.hungry.birds! http://www.hungry.birds! http://www.hungry.birds! '
      . 'http://www.hungry.birds! http://www.hungry.birds!';
$features = {};
$analyzer->analyze_urls( \$text, $features );
is_deeply( $features, { 
        'hungry.birds!' => 5, 
        REPEATED_URLS => 2,
        MANY_URLS => 2,
    } 
);

done_testing;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 2.365 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-72ae3ad1e6da )