AI-Classifier

 view release on metacpan or  search on metacpan

lib/AI/Classifier/Text/FileLearner.pm  view on Meta::CPAN

    my $num_docs   = +@documents;

    my %frequency;
    for my $doc (@documents) {
        for my $k (keys %{$doc->{attributes}}) {
            $frequency{$k}++;
        }
    }

    foreach my $doc (@documents) {
        my $f = $doc->{attributes};
        for (keys %$f) {
            $f->{$_} *= log($num_docs / ($frequency{$_} // 0) - $subtrahend);
        }
    }
}

sub euclidean_length {
    my $f = shift;

    my $total = 0;
    foreach (values %$f) {
        $total += $_**2;
    }

    return sqrt($total);
}

sub scale {
    my ($f, $scalar) = @_;

    $_ *= $scalar foreach values %$f;

    return $f;
}

sub normalize {
    my $attrs = shift;

    my $length = euclidean_length($attrs);

    return $length ? scale($attrs, 1/$length) : $attrs;
}

1;

=pod

=head1 NAME

AI::Classifier::Text::FileLearner - Training data reader for AI::NaiveBayes

=head1 VERSION

version 0.03

=head1 SYNOPSIS

    use AI::Classifier::Text::FileLearner;

    my $learner = AI::Classifier::Text::FileLearner->new( training_dir => 't/data/training_set_ordered/' );

    my $classifier = $learner->classifier;

=head1 DESCRIPTION

This is a trainer of text classifiers.  It traverses a directory filled,
interprets the subdirectories in it as category names, reads all files in them and adds them
as examples for the classifier being trained.

head1 METHODS

=over 4

=item next

Internal method for traversing the training data directory.

=item classifier

Returns a trained classifier.

=back

=head1 AUTHOR

Zbigniew Lukasiak <zlukasiak@opera.com>, Tadeusz Sośnierz <tsosnierz@opera.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2012 by Opera Software ASA.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut

__END__

# ABSTRACT: Training data reader for AI::NaiveBayes



( run in 0.783 second using v1.01-cache-2.11-cpan-39bf76dae61 )