AI-Classifier
view release on metacpan or search on metacpan
lib/AI/Classifier/Text/Analyzer.pm view on Meta::CPAN
my @urls;
my $p = URI::Find->new(
sub {
my ($uri, $t) = @_;
push @urls, $uri;
eval{
my $host = $uri->host;
$host =~ s/^www\.//;
$features->{ lc $host }++;
for (split /\//, $uri->path) {
if (length $_ > 3 ) {
$features->{ lc $_}++;
}
}
}
}
);
$p->find($text);
my $weight = $self->global_feature_weight;
if (!@urls) {
$features->{NO_URLS} = $weight;
}
if (scalar @urls > length( $text ) / 120 ) {
$features->{MANY_URLS} = $weight;
}
{
my %urls;
for my $url ( @urls ) {
if( $urls{$url}++ > 3 ){
$features->{REPEATED_URLS} = $weight;
last;
}
}
lib/AI/Classifier/Text/FileLearner.pm view on Meta::CPAN
}
foreach my $doc (@documents) {
my $f = $doc->{attributes};
for (keys %$f) {
$f->{$_} *= log($num_docs / ($frequency{$_} // 0) - $subtrahend);
}
}
}
sub euclidean_length {
my $f = shift;
my $total = 0;
foreach (values %$f) {
$total += $_**2;
}
return sqrt($total);
}
lib/AI/Classifier/Text/FileLearner.pm view on Meta::CPAN
my ($f, $scalar) = @_;
$_ *= $scalar foreach values %$f;
return $f;
}
sub normalize {
my $attrs = shift;
my $length = euclidean_length($attrs);
return $length ? scale($attrs, 1/$length) : $attrs;
}
1;
=pod
=head1 NAME
AI::Classifier::Text::FileLearner - Training data reader for AI::NaiveBayes
( run in 0.731 second using v1.01-cache-2.11-cpan-65fba6d93b7 )