AI-Classifier

 view release on metacpan or  search on metacpan

lib/AI/Classifier/Text/Analyzer.pm  view on Meta::CPAN

    my @urls;
    my $p = URI::Find->new(
        sub {
            my ($uri, $t) = @_;
            push @urls, $uri;
            eval{
                my $host = $uri->host;
                $host =~ s/^www\.//;
                $features->{ lc $host }++;
                for (split /\//, $uri->path) {
                    if (length $_ > 3 ) {
                        $features->{ lc $_}++;
                    }
                }
            }
        }
    );
    $p->find($text);
    my $weight = $self->global_feature_weight;
    if (!@urls) {
        $features->{NO_URLS} = $weight;
    }
    if (scalar @urls > length( $text ) / 120 ) {
        $features->{MANY_URLS} = $weight;
    }
    {
        my %urls;
        for my $url ( @urls ) {
            if( $urls{$url}++ > 3 ){
                $features->{REPEATED_URLS} = $weight;
                last;
            }
        }

lib/AI/Classifier/Text/FileLearner.pm  view on Meta::CPAN

    }

    foreach my $doc (@documents) {
        my $f = $doc->{attributes};
        for (keys %$f) {
            $f->{$_} *= log($num_docs / ($frequency{$_} // 0) - $subtrahend);
        }
    }
}

sub euclidean_length {
    my $f = shift;

    my $total = 0;
    foreach (values %$f) {
        $total += $_**2;
    }

    return sqrt($total);
}

lib/AI/Classifier/Text/FileLearner.pm  view on Meta::CPAN

    my ($f, $scalar) = @_;

    $_ *= $scalar foreach values %$f;

    return $f;
}

sub normalize {
    my $attrs = shift;

    my $length = euclidean_length($attrs);

    return $length ? scale($attrs, 1/$length) : $attrs;
}

1;

=pod

=head1 NAME

AI::Classifier::Text::FileLearner - Training data reader for AI::NaiveBayes



( run in 0.259 second using v1.01-cache-2.11-cpan-65fba6d93b7 )