AI-Categorizer
view release on metacpan or search on metacpan
lib/AI/Categorizer/FeatureSelector/DocFrequency.pm view on Meta::CPAN
package AI::Categorizer::FeatureSelector::DocFrequency;
use strict;
use AI::Categorizer::FeatureSelector;
use base qw(AI::Categorizer::FeatureSelector);
use Params::Validate qw(:types);
use Carp qw(croak);
__PACKAGE__->contained_objects
(
features => { class => 'AI::Categorizer::FeatureVector',
delayed => 1 },
);
# The KnowledgeSet keeps track of document frequency, so just use that.
sub rank_features {
my ($self, %args) = @_;
my $k = $args{knowledge_set} or die "No knowledge_set parameter provided to rank_features()";
my %freq_counts;
foreach my $name ($k->features->names) {
$freq_counts{$name} = $k->document_frequency($name);
}
return $self->create_delayed_object('features', features => \%freq_counts);
}
sub scan_features {
my ($self, %args) = @_;
my $c = $args{collection} or die "No 'collection' parameter provided to scan_features()";
my $doc_freq = $self->create_delayed_object('features');
while (my $doc = $c->next) {
$args{prog_bar}->() if $args{prog_bar};
$doc_freq->add( $doc->features->as_boolean_hash );
}
print "\n" if $self->verbose;
return $self->reduce_features($doc_freq);
}
1;
__END__
=head1 NAME
AI::Categorizer::FeatureSelector - Abstract Feature Selection class
=head1 SYNOPSIS
...
=head1 DESCRIPTION
The KnowledgeSet class that provides an interface to a set of
documents, a set of categories, and a mapping between the two. Many
parameters for controlling the processing of documents are managed by
the KnowledgeSet class.
=head1 METHODS
=over 4
=item new()
Creates a new KnowledgeSet and returns it. Accepts the following
parameters:
( run in 0.520 second using v1.01-cache-2.11-cpan-4d50c553e7e )