AI-Categorizer
view release on metacpan or search on metacpan
lib/AI/Categorizer/FeatureVector.pm view on Meta::CPAN
package AI::Categorizer::FeatureVector;
sub new {
my ($package, %args) = @_;
$args{features} ||= {};
return bless {features => $args{features}}, $package;
}
sub names {
my $self = shift;
return keys %{$self->{features}};
}
sub set {
my $self = shift;
$self->{features} = (ref $_[0] ? $_[0] : {@_});
}
sub as_hash {
my $self = shift;
return $self->{features};
}
sub euclidean_length {
my $self = shift;
my $f = $self->{features};
my $total = 0;
foreach (values %$f) {
$total += $_**2;
}
return sqrt($total);
}
sub normalize {
my $self = shift;
my $length = $self->euclidean_length;
return $length ? $self->scale(1/$length) : $self;
}
sub scale {
my ($self, $scalar) = @_;
$_ *= $scalar foreach values %{$self->{features}};
return $self;
}
sub as_boolean_hash {
my $self = shift;
return { map {($_ => 1)} keys %{$self->{features}} };
}
sub length {
my $self = shift;
return scalar keys %{$self->{features}};
}
sub clone {
my $self = shift;
return ref($self)->new( features => { %{$self->{features}} } );
}
sub intersection {
my ($self, $other) = @_;
$other = $other->as_hash if UNIVERSAL::isa($other, __PACKAGE__);
my $common;
if (UNIVERSAL::isa($other, 'ARRAY')) {
$common = {map {exists $self->{features}{$_} ? ($_ => $self->{features}{$_}) : ()} @$other};
} elsif (UNIVERSAL::isa($other, 'HASH')) {
$common = {map {exists $self->{features}{$_} ? ($_ => $self->{features}{$_}) : ()} keys %$other};
}
return ref($self)->new( features => $common );
}
sub add {
my ($self, $other) = @_;
$other = $other->as_hash if UNIVERSAL::isa($other, __PACKAGE__);
while (my ($k,$v) = each %$other) {
$self->{features}{$k} += $v;
}
}
sub dot {
my ($self, $other) = @_;
$other = $other->as_hash if UNIVERSAL::isa($other, __PACKAGE__);
my $sum = 0;
my $f = $self->{features};
while (my ($k, $v) = each %$f) {
$sum += $other->{$k} * $v if exists $other->{$k};
}
return $sum;
}
sub sum {
my ($self) = @_;
# Return total of values in this vector
my $total = 0;
$total += $_ foreach values %{ $self->{features} };
return $total;
}
sub includes {
return exists $_[0]->{features}{$_[1]};
}
sub value {
return $_[0]->{features}{$_[1]};
}
sub values {
my $self = shift;
return @{ $self->{features} }{ @_ };
}
1;
__END__
=head1 NAME
AI::Categorizer::FeatureVector - Features vs. Values
=head1 SYNOPSIS
my $f1 = new AI::Categorizer::FeatureVector
(features => {howdy => 2, doody => 3});
my $f2 = new AI::Categorizer::FeatureVector
(features => {doody => 1, whopper => 2});
( run in 0.956 second using v1.01-cache-2.11-cpan-140bd7fdf52 )