AI-Categorizer

 view release on metacpan or  search on metacpan

lib/AI/Categorizer/FeatureVector.pm  view on Meta::CPAN

package AI::Categorizer::FeatureVector;

sub new {
  my ($package, %args) = @_;
  $args{features} ||= {};
  return bless {features => $args{features}}, $package;
}

sub names {
  my $self = shift;
  return keys %{$self->{features}};
}

sub set {
  my $self = shift;
  $self->{features} = (ref $_[0] ? $_[0] : {@_});
}

sub as_hash {
  my $self = shift;
  return $self->{features};
}

sub euclidean_length {
  my $self = shift;
  my $f = $self->{features};

  my $total = 0;
  foreach (values %$f) {
    $total += $_**2;
  }
  return sqrt($total);
}

sub normalize {
  my $self = shift;

  my $length = $self->euclidean_length;
  return $length ? $self->scale(1/$length) : $self;
}

sub scale {
  my ($self, $scalar) = @_;
  $_ *= $scalar foreach values %{$self->{features}};
  return $self;
}

sub as_boolean_hash {
  my $self = shift;
  return { map {($_ => 1)} keys %{$self->{features}} };
}

sub length {
  my $self = shift;
  return scalar keys %{$self->{features}};
}

sub clone {
  my $self = shift;
  return ref($self)->new( features => { %{$self->{features}} } );
}

sub intersection {
  my ($self, $other) = @_;
  $other = $other->as_hash if UNIVERSAL::isa($other, __PACKAGE__);

  my $common;
  if (UNIVERSAL::isa($other, 'ARRAY')) {
    $common = {map {exists $self->{features}{$_} ? ($_ => $self->{features}{$_}) : ()} @$other};
  } elsif (UNIVERSAL::isa($other, 'HASH')) {
    $common = {map {exists $self->{features}{$_} ? ($_ => $self->{features}{$_}) : ()} keys %$other};
  }
  return ref($self)->new( features => $common );
}

sub add {
  my ($self, $other) = @_;

  $other = $other->as_hash if UNIVERSAL::isa($other, __PACKAGE__);
  while (my ($k,$v) = each %$other) {
    $self->{features}{$k} += $v;
  }
}

sub dot {
  my ($self, $other) = @_;
  $other = $other->as_hash if UNIVERSAL::isa($other, __PACKAGE__);

  my $sum = 0;
  my $f = $self->{features};
  while (my ($k, $v) = each %$f) {
    $sum += $other->{$k} * $v if exists $other->{$k};
  }
  return $sum;
}

sub sum {
  my ($self) = @_;

  # Return total of values in this vector
  my $total = 0;
  $total += $_ foreach values %{ $self->{features} };
  return $total;
}

sub includes {
  return exists $_[0]->{features}{$_[1]};
}

sub value {
  return $_[0]->{features}{$_[1]};
}

sub values {
  my $self = shift;
  return @{ $self->{features} }{ @_ };
}

1;
__END__

=head1 NAME

AI::Categorizer::FeatureVector - Features vs. Values

=head1 SYNOPSIS

  my $f1 = new AI::Categorizer::FeatureVector
    (features => {howdy => 2, doody => 3});
  my $f2 = new AI::Categorizer::FeatureVector
    (features => {doody => 1, whopper => 2});



( run in 0.956 second using v1.01-cache-2.11-cpan-140bd7fdf52 )