Algorithm-ContextVector

 view release on metacpan or  search on metacpan

lib/Algorithm/ContextVector.pm  view on Meta::CPAN


    my $attr = $args{attributes} or die 'attributes required';
    return unless keys %$attr;

    my $labels = $args{label} or die 'label required';
    $labels = [ $labels ] unless ref $labels;

    for $_ (@$labels) {
        $self->{labels}{$_}{features} ||= {};
        $self->{labels}{$_}{features} = $self->_add_hashrefs(
            $self->{labels}{$_}{features}, $attr
        );
    }
}

sub _norm_features {
    my $self = shift;
    my ($features) = @_;
    my $norm = 0;
    $norm += $_**2 for values %$features;
    $norm = sqrt($norm);
   
    $_ = $_ / $norm for values %$features;

    return $features;
}

sub _cut_features {
    my $self = shift;
    my ($features) = @_;
    my $top = $self->{top};
    return $features unless defined $top;

    my @sorted =
        sort { $b->[1] <=> $a->[1] } 
        map { [ $_, $features->{$_} ] }
        keys %$features;

    my @keep = splice @sorted, 0, $top;

    my $r = { map { $_->[0] => $_->[1] } @keep };

    return $r;
}

# IDEA dead code for now
sub _cut_features_avg {
    my $features = shift; 
    my $sum = 0;
    $sum += $_ for values %$features;
    my $count = scalar keys %$features;
    my $cut = $sum / $count; # hum cut at the avg
    for (keys %$features) {
        delete $features->{$_} if $features->{$_} < $cut;
    }
    return $features;
}

=head2 $self->train

Keeps the best features (top N) and norms the vectors.

=cut

sub train {
    my $self = shift;
    for $_ (keys %{ $self->{labels} }) {
        $self->{labels}{$_}{features} = $self->_cut_features( $self->{labels}{$_}{features} );
        $self->{labels}{$_}{features} = $self->_norm_features( $self->{labels}{$_}{features} );
    }
}

=head2 $self->predict( attributes => { ... } )

Returns a hashref with the labels as the keys and the cosines as the values.

=cut

sub predict {
    my $self = shift;
    my %args = @_;

    my $attr = $args{attributes} or die 'attributes required';
   
    my $cs = Data::CosineSimilarity->new( normed => 1 );

    for my $label (keys %{ $self->{labels} }) {
        $cs->add( $label => $self->{labels}{$label}{features} );
    }
    
    $cs->add( __my_test => $self->_norm_features( $self->_cut_features( $attr ) ) );

    my @all = $cs->all_for_label('__my_test');
    my %r;
    for (@all) {
        my (undef, $label) = $_->labels;
        $r{$label} = $_->cosine;
    }
    return \%r;
}

=head1 AUTHOR

Antoine Imbert, C<< <antoine.imbert at gmail.com> >>

=head1 LICENSE AND COPYRIGHT

This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=cut

1;



( run in 2.338 seconds using v1.01-cache-2.11-cpan-d7f47b0818f )