Algorithm-FeatureSelection
view release on metacpan or search on metacpan
lib/Algorithm/FeatureSelection.pm view on Meta::CPAN
return $IGR;
}
sub entropy {
my $self = shift;
my $data = shift;
my @ratio;
if ( ref $data eq 'HASH' ) {
@ratio = _ratio( [ values %$data ] );
}
elsif ( ref $data eq 'ARRAY' ) {
my $s = sum(@$data) || 0;
if ( $s == 1 ) {
@ratio = @$data;
}
else {
@ratio = _ratio($data);
}
}
my $entropy;
for my $p (@ratio) {
if ( $p <= 0 ) {
$p = 0.000000000000000000000001;
}
$entropy += -$p * _log2($p);
}
return $entropy;
}
sub split_information {
my $self = shift;
my $data = shift;
my $all = int keys %$data;
my $s;
while ( my ( $w, $ref ) = each %$data ) {
for my $category ( keys %$ref ) {
$s->{$category}++;
}
}
my @array;
while ( my ( $category, $num ) = each %$s ) {
push @array, $num / $all;
}
my $SI = $self->entropy( \@array );
return $SI;
}
sub _ratio {
my $arrayref = shift;
my @ratio;
my $sum = sum(@$arrayref);
for (@$arrayref) {
next if $_ <= 0;
eval { push @ratio, $_ / $sum; };
if ($@) {
use Data::Dumper;
print Dumper $arrayref;
die($@);
}
}
return @ratio;
}
sub _log2 {
my $n = shift;
log($n) / log(2);
}
1;
__END__
=head1 NAME
Algorithm::FeatureSelection -
=head1 SYNOPSIS
use Algorithm::FeatureSelection;
my $fs = Algorithm::FeatureSelection->new();
# feature-class data structure ...
my $features = {
feature_1 => {
class_a => 10,
class_b => 2,
},
feature_2 => {
class_b => 11,
class_d => 32
},
.
.
.
};
# get pairwise-mutula-information
my $pmi = $fs->pairwise_mutual_information($features);
my $pmi = $fs->pmi($features); # same above
# get information-gain
my $ig = $fs->information_gain($features);
my $ig = $fs->ig($features); # same above
=head1 DESCRIPTION
This library is an perl implementation of 'Pairwaise Mutual Information' and 'Information Gain'
that are used as well-known method of feature selection on text mining fields.
=head1 METHOD
=head2 new()
=head2 information_gain( $features )
( run in 1.546 second using v1.01-cache-2.11-cpan-524268b4103 )