Algorithm-FeatureSelection

 view release on metacpan or  search on metacpan

lib/Algorithm/FeatureSelection.pm  view on Meta::CPAN

    my $arrayref = shift;
    my @ratio;
    my $sum = sum(@$arrayref);
    for (@$arrayref) {
        next if $_ <= 0;
        eval { push @ratio, $_ / $sum; };
        if ($@) {
            use Data::Dumper;
            print Dumper $arrayref;
            die($@);
        }
    }
    return @ratio;
}

sub _log2 {
    my $n = shift;
    log($n) / log(2);
}

1;
__END__

=head1 NAME

Algorithm::FeatureSelection -

=head1 SYNOPSIS

  use Algorithm::FeatureSelection;
  my $fs = Algorithm::FeatureSelection->new();

  # feature-class data structure ...
  my $features = {
    feature_1 => {
        class_a => 10,
        class_b => 2,
    },
    feature_2 => {
        class_b => 11,
        class_d => 32
    },
          .
          .
          .
  };

  # get pairwise-mutula-information
  my $pmi = $fs->pairwise_mutual_information($features);
  my $pmi = $fs->pmi($features); # same above

  # get information-gain 
  my $ig = $fs->information_gain($features);
  my $ig = $fs->ig($features); # same above



=head1 DESCRIPTION

This library is an perl implementation of 'Pairwaise Mutual Information' and 'Information Gain' 
that are used as well-known method of feature selection on text mining fields.

=head1 METHOD

=head2 new()

=head2 information_gain( $features )

  my $features = {
    feature_1 => {
        class_a => 10,
        class_b => 2,
    },
    feature_2 => {
        class_b => 11,
        class_d => 32
    },
          .
          .
          .
  };
  my $fs = Algorithm::FeatureSelection->new();
  my $ig = $fs->information_gain($features);

=head2 ig( $features )

short name of information_gain()

=head2 information_gain_ratio( $features )

  my $features = {
    feature_1 => {
        class_a => 10,
        class_b => 2,
    },
    feature_2 => {
        class_b => 11,
        class_d => 32
    },
          .
          .
          .
  };
  my $fs = Algorithm::FeatureSelection->new();
  my $igr = $fs->information_gain_ratio($features);

=head2 igr( $features )

short name of information_gain_ratio()

=head2 pairwise_mutual_information( $features )

  my $features = {
    feature_1 => {
        class_a => 10,
        class_b => 2,
    },
    feature_2 => {
        class_b => 11,
        class_d => 32
    },



( run in 1.783 second using v1.01-cache-2.11-cpan-796a6f069b2 )