Algorithm-FeatureSelection

 view release on metacpan or  search on metacpan

lib/Algorithm/FeatureSelection.pm  view on Meta::CPAN

    return $IGR;
}

sub entropy {
    my $self = shift;
    my $data = shift;

    my @ratio;
    if ( ref $data eq 'HASH' ) {
        @ratio = _ratio( [ values %$data ] );
    }
    elsif ( ref $data eq 'ARRAY' ) {
        my $s = sum(@$data) || 0;
        if ( $s == 1 ) {
            @ratio = @$data;
        }
        else {
            @ratio = _ratio($data);
        }
    }

    my $entropy;
    for my $p (@ratio) {
        if ( $p <= 0 ) {
            $p = 0.000000000000000000000001;
        }

        $entropy += -$p * _log2($p);
    }
    return $entropy;

}

sub split_information {
    my $self = shift;
    my $data = shift;

    my $all = int keys %$data;
    my $s;
    while ( my ( $w, $ref ) = each %$data ) {
        for my $category ( keys %$ref ) {
            $s->{$category}++;
        }
    }
    my @array;
    while ( my ( $category, $num ) = each %$s ) {
        push @array, $num / $all;
    }
    my $SI = $self->entropy( \@array );
    return $SI;
}

sub _ratio {
    my $arrayref = shift;
    my @ratio;
    my $sum = sum(@$arrayref);
    for (@$arrayref) {
        next if $_ <= 0;
        eval { push @ratio, $_ / $sum; };
        if ($@) {
            use Data::Dumper;
            print Dumper $arrayref;
            die($@);
        }
    }
    return @ratio;
}

sub _log2 {
    my $n = shift;
    log($n) / log(2);
}

1;
__END__

=head1 NAME

Algorithm::FeatureSelection -

=head1 SYNOPSIS

  use Algorithm::FeatureSelection;
  my $fs = Algorithm::FeatureSelection->new();

  # feature-class data structure ...
  my $features = {
    feature_1 => {
        class_a => 10,
        class_b => 2,
    },
    feature_2 => {
        class_b => 11,
        class_d => 32
    },
          .
          .
          .
  };

  # get pairwise-mutula-information
  my $pmi = $fs->pairwise_mutual_information($features);
  my $pmi = $fs->pmi($features); # same above

  # get information-gain 
  my $ig = $fs->information_gain($features);
  my $ig = $fs->ig($features); # same above



=head1 DESCRIPTION

This library is an perl implementation of 'Pairwaise Mutual Information' and 'Information Gain' 
that are used as well-known method of feature selection on text mining fields.

=head1 METHOD

=head2 new()

=head2 information_gain( $features )



( run in 1.546 second using v1.01-cache-2.11-cpan-524268b4103 )