AI-Perceptron-Simple
view release on metacpan or search on metacpan
lib/AI/Perceptron/Simple.pm view on Meta::CPAN
=head2 tame ( ... )
=head2 exercise ( ... )
=head2 train ( $stimuli_train_csv, $expected_output_header, $save_nerve_to_file )
=head2 train ( $stimuli_train_csv, $expected_output_header, $save_nerve_to_file, $display_stats, $identifier )
Trains the perceptron.
C<$stimuli_train_csv> is the set of data / input (in CSV format) to train the perceptron while C<$save_nerve_to_file> is
the filename that will be generate each time the perceptron finishes the training process. This data file is the data of the C<AI::Perceptron::Simple>
object and it is used in the C<validate> method.
C<$expected_output_header> is the header name of the columns in the csv file with the actual category or the exepcted values. This is used to determine to tune the nerve up or down. This value should only be 0 or 1 for the sake of simplicity.
C<$display_stats> is B<optional> and the default is 0. It will display more output about the tuning process. It will show the followings:
=over 4
=item tuning status
Indicates the nerve was tuned up, down or no tuning needed
=item old sum
The original sum of all C<weightage * input> or C<dendrite_size * binary_input>
=item threshold
The threshold of the nerve
=item new sum
The new sum of all C<weightage * input> after fine-tuning the nerve
=back
If C<$display_stats> is specified ie. set to C<1>, then you B<MUST> specify the C<$identifier>. C<$identifier> is the column / header name that is used to identify a specific row of data in C<$stimuli_train_csv>.
=cut
sub tame {
train( @_ );
}
sub exercise {
train( @_ );
}
sub train {
my $self = shift;
my( $stimuli_train_csv, $expected_output_header, $save_nerve_to_file, $display_stats, $identifier ) = @_;
$display_stats = 0 if not defined $display_stats;
if ( $display_stats and not defined $identifier ) {
croak "Please specifiy a string for \$identifier if you are trying to display stats";
}
# CSV processing is all according to the documentation of Text::CSV
open my $data_fh, "<:encoding(UTF-8)", $stimuli_train_csv
or croak "Can't open $stimuli_train_csv: $!";
my $csv = Text::CSV->new( {auto_diag => 1, binary => 1} );
my $attrib = $csv->getline($data_fh);
$csv->column_names( $attrib );
# individual row
ROW: while ( my $row = $csv->getline_hr($data_fh) ) {
# print $row->{book_name}, " -> ";
# print $row->{$expected_output_header} ? "ææ\n" : "é
丽ä¼å\n";
# calculate the output and fine tune parameters if necessary
while (1) {
my $output = _calculate_output( $self, $row );
#print "Sum = ", $output, "\n";
# $expected_output_header to be checked together over here
# if output >= threshold
# then category/result aka output is considered 1
# else output considered 0
# output expected/actual tuning
# 0 0 -
# 1 0 down
# 0 1 up
# 1 1 -
if ( ($output >= $self->threshold) and ( $row->{$expected_output_header} eq 0 ) ) {
_tune( $self, $row, TUNE_DOWN );
if ( $display_stats ) {
print $row->{$identifier}, "\n";
print " -> TUNED DOWN";
print " Old sum = ", $output;
print " Threshold = ", $self->threshold;
print " New Sum = ", _calculate_output( $self, $row ), "\n";
}
} elsif ( ($output < $self->threshold) and ( $row->{$expected_output_header} eq 1 ) ) {
_tune( $self, $row, TUNE_UP );
if ( $display_stats ) {
print $row->{$identifier}, "\n";
print " -> TUNED UP";
print " Old sum = ", $output;
print " Threshold = ", $self->threshold;
print " New Sum = ", _calculate_output( $self, $row ), "\n";
}
} elsif ( ($output < $self->threshold) and ( $row->{$expected_output_header} eq 0 ) ) {
if ( $display_stats ) {
print $row->{$identifier}, "\n";
print " -> NO TUNING NEEDED";
print " Sum = ", _calculate_output( $self, $row );
print " Threshold = ", $self->threshold, "\n";
}
next ROW;
lib/AI/Perceptron/Simple.pm view on Meta::CPAN
=head2 _real_validate_or_test ( $data_hash_ref )
This is where the actual validation or testing takes place.
C<$data_hash_ref> is the list of parameters passed into the C<validate> or C<test> methods.
This is a B<method>, so use the OO way. This is one of the exceptions to the rules where private subroutines are treated as methods :)
=cut
sub _real_validate_or_test {
my $self = shift; my $data_hash_ref = shift;
#####
my @missing_keys;
for ( qw( stimuli_validate predicted_column_index ) ) {
push @missing_keys, $_ unless exists $data_hash_ref->{ $_ };
}
croak "Missing keys: @missing_keys" if @missing_keys;
#####
my $stimuli_validate = $data_hash_ref->{ stimuli_validate };
my $predicted_index = $data_hash_ref->{ predicted_column_index };
# actual processing starts here
my $output_file = defined $data_hash_ref->{ results_write_to }
? $data_hash_ref->{ results_write_to }
: $stimuli_validate;
# open for writing results
my $aoa = csv (in => $stimuli_validate, encoding => ":encoding(utf-8)");
my $attrib_array_ref = shift @$aoa; # 'remove' the header, it's annoying :)
$aoa = _fill_predicted_values( $self, $stimuli_validate, $predicted_index, $aoa );
# put back the array of headers before saving file
unshift @$aoa, $attrib_array_ref;
print "Saving data to $output_file\n";
csv( in => $aoa, out => $output_file, encoding => ":encoding(utf-8)" );
print "Done saving!\n";
}
=head2 &_fill_predicted_values ( $self, $stimuli_validate, $predicted_index, $aoa )
This is where the filling in of the predicted values takes place. Take note that the parameters naming are the same as the ones used in the C<validate> and C<test> method.
This subroutine should be called in the procedural way.
=cut
sub _fill_predicted_values {
my ( $self, $stimuli_validate, $predicted_index, $aoa ) = @_;
# CSV processing is all according to the documentation of Text::CSV
open my $data_fh, "<:encoding(UTF-8)", $stimuli_validate
or croak "Can't open $stimuli_validate: $!";
my $csv = Text::CSV->new( {auto_diag => 1, binary => 1} );
my $attrib = $csv->getline($data_fh);
$csv->column_names( $attrib );
# individual row
my $row = 0;
while ( my $data = $csv->getline_hr($data_fh) ) {
if ( _calculate_output( $self, $data ) >= $self->threshold ) {
# write 1 into aoa
$aoa->[ $row ][ $predicted_index ] = 1;
} else {
#write 0 into aoa
$aoa->[ $row ][ $predicted_index ] = 0;
}
$row++;
}
close $data_fh;
$aoa;
}
=head1 RESULTS RELATED SUBROUTINES/METHODS
This part is related to generating the confusion matrix.
=head2 get_exam_results ( ... )
The parameters and usage are the same as C<get_confusion_matrix>. See the next method.
=head2 get_confusion_matrix ( \%options )
Returns the confusion matrix in the form of a hash. The hash will contain these keys: C<true_positive>, C<true_negative>, C<false_positive>, C<false_negative>, C<accuracy>, C<sensitivity>. More stats like C<precision>, C<specificity> and C<F1_Score> ...
If you are trying to manipulate the confusion matrix hash or something, take note that all the stats are in percentage (%) in decimal (if any) except the total entries.
For C<%options>, the followings are needed unless mentioned:
=over 4
=item full_data_file => $filled_test_file
This is the CSV file filled with the predicted values.
Make sure that you don't do anything to the actual and predicted output in this file after testing the nerve. These two columns must contain binary values only!
=item actual_output_header => $actual_column_name
=item predicted_output_header => $predicted_column_name
The binary values are treated as follows:
=over 4
=item C<0> is negative
=item C<1> is positive
=back
=item more_stats => 1
Optional.
Setting it to C<1> will process more stats that are usually not so important eg. C<precision>, C<specificity> and C<F1_Score>
=back
=cut
sub get_exam_results {
my ( $self, $info ) = @_;
$self->get_confusion_matrix( $info );
}
sub get_confusion_matrix {
my ( $self, $info ) = @_;
my %c_matrix = _collect_stats( $info ); # processes total_entries, accuracy, sensitivity etc
%c_matrix;
}
=head2 &_collect_stats ( \%options )
Generates a hash of confusion matrix based on C<%options> given in the C<get_confusion_matrix> method.
=cut
sub _collect_stats {
my $info = shift;
my $file = $info->{ full_data_file };
my $actual_header = $info->{ actual_output_header };
my $predicted_header = $info->{ predicted_output_header };
my $more_stats = defined ( $info->{ more_stats } ) ? 1 : 0;
my %c_matrix = (
true_positive => 0, true_negative => 0, false_positive => 0, false_negative => 0,
accuracy => 0, sensitivity => 0
);
# CSV processing is all according to the documentation of Text::CSV
open my $data_fh, "<:encoding(UTF-8)", $file
or croak "Can't open $file: $!";
my $csv = Text::CSV->new( {auto_diag => 1, binary => 1} );
my $attrib = $csv->getline($data_fh); # get the row of headers, can't specify any column
# shouldn't be a problem, since we're reading line by line :)
$csv->column_names( $attrib );
# individual row
while ( my $row = $csv->getline_hr($data_fh) ) {
# don't pack this part into another subroutine, number of rows can be very big
if ( $row->{ $actual_header } == 1 and $row->{ $predicted_header } == 1 ) {
# true positive
$c_matrix{ true_positive }++;
} elsif ( $row->{ $actual_header } == 0 and $row->{ $predicted_header } == 0 ) {
# true negative
$c_matrix{ true_negative }++;
} elsif ( $row->{ $actual_header } == 1 and $row->{ $predicted_header } == 0 ) {
# false negative
$c_matrix{ false_negative }++;
} elsif ( $row->{ $actual_header } == 0 and $row->{ $predicted_header } == 1 ) {
# false positive
$c_matrix{ false_positive }++;
} else {
croak "Something's wrong!\n".
"Make sure that the actual and predicted values in your file are binary ie 0 or 1" ;
}
}
close $data_fh;
_calculate_total_entries( \%c_matrix );
_calculate_sensitivity( \%c_matrix );
_calculate_accuracy( \%c_matrix );
if ( $more_stats == 1 ) {
_calculate_precision( \%c_matrix );
_calculate_specificity( \%c_matrix );
_calculate_f1_score( \%c_matrix );
# unimplemented, some more left
_calculate_negative_predicted_value( \%c_matrix ); #
_calculate_false_negative_rate( \%c_matrix ); #
_calculate_false_positive_rate( \%c_matrix ); #
( run in 1.706 second using v1.01-cache-2.11-cpan-39bf76dae61 )