AI-Perceptron-Simple

 view release on metacpan or  search on metacpan

docs/specifications.t  view on Meta::CPAN

#!/usr/bin/perl

use AI::Perceptron::Simple;
use Test::More;

plan( skip_all => "This is just the specification" );
done_testing;

######### specifications ##############
#
# This specification is based on My::Perceptron (see my github repo) and packed into AI::Perceptron::Simple v1.00
#
# Version 0.01 - completed on 8 August 2021
#   [v] able to create perceptron
#   [v] able to process data: &train method
#       [v] read csv - for training stage
#   [v] able to save the actual perceptron object and load it back
#
# Version 0.02 - completed on 17 August 2021
#   [v] implement output algorithm for train and finalize it
#   [v] read and calculate data line by line, not bulk, so no shuffling method
#   [v] implement validate method
#       [v] read csv bulk - for validating and testing stages
#       [v] write into a new csv file - validation and testing stages
#   [v] implement testing method
#       [v] read csv bulk - for validating and testing stages
#       [v] write into a new csv file - validation and testing stages
#
# Version 0.03 - completed on 19 August 2021
#   [v] implement confusion matrix
#       [v] read only expected and predicted columns, line by line
#       [v] return a hash of data
#           [v] TP, TN, FP, FN
#           [v] accuracy
#           [v] sensitivity
#   [v] remove the return value for "train" method
#   [v] display confusion matrix data to console
#       [v] use Text:Matrix
#
# Version 0.04 / Version 1.0 - completed on 23 AUGUST 2021
#   [v] add synonyms
#       [v] synonyms MUST call actual subroutines and not copy pasting!
#       train: [v] tame  [v] exercise
#       validate: [v] take_mock_exam  [v] take_lab_test
#       test: [v] take_real_exam  [v] work_in_real_world
#       generate_confusion_matrix: [v] get_exam_results
#       display_confusion_matrix: [v] display_exam_results
#       save_perceptron: [v] preserve
#       load_perceptron: [v] revive
#
# Version 1.01
#   [v] fixed currently known issues as much as possible (see 'Changes')
#       - "long size integer" === "byte order not compatible"
#
# Version 1.02
#   [v] minimum perl version changed to 5.8 due to Test::Output
#   [v] YAML (nerve file) for portability
#       [v] make subroutines exportable, the names are too long
#           [v] :local_data
#           [v] :portable_data
#   [v] fix test for display_confusion_matrix
#       [v] modifier "n" (perl 5.22 and above) changed to primitive '?:', 5.22 is too high
#       [v] fixed inaccurate test for output part
#   [v] clean & refactor codes
#       [v] refactored &display_confusion_matrix
#   [v] improve the documentation
#
# Version 1.03
#   [v] data processing: shuffle data + import tag
#   [v] add more useful data to the confusion matrix
#       [v] sum of column and rows to make it look more classic :)
#   [v] optional option to show more stats
#       [v] precision    [v] specificity    [v] F1 score
#       [v] negative_predicted_value    [v] false_negative_rate    [v] false_positive_rate
#       [v] false_discovery_rate    [v] false_omission_rate    [v] balanced_accuracy
#
# Version 1.04
#   [v] fix docs
#   [v] change die to croak for file opening
#   [v] fixed yaml nerve not loading back as an AI::Perceptron::Simple object
#
# Version 1.05
#   -colorise the confusion matrix if option is enabled
#       - sum: green
#       - etc
#   -add public function:
#       -predict result from non-csv input (single row), might be useful when gui is involved
#
# Version 1.06
#   -add a simple tutorial
#   -smart tuning feature: automatically increase/decrease learning_rate in multiples in training stage
#
# Version ?.??
#   ? implement shuffling system into training stage, bulk data processing   
#   ? Data processing: splitting data, k-fold
#   -...
#
#
############ "flow" of the codes ############

# these three steps could be done in seperated scripts if necessary
# &train and &validate could be put inside a loop or something
# the parameters make more sense when they are taken from @ARGV
    # so when it's the first time training, it will create the nerve_file,
    # the second time and up it will directly overrride that file since everything is read from it
    # ... anyway :) afterall training stage wasn't meant to be a fully working program, so it shouldnt be a problem
# just assume that 
$perceptron->train( $stimuli_train, $save_nerve_to_file ); 
    # reads training stimuli from csv
    # tune attributes based on csv data
        # calls the same subroutine to do the calculation
    # shouldn't give any output upon completion
    # should save a copy of itselt into a new file
    # returns the nerve's data filename to be used in validate()
        # these two can go into a loop with conditions checking
        # which means that we can actuall write this
            # $perceptron->validate( $stimuli_validate, 
            #                        $perceptron->train( $stimuli_train, $save_nerve_to_file ) 
            #                       );
            # and then check the confusion matrix, if not satisfied, run the loop again :)
$perceptron->validate( $stimuli_validate, $nerve_data_to_read );
$perceptron->test( $stimuli_test ); # loads nerve data from data file, turn into a object, then do the following:
    # reads from csv :
        # validation stimuli
        # testing stimuli
    # both will call the same subroutine to do calculation
    # both will write predicted data into the original data file

# show results ie confusion matrix (TP-true positive, TN-true negative, FP-false positive, FN-false negative)
# this should only be done during validation and testing
$perceptron->generate_confusion_matrix( { 1 => $csv_header_true, 0 => $csv_header_false } );
    # calculates the 4 thingy based on the current data on hand (RAM), don't read from file again, it shouldn't be a problem
        # returns a hash
    # ie it must be used together with validate() and test() to avoid problems
        # ie validate() and test() must be in different scripts, which makes sense
        # unless, create 3 similar objects to do the work in one go
        
# save data of the trained perceptron
$perceptron->save_data( $data_file );
    # see train() on saving copy of the perceptron

# load data of percpetron for use in actual program
My::Perceptron::load_data( $data_file );
    # loads the perceptron and returns the actual My::Perceptron object
        # should work though as Storable claims it can do that


# besiyata d'shmaya






( run in 0.447 second using v1.01-cache-2.11-cpan-39bf76dae61 )