AI-ConfusionMatrix

 view release on metacpan or  search on metacpan

lib/AI/ConfusionMatrix.pm  view on Meta::CPAN

package AI::ConfusionMatrix;
$AI::ConfusionMatrix::VERSION = '0.010';
use strict;
use warnings;
use Carp;
use Exporter 'import';
our @EXPORT= qw (getConfusionMatrix makeConfusionMatrix);
use strict;
use Tie::File;

# ABSTRACT: Make a confusion matrix

sub makeConfusionMatrix {
    my ($matrix, $file, $delem) = @_;
    unless(defined $delem) {
        $delem = ',';
    }

    carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';

    my %cmData = genConfusionMatrixData($matrix);
    # This ties @output_array to the output file. Each output_array item represents a line in the output file
    tie my @output_array, 'Tie::File', $file or carp "$!";
    # Empty the file
    @output_array = ();

    my @columns = @{$cmData{columns}};
    map {$output_array[0] .= $delem . $_} join $delem, (@columns, 'TOTAL', 'TP', 'FP', 'FN', 'SENS', 'ACC');
    my $line = 1;
    my @expected = sort keys %{$matrix};
    for my $expected (@expected) {
        $output_array[$line] = $expected;
        my $lastIndex = 0;
        my $index;
        for my $predicted (sort keys %{$matrix->{$expected}}) {
            # Calculate the index of the label in the output_array of columns
            $index = _findIndex($predicted, \@columns);
            # Print some of the delimiter to get to the column of the next value predicted
            $output_array[$line] .= $delem x ($index - $lastIndex) . $matrix->{$expected}{$predicted};
            $lastIndex = $index;
        }

        # Get to the columns of the stats
        $output_array[$line] .= $delem x (scalar(@columns) - $lastIndex + 1);
        $output_array[$line] .= join $delem, (
                                    $cmData{stats}{$expected}{'total'},
                                    $cmData{stats}{$expected}{'tp'},
                                    $cmData{stats}{$expected}{'fp'},
                                    $cmData{stats}{$expected}{'fn'},
                                    sprintf('%.2f%%', $cmData{stats}{$expected}{'sensitivity'}),
                                    sprintf('%.2f%%', $cmData{stats}{$expected}{'acc'})
                                   );
        ++$line;
    }
    # Print the TOTAL row to the csv file
    $output_array[$line] = 'TOTAL' . $delem;
    map {$output_array[$line] .= $cmData{totals}{$_} . $delem} (@columns);
    $output_array[$line] .= join $delem, (
                                $cmData{totals}{'total'},
                                $cmData{totals}{'tp'},
                                $cmData{totals}{'fp'},
                                $cmData{totals}{'fn'},
                                sprintf('%.2f%%', $cmData{totals}{'sensitivity'}),
                                sprintf('%.2f%%', $cmData{totals}{'acc'})
                            );

    untie @output_array;
}

sub getConfusionMatrix {
    my ($matrix) = @_;

    carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
    return genConfusionMatrixData($matrix);
}

sub genConfusionMatrixData {
    my $matrix = shift;
    my @expected = sort keys %{$matrix};
    my %stats;
    my %totals;
    my @columns;
    for my $expected (@expected) {
        $stats{$expected}{'fn'} = 0;
        $stats{$expected}{'tp'} = 0;
        # Ensure that the False Positive counter is defined to be able to compute the total later
        unless(defined $stats{$expected}{'fp'}) {
            $stats{$expected}{'fp'} = 0;
        }
        for my $predicted (keys %{$matrix->{$expected}}) {
            $stats{$expected}{'total'} += $matrix->{$expected}->{$predicted};
            $stats{$expected}{'tp'} += $matrix->{$expected}->{$predicted} if $expected eq $predicted;
            if ($expected ne $predicted) {
                $stats{$expected}{'fn'} += $matrix->{$expected}->{$predicted};
                $stats{$predicted}{'fp'} += $matrix->{$expected}->{$predicted};
            }
            $totals{$predicted} += $matrix->{$expected}->{$predicted};
            # Add the label to the array of columns if it does not contain it already
            push @columns, $predicted unless _findIndex($predicted, \@columns);
        }

        $stats{$expected}{'acc'} = ($stats{$expected}{'tp'} * 100) / $stats{$expected}{'total'};
    }

    for my $expected (@expected) {
        $totals{'total'} += $stats{$expected}{'total'};
        $totals{'tp'}    += $stats{$expected}{'tp'};
        $totals{'fn'}    += $stats{$expected}{'fn'};
        $totals{'fp'}    += $stats{$expected}{'fp'};
        $stats{$expected}{'sensitivity'} = ($stats{$expected}{'tp'} * 100) / ($stats{$expected}{'tp'} + $stats{$expected}{'fp'});
    }

    $totals{'acc'} = ($totals{'tp'} * 100) / $totals{'total'};
    $totals{'sensitivity'} = ($totals{'tp'} * 100) / ($totals{'tp'} + $totals{'fp'});

    return (
        columns => [sort @columns],



( run in 1.550 second using v1.01-cache-2.11-cpan-140bd7fdf52 )