AI-Calibrate
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
Revision history for Perl extension AI::Calibrate.
1.5 Fri Aug 3 2012
- Changes to ./t/AI-Calibrate-1.t to let it pass with almost-equal
numbers.
1.4 Thu Aug 2 2012
- Revised calibration algorithm based on bug
- Updated tests in ./t
- Added ./t/AI-Calibrate-KL.t using Kun Liu's dataset.
- Added ./t/AI-Calibrate-pathologies.t to test for pathological cases.
1.3 Fri Nov 4
- Removed dependency on Test::Deep, added explicit declaration of
dependency on Test::More to Makefile.PL
1.2 Thu Nov 3
- Fixed test ./t/AI-Calibrate-NB.t so that test wouldn't fail. Used to
call is_deeply, which was failing on slight differences between
floating point numbers. Now compares with a small tolerance.
1.1 Thu Feb 28 19:00:06 2008
- Added new function print_mapping
- Added new test file AI-Calibrate-NB.t which, if AI::NaiveBayes1 is
present, trains a classifier and calibrates it.
1.0 Thu Feb 05 11:37:31 2008
- First public release to CPAN.
0.01 Thu Jan 24 11:37:31 2008
- original version; created by h2xs 1.23 with options
-XA -n AI::Calibrate
{
"abstract" : "Perl module for producing probabilities from classifier scores",
"author" : [
"Tom Fawcett <tfawcett@acm.org>"
],
"dynamic_config" : 1,
"generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.112150",
"license" : [
"unknown"
],
"meta-spec" : {
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : "2"
},
"name" : "AI-Calibrate",
"no_index" : {
"directory" : [
"t",
"inc"
]
},
"prereqs" : {
"build" : {
"requires" : {
"ExtUtils::MakeMaker" : 0
}
},
"configure" : {
"requires" : {
"ExtUtils::MakeMaker" : 0
}
},
"runtime" : {
"requires" : {
"Test::More" : 0
}
}
},
"release_status" : "stable",
"version" : "1.5"
}
---
abstract: 'Perl module for producing probabilities from classifier scores'
author:
- 'Tom Fawcett <tfawcett@acm.org>'
build_requires:
ExtUtils::MakeMaker: 0
configure_requires:
ExtUtils::MakeMaker: 0
dynamic_config: 1
generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.112150'
license: unknown
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
name: AI-Calibrate
no_index:
directory:
- t
- inc
requires:
Test::More: 0
version: 1.5
Makefile.PL view on Meta::CPAN
use 5.008008;
use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
WriteMakefile(
NAME => 'AI::Calibrate',
VERSION_FROM => 'lib/AI/Calibrate.pm', # finds $VERSION
PREREQ_PM => {Test::More => 0}, # e.g., Module::Name => 1.1
($] >= 5.005 ? ## Add these new keywords supported since 5.005
(ABSTRACT_FROM => 'lib/AI/Calibrate.pm', # retrieve abstract from module
AUTHOR => 'Tom Fawcett <tfawcett@acm.org>') : ()),
);
is a very useful classifier, but the scores it produces are usually "bunched"
around 0 and 1, making these scores poor probability estimates. Support
vector machines have a similar problem. Both classifier types should be
calibrated before their scores are used as probability estimates. This module
calibrates a classifier using the Pool Adjacent Violators algorithm.
INSTALLATION
To install this module type the following:
perl Makefile.PL
make
make test
make install
DEPENDENCIES
No other modules are required.
COPYRIGHT AND LICENCE
Copyright (C) 2008 by Tom Fawcett
This library is free software; you can redistribute it and/or modify
lib/AI/Calibrate.pm view on Meta::CPAN
require Exporter;
our @ISA = qw(Exporter);
# This allows declaration:
# use AI::Calibrate ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = (
'all' => [
qw(
calibrate
score_prob
print_mapping
)
]
);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw( );
use constant DEBUG => 0;
# Structure slot names
use constant SCORE => 0;
use constant PROB => 1;
=head1 NAME
AI::Calibrate - Perl module for producing probabilities from classifier scores
=head1 SYNOPSIS
use AI::Calibrate ':all';
... train a classifier ...
... test classifier on $points ...
$calibrated = calibrate($points);
=head1 DESCRIPTION
Classifiers usually return some sort of an instance score with their
classifications. These scores can be used as probabilities in various
calculations, but first they need to be I<calibrated>. Naive Bayes, for
example, is a very useful classifier, but the scores it produces are usually
"bunched" around 0 and 1, making these scores poor probability estimates.
Support vector machines have a similar problem. Both classifier types should
be calibrated before their scores are used as probability estimates.
t/AI-Calibrate-1.t view on Meta::CPAN
# `make test'. After `make install' it should work as `perl AI-Calibrate.t'
#########################
use Test::More tests => 34;
BEGIN { use_ok('AI::Calibrate', ':all') };
srand;
sub deeply_approx {
# Like Test::More::is_deeply but uses approx() to compare elements.
my( $got, $expected ) = @_;
my $EPSILON = 1.0e-6;
sub max { $_[0] > $_[1] ? $_[0] : $_[1] }
sub approx {
my($x, $y) = @_;
print("approx($x, $y)\n");
if ($x == 0 and $y == 0) {
return(1);
} else {
return(abs($x-$y) / max($x,$y) < $EPSILON);
}
}
for my $i (0 .. $#{$got}) {
my $g = $got->[$i];
if (defined($expected->[$i])) {
my $e = $expected->[$i];
if (!approx($g->[0], $e->[0])) {
return(0);
}
if (!approx($g->[1], $e->[1])) {
return(0);
}
} else {
return(0);
}
}
return(1);
}
# Given an array reference, shuffle the array. This is the Fisher-Yates code
# from The Perl Cookbook.
sub shuffle_array {
my($array) = shift;
my($i);
for ($i = @$array ; --$i; ) {
my $j = int rand ($i+1);
next if $i == $j;
@$array[$i,$j] = @$array[$j,$i]
}
}
# These points are from the ROCCH-PAV paper, Table 1
# Format of each point is [Threshold, Class].
my $points = [
[.9, 1],
[.8, 1],
[.7, 0],
[.6, 1],
[.55, 1],
[.5, 1],
[.45, 0],
[.4, 1],
[.35, 1],
[.3, 0 ],
[.27, 1],
[.2, 0 ],
[.18, 0],
[.1, 1 ],
[.02, 0]
];
my $calibrated_expected =
[
[0.8, 1],
[0.5, 0.75],
[0.35, 0.666666666666667],
[0.27, 0.5],
[0.1, 0.333333333333333]
];
my $calibrated_got = calibrate( $points, 1 );
pass("ran_ok");
ok(deeply_approx($calibrated_got, $calibrated_expected),
"pre-sorted calibration");
# Shuffle the arrays a bit and try calibrating again
for (1 .. 10) {
shuffle_array($points);
my $calibrated_got = calibrate($points, 0);
ok(deeply_approx($calibrated_got, $calibrated_expected),
"unsorted cal $_");
}
# Tweak the thresholds
for (1 .. 10) {
my $delta = rand;
my @delta_points;
for my $point (@$points) {
my($thresh, $class) = @$point;
push(@delta_points, [ $thresh+$delta, $class]);
}
my @delta_expected;
for my $point (@$calibrated_expected) {
my($thresh, $class) = @$point;
push(@delta_expected, [ $thresh+$delta, $class]);
}
my $delta_got = calibrate(\@delta_points, 0);
ok(deeply_approx($delta_got, \@delta_expected), "unsorted cal $_");
}
my @test_estimates =
( [100, 1],
[.9, 1 ],
[.8, 1],
[.7, 3/4 ],
[.5, 3/4 ],
[.45, 2/3 ],
[.35, 2/3 ],
[.3, 1/2 ],
[.2, 1/3 ],
[.02, 0 ],
[.00001, 0]
);
print "Using this mapping:\n";
print_mapping($calibrated_got);
print;
for my $pair (@test_estimates) {
my($score, $prob_expected) = @$pair;
my $prob_got = score_prob($calibrated_got, $score);
is($prob_got, $prob_expected, "score_prob test @$pair");
}
t/AI-Calibrate-KL.t view on Meta::CPAN
use strict;
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl AI-Calibrate.t'
#########################
use Test::More tests => 4;
BEGIN { use_ok('AI::Calibrate', ':all') };
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
# These points are from Kun Liu
# Format of each point is [Threshold, Class].
my $points = [
[0.999, 1],
[0.998, 1],
[0.742, 0],
[0.737, 1],
[0.685, 1],
[0.636, 1],
[0.613, 1],
[0.598, 1],
[0.559, 1],
[0.542, 1],
[0.541, 1],
[0.505, 1],
[0.490, 0],
[0.477, 1],
[0.475, 1],
[0.442, 0],
[0.442, 0],
[0.439, 1],
[0.425, 1],
[0.413, 0],
[0.411, 0],
[0.409, 0],
[0.401, 1],
[0.399, 0],
[0.386, 0],
[0.385, 0],
[0.375, 1],
[0.374, 0],
[0.369, 0],
[0.367, 1],
[0.362, 1],
[0.359, 1],
[0.359, 0],
];
my $calibrated_expected =
[[0.998, 1],
[0.505, 0.9],
[0.475, 0.666666666666667],
[0.425, 0.5],
[0.359, 0.384615384615384]
];
my $calibrated_got = calibrate( $points, 1 );
pass("ran_ok");
is_deeply($calibrated_got, $calibrated_expected, "calibration");
my $expected_mapping = "
t/AI-Calibrate-NB.t view on Meta::CPAN
# -*- Mode: CPerl -*-
use English;
use strict;
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl AI-Calibrate.t'
use Test::More;
eval("use AI::NaiveBayes1");
if ($EVAL_ERROR) {
plan skip_all => 'AI::NaiveBayes1 does not seem to be present';
} else {
plan tests => 2;
}
use_ok('AI::Calibrate', ':all');
my @instances =
( [ { outlook=>'sunny',temperature=>85,humidity=>85,windy=>'FALSE'},
'no'],
[ {outlook=>'sunny',temperature=>80,humidity=>90,windy=>'TRUE'},
'no'],
[ {outlook=>'overcast',temperature=>83,humidity=>86,windy=>'FALSE'},
'yes'],
[ {outlook=>'rainy',temperature=>70,humidity=>96,windy=>'FALSE'},
'yes'],
[ {outlook=>'rainy',temperature=>68,humidity=>80,windy=>'FALSE'},
'yes'],
[ {outlook=>'rainy',temperature=>65,humidity=>70,windy=>'TRUE'},
'no'],
[ {outlook=>'overcast',temperature=>64,humidity=>65,windy=>'TRUE'},
'yes'],
[ {outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},
'no'],
[ {outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},
'yes'],
[ {outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},
'yes'],
[ {outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},
'yes'],
[ {outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},
'yes'],
[ {outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},
'yes'],
[ {outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},
'no']
);
my $nb = AI::NaiveBayes1->new;
$nb->set_real('temperature', 'humidity');
for my $inst (@instances) {
my($attrs, $play) = @$inst;
$nb->add_instance(attributes=>$attrs, label=>"play=$play");
}
$nb->train;
my @points;
for my $inst (@instances) {
my($attrs, $play) = @$inst;
my $ph = $nb->predict(attributes=>$attrs);
my $play_score = $ph->{"play=yes"};
push(@points, [$play_score, ($play eq "yes" ? 1 : 0)]);
}
my $calibrated = calibrate(\@points, 0); # not sorted
print "Mapping:\n";
print_mapping($calibrated);
my(@expected) =
(
[0.779495793582905, 1],
[0.535425255450615, 0.666666666666667]
);
for my $i (0 .. $#expected) {
print "$i = @{$expected[$i]}\n";
}
# This fails because two numbers differ at the 15th digit:
# is_deeply($calibrated, \@expected, "Naive Bayes calibration test");
sub close_enough {
my($x, $y) = @_;
return(abs($x - $y) < 1.0e-5);
}
sub lists_close_enough {
my($got, $expected) = @_;
if (@$got != @$expected) {
return 0;
}
for my $i (0 .. $#{$got}) {
for my $elem (0, 1) {
if (! close_enough($got->[$i][$elem], $expected->[$i][$elem])) {
diag(sprintf( "Got: %f\n", $got->[$i]));
diag(sprintf( "Expected: %f\n", $expected->[$i]));
return 0;
}
}
}
return 1;
}
ok(lists_close_enough($calibrated, \@expected),
'Calibration of NB1 results');
t/AI-Calibrate-pathologies.t view on Meta::CPAN
BEGIN { use_ok('AI::Calibrate', ':all') };
my $points0 = [ ];
use Data::Dumper;
is_deeply( calibrate($points0), [], "empty point set");
my $points1 = [
[.9, 1]
];
is_deeply(calibrate($points1), [[0.9,1]], "Singleton point set");
my $points2 = [
[.8, 1],
[.7, 0],
];
is_deeply(calibrate($points2), [[0.8, 1]], "two-point perfect");
my $points3 = [
[.8, 0],
[.7, 1],
];
is_deeply(calibrate($points3), [[0.7, 0.5]], "two-point anti-perfect");
my $points4 = [
[.8, 0],
[.8, 1],
];
is_deeply(calibrate($points4), [[0.8, 0.5]], "two-point conflicting");
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.083 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )