AI-Calibrate
view release on metacpan or search on metacpan
lib/AI/Calibrate.pm view on Meta::CPAN
PAV($new_data);
if (DEBUG) {
print("After PAV, vector is:\n");
print_vector($new_data);
}
my(@result);
my( $last_prob, $last_score);
push(@$new_data, [-1e10, 0]);
for my $pair (@$new_data) {
print "Seeing @$pair\n" if DEBUG;
my($score, $prob) = @$pair;
if (defined($last_prob) and $prob < $last_prob) {
print("Pushing [$last_score, $last_prob]\n") if DEBUG;
push(@result, [$last_score, $last_prob] );
}
$last_prob = $prob;
$last_score = $score;
}
return \@result;
}
sub PAV {
my ( $result ) = @_;
for ( my $i = 0; $i < @$result - 1; $i++ ) {
if ( $result->[$i][PROB] < $result->[ $i + 1 ][PROB] ) {
$result->[$i][PROB] =
( $result->[$i][PROB] + $result->[ $i + 1 ][PROB] ) / 2;
$result->[ $i + 1 ][PROB] = $result->[$i][PROB];
print "Averaging elements $i and ", $i + 1, "\n" if DEBUG;
for ( my $j = $i - 1; $j >= 0; $j-- ) {
if ( $result->[$j][PROB] < $result->[ $i + 1 ][PROB] ) {
my $d = ( $i + 1 ) - $j + 1;
flatten( $result, $j, $d );
}
else {
last;
}
}
}
}
}
sub print_vector {
my($vec) = @_;
for my $pair (@$vec) {
print join(", ", @$pair), "\n";
}
}
sub flatten {
my ( $vec, $start, $len ) = @_;
if (DEBUG) {
print "Flatten called on vec, $start, $len\n";
print "Vector before: \n";
print_vector($vec);
}
my $sum = 0;
for my $i ( $start .. $start + $len-1 ) {
$sum += $vec->[$i][PROB];
}
my $avg = $sum / $len;
print "Sum = $sum, avg = $avg\n" if DEBUG;
for my $i ( $start .. $start + $len -1) {
$vec->[$i][PROB] = $avg;
}
if (DEBUG) {
print "Vector after: \n";
print_vector($vec);
}
}
=item B<score_prob>
This is a simple utility function that takes the structure returned by
B<calibrate>, along with a new score, and returns the probability estimate.
Example calling form:
$p = score_prob($calibrated, $score);
Once you have a trained, calibrated classifier, you could imagine using it
like this:
$calibrated = calibrate( $calibration_set );
print "Input instances, one per line:\n";
while (<>) {
chomp;
my(@fields) = split;
my $score = classifier(@fields);
my $prob = score_prob($score);
print "Estimated probability: $prob\n";
}
=cut
sub score_prob {
my($calibrated, $score) = @_;
my $last_prob = 1.0;
for my $tuple (@$calibrated) {
my($bound, $prob) = @$tuple;
return $prob if $score >= $bound;
$last_prob = $prob;
}
# If we drop off the end, probability estimate is zero
return 0;
}
=item B<print_mapping>
This is a simple utility function that takes the structure returned by
B<calibrate> and prints out a simple list of lines describing the mapping
created.
Example calling form:
print_mapping($calibrated);
Sample output:
1.00 > SCORE >= 1.00 prob = 1.000
1.00 > SCORE >= 0.71 prob = 0.667
( run in 2.978 seconds using v1.01-cache-2.11-cpan-cdf2f3d4e48 )