AI-Calibrate

 view release on metacpan or  search on metacpan

lib/AI/Calibrate.pm  view on Meta::CPAN


    PAV($new_data);

    if (DEBUG) {
        print("After PAV, vector is:\n");
        print_vector($new_data);
    }

    my(@result);
    my( $last_prob, $last_score);

    push(@$new_data, [-1e10, 0]);

    for my $pair (@$new_data) {
        print "Seeing @$pair\n" if DEBUG;
        my($score, $prob) = @$pair;
        if (defined($last_prob) and $prob < $last_prob) {
            print("Pushing [$last_score, $last_prob]\n") if DEBUG;
            push(@result, [$last_score, $last_prob] );
        }
        $last_prob = $prob;
        $last_score = $score;
    }

    return \@result;
}


sub PAV {
    my ( $result ) = @_;

    for ( my $i = 0; $i < @$result - 1; $i++ ) {
        if ( $result->[$i][PROB] < $result->[ $i + 1 ][PROB] ) {
            $result->[$i][PROB] =
                ( $result->[$i][PROB] + $result->[ $i + 1 ][PROB] ) / 2;
            $result->[ $i + 1 ][PROB] = $result->[$i][PROB];
            print "Averaging elements $i and ", $i + 1, "\n" if DEBUG;

            for ( my $j = $i - 1; $j >= 0; $j-- ) {
                if ( $result->[$j][PROB] < $result->[ $i + 1 ][PROB] ) {
                    my $d = ( $i + 1 ) - $j + 1;
                    flatten( $result, $j, $d );
                }
                else {
                    last;
                }
            }
        }
    }
}

sub print_vector {
    my($vec) = @_;
    for my $pair (@$vec) {
        print join(", ", @$pair), "\n";
    }
}


sub flatten {
    my ( $vec, $start, $len ) = @_;
    if (DEBUG) {
        print "Flatten called on vec, $start, $len\n";
        print "Vector before: \n";
        print_vector($vec);
    }

    my $sum = 0;
    for my $i ( $start .. $start + $len-1 ) {
        $sum += $vec->[$i][PROB];
    }
    my $avg = $sum / $len;
    print "Sum = $sum, avg = $avg\n" if DEBUG;
    for my $i ( $start .. $start + $len -1) {
        $vec->[$i][PROB] = $avg;
    }
    if (DEBUG) {
        print "Vector after: \n";
        print_vector($vec);
    }
}

=item B<score_prob>

This is a simple utility function that takes the structure returned by
B<calibrate>, along with a new score, and returns the probability estimate.
Example calling form:

  $p = score_prob($calibrated, $score);

Once you have a trained, calibrated classifier, you could imagine using it
like this:

 $calibrated = calibrate( $calibration_set );
 print "Input instances, one per line:\n";
 while (<>) {
    chomp;
    my(@fields) = split;
    my $score = classifier(@fields);
    my $prob = score_prob($score);
    print "Estimated probability: $prob\n";
 }

=cut

sub score_prob {
    my($calibrated, $score) = @_;

    my $last_prob = 1.0;

    for my $tuple (@$calibrated) {
        my($bound, $prob) = @$tuple;
        return $prob if $score >= $bound;
        $last_prob = $prob;
    }
    #  If we drop off the end, probability estimate is zero
    return 0;
}


=item B<print_mapping>

This is a simple utility function that takes the structure returned by
B<calibrate> and prints out a simple list of lines describing the mapping
created.

Example calling form:

  print_mapping($calibrated);

Sample output:

  1.00 > SCORE >= 1.00     prob = 1.000
  1.00 > SCORE >= 0.71     prob = 0.667



( run in 2.978 seconds using v1.01-cache-2.11-cpan-cdf2f3d4e48 )