Algorithm-CurveFit-Simple
view release on metacpan or search on metacpan
lib/Algorithm/CurveFit/Simple.pm view on Meta::CPAN
package Algorithm::CurveFit::Simple;
# ABSTRACT: Convenience wrapper around Algorithm::CurveFit.
our $VERSION = '1.03'; # VERSION 1.03
use strict;
use warnings;
use Algorithm::CurveFit;
use Time::HiRes;
use JSON::PP;
our %STATS_H; # side-products of fit() stored here for profiling purposes
BEGIN {
require Exporter;
our $VERSION = '1.03';
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(fit %STATS_H);
}
# fit() - only public function for this distribution
# Given at least parameter "xy", generate a best-fit curve within a time limit.
# Output: max deviation, avg deviation, implementation source string (perl or C, for now).
# Optional parameters and their defaults:
# terms => 3 # number of terms in formula, max is 10
# time_limit => 3 # number of seconds to try for better fit
# inv => 1 # invert sense of curve-fit, from x->y to y->x
# impl_lang => 'perl' # programming language used for output implementation: perl, c
# impl_name => 'x2y' # name given to output implementation function
sub fit {
my %p = @_;
my $formula = _init_formula(%p);
my ($xdata, $ydata) = _init_data(%p);
my $parameters = _init_parameters($xdata, $ydata, %p);
my $iter_mode = 'time';
my $time_limit = 3; # sane default?
$time_limit = 0.01 if ($time_limit < 0.01);
my $n_iter;
if (defined($p{iterations})) {
$iter_mode = 'iter';
$n_iter = $p{iterations} || 10000;
} else {
$time_limit = $p{time_limit} // $time_limit;
$n_iter = 10000 * $time_limit; # will use this to figure out how long it -really- takes.
}
my ($n_sec, $params_ar_ar);
if ($iter_mode eq 'time') {
($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class});
$STATS_H{iter_mode} = $iter_mode;
$STATS_H{fit_calib_iter} = $n_iter;
$STATS_H{fit_calib_time} = $n_sec;
$STATS_H{fit_calib_parar} = $params_ar_ar;
$n_iter = int(($time_limit / $n_sec) * $n_iter + 1);
}
($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class});
$STATS_H{fit_iter} = $n_iter;
$STATS_H{fit_time} = $n_sec;
$STATS_H{fit_parar} = $params_ar_ar;
my $coderef = _implement_formula($params_ar_ar, "coderef", "", $xdata, \%p);
my ($max_dev, $avg_dev) = _calculate_deviation($coderef, $xdata, $ydata);
my $impl_lang = $p{impl_lang} // 'perl';
$impl_lang = lc($impl_lang);
my $impl_name = $p{inv} ? "y2x" : "x2y";
$impl_name = $p{impl_name} // $impl_name;
my $impl = $coderef;
$impl = _implement_formula($params_ar_ar, $impl_lang, $impl_name, $xdata, \%p) unless($impl_lang eq 'coderef');
return ($max_dev, $avg_dev, $impl);
}
# ($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class});
sub _try_fit {
my ($formula, $parameters, $xdata, $ydata, $n_iter, $fitter_class) = @_;
$fitter_class //= "Algorithm::CurveFit";
my $params_ar_ar = [map {[@$_]} @$parameters]; # making a copy because curve_fit() is destructive
my $tm0 = Time::HiRes::time();
my $res = $fitter_class->curve_fit(
formula => $formula,
lib/Algorithm/CurveFit/Simple.pm view on Meta::CPAN
=back
=head1 VARIABLES
The class variable C<%STATS_H> contains various intermediate values which might be helpful. For instance, C<$STATS_H{deviation_max_offset_datum}> contains the x data point which corresponds to the maximum deviation returned.
The contents of C<%STATS_H> is subject to change and might not be fully documented in future versions. The current fields are:
=over 4
=item C<deviation_max_offset_datum>: The x data point corresponding with returned maximum deviation.
=item C<fit_calib_parar>: Arrayref of formula parameters as returned by L<Algorithm::CurveFit> after a short fitting attempt used for timing calibration.
=item C<fit_calib_time>: The number of seconds L<Algorithm::CurveFit> spent in the calibration run.
=item C<fit_iter>: The iterations parameter passed to L<Algorithm::CurveFit>.
=item C<fit_parar>: Arrayref of formula parameters as returned by L<Algorithm::CurveFit>.
=item C<fit_time>: The number of seconds L<Algorithm::CurveFit> actually spent fitting the formula.
=item C<impl_exception>: The exception thrown when the implementation was used to calculate the deviations, or the empty string if none.
=item C<impl_formula>: The formula part of the implementation.
=item C<impl_source>: The implementation source string.
=item C<iter_mode>: One of C<"time"> or C<"iter">, indicating whether a time limit was used or an iteration count.
=item C<xdata>: Arrayref of x data points as passed to L<Algorithm::CurveFit>.
=item C<ydata>: Arrayref of y data points as passed to L<Algorithm::CurveFit>.
=back
=head1 CAVEATS
=over 4
=item * Only simple polynomial functions are supported. Sometimes you need something else. Use L<Algorithm::CurveFit> for such cases.
=item * If C<xydata> is very large, iterating over it to calculate deviances can take more time than permitted by C<time_limit>.
=item * The dangers of overfitting are real! L<https://en.wikipedia.org/wiki/Overfitting>
=item * Using too many terms can dramatically reduce the accuracy of the fitted formula.
=item * Sometimes calling L<Algorithm::CurveFit> with a ten-term polynomial causes it to hang.
=back
=head1 TO DO
=over 4
=item * Support more programming languages for formula implementation: R, MATLAB, python
=item * Calculate the actual term sigfigs and set precision appropriately in the formula implementation instead of just "%.11f".
=item * Support trying a range of terms and returning whatever gives the best fit.
=item * Support piecewise output formulas.
=item * Work around L<Algorithm::CurveFit>'s occasional hang problem when using ten-term polynomials.
=back
=head1 SEE ALSO
L<Algorithm::CurveFit>
L<curvefit>
=cut
( run in 0.913 second using v1.01-cache-2.11-cpan-5623c5533a1 )