Math-LiveStats

 view release on metacpan or  search on metacpan

lib/Math/LiveStats.pm  view on Meta::CPAN

package Math::LiveStats;

use strict;
use warnings;

# perl -MPod::Markdown -e 'Pod::Markdown->new->filter(@ARGV)' lib/Math/LiveStats.pm  > README.md

=head1 NAME

Math::LiveStats - Pure perl module to make mean, standard deviation, vwap, and p-values available for one or more window sizes in streaming data

=head1 SYNOPSIS


    #!/usr/bin/perl -w
  
    use Math::LiveStats;
  
    # Create a new Math::LiveStats object with window sizes of 60 and 300 seconds
    my $stats = Math::LiveStats->new(60, 300); # doesn't have to be "time" or "seconds" - could be any series base you want
  
    # Add time-series data points (timestamp, value, volume) # use volume=0 if you don't use/need vwap
    $stats->add(1000, 50, 5);
    $stats->add(1060, 55, 10);
    $stats->add(1120, 53, 5);
  
    # Get mean and standard deviation for a window size
    my $mean_60 = $stats->mean(60);
    my $stddev_60 = $stats->stddev(60); # of the mean
    my $vwap_60 = $stats->vwap(60);
    my $vwapdev_60 = $stats->vwapdev(60); # stddev of the vwap
  
    # Get the p-value for a window size
    my $pvalue_60 = $stats->pvalue(60);
  
    # Get the number of entries in a window
    my $n_60 = $stats->n(60);
  
    # Recalculate statistics to reduce accumulated errors
    $stats->recalc(60);

=head1 CLI one-liner example

    cat data | perl -MMath::LiveStats -ne 'BEGIN{$s=Math::LiveStats->new(20);} chomp;($t,$p,$v)=split(/,/); $s->add($t,$p,$v); print "$t,$p,$v,",$s->n(20),",",$s->mean(20),",",$s->stddev(20),",",$s->vwap(20),",",$s->vwapdev(20),"\n"'

=head1 DESCRIPTION

Math::LiveStats provides live statistical calculations (mean, standard deviation, p-value,
volume-weighted-average-price and stddev vwap) over multiple window sizes for streaming 
data. It uses West's algorithm for efficient updates and supports synthetic boundary 
entries to maintain consistent results.

Stats are computed based on data that exists inside the given window size, plus possibly
one (at most) synthetic entry: when old data shuffles out of the window, if there's no
data exactly on the oldest boundary of the window, one synthetic value is assumed to be
there, which is linearly-interpolated from the entries that appeared logically either side.

=head1 METHODS


=cut

require Exporter;

our @ISA = qw(Exporter);
our($VERSION)='1.02';
our($UntarError) = '';

our %EXPORT_TAGS = ( 'all' => [ qw( ) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw( );



=head2 new(@window_sizes)

Creates a new Math::LiveStats object with the specified window sizes.

=cut

sub new {
  my ($class, @window_sizes) = @_;
  die "At least one window size must be provided" unless @window_sizes;

  # Ensure window sizes are positive integers and sort them
  @window_sizes = sort { $a <=> $b } grep { $_ > 0 } @window_sizes;

  my $self = {
    window_sizes => \@window_sizes,
    data         => [],
    stats        => {},
  };

  # Initialize stats for each window size
  foreach my $window (@window_sizes) {
    $self->{stats}{$window} = {
      n         => 0,
      mean      => 0,
      M2        => 0,
      cpv       => 0, # Cumulative_Price_Volume
      cv        => 0, # Cumulative_Volume
      vM2       => 0, # M2 of the vwap
      vmean	=> 0, # for vwapdev
      synthetic => undef,  # To store synthetic entry if needed
      start_index => 0,
    };
  }



( run in 2.849 seconds using v1.01-cache-2.11-cpan-df04353d9ac )