App-St

 view release on metacpan or  search on metacpan

Changelog  view on Meta::CPAN

Revision history for st

1.1.4   Mon Jun 26 13:57:00 2017 +0200
        Percentile between 0 and 100

1.1.3   Mon Jun 26 12:58:00 2017 +0200
        Fixed --percentile and --quartile options

1.1.2   Wed Apr 1 18:43:12 2015 +0200
        Bugfix: sorted data was not cached

1.1.1   Thu Oct 10 19:37:41 2013 +0200
        Makefile.PL allows script renaming

1.1.0   Mon Sep 23 18:38:35 2013 +0200
        Adopt "%g" as default output format
        s/transverse/transpose/g

MANIFEST  view on Meta::CPAN

Makefile.PL
MANIFEST			This list of files
README.md
script/st
t/01-use.t
t/02-new.t
t/03-validate.t
t/04-process.t
t/05-basic-stats.t
t/05-format.t
t/06-percentile.t
t/06-quantiles.t
t/07-result.t
META.yml                                 Module YAML meta-data (added by MakeMaker)
META.json                                Module JSON meta-data (added by MakeMaker)

README.md  view on Meta::CPAN

    --stderr|sem|se
    --sum|s
    --var|variance

    --min
    --q1
    --median
    --q3
    --max

    --percentile=<0..1>
    --quartile=<1..4>

If no functions are selected, "st" will print the default output:

    N     min  max  sum  mean  stddev

You can also use the following predefined sets of functions:

    --summary   # five-number summary (min q1 median q3 max)
    --complete  # everything

lib/App/St.pm  view on Meta::CPAN


  return $opt{formatted} ? $self->_format($mean)
                         : $mean;
}

sub quartile {
    my ($self,$q,%opt) = @_;
    if ($q !~ /^[01234]$/) {
        die "Invalid quartile '$q'\n";
    }
    return $self->percentile($q / 4 * 100, %opt);
}

sub median {
    my ($self,%opt) = @_;
    return $self->percentile(50, %opt);
}

sub variance {
  my ($self,%opt) = @_;

  my $N  = $self->{N};
  my $M2 = $self->{M2};

  my $variance = $N > 1 ? $M2 / ($N - 1) : undef;

lib/App/St.pm  view on Meta::CPAN


  my $stddev = $self->stddev();
  my $N      = $self->N();

  my $stderr  = defined $stddev ? $stddev/sqrt($N) : undef;

  return $opt{formatted} ? $self->_format($stderr)
                         : $stderr;
}

sub percentile {
    my ($self, $p, %opt) = @_;

    my $data = $self->{data};

    if (!$self->{keep_data} or scalar @{$data} == 0) {
        die "Can't get percentile from empty dataset\n";
    }

    if ($p < 0 or $p > 100) {
        die "Invalid percentile '$p'\n";
    }

    if (!$self->{_is_sorted_}) {
        $data = [ sort {$a <=> $b} @{ $data } ];
        $self->{data} = $data;
        $self->{_is_sorted_} = 1;
    }

    my $N = $self->N();
    my $idx = ($N - 1) * $p / 100;

    my $percentile =
        int($idx) == $idx ? $data->[$idx]
                          : ($data->[$idx] + $data->[$idx+1]) / 2;

    return $opt{formatted} ? _format($percentile)
                           : $percentile;
}

sub result {
    my $self = shift;

    my %result = (
        N          => $self->N(),
        sum        => $self->sum(),
        mean       => $self->mean(),
        stddev     => $self->stddev(),

lib/App/St.pm  view on Meta::CPAN

    if ($self->{keep_data}) {
        %result = (%result,
            (
                q1      => $self->quartile(1),
                median  => $self->median(),
                q3      => $self->quartile(3),
            )
        );
    }

    # the following is a hack to accept multiple percentiles/quartiles
    if ( exists $self->{percentile} ) {
        my $percentile = ref $self->{percentile} eq 'ARRAY'
          ? [ map { $self->percentile($_) } @{ $self->{percentile} } ]
          : $self->percentile( $self->{percentile} );

        %result = (
            %result,
            percentile => $percentile
        );
    }

    if (exists $self->{quartile}) {
        my $quartile = ref $self->{quartile} eq 'ARRAY'
            ? [ map { $self->quartile($_) } @{ $self->{quartile} } ]
            : $self->quartile( $self->{quartile} );

        %result = (
            %result,

lib/App/St.pm  view on Meta::CPAN

=head2 N

=head2 sum

=head2 mean

=head2 stddev

=head2 stderr

=head2 percentile=<0..100>

=head2 quartile=<0..4>

=head2 min

=head2 q1

=head2 median

=head2 q3

script/st  view on Meta::CPAN

  'stddev|sd',
  'stderr|sem|se',
  'sum|s',
  'variance|var',

  'min|q0',
  'q1',
  'median|q2',
  'q3',
  'max|q4',
  'percentile=f@',
  'quartile=i@',

  # predefined output sets
  'summary',
  'complete|everything|all',
  'default',

  # output control
  'delimiter|d=s',
  'format|fmt|f=s',

script/st  view on Meta::CPAN

pod2usage(1) if $opt{help};

my %config = get_config(%opt);
my @stats  = statistical_options(%opt);

if (   $opt{summary}
    or $opt{complete}
    or $opt{q1}
    or $opt{median}
    or $opt{q3}
    or defined $opt{percentile}
    or defined $opt{quartile} )
{
    $config{keep_data} = 1;
}

# special cases: percentile and quartile are not booleans
my %special_parameters = map { $_ => $opt{$_} } grep { exists $opt{$_} } qw/percentile quartile/;

my $st = App::St->new(%config, %special_parameters);

my $n = 0;
while (my $num = <>) {
  chomp $num;

  $n++;
  if (!$st->validate($num)) {
      my $err = "Invalid value '$num' on input line $.\n";

script/st  view on Meta::CPAN


  return (%config, delimiter => $delimiter, format => $format);

}

sub statistical_options {
  my %opt = @_;

  # predefined sets
  my %predefined = (
    complete => [ qw/N min q1 median q3 max sum mean stddev stderr variance percentile quartile/ ],
    summary  => [ qw/min q1 median q3 max/ ],
    default  => [ qw/N min max sum mean stddev/ ],
  );

  # selected options
  my %selected = map { $_ => 1 } grep { exists $opt{$_} } @{ $predefined{complete} };

  # expand with predefined sets
  for my $set (keys %predefined) {
    if ($opt{$set}) {

script/st  view on Meta::CPAN

  --stderr|sem|se       # standard error of mean
  --sum|s               # sum of elements of the sample
  --variance|var        # variance

The following options require that the whole dataset is stored in
memory, which can be problematic for huge datasets:

  --q1                  # first quartile
  --median|q2           # second quartile, or median
  --q3                  # third quartile
  --percentile=f        # percentile=<0..100>
  --quartile=i          # quartile=<1..4>

If no functions are selected, C<st> will print the default output:

    N     min  max  sum  mean  stddev

You can also use the following predefined sets of functions:

  --summary   # five-number summary (min q1 median q3 max)
  --complete  # everything

t/06-percentile.t  view on Meta::CPAN

use Test::More;

use App::St;

my $st = App::St->new( keep_data => 1 );

for my $num (reverse 1..10) {
  $st->process($num);
}

my %percentiles = (
    0   => 1,
    50  => 5.5,
    90  => 9.5,
    100 => 10,
);

plan tests => scalar keys %percentiles;

for my $p (keys %percentiles) {
    is($st->percentile($p), $percentiles{$p});
}



( run in 0.407 second using v1.01-cache-2.11-cpan-05162d3a2b1 )