App-St
view release on metacpan or search on metacpan
Revision history for st
1.1.4 Mon Jun 26 13:57:00 2017 +0200
Percentile between 0 and 100
1.1.3 Mon Jun 26 12:58:00 2017 +0200
Fixed --percentile and --quartile options
1.1.2 Wed Apr 1 18:43:12 2015 +0200
Bugfix: sorted data was not cached
1.1.1 Thu Oct 10 19:37:41 2013 +0200
Makefile.PL allows script renaming
1.1.0 Mon Sep 23 18:38:35 2013 +0200
Adopt "%g" as default output format
s/transverse/transpose/g
Makefile.PL
MANIFEST This list of files
README.md
script/st
t/01-use.t
t/02-new.t
t/03-validate.t
t/04-process.t
t/05-basic-stats.t
t/05-format.t
t/06-percentile.t
t/06-quantiles.t
t/07-result.t
META.yml Module YAML meta-data (added by MakeMaker)
META.json Module JSON meta-data (added by MakeMaker)
--stderr|sem|se
--sum|s
--var|variance
--min
--q1
--median
--q3
--max
--percentile=<0..1>
--quartile=<1..4>
If no functions are selected, "st" will print the default output:
N min max sum mean stddev
You can also use the following predefined sets of functions:
--summary # five-number summary (min q1 median q3 max)
--complete # everything
lib/App/St.pm view on Meta::CPAN
return $opt{formatted} ? $self->_format($mean)
: $mean;
}
sub quartile {
my ($self,$q,%opt) = @_;
if ($q !~ /^[01234]$/) {
die "Invalid quartile '$q'\n";
}
return $self->percentile($q / 4 * 100, %opt);
}
sub median {
my ($self,%opt) = @_;
return $self->percentile(50, %opt);
}
sub variance {
my ($self,%opt) = @_;
my $N = $self->{N};
my $M2 = $self->{M2};
my $variance = $N > 1 ? $M2 / ($N - 1) : undef;
lib/App/St.pm view on Meta::CPAN
my $stddev = $self->stddev();
my $N = $self->N();
my $stderr = defined $stddev ? $stddev/sqrt($N) : undef;
return $opt{formatted} ? $self->_format($stderr)
: $stderr;
}
sub percentile {
my ($self, $p, %opt) = @_;
my $data = $self->{data};
if (!$self->{keep_data} or scalar @{$data} == 0) {
die "Can't get percentile from empty dataset\n";
}
if ($p < 0 or $p > 100) {
die "Invalid percentile '$p'\n";
}
if (!$self->{_is_sorted_}) {
$data = [ sort {$a <=> $b} @{ $data } ];
$self->{data} = $data;
$self->{_is_sorted_} = 1;
}
my $N = $self->N();
my $idx = ($N - 1) * $p / 100;
my $percentile =
int($idx) == $idx ? $data->[$idx]
: ($data->[$idx] + $data->[$idx+1]) / 2;
return $opt{formatted} ? _format($percentile)
: $percentile;
}
sub result {
my $self = shift;
my %result = (
N => $self->N(),
sum => $self->sum(),
mean => $self->mean(),
stddev => $self->stddev(),
lib/App/St.pm view on Meta::CPAN
if ($self->{keep_data}) {
%result = (%result,
(
q1 => $self->quartile(1),
median => $self->median(),
q3 => $self->quartile(3),
)
);
}
# the following is a hack to accept multiple percentiles/quartiles
if ( exists $self->{percentile} ) {
my $percentile = ref $self->{percentile} eq 'ARRAY'
? [ map { $self->percentile($_) } @{ $self->{percentile} } ]
: $self->percentile( $self->{percentile} );
%result = (
%result,
percentile => $percentile
);
}
if (exists $self->{quartile}) {
my $quartile = ref $self->{quartile} eq 'ARRAY'
? [ map { $self->quartile($_) } @{ $self->{quartile} } ]
: $self->quartile( $self->{quartile} );
%result = (
%result,
lib/App/St.pm view on Meta::CPAN
=head2 N
=head2 sum
=head2 mean
=head2 stddev
=head2 stderr
=head2 percentile=<0..100>
=head2 quartile=<0..4>
=head2 min
=head2 q1
=head2 median
=head2 q3
'stddev|sd',
'stderr|sem|se',
'sum|s',
'variance|var',
'min|q0',
'q1',
'median|q2',
'q3',
'max|q4',
'percentile=f@',
'quartile=i@',
# predefined output sets
'summary',
'complete|everything|all',
'default',
# output control
'delimiter|d=s',
'format|fmt|f=s',
pod2usage(1) if $opt{help};
my %config = get_config(%opt);
my @stats = statistical_options(%opt);
if ( $opt{summary}
or $opt{complete}
or $opt{q1}
or $opt{median}
or $opt{q3}
or defined $opt{percentile}
or defined $opt{quartile} )
{
$config{keep_data} = 1;
}
# special cases: percentile and quartile are not booleans
my %special_parameters = map { $_ => $opt{$_} } grep { exists $opt{$_} } qw/percentile quartile/;
my $st = App::St->new(%config, %special_parameters);
my $n = 0;
while (my $num = <>) {
chomp $num;
$n++;
if (!$st->validate($num)) {
my $err = "Invalid value '$num' on input line $.\n";
return (%config, delimiter => $delimiter, format => $format);
}
sub statistical_options {
my %opt = @_;
# predefined sets
my %predefined = (
complete => [ qw/N min q1 median q3 max sum mean stddev stderr variance percentile quartile/ ],
summary => [ qw/min q1 median q3 max/ ],
default => [ qw/N min max sum mean stddev/ ],
);
# selected options
my %selected = map { $_ => 1 } grep { exists $opt{$_} } @{ $predefined{complete} };
# expand with predefined sets
for my $set (keys %predefined) {
if ($opt{$set}) {
--stderr|sem|se # standard error of mean
--sum|s # sum of elements of the sample
--variance|var # variance
The following options require that the whole dataset is stored in
memory, which can be problematic for huge datasets:
--q1 # first quartile
--median|q2 # second quartile, or median
--q3 # third quartile
--percentile=f # percentile=<0..100>
--quartile=i # quartile=<1..4>
If no functions are selected, C<st> will print the default output:
N min max sum mean stddev
You can also use the following predefined sets of functions:
--summary # five-number summary (min q1 median q3 max)
--complete # everything
t/06-percentile.t view on Meta::CPAN
use Test::More;
use App::St;
my $st = App::St->new( keep_data => 1 );
for my $num (reverse 1..10) {
$st->process($num);
}
my %percentiles = (
0 => 1,
50 => 5.5,
90 => 9.5,
100 => 10,
);
plan tests => scalar keys %percentiles;
for my $p (keys %percentiles) {
is($st->percentile($p), $percentiles{$p});
}
( run in 0.356 second using v1.01-cache-2.11-cpan-709fd43a63f )