Algorithm-BaumWelch
view release on metacpan or search on metacpan
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
obs2 => [0.3, 0.4],
obs3 => [0.4, 0.3],
};
# The transition matrixi - each row and column correspond to a particular state e.g. P(state1_x|state1_x-1) = 0.9...
my $trans = [
[0.9, 0.1],
[0.1, 0.9],
];
# The probabilities of each state at the start of the series.
my $start = [0.5, 0.5];
# Create an Algorithm::BaumWelch object.
my $ba = Algorithm::BaumWelch->new;
# Feed in the observation series.
$ba->feed_obs($obs_series);
# Feed in the transition and emission matrices and the starting probabilities.
$ba->feed_values($trans, $emis, $start);
# Alternatively you can randomly initialise the values - pass it the number of hidden states -
# i.e. to determine the parameters we need to make a first guess).
# $ba->random_initialise(2);
# Perform the algorithm.
$ba->baum_welch;
# Use results to pass data.
# In VOID-context prints formated results to STDOUT.
# In LIST-context returns references to the predicted transition & emission matrices and the starting parameters.
$ba->results;
=cut
=head1 DESCRIPTION
The Baum-Welch algorithm is used to compute the parameters (transition and emission probabilities) of an Hidden Markov
Model (HMM). The algorithm calculates the forward and backwards probabilities for each HMM state in a series and then re-estimates the parameters of
the model.
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
my @obs = (keys %uniq);
@obs = sort { $a cmp $b } @obs;
$self->[0][0] = $series;
$self->[0][1] = [@obs];
$self->[0][2] = scalar @obs;
return;
}
sub feed_values {
croak qq{\nThis method expects 3 arguments.} if @_ != 4;
my ($self, $trans, $emis, $start) = @_;
croak qq{\nThis method expects 3 arguments.} if (ref $trans ne q{ARRAY} || ref $emis ne q{HASH} || ref $start ne q{ARRAY});
my $obs_tipos = $self->[0][1];
my $obs_numero = $self->[0][2];
my $t_length = &_check_trans($trans);
&_check_emis($emis, $obs_tipos, $obs_numero, $t_length);
&_check_start($start, $t_length);
$self->[1][0] = $trans;
$self->[1][1] = $emis;
$self->[1][2] = $start;
my @stop; # 0.1/1 nao faz diferenca e para|comeca (stop|start) sempre iguala = 0
for (0..$#{$trans}) { push @stop, 1 };
$self->[1][3] = [@stop];
return;
}
sub _check_start {
my ($start, $t_length) = @_;
croak qq{\nThere must be an initial probablity for each state in the start ARRAY.} if scalar @{$start} != $t_length;
for (@{$start}) { croak qq{\nThe start ARRAY values must be numeric.} if !(/^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$/) };
my $sum =0;
for (@{$start}) { $sum += $_ }
croak qq{\nThe starting probabilities must sum to 1.} if ($sum <= 0.95 || $sum >= 1.05);
return;
}
sub _check_emis {
my ($emis, $obs_tipos, $obs_numero, $t_length) = @_;
my @emis_keys = (keys %{$emis});
@emis_keys = sort {$a cmp $b} @emis_keys;
croak qq{\nThere must be an entry in the emission matrix for each type of observation in the observation series.} if $obs_numero != scalar @emis_keys;
for (0..$#emis_keys) { croak qq{\nThe observations in the emission matrix do not match those in the observation series.} if $emis_keys[$_] ne $obs_tipos->[$_]; }
for (values %{$emis}) {
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
croak qq{\nThe transition matrix row must sum to 1.} if ($sum <= 0.95 || $sum >= 1.05);
}
return $t_length;
}
sub random_initialise {
my ($self, $states) = @_;
my $obs_names = $self->[0][1];
my $trans = &_gera_trans($states);
my $emis = &_gera_emis($states, $obs_names);
my $start = &_gera_init($states);
$self->[1][0] = $trans;
$self->[1][1] = $emis;
$self->[1][2] = $start;
my @stop; # 0.1/1 nao faz diferenca e para|comeca (stop|start) sempre iguala = 0
for (0..$states-1) { push @stop, 1 };
$self->[1][3] = [@stop];
return;
}
sub _gera_init {
my $length = shift;
my $sum = 0;
my $init = [];
srand;
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
#print qq{\n\nauto-gera emis de numeros aleatorios que sumam 1 para cada estado}; draw($e);
return $e;
}
sub _forwardbackward_reestimacao {
my $self = shift;
my $obs_series = $self->[0][0];
my $obs_types = $self->[0][1];
my $trans = $self->[1][0];
my $emis = $self->[1][1];
my $start = $self->[1][2];
my $stop = $self->[1][3];
my $alpha = [];
#y initialise
for (0..$#{$trans}) { $alpha->[$_][0] = $start->[$_] * $emis->{$obs_series->[0]}[$_]; }
#y not sure if i´ve extrapolated to higher-state number BW algorithm equations correctly?!?
for my $n (1..$#{$obs_series}) {
for my $s (0..$#{$trans}) {
#push @{$alpha->[$s]}, ( ( ($alpha->[0][$n-1]*$trans->[$s][0]) + ($alpha->[1][$n-1]*$trans->[$s][1]) ) * $emis->{$obs_series->[$n]}[$s] ) ;
my $sum = 0;
for my $s_other (0..$#{$trans}) { $sum += $alpha->[$s_other][$n-1]*$trans->[$s][$s_other]; }
push @{$alpha->[$s]}, ( ($sum) * $emis->{$obs_series->[$n]}[$s] ) ;
}
}
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
my $trans_new = [];
for my $s_1st (0..$#{$trans}) {
for my $s_2nd (0..$#{$trans}) {
$trans_new->[$s_2nd][$s_1st] = (sum @{$p_state_too_state_trans->[$s_1st][$s_2nd]} ) / (sum @{$p_too_state_trans->[$s_1st]} );
}
}
my $stop_new = [];
for my $s (0..$#{$trans}) { $stop_new->[$s] = ( $p_too_state_trans->[$s][$#{$obs_series}] ) / (sum @{$p_too_state_trans->[$s]} ); }
my $start_new = [];
for my $s (0..$#{$trans}) { $start_new->[$s] = $p_too_state_trans->[$s][0]; }
$self->[1][0] = $trans_new;
$self->[1][1] = $emis_new;
$self->[1][2] = $start_new;
$self->[1][3] = $stop_new;
return;
}
sub baum_welch {
#/ i´m being lazy this is an acceptable cut-off mechanism for now
my ($self, $max) = @_;
$max ||= 100;
my $val;
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
sub _baum_welch_length {
my $self = shift;
for (0..$#{$self->[0][0]}) { $self->_forwardbackward_reestimacao; }
return;
}
sub results {
my $self = shift;
my $trans = $self->[1][0];
my $emis = $self->[1][1];
my $start = $self->[1][2];
if (wantarray) {
return ($trans, $emis, $start);
}
else {
my $keys = $self->[0][1];
my @config = ( [15, q{}] );
push @config, (map { [ 15, q{P(...|State_}.$_.q{)} ] } (1..$#{$trans->[0]}+1));
my $tbl = Text::SimpleTable->new(@config);
for my $row (0..$#{$trans}) {
my @data;
# quem liga qual serie
for my $col (0..$#{$trans->[0]}) { push @data, sprintf(q{%.8e},$trans->[$row][$col]); }
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
}
print qq{\nTransition matrix.\n};
print $tbl->draw;
undef @config;
@config = ( [15, q{}] );
push @config, (map { [ 15, q{P(...|State_}.$_.q{)} ] } (1..$#{$trans->[0]}+1));
my $tbl1 = Text::SimpleTable->new(@config);
my $count = 0;
for my $row (@{$keys}) {
#$tbl1->row( $row, ( map { my $v = $emis->{$row}[$_]; if ($v > 1e-4 || $v < 1e4 ) { $v = sprintf(q{%.12f},$start->[$_]) } else { $v = sprintf(q{%.8e},$start->[$_]) }; $v } (0..$#{$trans->[0]}) ) );
my @data;
for my $col (0..$#{$trans->[0]}) { push @data, sprintf(q{%.8e},$emis->{$row}[$col]); }
$tbl1->row( qq{P($row|...)}, @data );
$tbl1->hr if $count != $#{$keys};
$count++;
}
print qq{\nEmission matrix.\n};
print $tbl1->draw;
undef @config;
push @config, (map { [ 15, q{State_}.$_ ] } (1..$#{$start}+1));
my $tbl2 = Text::SimpleTable->new(@config);
#my @data;
#for my $i (0..$#{$trans->[0]}) { push @data, sprintf(q{%.8e},$start->[$i]); }
#$tbl2->row(@data);
$tbl2->row( ( map { my $v = $start->[$_]; if ($v > 1e-4 && $v < 1e4 || $v == 0 ) {
$v = sprintf(q{%.12f},$start->[$_])
}
else {
$v = sprintf(q{%.8e},$start->[$_]) }; $v
} (0..$#{$trans->[0]}) ) );
print qq{\nStart probabilities.\n};
print $tbl2->draw;
}
return;
}
1; # Magic true value required at end of module
__END__
lib/Algorithm/BaumWelch.pm view on Meta::CPAN
# | | |__ARRAY REFERENCE (3) ---LONG_LIST_OF_SCALARS--- [ length = 2 ]: 0.0662208150521236, 0.864944219467616 [ '->[1][0][1]' ]
# | |
# | |__HASH REFERENCE (2) [ '->[1][1]' ] # emission matrix
# | | |
# | | |__'obs3'=>ARRAY REFERENCE (3) ---LONG_LIST_OF_SCALARS--- [ length = 2 ]: 0.211448366743702, 0.465609305295478 [ '->[1][1]{obs3}' ]
# | | |
# | | |__'obs1'=>ARRAY REFERENCE (3) ---LONG_LIST_OF_SCALARS--- [ length = 2 ]: 0.640481492730478, 7.18630557481621e-09 [ '->[1][1]{obs1}' ]
# | | |
# | | |__'obs2'=>ARRAY REFERENCE (3) ---LONG_LIST_OF_SCALARS--- [ length = 2 ]: 0.14807014052582, 0.534390687518216 [ '->[1][1]{obs2}' ]
# | |
# | |__ARRAY REFERENCE (2) ---LONG_LIST_OF_SCALARS--- [ length = 2 ]: 4.52394236439737e-30, 1 [ '->[1][2]' ] # start conditions
# |
# |__ ARRAY REFERENCE (1) [ '->[2]' ] # perp
#
=head1 SEE ALSO
Algorithm::Viterbi
=cut
( run in 0.403 second using v1.01-cache-2.11-cpan-0d8aa00de5b )