Algorithm-AM
view release on metacpan or search on metacpan
lib/Algorithm/AM.pm view on Meta::CPAN
# recalculate the lattice sizes with new number of active features
my $lattice_sizes = _compute_lattice_sizes($num_feats);
## $activeContexts = 1 << $activeVar;
my $nullcontext = pack "b64", '0' x 64;
my $given_excluded = 0;
my $test_in_training = 0;
# initialize classification-related variables
# it is important to dereference rather than just
# assigning a new one with [] or {}. This is because
# the XS code has access to the existing reference,
# but will be accessing the wrong variable if we
# change it.
%{$self->{context_size}} = ();
%{$self->{itemcontextchainhead}} = ();
%{$self->{context_to_class}} = ();
%{$self->{pointers}} = ();
%{$self->{raw_gang}} = ();
@{$self->{itemcontextchain}} = ();
# big ints are used in AM.xs; these consist of an
# array of 8 unsigned longs
foreach (@{$self->{sum}}) {
$_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
}
# calculate context labels and associated structures for
# the entire data set
for my $index ( 0 .. $training_set->size - 1 ) {
my $context = _context_label(
# Note: this must be copied to prevent infinite loop;
# see todo note for _context_label
[@{$lattice_sizes}],
$training_set->get_item($index)->features,
$test_item->features,
$self->exclude_nulls
);
$self->{context_size}->{$context}++;
# TODO: explain itemcontextchain and itemcontextchainhead
$self->{itemcontextchain}->[$index] =
$self->{itemcontextchainhead}->{$context};
$self->{itemcontextchainhead}->{$context} = $index;
# store the class for the subcontext; if there
# is already a different class for this subcontext,
# then store 0, signifying heterogeneity.
my $class = $training_set->_index_for_class(
$training_set->get_item($index)->class);
if ( defined $self->{context_to_class}->{$context} ) {
if($self->{context_to_class}->{$context} != $class){
$self->{context_to_class}->{$context} = 0;
}
}
else {
$self->{context_to_class}->{$context} = $class;
}
}
# $nullcontext is all 0's, which is a context label for
# a training item that exactly matches the test item. Exclude
# the item if required, and set a flag that the test item was
# found in the training set.
if ( exists $self->{context_to_class}->{$nullcontext} ) {
$test_in_training = 1;
if($self->exclude_given){
delete $self->{context_to_class}->{$nullcontext};
$given_excluded = 1;
}
}
# initialize the results object to hold all of the configuration
# info.
my $result = Algorithm::AM::Result->new(
given_excluded => $given_excluded,
cardinality => $num_feats,
exclude_nulls => $self->exclude_nulls,
count_method => $self->linear ? 'linear' : 'squared',
training_set => $training_set,
test_item => $test_item,
test_in_train => $test_in_training,
);
$log->debug(${$result->config_info})
if($log->is_debug);
$result->start_time([ (localtime)[0..2] ]);
$self->_fillandcount(
$lattice_sizes, $self->linear ? 1 : 0);
$result->end_time([ (localtime)[0..2] ]);
unless ($self->{pointers}->{'grand_total'}) {
#TODO: is this tested yet?
if($log->is_warn){
$log->warn('No training items considered. ' .
'No prediction possible.');
}
return;
}
$result->_process_stats(
# TODO: after refactoring to a "guts" object,
# just pass that in
$self->{sum},
$self->{pointers},
$self->{itemcontextchainhead},
$self->{itemcontextchain},
$self->{context_to_class},
$self->{raw_gang},
$lattice_sizes,
$self->{context_size}
);
return $result;
}
# since we split the lattice in four, we have to decide which features
# go where. Given the number of features being used, return an arrayref
# containing the number of features to be used in each of the the four
# lattices.
sub _compute_lattice_sizes {
my ($num_feats) = @_;
use integer;
( run in 1.668 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )