Algorithm-AM
view release on metacpan or search on metacpan
lib/Algorithm/AM.pm view on Meta::CPAN
$self->{context_size}->{$context}++;
# TODO: explain itemcontextchain and itemcontextchainhead
$self->{itemcontextchain}->[$index] =
$self->{itemcontextchainhead}->{$context};
$self->{itemcontextchainhead}->{$context} = $index;
# store the class for the subcontext; if there
# is already a different class for this subcontext,
# then store 0, signifying heterogeneity.
my $class = $training_set->_index_for_class(
$training_set->get_item($index)->class);
if ( defined $self->{context_to_class}->{$context} ) {
if($self->{context_to_class}->{$context} != $class){
$self->{context_to_class}->{$context} = 0;
}
}
else {
$self->{context_to_class}->{$context} = $class;
}
}
# $nullcontext is all 0's, which is a context label for
# a training item that exactly matches the test item. Exclude
# the item if required, and set a flag that the test item was
# found in the training set.
if ( exists $self->{context_to_class}->{$nullcontext} ) {
$test_in_training = 1;
if($self->exclude_given){
delete $self->{context_to_class}->{$nullcontext};
$given_excluded = 1;
}
}
# initialize the results object to hold all of the configuration
# info.
my $result = Algorithm::AM::Result->new(
given_excluded => $given_excluded,
cardinality => $num_feats,
exclude_nulls => $self->exclude_nulls,
count_method => $self->linear ? 'linear' : 'squared',
training_set => $training_set,
test_item => $test_item,
test_in_train => $test_in_training,
);
$log->debug(${$result->config_info})
if($log->is_debug);
$result->start_time([ (localtime)[0..2] ]);
$self->_fillandcount(
$lattice_sizes, $self->linear ? 1 : 0);
$result->end_time([ (localtime)[0..2] ]);
unless ($self->{pointers}->{'grand_total'}) {
#TODO: is this tested yet?
if($log->is_warn){
$log->warn('No training items considered. ' .
'No prediction possible.');
}
return;
}
$result->_process_stats(
# TODO: after refactoring to a "guts" object,
# just pass that in
$self->{sum},
$self->{pointers},
$self->{itemcontextchainhead},
$self->{itemcontextchain},
$self->{context_to_class},
$self->{raw_gang},
$lattice_sizes,
$self->{context_size}
);
return $result;
}
# since we split the lattice in four, we have to decide which features
# go where. Given the number of features being used, return an arrayref
# containing the number of features to be used in each of the the four
# lattices.
sub _compute_lattice_sizes {
my ($num_feats) = @_;
use integer;
my @lattice_sizes;
my $half = $num_feats / 2;
$lattice_sizes[0] = $half / 2;
$lattice_sizes[1] = $half - $lattice_sizes[0];
$half = $num_feats - $half;
$lattice_sizes[2] = $half / 2;
$lattice_sizes[3] = $half - $lattice_sizes[2];
return \@lattice_sizes;
}
# Create binary context labels for a training item
# by comparing it with a test item. Each training item
# needs one binary label for each sublattice (of which
# there are currently four), but this is packed into a
# single scalar representing an array of 4 shorts (this
# format is used in the XS side).
# TODO: we have to copy lattice_sizes out of $self in order to
# iterate it. Otherwise it goes on forever. Why?
sub _context_label {
# inputs:
# number of active features in each lattice,
# training item features, test item features,
# and boolean indicating if nulls should be excluded
my ($lattice_sizes, $train_feats, $test_feats, $skip_nulls) = @_;
# feature index
my $index = 0;
# the binary context labels for each separate lattice
my @context_list = ();
for my $a (@$lattice_sizes) {
# binary context label for a single sublattice
my $context = 0;
# loop through all features in the sublattice
# assign 0 if features match, 1 if they do not
for ( ; $a ; --$a ) {
( run in 0.708 second using v1.01-cache-2.11-cpan-f0fbb3f571b )