Algorithm-AM
view release on metacpan or search on metacpan
lib/Algorithm/AM.pm view on Meta::CPAN
# big ints are used in AM.xs; these consist of an
# array of 8 unsigned longs
foreach (@{$self->{sum}}) {
$_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
}
# calculate context labels and associated structures for
# the entire data set
for my $index ( 0 .. $training_set->size - 1 ) {
my $context = _context_label(
# Note: this must be copied to prevent infinite loop;
# see todo note for _context_label
[@{$lattice_sizes}],
$training_set->get_item($index)->features,
$test_item->features,
$self->exclude_nulls
);
$self->{context_size}->{$context}++;
# TODO: explain itemcontextchain and itemcontextchainhead
$self->{itemcontextchain}->[$index] =
$self->{itemcontextchainhead}->{$context};
$self->{itemcontextchainhead}->{$context} = $index;
# store the class for the subcontext; if there
# is already a different class for this subcontext,
# then store 0, signifying heterogeneity.
my $class = $training_set->_index_for_class(
$training_set->get_item($index)->class);
if ( defined $self->{context_to_class}->{$context} ) {
if($self->{context_to_class}->{$context} != $class){
$self->{context_to_class}->{$context} = 0;
}
}
else {
$self->{context_to_class}->{$context} = $class;
}
}
# $nullcontext is all 0's, which is a context label for
# a training item that exactly matches the test item. Exclude
# the item if required, and set a flag that the test item was
# found in the training set.
if ( exists $self->{context_to_class}->{$nullcontext} ) {
$test_in_training = 1;
if($self->exclude_given){
delete $self->{context_to_class}->{$nullcontext};
$given_excluded = 1;
}
}
# initialize the results object to hold all of the configuration
# info.
my $result = Algorithm::AM::Result->new(
given_excluded => $given_excluded,
cardinality => $num_feats,
exclude_nulls => $self->exclude_nulls,
count_method => $self->linear ? 'linear' : 'squared',
training_set => $training_set,
test_item => $test_item,
test_in_train => $test_in_training,
);
$log->debug(${$result->config_info})
if($log->is_debug);
$result->start_time([ (localtime)[0..2] ]);
$self->_fillandcount(
$lattice_sizes, $self->linear ? 1 : 0);
$result->end_time([ (localtime)[0..2] ]);
unless ($self->{pointers}->{'grand_total'}) {
#TODO: is this tested yet?
if($log->is_warn){
$log->warn('No training items considered. ' .
'No prediction possible.');
}
return;
}
$result->_process_stats(
# TODO: after refactoring to a "guts" object,
# just pass that in
$self->{sum},
$self->{pointers},
$self->{itemcontextchainhead},
$self->{itemcontextchain},
$self->{context_to_class},
$self->{raw_gang},
$lattice_sizes,
$self->{context_size}
);
return $result;
}
# since we split the lattice in four, we have to decide which features
# go where. Given the number of features being used, return an arrayref
# containing the number of features to be used in each of the the four
# lattices.
sub _compute_lattice_sizes {
my ($num_feats) = @_;
use integer;
my @lattice_sizes;
my $half = $num_feats / 2;
$lattice_sizes[0] = $half / 2;
$lattice_sizes[1] = $half - $lattice_sizes[0];
$half = $num_feats - $half;
$lattice_sizes[2] = $half / 2;
$lattice_sizes[3] = $half - $lattice_sizes[2];
return \@lattice_sizes;
}
# Create binary context labels for a training item
# by comparing it with a test item. Each training item
# needs one binary label for each sublattice (of which
# there are currently four), but this is packed into a
# single scalar representing an array of 4 shorts (this
# format is used in the XS side).
# TODO: we have to copy lattice_sizes out of $self in order to
# iterate it. Otherwise it goes on forever. Why?
sub _context_label {
# inputs:
# number of active features in each lattice,
( run in 0.546 second using v1.01-cache-2.11-cpan-119454b85a5 )