Algorithm-AM

 view release on metacpan or  search on metacpan

lib/Algorithm/AM.pm  view on Meta::CPAN

        $self->{context_size}->{$context}++;
        # TODO: explain itemcontextchain and itemcontextchainhead
        $self->{itemcontextchain}->[$index] =
            $self->{itemcontextchainhead}->{$context};
        $self->{itemcontextchainhead}->{$context} = $index;

        # store the class for the subcontext; if there
        # is already a different class for this subcontext,
        # then store 0, signifying heterogeneity.
        my $class = $training_set->_index_for_class(
            $training_set->get_item($index)->class);
        if ( defined $self->{context_to_class}->{$context} ) {
            if($self->{context_to_class}->{$context} != $class){
                $self->{context_to_class}->{$context} = 0;
            }
        }
        else {
            $self->{context_to_class}->{$context} = $class;
        }
    }
    # $nullcontext is all 0's, which is a context label for
    # a training item that exactly matches the test item. Exclude
    # the item if required, and set a flag that the test item was
    # found in the training set.
    if ( exists $self->{context_to_class}->{$nullcontext} ) {
        $test_in_training = 1;
        if($self->exclude_given){
           delete $self->{context_to_class}->{$nullcontext};
           $given_excluded = 1;
        }
    }
    # initialize the results object to hold all of the configuration
    # info.
    my $result = Algorithm::AM::Result->new(
        given_excluded => $given_excluded,
        cardinality => $num_feats,
        exclude_nulls => $self->exclude_nulls,
        count_method => $self->linear ? 'linear' : 'squared',
        training_set => $training_set,
        test_item => $test_item,
        test_in_train => $test_in_training,
    );

    $log->debug(${$result->config_info})
        if($log->is_debug);

    $result->start_time([ (localtime)[0..2] ]);
    $self->_fillandcount(
        $lattice_sizes, $self->linear ? 1 : 0);
    $result->end_time([ (localtime)[0..2] ]);

    unless ($self->{pointers}->{'grand_total'}) {
        #TODO: is this tested yet?
        if($log->is_warn){
            $log->warn('No training items considered. ' .
                'No prediction possible.');
        }
        return;
    }

    $result->_process_stats(
        # TODO: after refactoring to a "guts" object,
        # just pass that in
        $self->{sum},
        $self->{pointers},
        $self->{itemcontextchainhead},
        $self->{itemcontextchain},
        $self->{context_to_class},
        $self->{raw_gang},
        $lattice_sizes,
        $self->{context_size}
    );
    return $result;
}

# since we split the lattice in four, we have to decide which features
# go where. Given the number of features being used, return an arrayref
# containing the number of features to be used in each of the the four
# lattices.
sub _compute_lattice_sizes {
    my ($num_feats) = @_;

    use integer;
    my @lattice_sizes;
    my $half = $num_feats / 2;
    $lattice_sizes[0] = $half / 2;
    $lattice_sizes[1] = $half - $lattice_sizes[0];
    $half         = $num_feats - $half;
    $lattice_sizes[2] = $half / 2;
    $lattice_sizes[3] = $half - $lattice_sizes[2];
    return \@lattice_sizes;
}

# Create binary context labels for a training item
# by comparing it with a test item. Each training item
# needs one binary label for each sublattice (of which
# there are currently four), but this is packed into a
# single scalar representing an array of 4 shorts (this
# format is used in the XS side).

# TODO: we have to copy lattice_sizes out of $self in order to
# iterate it. Otherwise it goes on forever. Why?
sub _context_label {
    # inputs:
    # number of active features in each lattice,
    # training item features, test item features,
    # and boolean indicating if nulls should be excluded
    my ($lattice_sizes, $train_feats, $test_feats, $skip_nulls) = @_;

    # feature index
    my $index        = 0;
    # the binary context labels for each separate lattice
    my @context_list    = ();

    for my $a (@$lattice_sizes) {
        # binary context label for a single sublattice
        my $context = 0;
        # loop through all features in the sublattice
        # assign 0 if features match, 1 if they do not
        for ( ; $a ; --$a ) {



( run in 0.708 second using v1.01-cache-2.11-cpan-f0fbb3f571b )