Algorithm-AM

 view release on metacpan or  search on metacpan

lib/Algorithm/AM.pm  view on Meta::CPAN

    # big ints are used in AM.xs; these consist of an
    # array of 8 unsigned longs
    foreach (@{$self->{sum}}) {
        $_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
    }

    # calculate context labels and associated structures for
    # the entire data set
    for my $index ( 0 .. $training_set->size - 1 ) {
        my $context = _context_label(
            # Note: this must be copied to prevent infinite loop;
            # see todo note for _context_label
            [@{$lattice_sizes}],
            $training_set->get_item($index)->features,
            $test_item->features,
            $self->exclude_nulls
        );
        $self->{context_size}->{$context}++;
        # TODO: explain itemcontextchain and itemcontextchainhead
        $self->{itemcontextchain}->[$index] =
            $self->{itemcontextchainhead}->{$context};
        $self->{itemcontextchainhead}->{$context} = $index;

        # store the class for the subcontext; if there
        # is already a different class for this subcontext,
        # then store 0, signifying heterogeneity.
        my $class = $training_set->_index_for_class(
            $training_set->get_item($index)->class);
        if ( defined $self->{context_to_class}->{$context} ) {
            if($self->{context_to_class}->{$context} != $class){
                $self->{context_to_class}->{$context} = 0;
            }
        }
        else {
            $self->{context_to_class}->{$context} = $class;
        }
    }
    # $nullcontext is all 0's, which is a context label for
    # a training item that exactly matches the test item. Exclude
    # the item if required, and set a flag that the test item was
    # found in the training set.
    if ( exists $self->{context_to_class}->{$nullcontext} ) {
        $test_in_training = 1;
        if($self->exclude_given){
           delete $self->{context_to_class}->{$nullcontext};
           $given_excluded = 1;
        }
    }
    # initialize the results object to hold all of the configuration
    # info.
    my $result = Algorithm::AM::Result->new(
        given_excluded => $given_excluded,
        cardinality => $num_feats,
        exclude_nulls => $self->exclude_nulls,
        count_method => $self->linear ? 'linear' : 'squared',
        training_set => $training_set,
        test_item => $test_item,
        test_in_train => $test_in_training,
    );

    $log->debug(${$result->config_info})
        if($log->is_debug);

    $result->start_time([ (localtime)[0..2] ]);
    $self->_fillandcount(
        $lattice_sizes, $self->linear ? 1 : 0);
    $result->end_time([ (localtime)[0..2] ]);

    unless ($self->{pointers}->{'grand_total'}) {
        #TODO: is this tested yet?
        if($log->is_warn){
            $log->warn('No training items considered. ' .
                'No prediction possible.');
        }
        return;
    }

    $result->_process_stats(
        # TODO: after refactoring to a "guts" object,
        # just pass that in
        $self->{sum},
        $self->{pointers},
        $self->{itemcontextchainhead},
        $self->{itemcontextchain},
        $self->{context_to_class},
        $self->{raw_gang},
        $lattice_sizes,
        $self->{context_size}
    );
    return $result;
}

# since we split the lattice in four, we have to decide which features
# go where. Given the number of features being used, return an arrayref
# containing the number of features to be used in each of the the four
# lattices.
sub _compute_lattice_sizes {
    my ($num_feats) = @_;

    use integer;
    my @lattice_sizes;
    my $half = $num_feats / 2;
    $lattice_sizes[0] = $half / 2;
    $lattice_sizes[1] = $half - $lattice_sizes[0];
    $half         = $num_feats - $half;
    $lattice_sizes[2] = $half / 2;
    $lattice_sizes[3] = $half - $lattice_sizes[2];
    return \@lattice_sizes;
}

# Create binary context labels for a training item
# by comparing it with a test item. Each training item
# needs one binary label for each sublattice (of which
# there are currently four), but this is packed into a
# single scalar representing an array of 4 shorts (this
# format is used in the XS side).

# TODO: we have to copy lattice_sizes out of $self in order to
# iterate it. Otherwise it goes on forever. Why?
sub _context_label {
    # inputs:
    # number of active features in each lattice,



( run in 0.546 second using v1.01-cache-2.11-cpan-119454b85a5 )