Algorithm-AM

 view release on metacpan or  search on metacpan

lib/Algorithm/AM.pm  view on Meta::CPAN

    # recalculate the lattice sizes with new number of active features
    my $lattice_sizes = _compute_lattice_sizes($num_feats);
##  $activeContexts = 1 << $activeVar;

    my $nullcontext = pack "b64", '0' x 64;

    my $given_excluded = 0;
    my $test_in_training   = 0;

    # initialize classification-related variables
    # it is important to dereference rather than just
    # assigning a new one with [] or {}. This is because
    # the XS code has access to the existing reference,
    # but will be accessing the wrong variable if we
    # change it.
    %{$self->{context_size}}             = ();
    %{$self->{itemcontextchainhead}}    = ();
    %{$self->{context_to_class}}      = ();
    %{$self->{pointers}}                = ();
    %{$self->{raw_gang}}                    = ();
    @{$self->{itemcontextchain}}        = ();
    # big ints are used in AM.xs; these consist of an
    # array of 8 unsigned longs
    foreach (@{$self->{sum}}) {
        $_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
    }

    # calculate context labels and associated structures for
    # the entire data set
    for my $index ( 0 .. $training_set->size - 1 ) {
        my $context = _context_label(
            # Note: this must be copied to prevent infinite loop;
            # see todo note for _context_label
            [@{$lattice_sizes}],
            $training_set->get_item($index)->features,
            $test_item->features,
            $self->exclude_nulls
        );
        $self->{context_size}->{$context}++;
        # TODO: explain itemcontextchain and itemcontextchainhead
        $self->{itemcontextchain}->[$index] =
            $self->{itemcontextchainhead}->{$context};
        $self->{itemcontextchainhead}->{$context} = $index;

        # store the class for the subcontext; if there
        # is already a different class for this subcontext,
        # then store 0, signifying heterogeneity.
        my $class = $training_set->_index_for_class(
            $training_set->get_item($index)->class);
        if ( defined $self->{context_to_class}->{$context} ) {
            if($self->{context_to_class}->{$context} != $class){
                $self->{context_to_class}->{$context} = 0;
            }
        }
        else {
            $self->{context_to_class}->{$context} = $class;
        }
    }
    # $nullcontext is all 0's, which is a context label for
    # a training item that exactly matches the test item. Exclude
    # the item if required, and set a flag that the test item was
    # found in the training set.
    if ( exists $self->{context_to_class}->{$nullcontext} ) {
        $test_in_training = 1;
        if($self->exclude_given){
           delete $self->{context_to_class}->{$nullcontext};
           $given_excluded = 1;
        }
    }
    # initialize the results object to hold all of the configuration
    # info.
    my $result = Algorithm::AM::Result->new(
        given_excluded => $given_excluded,
        cardinality => $num_feats,
        exclude_nulls => $self->exclude_nulls,
        count_method => $self->linear ? 'linear' : 'squared',
        training_set => $training_set,
        test_item => $test_item,
        test_in_train => $test_in_training,
    );

    $log->debug(${$result->config_info})
        if($log->is_debug);

    $result->start_time([ (localtime)[0..2] ]);
    $self->_fillandcount(
        $lattice_sizes, $self->linear ? 1 : 0);
    $result->end_time([ (localtime)[0..2] ]);

    unless ($self->{pointers}->{'grand_total'}) {
        #TODO: is this tested yet?
        if($log->is_warn){
            $log->warn('No training items considered. ' .
                'No prediction possible.');
        }
        return;
    }

    $result->_process_stats(
        # TODO: after refactoring to a "guts" object,
        # just pass that in
        $self->{sum},
        $self->{pointers},
        $self->{itemcontextchainhead},
        $self->{itemcontextchain},
        $self->{context_to_class},
        $self->{raw_gang},
        $lattice_sizes,
        $self->{context_size}
    );
    return $result;
}

# since we split the lattice in four, we have to decide which features
# go where. Given the number of features being used, return an arrayref
# containing the number of features to be used in each of the the four
# lattices.
sub _compute_lattice_sizes {
    my ($num_feats) = @_;

    use integer;



( run in 1.668 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )