Algorithm-AM
view release on metacpan or search on metacpan
- grand_total/total_pointers/total_score
- datacap
- Where the documentation says to see 'the red book' or 'the green book' for details, remove the reference and put the details in the text!
- Change the POD to use Pod::Weaver properly (=method, etc.).
##Parallel Algorithm thoughts
pseudocode for a distributed lattice
decide which features go in each lattice;
fill individual lattices without throwing any out because of heterogeneity
(list of supracontexts) lattice = lattices[0]
for 1..$#$lattices - 1
combine(lattice, lattices[$_])
combine_final(lattice, lattices[$#lattices])
list<Supracontext> combine(lat1, lat2){
list<Supra> output;
for (Supra s1 : lat1) {
for (Supra s2 : lat2) {
Item[] data = intersection(s1.data, s2.data);
lib/Algorithm/AM.pm view on Meta::CPAN
# do all of the classification data structure initialization here,
# as well as calling the XS initialization method.
sub _initialize {
my ($self) = @_;
my $train = $self->training_set;
# compute sub-lattices sizes here so that lattice space can be
# allocated in the _xs_initialize method. If certain features are
# thrown out later, each sub-lattice can only get smaller, so
# this is safe to do once here.
my $lattice_sizes = _compute_lattice_sizes($train->cardinality);
# sum is intitialized to a list of zeros
@{$self->{sum}} = (0.0) x ($train->num_classes + 1);
# preemptively allocate memory
# TODO: not sure what this does
@{$self->{itemcontextchain}} = (0) x $train->size;
lib/Algorithm/AM/algorithm.pod view on Meta::CPAN
Exemplars that seem less similar to the test item than those that seem
more similar can still have a magnified effect if there are many of
them. This is known as the I<gang effect>.
=item *
AM accounts for I<leakage>.
For instance, it is possible for someone to accidentally say "snew"
instead of "snowed", in analogy with "know/knew", "grow/grew",
"throw/threw", "blow/blew", etc. (I've never done this myself, though
I know someone who has.) In rule-based modeling, this could never
occur; in AM, this is predicted to occur, though with very low
frequency.
=back
=head1 AUTHOR
Theron Stanford <shixilun@yahoo.com>, Nathan Glenn <garfieldnate@gmail.com>
t/01-Item.t view on Meta::CPAN
plan tests => 13;
use Test::NoWarnings;
use Test::Exception;
use Algorithm::AM::DataSet::Item 'new_item';
test_constructor();
test_accessors();
# test that the constructor lives/dies when given valid/invalid parameters
sub test_constructor {
# The error should be thrown from Tiny.pm, the caller of DataSet,
# not from DataSet (tests that @CARP_NOT is working properly).
throws_ok {
Algorithm::AM::DataSet::Item->new();
} qr/Must provide 'features' parameter of type array ref.*Item.t/,
'constructor dies with missing features parameter';
throws_ok {
Algorithm::AM::DataSet::Item->new(features => 'hello');
} qr/Must provide 'features' parameter of type array ref.*Item.t/,
'constructor dies with incorrect features parameter';
throws_ok {
Algorithm::AM::DataSet::Item->new(
features => ['a'],
foo => 'baz',
bar => 'qux'
);
} qr/Unknown parameters: bar,foo.*Item.t/,
'constructor dies with unknown parameters';
my $item = Algorithm::AM::DataSet::Item->new(features => ['a','b']);
isa_ok($item, 'Algorithm::AM::DataSet::Item');
t/02-DataSet.t view on Meta::CPAN
my $data_dir = path($Bin, 'data');
test_constructor();
test_data();
test_dataset_from_file();
test_private_data();
# test that the constructor lives/dies when given valid/invalid
# parameters, and that state is set correctly
sub test_constructor {
throws_ok {
Algorithm::AM::DataSet->new();
} qr/Failed to provide 'cardinality' parameter/,
q<dies without 'cardinality' parameter>;
throws_ok {
Algorithm::AM::DataSet->new(
cardinality => 3,
foo => 'bar',
baz => 'buff'
);
} qr/Unknown parameters in DataSet constructor: baz, foo/,
'dies with unknown parameters';
lives_ok {
Algorithm::AM::DataSet->new(
t/02-DataSet.t view on Meta::CPAN
$dataset->add_item(
features => ['a','b','d'],
class => 'c',
comment => 'stuff'
);
is($dataset->num_classes, 2, 'data set has 2 classes');
is($dataset->get_item(1)->comment, 'stuff', 'get_item');
throws_ok {
$dataset->add_item(
features => ['3','1'],
class => 'c',
comment => 'comment'
);
} qr/Expected 3 features, but found 2 in 3 1 \(comment\)/,
'add_item fails with wrong number of features';
# The error should be thrown from Tiny.pm, the caller of DataSet,
# not from DataSet (tests that @CARP_NOT is working cardinalityperly).
throws_ok {
$dataset->add_item();
} qr/Must provide 'features' parameter of type array ref.*DataSet.t/,
'add_item fails with missing features parameter';
return;
}
# test the dataset_from_file function
sub test_dataset_from_file {
subtest 'read nocommas data set' => sub {
plan tests => 5;
t/02-DataSet.t view on Meta::CPAN
);
is($dataset->cardinality, 3, 'cardinality');
is($dataset->size, 5, 'size');
my $item = $dataset->get_item(0);
is($item->class, 'e', 'item class');
is_deeply($item->features, ['3', '1', '0'],
'item features');
is($item->comment, 'myFirstCommentHere');
};
throws_ok {
my $dataset = dataset_from_file(
path => path($data_dir, 'chapter_3_commas.txt'),
);
} qr/Failed to provide 'format' parameter/,
'fail with missing format parameter';
throws_ok {
my $dataset = dataset_from_file(
path => path($data_dir, 'chapter_3_commas.txt'),
format => 'buh'
);
} qr/Unknown value buh for format parameter \(should be 'commas' or 'nocommas'\)/,
'fail with incorrect format parameter';
throws_ok {
my $dataset = dataset_from_file(
format => 'commas'
);
} qr/Failed to provide 'path' parameter/,
'fail with missing path parameter';
throws_ok {
my $dataset = dataset_from_file(
path => path($data_dir, 'nonexistent'),
format => 'commas'
);
} qr/Could not find file .*nonexistent/,
'fail with non-existent Path';
throws_ok {
my $dataset = dataset_from_file(
path => path($data_dir, 'bad_data_line.txt'),
format => 'nocommas'
);
} qr/Couldn't read data at line 2 in .*bad_data_line/,
'fail with malformed data file';
subtest 'data set with default unknown/null labels' => sub {
plan tests => 3;
my $dataset = dataset_from_file(
test_quadratic_classification($result);
test_analogical_set($result);
test_gang_effects($result);
test_linear_classification();
test_nulls();
test_given();
# test that methods die with bad input
sub test_input_checking {
throws_ok {
Algorithm::AM->new();
} qr/Missing required parameter 'training_set'/,
'dies when no training set provided';
throws_ok {
Algorithm::AM->new(
training_set => 'stuff',
);
} qr/Parameter training_set should be an Algorithm::AM::DataSet/,
'dies with bad training set';
throws_ok {
Algorithm::AM->new(
training_set => Algorithm::AM::DataSet->new(
cardinality => 3),
foo => 'bar'
);
} qr/Invalid attributes for Algorithm::AM: foo/,
'dies with bad argument';
throws_ok {
my $am = Algorithm::AM->new(
training_set => Algorithm::AM::DataSet->new(cardinality => 3),
);
$am->classify(
Algorithm::AM::DataSet::Item->new(
features => ['a']
)
);
} qr/Training set and test item do not have the same cardinality \(3 and 1\)/,
'dies with mismatched train/test cardinalities';
t/06-Batch.t view on Meta::CPAN
use Test::NoWarnings;
use Test::LongString;
plan tests => 12;
use t::TestAM qw(chapter_3_train chapter_3_test);
test_input_checking();
test_accessors();
test_classify();
sub test_input_checking {
throws_ok {
Algorithm::AM::Batch->new();
} qr/Missing required parameter 'training_set'/,
'dies when no training set provided';
throws_ok {
Algorithm::AM::Batch->new(
training_set => 'stuff',
);
} qr/Parameter training_set should be an Algorithm::AM::DataSet/,
'dies with bad training set';
throws_ok {
Algorithm::AM::Batch->new(
training_set => Algorithm::AM::DataSet->new(
cardinality => 3),
test_set => Algorithm::AM::DataSet->new(
cardinality => 3),
foo => 'bar'
);
} qr/Invalid attributes for Algorithm::AM::Batch/,
'dies with bad argument';
throws_ok {
my $batch = Algorithm::AM::Batch->new(
training_set => Algorithm::AM::DataSet->new(
cardinality => 3)
);
$batch->classify_all(Algorithm::AM::DataSet->new(
cardinality => 4));
} qr/Training and test sets do not have the same cardinality \(3 and 4\)/,
'dies with mismatched dataset cardinalities';
throws_ok {
my $batch = Algorithm::AM::Batch->new(
training_set =>
Algorithm::AM::DataSet->new(cardinality => 3)
);
$batch->classify_all();
} qr/Must provide a DataSet to classify_all/,
'dies with no input to classify';
throws_ok {
my $batch = Algorithm::AM::Batch->new(
training_set => Algorithm::AM::DataSet->new(
cardinality => 3),
);
$batch->classify_all('foo');
} qr/Must provide a DataSet to classify_all/,
'dies with bad test set';
return;
}
( run in 0.466 second using v1.01-cache-2.11-cpan-496ff517765 )