Algorithm-AM
view release on metacpan or search on metacpan
lib/Algorithm/AM/DataSet.pm view on Meta::CPAN
package Algorithm::AM::DataSet;
use strict;
use warnings;
our $VERSION = '3.13';
# ABSTRACT: Manage data used by Algorithm::AM
use Carp;
use Algorithm::AM::DataSet::Item;
use Path::Tiny;
use Exporter::Easy (
OK => ['dataset_from_file']
);
#pod =head1 SYNOPSIS
#pod
#pod use Algorithm::AM::DataSet 'dataset_from_file';
#pod use Algorithm::AM::DataSet::Item 'new_item';
#pod my $dataset = Algorithm::AM::DataSet->new(cardinality => 10);
#pod # or
#pod $dataset = dataset_from_file(path => 'finnverb', format => 'nocommas');
#pod $dataset->add_item(
#pod new_item(features => [qw(a b c d e f g h i)]));
#pod my $item = $dataset->get_item(2);
#pod
#pod =head1 DESCRIPTION
#pod
#pod This package contains a list of items that can be used by
#pod L<Algorithm::AM> or L<Algorithm::AM::Batch> for classification.
#pod DataSets can be made one item at a time via the L</add_item> method,
#pod or they can be read from files via the L</dataset_from_file> function.
#pod
#pod =head2 C<new>
#pod
#pod Creates a new DataSet object. You must provide a C<cardinality> argument
#pod indicating the number of features to be contained in each data vector.
#pod You can then add items via the add_item method. Each item will contain
#pod a feature vector, and also optionally a class label and a comment
#pod (also called a "spec").
#pod
#pod =cut
sub new {
my ($class, %opts) = @_;
my $new_opts = _check_opts(%opts);
my $self = bless $new_opts, $class;
$self->_init;
return $self;
}
# check the options for validity
# Return an option hash to initialize $self with
# For now only 'cardinality' is allowed/required.
sub _check_opts {
my (%opts) = @_;
my %final_opts;
if(!defined $opts{cardinality}){
croak q{Failed to provide 'cardinality' parameter};
}
$final_opts{cardinality} = $opts{cardinality};
delete $opts{cardinality};
if(keys %opts){
# sort the keys in the error message to make testing possible
croak 'Unknown parameters in DataSet constructor: ' .
(join ', ', sort keys %opts);
}
return \%final_opts;
}
# initialize internal state
sub _init {
my ($self) = @_;
# contains all of the items in the dataset
$self->{items} = [];
# map unique class labels to unique integers;
# these are the indices of the class labels in class_list below;
# the indices must start at 1 for AM to work, as 0 is reserved
# for heterogeneity.
$self->{class_num_index} = {};
# contains the list of class strings in an order that matches
# the indices in class_num_index
$self->{class_list} = [];
# the total number of different classes contained in the data set
$self->{num_classes} = 0;
return;
}
#pod =head2 C<cardinality>
#pod
#pod Returns the number of features contained in the feature vector of a
#pod single item.
#pod
#pod =cut
sub cardinality {
my ($self) = @_;
return $self->{cardinality};
}
#pod =head2 C<size>
( run in 1.209 second using v1.01-cache-2.11-cpan-39bf76dae61 )