Algorithm-AM

 view release on metacpan or  search on metacpan

lib/Algorithm/AM/DataSet.pm  view on Meta::CPAN

#pod
#pod =head1 DESCRIPTION
#pod
#pod This package contains a list of items that can be used by
#pod L<Algorithm::AM> or L<Algorithm::AM::Batch> for classification.
#pod DataSets can be made one item at a time via the L</add_item> method,
#pod or they can be read from files via the L</dataset_from_file> function.
#pod
#pod =head2 C<new>
#pod
#pod Creates a new DataSet object. You must provide a C<cardinality> argument
#pod indicating the number of features to be contained in each data vector.
#pod You can then add items via the add_item method. Each item will contain
#pod a feature vector, and also optionally a class label and a comment
#pod (also called a "spec").
#pod
#pod =cut
sub new {
    my ($class, %opts) = @_;

    my $new_opts = _check_opts(%opts);

    my $self = bless $new_opts, $class;

    $self->_init;

    return $self;
}

# check the options for validity
# Return an option hash to initialize $self with
# For now only 'cardinality' is allowed/required.
sub _check_opts {
    my (%opts) = @_;

    my %final_opts;

    if(!defined $opts{cardinality}){
        croak q{Failed to provide 'cardinality' parameter};
    }
    $final_opts{cardinality} = $opts{cardinality};
    delete $opts{cardinality};

    if(keys %opts){
        # sort the keys in the error message to make testing possible
        croak 'Unknown parameters in DataSet constructor: ' .
            (join ', ', sort keys %opts);
    }

    return \%final_opts;
}

# initialize internal state
sub _init {
    my ($self) = @_;
    # contains all of the items in the dataset
    $self->{items} = [];

    # map unique class labels to unique integers;
    # these are the indices of the class labels in class_list below;
    # the indices must start at 1 for AM to work, as 0 is reserved
    # for heterogeneity.
    $self->{class_num_index} = {};
    # contains the list of class strings in an order that matches
    # the indices in class_num_index
    $self->{class_list} = [];
    # the total number of different classes contained in the data set
    $self->{num_classes} = 0;
    return;
}

#pod =head2 C<cardinality>
#pod
#pod Returns the number of features contained in the feature vector of a
#pod single item.
#pod
#pod =cut
sub cardinality {
    my ($self) = @_;
    return $self->{cardinality};
}

#pod =head2 C<size>
#pod
#pod Returns the number of items in the data set.
#pod
#pod =cut
sub size {
    my ($self) = @_;
    return scalar @{$self->{items}};
}

#pod =head2 C<classes>
#pod
#pod Returns the list of all unique class labels in the data set.
#pod
#pod =cut
sub classes {
    my ($self) = @_;
    return @{ $self->{class_list} };
}

#pod =head2 C<add_item>
#pod
#pod Adds a new item to the data set. The input may be either an
#pod L<Algorithm::AM::DataSet::Item> object, or the arguments to create
#pod one via its constructor (features, class, comment). This method will
#pod croak if the cardinality of the item does not match L</cardinality>.
#pod
#pod =cut
sub add_item {
    my ($self, @args) = @_;
    my $item;
    if('Algorithm::AM::DataSet::Item' eq ref $args[0]){
        $item = $args[0];
    }else{
        $item = Algorithm::AM::DataSet::Item->new(@args);
    }

    if($self->cardinality != $item->cardinality){
        croak 'Expected ' . $self->cardinality .



( run in 0.454 second using v1.01-cache-2.11-cpan-97f6503c9c8 )