Algorithm-LibLinear

 view release on metacpan or  search on metacpan

lib/Algorithm/LibLinear/DataSet.pm  view on Meta::CPAN


sub as_string {
    args
        my $self => $InstanceOfPackage;

    my $result = '';
    for my $entry (@{ $self->as_arrayref }) {
        my $feature = $entry->{feature};
        my @feature_dump =
            map { "$_:$feature->{$_}" } sort { $a <=> $b } keys %$feature;
        $result .= join(' ', $entry->{label}, @feature_dump) . "\n";
    }
    return $result;
}

sub parse_input_file {
    args_pos
        my $class => ClassName,
        my $source => FileHandle;

    my @data_set;
    while (defined(my $line = <$source>)) {
        chomp $line;
        my ($label, @feature) = split /\s+/, $line;
        $label += 0;
        my %feature = map {
            my ($index, $value) = split /:/;
            $index += 0;
            $value += 0;
            ($index => $value);
        } @feature;
        push @data_set, +{ feature => \%feature, label => $label, };
    }
    return \@data_set;
}

sub size { 0 + @{ $_[0]->as_arrayref } }

1;

__DATA__

=head1 NAME

Algorithm::LibLinear::DataSet

=head1 SYNOPSIS

  use Algorithm::LibLinear::DataSet;
  
  my $data_set = Algorithm::LibLinear::DataSet->new(data_set => [
    +{ feature => +{ 1 => 0.708333, 2 => 1, 3 => 1, ... }, label => 1, },
    +{ feature => +{ 1 => 0.583333, 2 => -1, 3 => 0.333333, ... }, label => -1, },
    +{ feature => +{ 1 => 0.166667, 2 => 1, 3 => -0.333333, ... }, label => 1, },
    ...
  ]);
  my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
  my $data_set = Algorithm::LibLinear::DataSet->load(filename => 'liblinear_file');
  my $data_set = Algorithm::LibLinear::DataSet->load(string => "+1 1:0.70833 ...");
  
  say $data_set->size;
  say $data_set->as_string;  # '+1 1:0.70833 2:1 3:1 ...'
  
  __DATA__
  +1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1 
  -1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1 
  +1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1 
  ...

=head1 DESCRIPTION

This class represents set of labeled feature vectors used for learning.

=head1 METHODS

=head2 new(data_set => \@data_set)

Constructor.

C<data_set> is an ArrayRef of HashRef that has 2 keys: C<feature> and C<label>.
The value of C<feature> is a HashRef which represents a (sparse) feature vector. Its key is an index and corresponding value is a real number. The indices must be >= 1.
The value of C<label> is an integer that is class label the feature belonging.

=head2 load(fh => \*FH | filename => $path | string => $string)

Class method. Loads data set from LIBSVM/LIBLINEAR format file.

=head2 as_string

Dumps the data set as a LIBSVM/LIBLINEAR format data.

=head2 size

The number of data.

=cut



( run in 1.035 second using v1.01-cache-2.11-cpan-483215c6ad5 )