Algorithm-LibLinear
view release on metacpan or search on metacpan
lib/Algorithm/LibLinear/DataSet.pm view on Meta::CPAN
sub as_string {
args
my $self => $InstanceOfPackage;
my $result = '';
for my $entry (@{ $self->as_arrayref }) {
my $feature = $entry->{feature};
my @feature_dump =
map { "$_:$feature->{$_}" } sort { $a <=> $b } keys %$feature;
$result .= join(' ', $entry->{label}, @feature_dump) . "\n";
}
return $result;
}
sub parse_input_file {
args_pos
my $class => ClassName,
my $source => FileHandle;
my @data_set;
while (defined(my $line = <$source>)) {
chomp $line;
my ($label, @feature) = split /\s+/, $line;
$label += 0;
my %feature = map {
my ($index, $value) = split /:/;
$index += 0;
$value += 0;
($index => $value);
} @feature;
push @data_set, +{ feature => \%feature, label => $label, };
}
return \@data_set;
}
sub size { 0 + @{ $_[0]->as_arrayref } }
1;
__DATA__
=head1 NAME
Algorithm::LibLinear::DataSet
=head1 SYNOPSIS
use Algorithm::LibLinear::DataSet;
my $data_set = Algorithm::LibLinear::DataSet->new(data_set => [
+{ feature => +{ 1 => 0.708333, 2 => 1, 3 => 1, ... }, label => 1, },
+{ feature => +{ 1 => 0.583333, 2 => -1, 3 => 0.333333, ... }, label => -1, },
+{ feature => +{ 1 => 0.166667, 2 => 1, 3 => -0.333333, ... }, label => 1, },
...
]);
my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
my $data_set = Algorithm::LibLinear::DataSet->load(filename => 'liblinear_file');
my $data_set = Algorithm::LibLinear::DataSet->load(string => "+1 1:0.70833 ...");
say $data_set->size;
say $data_set->as_string; # '+1 1:0.70833 2:1 3:1 ...'
__DATA__
+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1
-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1
+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1
...
=head1 DESCRIPTION
This class represents set of labeled feature vectors used for learning.
=head1 METHODS
=head2 new(data_set => \@data_set)
Constructor.
C<data_set> is an ArrayRef of HashRef that has 2 keys: C<feature> and C<label>.
The value of C<feature> is a HashRef which represents a (sparse) feature vector. Its key is an index and corresponding value is a real number. The indices must be >= 1.
The value of C<label> is an integer that is class label the feature belonging.
=head2 load(fh => \*FH | filename => $path | string => $string)
Class method. Loads data set from LIBSVM/LIBLINEAR format file.
=head2 as_string
Dumps the data set as a LIBSVM/LIBLINEAR format data.
=head2 size
The number of data.
=cut
( run in 1.035 second using v1.01-cache-2.11-cpan-483215c6ad5 )