AI-FANN-Evolving
view release on metacpan or search on metacpan
lib/AI/FANN/Evolving/TrainData.pm view on Meta::CPAN
if ( defined $i ) {
return [ map { $self->{'table'}->[$i]->[$_] } @dc ];
}
else {
my @dep;
for my $j ( 0 .. $self->size - 1 ) {
push @dep, $self->dependent_data($j);
}
return @dep;
}
}
=item read_data
Reads provided input file
=cut
sub read_data {
my ( $self, $file ) = @_; # file is tab-delimited
$log->debug("reading data from file $file");
open my $fh, '<', $file or die "Can't open $file: $!";
my ( %header, @table );
while(<$fh>) {
chomp;
next if /^\s*$/;
my @fields = split /\t/, $_;
if ( not %header ) {
my $i = 0;
%header = map { $_ => $i++ } @fields;
}
else {
push @table, \@fields;
}
}
$self->{'header'} = \%header;
$self->{'table'} = \@table;
return $self;
}
=item write_data
Writes to provided output file
=cut
sub write_data {
my ( $self, $file ) = @_;
# use file or STDOUT
my $fh;
if ( $file ) {
open $fh, '>', $file or die "Can't write to $file: $!";
$log->info("writing data to $file");
}
else {
$fh = \*STDOUT;
$log->info("writing data to STDOUT");
}
# print header
my $h = $self->{'header'};
print $fh join "\t", sort { $h->{$a} <=> $h->{$b} } keys %{ $h };
print $fh "\n";
# print rows
for my $row ( @{ $self->{'table'} } ) {
print $fh join "\t", @{ $row };
print $fh "\n";
}
}
=item trim_data
Trims sparse rows with missing values
=cut
sub trim_data {
my $self = shift;
my @trimmed;
ROW: for my $row ( @{ $self->{'table'} } ) {
next ROW if grep { not defined $_ } @{ $row };
push @trimmed, $row;
}
my $num = $self->{'size'} - scalar @trimmed;
$log->info("removed $num incomplete rows");
$self->{'table'} = \@trimmed;
}
=item sample_data
Sample a fraction of the data
=cut
sub sample_data {
my $self = shift;
my $sample = shift || 0.5;
my $clone1 = $self->clone;
my $clone2 = $self->clone;
my $size = $self->size;
my @sample;
$clone2->{'table'} = \@sample;
while( scalar(@sample) < int( $size * $sample ) ) {
my @shuffled = shuffle( @{ $clone1->{'table'} } );
push @sample, shift @shuffled;
$clone1->{'table'} = \@shuffled;
}
return $clone2, $clone1;
}
=item partition_data
Creates two clones that partition the data according to the provided ratio.
=cut
sub partition_data {
my $self = shift;
my $sample = shift || 0.5;
my $clone1 = $self->clone;
my $clone2 = $self->clone;
my $remain = 1 - $sample;
$log->info("going to partition into $sample : $remain");
# compute number of different dependent patterns and ratios of each
my @dependents = $self->dependent_data;
my %seen;
( run in 1.300 second using v1.01-cache-2.11-cpan-f56aa216473 )