AI-FANN-Evolving
view release on metacpan or search on metacpan
lib/AI/FANN/Evolving/TrainData.pm view on Meta::CPAN
ROW: for my $row ( @{ $self->{'table'} } ) {
next ROW if grep { not defined $_ } @{ $row };
push @trimmed, $row;
}
my $num = $self->{'size'} - scalar @trimmed;
$log->info("removed $num incomplete rows");
$self->{'table'} = \@trimmed;
}
=item sample_data
Sample a fraction of the data
=cut
sub sample_data {
my $self = shift;
my $sample = shift || 0.5;
my $clone1 = $self->clone;
my $clone2 = $self->clone;
my $size = $self->size;
my @sample;
$clone2->{'table'} = \@sample;
while( scalar(@sample) < int( $size * $sample ) ) {
my @shuffled = shuffle( @{ $clone1->{'table'} } );
push @sample, shift @shuffled;
$clone1->{'table'} = \@shuffled;
}
return $clone2, $clone1;
}
=item partition_data
Creates two clones that partition the data according to the provided ratio.
=cut
sub partition_data {
my $self = shift;
my $sample = shift || 0.5;
my $clone1 = $self->clone;
my $clone2 = $self->clone;
my $remain = 1 - $sample;
$log->info("going to partition into $sample : $remain");
# compute number of different dependent patterns and ratios of each
my @dependents = $self->dependent_data;
my %seen;
for my $dep ( @dependents ) {
my $key = join '/', @{ $dep };
$seen{$key}++;
}
# adjust counts to sample size
for my $key ( keys %seen ) {
$log->debug("counts: $key => $seen{$key}");
$seen{$key} = int( $seen{$key} * $sample );
$log->debug("rescaled: $key => $seen{$key}");
}
# start the sampling
my @dc = map { $self->{'header'}->{$_} } $self->dependent_columns;
my @new_table; # we will populate this
my @table = @{ $clone1->{'table'} }; # work on cloned instance
# as long as there is still sampling to do
SAMPLE: while( grep { !!$_ } values %seen ) {
for my $i ( 0 .. $#table ) {
my @r = @{ $table[$i] };
my $key = join '/', @r[@dc];
if ( $seen{$key} ) {
my $rand = rand(1);
if ( $rand < $sample ) {
push @new_table, \@r;
splice @table, $i, 1;
$seen{$key}--;
$log->debug("still to go for $key: $seen{$key}");
next SAMPLE;
}
}
}
}
$clone2->{'table'} = \@new_table;
$clone1->{'table'} = \@table;
return $clone2, $clone1;
}
=item size
Returns the number of data records
=cut
sub size { scalar @{ shift->{'table'} } }
=item to_fann
Packs data into an L<AI::FANN> TrainData structure
=cut
sub to_fann {
$log->debug("encoding data as FANN struct");
my $self = shift;
my @cols = @_ ? @_ : $self->predictor_columns;
my @deps = $self->dependent_data;
my @pred = $self->predictor_data( 'cols' => \@cols );
my @interdigitated;
for my $i ( 0 .. $#deps ) {
push @interdigitated, $pred[$i], $deps[$i];
}
return AI::FANN::TrainData->new(@interdigitated);
}
=back
=cut
1;
( run in 0.743 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )