AI-FANN-Evolving

 view release on metacpan or  search on metacpan

lib/AI/FANN/Evolving/TrainData.pm  view on Meta::CPAN

	if ( defined $i ) {
		return [ map { $self->{'table'}->[$i]->[$_] } @dc ];
	}
	else {
		my @dep;
		for my $j ( 0 .. $self->size - 1 ) {
			push @dep, $self->dependent_data($j);
		}
		return @dep;
	}
}

=item read_data

Reads provided input file

=cut

sub read_data {
	my ( $self, $file ) = @_; # file is tab-delimited
	$log->debug("reading data from file $file");
	open my $fh, '<', $file or die "Can't open $file: $!";
	my ( %header, @table );
	while(<$fh>) {
		chomp;
		next if /^\s*$/;
		my @fields = split /\t/, $_;
		if ( not %header ) {
			my $i = 0;
			%header = map { $_ => $i++ } @fields;
		}
		else {
			push @table, \@fields;
		}
	}
	$self->{'header'} = \%header;
	$self->{'table'}  = \@table;
	return $self;
}

=item write_data

Writes to provided output file

=cut

sub write_data {
	my ( $self, $file ) = @_;
	
	# use file or STDOUT
	my $fh;
	if ( $file ) {
		open $fh, '>', $file or die "Can't write to $file: $!";
		$log->info("writing data to $file");
	}
	else {
		$fh = \*STDOUT;
		$log->info("writing data to STDOUT");
	}
	
	# print header
	my $h = $self->{'header'};
	print $fh join "\t", sort { $h->{$a} <=> $h->{$b} } keys %{ $h };
	print $fh "\n";
	
	# print rows
	for my $row ( @{ $self->{'table'} } ) {
		print $fh join "\t", @{ $row };
		print $fh "\n";
	}
}

=item trim_data

Trims sparse rows with missing values

=cut

sub trim_data {
	my $self = shift;
	my @trimmed;
	ROW: for my $row ( @{ $self->{'table'} } ) {
		next ROW if grep { not defined $_ } @{ $row };
		push @trimmed, $row;
	}
	my $num = $self->{'size'} - scalar @trimmed;
	$log->info("removed $num incomplete rows");
	$self->{'table'} = \@trimmed;
}

=item sample_data

Sample a fraction of the data

=cut

sub sample_data {
	my $self   = shift;
	my $sample = shift || 0.5;
	my $clone1 = $self->clone;
	my $clone2 = $self->clone;
	my $size   = $self->size;
	my @sample;
	$clone2->{'table'} = \@sample;
	while( scalar(@sample) < int( $size * $sample ) ) {
		my @shuffled = shuffle( @{ $clone1->{'table'} } );
		push @sample, shift @shuffled;
		$clone1->{'table'} = \@shuffled;
	}
	return $clone2, $clone1;
}

=item partition_data

Creates two clones that partition the data according to the provided ratio.

=cut

sub partition_data {
	my $self   = shift;
	my $sample = shift || 0.5;
	my $clone1 = $self->clone;
	my $clone2 = $self->clone;
	my $remain = 1 - $sample;
	$log->info("going to partition into $sample : $remain");
		
	# compute number of different dependent patterns and ratios of each
	my @dependents = $self->dependent_data;
	my %seen;



( run in 1.300 second using v1.01-cache-2.11-cpan-f56aa216473 )