AI-FANN-Evolving
view release on metacpan or search on metacpan
lib/AI/FANN/Evolving/Experiment.pm view on Meta::CPAN
# modify workdir
my $wd = $self->{'workdir'};
$wd =~ s/\d+$/$i/;
$self->{'workdir'} = $wd;
mkdir $wd;
my $optimum = $self->optimum($i);
$log->debug("optimum at generation $i is $optimum");
my ( $fittest, $fitness ) = $self->population->turnover($i,$self->env,$optimum);
push @results, [ $fittest, $fitness ];
}
my ( $fittest, $fitness ) = map { @{ $_ } } sort { $a->[1] <=> $b->[1] } @results;
return $fittest, $fitness;
}
=item optimum
The optimal fitness is zero error in the ANN's classification. This method returns
that value: 0.
=cut
sub optimum { 0 }
lib/AI/FANN/Evolving/Experiment.pm view on Meta::CPAN
# process the argument
if ( @_ ) {
my $arg = shift;
if ( ref $arg eq 'CODE' ) {
$self->{'error_func'} = $arg;
$log->info("using custom error function");
}
elsif ( $arg eq 'sign' ) {
$self->{'error_func'} = \&_sign;
$log->info("using sign test error function");
}
elsif ( $arg eq 'mse' ) {
$self->{'error_func'} = \&_mse;
$log->info("using MSE error function");
}
else {
$log->warn("don't understand error func '$arg'");
}
}
script/aivolver view on Meta::CPAN
use YAML::Any 'LoadFile';
use File::Path 'make_path';
use AI::FANN::Evolving;
use AI::FANN::Evolving::TrainData;
use Algorithm::Genetic::Diploid::Logger ':levels';
# initialize config variables
my $verbosity = WARN; # log level
my $formatter = 'simple'; # log formatter
my %initialize; # settings to start the population
my %data; # train and test data files
my %experiment; # experiment settings
my %ann; # ANN settings
my $outfile;
# there are no arguments
if ( not @ARGV ) {
pod2usage( '-verbose' => 0 );
}
# first argument is a config file
script/aivolver view on Meta::CPAN
# read input data
my $deps = join ', ', @{ $data{'dependent'} };
my $ignore = join ', ', @{ $data{'ignore'} };
$log->info("going to read train data $data{file}, ignoring '$ignore', dependent columns are '$deps'");
my $inputdata = AI::FANN::Evolving::TrainData->new(
'file' => $data{'file'},
'dependent' => $data{'dependent'},
'ignore' => $data{'ignore'},
);
my ( $traindata, $testdata );
if ( $data{'type'} and lc $data{'type'} eq 'continuous' ) {
( $traindata, $testdata ) = $inputdata->sample_data( $data{'fraction'} );
}
else {
( $traindata, $testdata ) = $inputdata->partition_data( $data{'fraction'} );
}
$log->info("number of training data records: ".$traindata->size);
$log->info("number of test data records: ".$testdata->size);
# create first work dir
my $wd = delete $experiment{'workdir'};
make_path($wd);
$wd .= '/0';
# create the experiment
my $exp = AI::FANN::Evolving::Experiment->new(
'traindata' => $traindata->to_fann,
'env' => $testdata->to_fann,
'workdir' => $wd,
%experiment,
);
# initialize the experiment
$exp->initialize(%initialize);
# run!
my ( $fittest, $fitness ) = $exp->run();
$log->info("*** overall best fitness: $fitness");
my ($gene) = sort { $a->fitness <=> $b->fitness } map { $_->genes } $fittest->chromosomes;
$gene->ann->save($outfile);
__END__
=pod
=head1 NAME
aivolver - Evolves optimal artificial neural networks
script/aivolver view on Meta::CPAN
=item B<-m/--manual>
Prints manual page and exits.
=item B<-v/--verbose>
Increments verbosity of the process. Can be used multiple times.
=item B<-o/--outfile <file.annE<gt>>
File name for the fittest ANN file over all generations.
=item B<-d/--data <key=valueE<gt>>
The C<data> argument is used multiple times, each time followed by a key/value pair
that defines the location of one of the data files. The key/value pairs are as follows:
=over
=item B<file=<data.tsvE<gt>>
Defines the location of a file of input data.
=item B<fraction=<numberE<gt>>
Fraction of input data to use for training (versus testing).
=back
=item B<-i/--initialize <key=valueE<gt>>
The C<initialize> argument is used multiple times, each time followed by a key/value
pair that defines one of the initialization settings for the (genetic) structure of the
evolving population. The key/value pairs are as follows:
=over
script/aivolver view on Meta::CPAN
hard to predict I<a priori>, but can be optimized in a variety of ways.
C<aivolver> is a program that does this by evolving parameter settings using a genetic
algorithm that runs for a number of generations determined by C<ngens>. During this
process it writes the intermediate ANNs into the C<workdir> until the best result is
written to the C<outfile>.
The genetic algorithm proceeds by simulating a population of C<individual_count> diploid
individuals that each have C<chromosome_count> chromosomes whose C<gene_count> genes
encode the parameters of the ANN. During each generation, each individual is trained
on a sample data set, and the individual's fitness is then calculated by testing its
predictive abilities on an out-of-sample data set. The fittest individuals (whose
fraction of the total is determined by C<reproduction_rate>) are selected for breeding
in proportion to their fitness.
Before breeding, each individual undergoes a process of mutation, where a fraction of
the ANN parameters is randomly perturbed. Both the size of the fraction and the
maximum extent of the perturbation is determined by C<mutation_rate>. Subsequently, the
homologous chromosomes recombine (i.e. exchange parameters) at a rate determined by
C<crossover_rate>, which then results in (haploid) gametes. These gametes are fused with
those of other individuals to give rise to the next generation.
=head1 TRAINING AND TEST DATA
The data that is used for training the ANNs and for subsequently testing their predictive
abilities are provided as tab-separated tables. An example of an input data set is here:
L<https://github.com/naturalis/ai-fann-evolving/blob/master/examples/butterbeetles.tsv>
The tables have a header row, with at least the following columns:
=over
=item B<ID>
t/00-load.t view on Meta::CPAN
use Test::More tests => 1;
BEGIN {
use_ok('AI::FANN::Evolving');
}
diag("Testing AI::FANN::Evolving $AI::FANN::Evolving::VERSION, Perl $]");
use_ok('AI::FANN::Evolving::TrainData');
use_ok('AI::FANN::Evolving');
use_ok('Algorithm::Genetic::Diploid::Logger');
}
# create and configure logger
my $log = new_ok('Algorithm::Genetic::Diploid::Logger');
$log->level( 'level' => 4 );
$log->formatter(sub{
my %args = @_;
if ( $args{'msg'} =~ /fittest at generation (\d+): (.+)/ ) {
my ( $gen, $fitness ) = ( $1, $2 );
ok( $fitness, "generation $gen/2, fitness: $fitness" );
}
return '';
});
# set quieter and quicker to give up
AI::FANN::Evolving->defaults( 'epoch_printfreq' => 0, 'epochs' => 200 );
# instantiate factory
my $fac = new_ok('AI::FANN::Evolving::Factory');
# prepare data
my $data = AI::FANN::Evolving::TrainData->new(
'file' => "$Bin/../examples/Cochlopetalum.tsv",
'ignore' => [ 'image' ],
'dependent' => [ 'C1', 'C2', 'C3', 'C4', 'C5' ],
);
my ( $test, $train ) = $data->partition_data( 0.5 );
# create the experiment
my $exp = $fac->create_experiment(
'workdir' => tempdir( 'CLEANUP' => 1 ),
'traindata' => $train->to_fann,
'factory' => $fac,
'env' => $test->to_fann,
'mutation_rate' => 0.1,
'ngens' => 2,
);
isa_ok( $exp, 'Algorithm::Genetic::Diploid::Experiment' );
# initialize the experiment
ok( $exp->initialize( 'individual_count' => 2 ), "initialized" );
# run!
my ( $fittest, $fitness ) = $exp->run();
isa_ok( $fittest, 'Algorithm::Genetic::Diploid::Individual' );
t/pod-coverage.t view on Meta::CPAN
use Test::More;
plan skip_all => 'env var TEST_AUTHOR not set' if not $ENV{'TEST_AUTHOR'};
eval "use Test::Pod::Coverage 1.04";
plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage"
if $@;
all_pod_coverage_ok();
use Test::More;
plan skip_all => 'env var TEST_AUTHOR not set' if not $ENV{'TEST_AUTHOR'};
eval "use Test::Pod 1.14";
plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
all_pod_files_ok();
( run in 0.305 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )