AI-FANN-Evolving

 view release on metacpan or  search on metacpan

script/aivolver  view on Meta::CPAN

#!/usr/bin/perl
use strict;
use warnings;
use Pod::Usage;
use Getopt::Long;
use YAML::Any 'LoadFile';
use File::Path 'make_path';
use AI::FANN::Evolving;
use AI::FANN::Evolving::TrainData;
use Algorithm::Genetic::Diploid::Logger ':levels';

# initialize config variables
my $verbosity = WARN; # log level
my $formatter = 'simple'; # log formatter
my %initialize;       # settings to start the population
my %data;             # train and test data files
my %experiment;       # experiment settings
my %ann;              # ANN settings
my $outfile;

# there are no arguments
if ( not @ARGV ) {
	pod2usage( '-verbose' => 0 );
}

# first argument is a config file
if ( -e $ARGV[0] ) {
	my $conf = shift;
	my $yaml = LoadFile($conf);
	$outfile    = $yaml->{'outfile'}         if defined $yaml->{'outfile'};
	$verbosity  = $yaml->{'verbosity'}       if defined $yaml->{'verbosity'};
	$formatter  = $yaml->{'formatter'}       if defined $yaml->{'formatter'};
	%initialize = %{ $yaml->{'initialize'} } if defined $yaml->{'initialize'};
	%data       = %{ $yaml->{'data'} }       if defined $yaml->{'data'};
	%experiment = %{ $yaml->{'experiment'} } if defined $yaml->{'experiment'};
	%ann        = %{ $yaml->{'ann'} }        if defined $yaml->{'ann'};
}

# process command line arguments
GetOptions(
	'verbose+'     => \$verbosity,
	'formatter=s'  => \$formatter,
	'outfile=s'    => \$outfile,
	'initialize=s' => \%initialize,
	'data=s'       => \%data,
	'experiment=s' => \%experiment,
	'ann=s'        => \%ann,
	'help|?'       => sub { pod2usage( '-verbose' => 1 ) },
	'manual'       => sub { pod2usage( '-verbose' => 2 ) },
);

# configure ANN
AI::FANN::Evolving->defaults(%ann);

# configure logger
my $log = Algorithm::Genetic::Diploid::Logger->new;
$log->level( 'level' => $verbosity );
$log->formatter( $formatter );

# read input data
my $deps   = join ', ', @{ $data{'dependent'} };
my $ignore = join ', ', @{ $data{'ignore'} };
$log->info("going to read train data $data{file}, ignoring '$ignore', dependent columns are '$deps'");
my $inputdata = AI::FANN::Evolving::TrainData->new(
	'file'      => $data{'file'},
	'dependent' => $data{'dependent'},
	'ignore'    => $data{'ignore'},
);
my ( $traindata, $testdata );
if ( $data{'type'} and lc $data{'type'} eq 'continuous' ) {
	( $traindata, $testdata ) = $inputdata->sample_data( $data{'fraction'} );
}
else {
	( $traindata, $testdata ) = $inputdata->partition_data( $data{'fraction'} );
}

$log->info("number of training data records: ".$traindata->size);
$log->info("number of test data records: ".$testdata->size);

# create first work dir
my $wd  = delete $experiment{'workdir'};
make_path($wd);
$wd .= '/0';

# create the experiment
my $exp = AI::FANN::Evolving::Experiment->new(
	'traindata' => $traindata->to_fann,
	'env'       => $testdata->to_fann,
	'workdir'   => $wd,
	%experiment,
);

# initialize the experiment
$exp->initialize(%initialize);

# run!
my ( $fittest, $fitness ) = $exp->run();
$log->info("*** overall best fitness: $fitness");
my ($gene) = sort { $a->fitness <=> $b->fitness } map { $_->genes } $fittest->chromosomes;
$gene->ann->save($outfile);

__END__

=pod

=head1 NAME

aivolver - Evolves optimal artificial neural networks

=head1 SYNOPSIS

 aivolver [<config.yml>] [OPTION]...
	 try `aivolver --help' or `aivolver --manual' for more information

=head1 OPTIONS AND ARGUMENTS

B<***NO LONGER ACCURATE, CONSULT THE YAML CONFIG FILES***>

=over

=item B<<config.ymlE<gt>>

If the first command line argument is a file location, this will be interpreted as the
location of a configuration file in YAML syntax structured as in this
example: L<https://raw.github.com/naturalis/ai-fann-evolving/master/examples/conf.yml>.

Subsequent command line arguments can then be provided that override the defaults in this
configuration file.

=item B<-h/--help/-?>

Prints help message and exits.

=item B<-m/--manual>

Prints manual page and exits.

=item B<-v/--verbose>

Increments verbosity of the process. Can be used multiple times.

=item B<-o/--outfile <file.annE<gt>>

File name for the fittest ANN file over all generations.

=item B<-d/--data <key=valueE<gt>>

The C<data> argument is used multiple times, each time followed by a key/value pair
that defines the location of one of the data files. The key/value pairs are as follows:

=over

=item B<file=<data.tsvE<gt>>

Defines the location of a file of input data.

=item B<fraction=<numberE<gt>>

Fraction of input data to use for training (versus testing).

=back

=item B<-i/--initialize <key=valueE<gt>>

The C<initialize> argument is used multiple times, each time followed by a key/value
pair that defines one of the initialization settings for the (genetic) structure of the
evolving population. The key/value pairs are as follows:

=over

=item B<individual_count=<countE<gt>>

Defines the number of individuals in the population.

=item B<chromosome_count=<countE<gt>>

Defines the number of non-homologous chromosomes (i.e. n for diploid org). Normally
1 chromosome suffices.

=item B<gene_count=<countE<gt>>

Defines the number of genes per chromosome. Normally 1 gene (i.e. 1 ANN) suffices.

=back

=item B<-e/--experiment <key=valueE<gt>>



( run in 1.301 second using v1.01-cache-2.11-cpan-d8267643d1d )