Let's grep the CPAN together: search a pattern among all perl distributions

Result:

found more than 81 distributions - search limited to the first 2001 files matching your query ( run in 0.398 )

AI-FuzzyInference

view release on metacpan or search on metacpan


# A module to implement a fuzzy inference system.
#
# Copyright Ala Qumsieh (aqumsieh@cpan.org) 2002.
# This program is distributed under the same terms as Perl itself.

package AI::FuzzyInference;
use strict;

use vars qw/$VERSION/;
$VERSION = 0.05;

use AI::FuzzyInference::Set;

############################################
#
# First some global vars.
#
############################################

# this hash defines the possible interpretations of the
# standard fuzzy logic operations.
my %_operations = (
		   '&' => {
		       min     => sub { (sort {$a <=> $b} @_)[0] },
		       product => sub { my $p = 1; $p *= $_ for @_; $p },
		       default => 'min',
		   },
		   '|'  => {
		       max     => sub { (sort {$a <=> $b} @_)[-1] },
		       sum     => sub { my $s = 0; $s += $_ for @_; $s > 1 ? 1 : $s },
		       default => 'max',
		   },
		   '!' => {
		       complement => sub { 1 - $_[0] },
		       custom  => sub {},
		       default    => 'complement',
		   },
		   );

# this hash defines the currently implemented implication methods.
my %_implication     = qw(
			  clip    1
			  scale   1
			  default clip
			  );

# this hash defines the currently implemented aggregation methods.
my %_aggregation     = qw(
			  max     1
			  default max
			  );

# this hash defines the currently implemented defuzzification methods.
my %_defuzzification = qw(
			  centroid 1
			  default  centroid
			  );

# sub new() - constructor.
# 
# doesn't take any arguments. Returns an initialized AI::FuzzyInference object.

sub new {
    my $self  = shift;
    my $class = ref($self) || $self;

    my $obj = bless {} => $class;

    $obj->_init;

    return $obj;
}

# sub _init() - private method.
#
# no arguments. Initializes the data structures we will need.
# It also defines the default logic operations we might need.

sub _init {
    my $self = shift;

    $self->{SET}     = new AI::FuzzyInference::Set;
    $self->{INVARS}  = {};
    $self->{OUTVARS} = {};
    $self->{RULES}   = [];
    $self->{RESULTS} = {};

    $self->{IMPLICATION}     = $_implication{default};
    $self->{AGGREGATION}     = $_aggregation{default};
    $self->{DEFUZZIFICATION} = $_defuzzification{default};

    for my $op (qw/& | !/) {
	$self->{OPERATIONS}{$op} = $_operations{$op}{default};
    }
}

# sub implication() - public method.
#
# one optional argument: has to match one of the keys of the %_implication hash.
# used to query/set the implication method.

sub implication {
    my ($self,
	$new,
	) = @_;

    if (defined $new and exists $_implication{$new}) {
	$self->{IMPLICATION} = $new;
    }

    return $self->{IMPLICATION};
}

# sub aggregation() - public method.
#
# one optional argument: has to match one of the keys of the %_aggregation hash.
# used to query/set the aggregation method.

sub aggregation {
    my ($self,
	$new,
	) = @_;

    if (defined $new and exists $_aggregation{$new}) {
	$self->{AGGREGATION} = $new;
    }

    return $self->{AGGREGATION};
}

# sub defuzzification() - public method.
#
# one optional argument: has to match one of the keys of the %_defuzzification hash.
# used to query/set the defuzzification method.

sub defuzzification {
    my ($self,
	$new,
	) = @_;

    if (defined $new and exists $_defuzzification{$new}) {
	$self->{DEFUZZIFICATION} = $new;
    }

    return $self->{DEFUZZIFICATION};
}

# sub operation() - public method.
#
# two arguments: first one mandatory and specifies the logic operation
# in question. Second one is optional and has to match one of the keys
# of the %{$_operations{$first_arg}} hash.
# Used to query/set the logic operations method.

sub operation {
    my ($self,
	$op,
	$new,
	) = @_;

    return unless defined $op && exists $_operations{$op};

    if (defined $new and exists $_operations{$op}{$new}) {
	$self->{OPERATIONS}{$op} = $new;
    }

    return $self->{OPERATIONS}{$op};
}

# sub inVar() - public method.
#
# 4 arguments or more : First is a name of a new input variable.
# Second and third are the min and max values of that variable.
# These define the universe of discourse for that variable.
# Additional argumets constitute a hash. The keys of the hash
# are term set names defined for the given variable. The values
# are the coordinates of the vertices of the term sets.
#
# ex. $obj->inVar('height',
#                 5, 8,   # xmin, xmax (in feet, say)
#                 'tall' => [0, 0,
#                            5, 1,
#                            10,0],
#                  ....);

sub inVar {
    my ($self,
	$var,
	$xmin,
	$xmax,
	@sets,
	) = @_;

    $self->{INVARS}{$var} = [$xmin, $xmax];

    while (@sets) {
	my $s = shift @sets;
	my $c = shift @sets;

	$self->{SET}->add("$var:$s", $xmin, $xmax, @$c);
    }
}

# sub outVar() - public method.
#
# 4 arguments or more : First is a name of a new output variable.
# Second and third are the min and max values of that variable.
# These define the universe of discourse for that variable.
# Additional argumets constitute a hash. The keys of the hash
# are term set names defined for the given variable. The values
# are the coordinates of the vertices of the term sets.

sub outVar {
    my ($self,
	$var,
	$xmin,
	$xmax,
	@sets,
	) = @_;

    $self->{OUTVARS}{$var} = [$xmin, $xmax];

    while (@sets) {
	my $s = shift @sets;
	my $c = shift @sets;

	$self->{SET}->add("$var:$s", $xmin, $xmax, @$c);
    }
}

# sub addRule() - public method.
#
# Adds fuzzy if-then inference rules.
#
# $obj->addRule('x=medium'         => 'z = slow',
#               'x=low  & y=small' => 'z = fast',
#               'x=high & y=tiny'  => 'z=veryfast');
# spaces are optional. The characters [&=|] are special.

sub addRule {
    my ($self, %rules) = @_;

    for my $k (keys %rules) {
	my $v = $rules{$k};
	s/\s+//g for $v, $k;

	push @{$self->{RULES}} => [$k, $v];
    }

    return 1;
}

# sub show() - public method.
#
# This method displays the computed values of all
# output variables.
# It is ugly, and will be removed. Here for debugging.

sub show {
    my $self = shift;

    for my $var (keys %{$self->{RESULTS}}) {
	print "Var $var = $self->{RESULTS}{$var}.\n";
    }
}

# sub value() - public method.
#
# one argument: the name of an output variable.
# This method returns the computed value of a given output var.

sub value {
    my ($self,
	$var,
	) = @_;

    return undef unless exists $self->{RESULTS}{$var};
    return $self->{RESULTS}{$var};
}

# sub reset() - public method
# 
# cleans the data structures used.

sub reset {
    my $self = shift;

    my @list   =  $self->{SET}->listMatching(q|:implicated$|);
    push @list => $self->{SET}->listMatching(q|:aggregated$|);

    $self->{SET}->delete($_) for @list;

    $self->{RESULTS} = {};
}

# sub compute() - public method
#
# This method takes as input crisp values for each
# of the input vars, and produces a crisp output value
# based on the application of the fuzzy if-then rules.
# ex.
# $z = $obj->compute(x => 5,
#                    y => 24);

sub compute {
    my ($self,
	%vars,
	) = @_;

    $self->reset();

    # First thing we do is to fuzzify the inputs.
    $self->_fuzzify(%vars);

    # Now, apply the rules to see which ones fire.
    $self->_infer;

    # implicate
    $self->_implicate;

    # aggregate
    $self->_aggregate;

    # defuzzify .. final step.
    $self->_defuzzify;

    return 1;
}

# sub _defuzzify() - private method.
#
# no arguments. This method applies the defuzzification technique
# to get a crisp value out of the aggregated set of each output
# var.

sub _defuzzify {
    my $self = shift;

    my $_defuzzification = $self->{DEFUZZIFICATION};

    # iterate through all output vars.
    for my $var (keys %{$self->{OUTVARS}}) {

	my $result = 0;
	if ($self->{SET}->exists("$var:aggregated")) {
	    $result = $self->{SET}->$_defuzzification("$var:aggregated");
	}

	$self->{RESULTS}{$var} = $result;
    }
}

# sub _aggregate() - private method.
#
# no arguments. This method applies the aggregation technique to get
# one fuzzy set out of the implicated sets of each output var.

sub _aggregate {
    my $self = shift;

    my $_aggregation = $self->{AGGREGATION};

    # iterate through all output vars.
    for my $var (keys %{$self->{OUTVARS}}) {

	# get implicated sets.
	my @list = $self->{SET}->listMatching("\Q$var\E:.*:implicated\$");

	next unless @list;

	my $i = 0;
	my $current = shift @list;

	# aggregate everything together.
	while (@list) {
	    my $new  = shift @list;
	    my $name = "temp" . $i++;

	    my @c = $self->{SET}->$_aggregation($current, $new);
	    $self->{SET}->add($name, @{$self->{OUTVARS}{$var}}, @c);
	    $current = $name;
	}

	# rename the final aggregated set.
	my @c = $self->{SET}->coords($current);
	$self->{SET}->add("$var:aggregated", @{$self->{OUTVARS}{$var}}, @c);

	# delete the temporary sets.
	for my $j (0 .. $i - 1) {
	    $self->{SET}->delete("temp$j");
	}
    }
}

# sub _implicate() - private method.
#
# no arguments. This method applies the implication technique
# to all the fired rules to find a support value for each
# output variable.

sub _implicate {
  my $self = shift;

  my $_implication = $self->{IMPLICATION};

  my %ind;

  for my $ref (@{$self->{FIRED}}) {
    my ($i, $val) = @$ref;
    my ($var, $ts) = split /=/, $self->{RULES}[$i][1];

    if ($val > 0) {
      $ind{$var}{$ts}++;
      my @c = $self->{SET}->$_implication("$var:$ts", $val);
      my @u = @{$self->{OUTVARS}{$var}}; # the universe
      $self->{SET}->add("$var:$ts:$ind{$var}{$ts}:implicated", @u, @c);
    }
  }
}

# sub _fuzzify() - private method.
#
# one argument: a hash. The keys are input variables. The
# values are the crisp values of the input variables (same arguments
# as compute()). It finds the degree of membership of each input
# variable in each of its term sets.

sub _fuzzify {
    my ($self, %vars) = @_;

    my %terms;

    for my $var (keys %vars) {
	my $val = $vars{$var};

	for my $ts ($self->{SET}->listMatching("\Q$var\E")) {
	    my $deg = $self->{SET}->membership($ts, $val);

	    $terms{$var}{$ts} = $deg;
	}
    }

    $self->{FUZZIFY} = \%terms;
}

# sub _infer() - private method.
#
# no arguments. This method applies the logic operations to combine
# multiple parts of the antecedent of a rule to get one crisp value 
# that is the degree of support of that rule.
# Rules with positive support "fire".

sub _infer {
    my $self = shift;

    my @fired; # keep list of fired rules.

    for my $i (0 .. $#{$self->{RULES}}) {
	my $r = $self->{RULES}[$i][0];   # precedent

	my $val = 0;
	while ($r =~ /([&|]?)([^&|]+)/g) {
	    my ($op, $ant) = ($1, $2);
	    my ($var, $ts) = split /=/ => $ant;

	    $ts = "$var:$ts";

	    if ($op) {
		#$val = $self->{LOGIC}{$op}{SUB}->($val, $self->{FUZZIFY}{$var}{$ts});
		$val = $_operations{$op}{$self->{OPERATIONS}{$op}}->($val, $self->{FUZZIFY}{$var}{$ts});
	    } else {
		$val = $self->{FUZZIFY}{$var}{$ts};
	    }
	}

	# We only care about positive values.
	push @fired => [$i, $val];
    }

    $self->{FIRED} = \@fired;
}

__END__

=pod

=head1 NAME

AI::FuzzyInference - A module to implement a Fuzzy Inference System.

=head1 SYNOPSYS

    use AI::FuzzyInference;

    my $s = new AI::FuzzyInference;

    $s->inVar('service', 0, 10,
	  poor      => [0, 0,
			2, 1,
			4, 0],
	  good      => [2, 0,
			4, 1,
			6, 0],
	  excellent => [4, 0,
			6, 1,
			8, 0],
	  amazing   => [6, 0,
			8, 1,
			10, 0],
	  );

    $s->inVar('food', 0, 10,
	  poor      => [0, 0,
			2, 1,
			4, 0],
	  good      => [2, 0,
			4, 1,
			6, 0],
	  excellent => [4, 0,
			6, 1,
			8, 0],
	  amazing   => [6, 0,
			8, 1,
			10, 0],
	  );

    $s->outVar('tip', 5, 30,
	   poor      => [5, 0,
			 10, 1,
			 15, 0],
	   good      => [10, 0,
			 15, 1,
			 20, 0],
	   excellent => [15, 0,
			 20, 1,
			 25, 0],
	   amazing   => [20, 0,
			 25, 1,
			 30, 0],
	   );

    $s->addRule(
	    'service=poor      & food=poor'      => 'tip=poor',
	    'service=good      & food=poor'      => 'tip=poor',
	    'service=excellent & food=poor'      => 'tip=good',
	    'service=amazing   & food=poor'      => 'tip=good',

	    'service=poor      & food=good'      => 'tip=poor',
	    'service=good      & food=good'      => 'tip=good',
	    'service=excellent & food=good'      => 'tip=good',
	    'service=amazing   & food=good'      => 'tip=excellent',

	    'service=poor      & food=excellent' => 'tip=good',
	    'service=good      & food=excellent' => 'tip=excellent',
	    'service=excellent & food=excellent' => 'tip=excellent',
	    'service=amazing   & food=excellent' => 'tip=amazing',

	    'service=poor      & food=amazing'   => 'tip=good',
	    'service=good      & food=amazing'   => 'tip=excellent',
	    'service=excellent & food=amazing'   => 'tip=amazing',
	    'service=amazing   & food=amazing'   => 'tip=amazing',

	    );

    $s->compute(service => 2,
	    food    => 7);

=head1 DESCRIPTION

This module implements a fuzzy inference system. Very briefly, an FIS
is a system defined by a set of input and output variables, and a set
of fuzzy rules relating the input variables to the output variables.
Given crisp values for the input variables, the FIS uses the fuzzy rules
to compute a crisp value for each of the output variables.

The operation of an FIS is split into 4 distinct parts: I<fuzzification>,
I<inference>, I<aggregation> and I<defuzzification>.

=head2 Fuzzification

In this step, the crisp values of the input variables are used to
compute a degree of membership of each of the input variables in each
of its term sets. This produces a set of fuzzy sets.

=head2 Inference

In this step, all the defined rules are examined. Each rule has two parts:
the I<precedent> and the I<consequent>. The degree of support for each
rule is computed by applying fuzzy operators (I<and>, I<or>) to combine
all parts of its precendent, and generate a single crisp value. This value
indicates the "strength of firing" of the rule, and is used to reshape
(I<implicate>) the consequent part of the rule, generating modified
fuzzy sets.

=head2 Aggregation

Here, all implicated fuzzy sets of the fired rules are combined using
fuzzy operators to generate a single fuzzy set for each of the
output variables.

=head2 Defuzzification

Finally, a defuzzification operator is applied to the aggregated fuzzy
set to generate a single crisp value for each of the output variables.

For a more detailed explanation of fuzzy inference, you can check out
the tutorial by Jerry Mendel at
S<http://sipi.usc.edu/~mendel/publications/FLS_Engr_Tutorial_Errata.pdf>.

Note: The terminology used in this module might differ from that used
in the above tutorial.

=head1 PUBLIC METHODS

The module has the following public methods:

=over 4

=item new()

This is the constructor. It takes no arguments, and returns an
initialized AI::FuzzyInference object.

=item operation()

This method is used to set/query the fuzzy operations. It takes at least
one argument, and at most 2. The first argument specifies the logic
operation in question, and can be either C<&> for logical I<AND>,
C<|> for logical I<OR>, or C<!> for logical I<NOT>. The second
argument is used to set what method to use for the given operator.
The following values are possible:

=item &

=over 8

=item min

The result of C<A and B> is C<min(A, B)>. This is the default.

=item product

The result of C<A and B> is C<A * B>.

=back

=item |

=over 8

=item max

The result of C<A or B> is C<max(A, B)>. This is the default.

=item sum

The result of C<A or B> is C<min(A + B, 1)>.

=back

=item !

=over 8

=item complement

The result of C<not A> is C<1 - A>. This is the default.

=back

The method returns the name of the method to be used for the given
operation.

=item implication()

This method is used to set/query the implication method used to alter
the shape of the implicated output fuzzy sets. It takes one optional
argument which specifies the name of the implication method used.
This can be one of the following:

=over 8

=item clip

This causes the output fuzzy set to be clipped at its support value.
This is the default.

=item scale

This scales the output fuzzy set by multiplying it by its support value.

=back

=item aggregation()

This method is used to set/query the aggregation method used to combine
the output fuzzy sets. It takes one optional
argument which specifies the name of the aggregation method used.
This can be one of the following:

=over 8

=item max

The fuzzy sets are combined by taking at each point the maximum value of
all the fuzzy sets at that point.
This is the default.

=back

=item defuzzification()

This method is used to set/query the defuzzification method used to
extract a single crisp value from the aggregated fuzzy set.
It takes one optional
argument which specifies the name of the defuzzification method used.
This can be one of the following:

=over 8

=item centroid

The centroid (aka I<center of mass> and I<center of gravity>) of the
aggregated fuzzy set is computed and returned.
This is the default.

=back

=item inVar()

This method defines an input variable, along with its universe of
discourse, and its term sets. Here's an example:

      $obj->inVar('height',
                  5, 8,   # xmin, xmax (in feet, say)
                  'tall' => [5,   0,
                             5.5, 1,
                             6,   0],
                  'medium' => [5.5, 0,
                             6.5, 1,
                             7, 0],
                  'short' => [6.5, 0,
                             7, 1]
		  );

This example defines an input variable called I<height>. The minimum
possible value for height is 5, and the maximum is 8. It also defines
3 term sets associated with height: I<tall>, I<medium> and I<short>.
The shape of each of these triangular term sets is completely
specified by the supplied anonymous array of indices.

=item outVar()

This method defines an output variable, along with its universe of
discourse, and its term sets. The arguments are identical to those for
the C<inVar()> method.

=item addRule()

This method is used to add the fuzzy rules. Its arguments are hash-value
pairs; the keys are the precedents and the values are the consequents.
Each antecedent has to be a combination of 1 or more strings. The
strings have to be separated by C<&> or C<|> indicating the fuzzy
I<AND> and I<OR> operations respectively. Each consequent must be a
single string. Each string has the form: C<var = term_set>. Spaces
are completely optional. Example:

    $obj->addRule('height=short & weight=big' => 'diet = necessary',
		  'height=tall & weight=tiny' => 'diet = are_you_kidding_me');

The first rule basically says I<If the height is short, and the weight is
big, then diet is necessary>.

=item compute()

This method takes as input a set of hash-value pairs; the keys are names
of input variables, and the values are the values of the variables. It
runs those values through the FIS, generating corresponding values for
the output variables. It always returns a true value. To get the actual
values of the output variables, look at the C<value()> method below.
Example:

    $obj->compute(x => 5,
		  y => 24);

Note that any subsequent call to C<compute()> will implicitly clear out
the old computed values before recomputing the new ones. This is done
through a call to the C<reset()> method below.

=item value()

This method returns the value of the supplied output variable. It only
works for output variables (defined using the C<outVar()> method),
and only returns useful results after a call to C<compute()> has been
made.

=item reset()

This method resets all the data structures used to compute crisp values
of the output variables. It is implicitly called by the C<compute()>
method above.

=back

=head1 INSTALLATION

It's all in pure Perl. Just place it somewhere and point your @INC to it.

But, if you insist, here's the traditional way:

To install this module type the following:

   perl Makefile.PL
   make
   make test
   make install


=head1 AUTHOR

Copyright 2002, Ala Qumsieh. All rights reserved.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

Address bug reports and comments to: aqumsieh@cpan.org

view all matches for this distribution

AI-Gene-Sequence

13 results

view release on metacpan or search on metacpan

AI/Gene/Sequence.pm view on Meta::CPAN

package AI::Gene::Sequence;
require 5.6.0;
use strict;
use warnings;

BEGIN {
  use Exporter   ();
  our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
  $VERSION     = 0.22;
  @ISA         = qw(Exporter);
  @EXPORT      = ();
  %EXPORT_TAGS = ();
  @EXPORT_OK   = qw();
}
our @EXPORT_OK;

my ($probs,$mut_keys) = _normalise( { map {$_ => 1} 
				      qw(insert remove overwrite 
					 duplicate minor major 
					 switch shuffle reverse) } );

##
# calls mutation method at random
# 0: number of mutations to perform
# 1: ref to hash of probs to use (otherwise uses default mutations and probs)

sub mutate {
  my $self = shift;
  my $num_mutates = +$_[0] || 1;
  my $rt = 0;
  my ($hr_probs, $muts);
  if (ref $_[1] eq 'HASH') { # use non standard mutations or probs
    ($hr_probs, $muts) = _normalise($_[1]);
  }
  else {                     # use standard mutations and probs
    $hr_probs = $probs;
    $muts = $mut_keys;
  }

 MUT_CYCLE: for (1..$num_mutates) {
    my $rand = rand;
    foreach my $mutation (@{$muts}) {
      next unless $rand < $hr_probs->{$mutation};
      my $mut = 'mutate_' . $mutation;
      $rt += $self->$mut(1);
      next MUT_CYCLE;
    }
  }
  return $rt;
}

##
# creates a normalised and cumulative prob distribution for the
# keys of the referenced hash

sub _normalise {
  my $hr = $_[0];
  my $h2 = {};
  my $muts = [keys %{$hr}];
  my $sum = 0;
  foreach (values %{$hr}) {
    $sum += $_;
  }
  if ($sum <= 0) {
    die "Cannot randomly mutate with bad probability distribution";
  }
  else {
    my $cum;
    @{$h2}{ @{$muts} } = map {$cum +=$_; $cum / $sum} @{$hr}{ @{$muts} };
    return ($h2, $muts);
  }
}

##
# inserts one element into the sequence
# 0: number to perform ( or 1)
# 1: position to mutate (undef for random)

sub mutate_insert {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    next if $pos > $length; # further than 1 place after gene
    my @token = $self->generate_token;
    my $new = $self->[0];
    substr($new, $pos, 0) = $token[0];
    next unless $self->valid_gene($new, $pos);
    $self->[0] = $new;
    splice @{$self->[1]}, $pos, 0, $token[1];
    $rt++;
  }
  return $rt;
}

##
# removes element(s) from sequence
# 0: number of times to perform
# 1: position to affect (undef for rand)
# 2: length to affect, undef => 1, 0 => random length

sub mutate_remove {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);
    next if ($length - $len) <= 0;
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    next if $pos >= $length; # outside of gene
    my $new = $self->[0];
    substr($new, $pos, $len) = '';
    next unless $self->valid_gene($new, $pos);
    $self->[0] = $new;
    splice @{$self->[1]}, $pos, $len;
    $rt++;
  }
  return $rt;
}

##
# copies an element or run of elements into a random place in the gene
# 0: number to perform (or 1)
# 1: posn to copy from (undef for rand)
# 2: posn to splice in (undef for rand)
# 3: length            (undef for 1, 0 for random)

sub mutate_duplicate {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
    my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
    my $new = $self->[0];
    next if ($pos1 + $len) > $length;
    next if $pos2 > $length;
    my $seq = substr($new, $pos1, $len);
    substr($new, $pos2,0) = $seq;
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    splice @{$self->[1]}, $pos2, 0, @{$self->[1]}[$pos1..($pos1+$len-1)];
    $rt++;
  }
  return $rt;
}

##
# Duplicates a sequence and writes it on top of some other position
# 0: num to perform  (or 1)
# 1: pos to get from          (undef for rand)
# 2: pos to start replacement (undef for rand)
# 3: length to operate on     (undef => 1, 0 => rand)

sub mutate_overwrite {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $new = $self->[0];
    my $length = length $self->[0];
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
    my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
    next if ( ($pos1 + $len) >= $length
	      or $pos2 > $length);
    substr($new, $pos2, $len) = substr($new, $pos1, $len);
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    splice (@{$self->[1]}, $pos2, $len,
	    @{$self->[1]}[$pos1..($pos1+$len-1)] );
    $rt++;
  }

  return $rt;
}

##
# Takes a run of tokens and reverses their order, is a noop with 1 item
# 0: number to perform
# 1: posn to start from (undef for rand)
# 2: length             (undef=>1, 0=>rand)

sub mutate_reverse {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $length = length $self->[0];
    my $new = $self->[0];
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);

    next if ($pos >= $length
	    or $pos + $len > $length);

    my $chunk = reverse split('', substr($new, $pos, $len));
    substr($new, $pos, $len) = join('', $chunk);
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    splice (@{$self->[1]}, $pos, $len,
	    reverse( @{$self->[1]}[$pos..($pos+$len-1)] ));
    $rt++;
  }
  return $rt;
}

##
# Changes token into one of same type (ie. passes type to generate..)
# 0: number to perform
# 1: position to affect (undef for rand)

sub mutate_minor {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $pos = defined $_[1] ? $_[1] : int rand length $self->[0];
    next if $pos >= length($self->[0]); # pos lies outside of gene
    my $type = substr($self->[0], $pos, 1);
    my @token = $self->generate_token($type, $self->[1][$pos]);
    # still need to check for niceness, just in case
    if ($token[0] eq $type) {
      $self->[1][$pos] = $token[1];
    }
    else {
      my $new = $self->[0];
      substr($new, $pos, 1) = $token[0];
      next unless $self->valid_gene($new, $pos);
      $self->[0] = $new;
      $self->[1][$pos] = $token[1];
    }
    $rt++;
  }
  return $rt;
}

##
# Changes one token into some other token
# 0: number to perform
# 1: position to affect (undef for random)

sub mutate_major {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $pos = defined $_[1] ? $_[1] : int rand length $self->[0];
    next if $pos >= length($self->[0]); # outside of gene
    my @token = $self->generate_token();
    my $new = $self->[0];
    substr($new, $pos, 1) = $token[0];
    next unless $self->valid_gene($new, $pos);
    $self->[0] = $new;
    $self->[1][$pos] = $token[1];
    $rt++;
  }
  return $rt;
}

##
# swaps over two sequences within the gene
# any sort of oddness can occur if regions overlap
# 0: number to perform
# 1: start of first sequence   (undef for rand)
# 2: start of second sequence  (undef for rand)
# 3: length of first sequence  (undef for 1, 0 for rand)
# 4: length of second sequence (undef for 1, 0 for rand)

sub mutate_switch {
  my $self = shift;
  my $num = $_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $pos1 = defined $_[1] ? $_[1] : int rand $length;
    my $pos2 = defined $_[2] ? $_[2] : int rand $length;
    my $len1 = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
    my $len2 = !defined($_[4]) ? 1 : ($_[4] || int rand $length);

    my $new = $self->[0];
    next if $pos1 == $pos2;
    if ($pos1 > $pos2) { # ensure $pos1 comes first
      ($pos1, $pos2) = ($pos2, $pos1);
      ($len1, $len2) = ($len2, $len1);
    }
    if ( ($pos1 + $len1) > $pos2 # ensure no overlaps
	 or ($pos2 + $len2) > $length
	 or $pos1 >= $length ) {
      next;
    }
    my $chunk1 = substr($new, $pos1, $len1, substr($new, $pos2, $len2,''));
    substr($new,$pos2 -$len1 + $len2,0) = $chunk1;
    next unless $self->valid_gene($new);
    $self->[0]= $new;
    my @chunk1 = splice(@{$self->[1]}, $pos1, $len1,
			splice(@{$self->[1]}, $pos2, $len2) );
    splice @{$self->[1]}, $pos2 + $len2 - $len1,0, @chunk1;
    $rt++;
  }
  return $rt;
}

##
# takes a sequence, removes it, then inserts it at another position
# odd things might occur if posn to replace to lies within area taken from
# 0: number to perform
# 1: posn to get from   (undef for rand)
# 2: posn to put        (undef for rand)
# 3: length of sequence (undef for 1, 0 for rand)

sub mutate_shuffle {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $length = length $self->[0];
    my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);

    my $new = $self->[0];
    if ($pos1 +$len > $length   # outside gene
	or $pos2 >= $length      # outside gene
	or ($pos2 < ($pos1 + $len) and $pos2 > $pos1)) { # overlap
      next;
    }
    if ($pos1 < $pos2) {
      substr($new, $pos2-$len,0) = substr($new, $pos1, $len, '');
    }
    else {
      substr($new, $pos2, 0) = substr($new, $pos1, $len, '');
    }
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    if ($pos1 < $pos2) {
      splice (@{$self->[1]}, $pos2-$len, 0, 
	      splice(@{$self->[1]}, $pos1, $len) );
    }
    else {
      splice(@{$self->[1]}, $pos2, 0,
	     splice(@{$self->[1]}, $pos1, $len) );
    }
    $rt++;
  }
  return $rt;
}

# These are intended to be overriden, simple versions are
# provided for the sake of testing.

# Generates things to make up genes
# can be called with a token type to produce, or with none.
# if called with a token type, it will also be passed the original
# token as the second argument.
# should return a two element list of the token type followed by the token itself.

sub generate_token {
  my $self = shift;
  my $token_type = $_[0];
  my $letter = ('a'..'z')[rand 25];
  unless ($token_type) {
    return ($letter) x2;
  }
  return ($token_type) x2;
}

# takes sting of token types to be checked for validity.
# If a mutation affects only one place, then the position of the
# mutation can be passed as a second argument.
sub valid_gene {1}

## You might also want to have methods like the following,
# they will not be called by the 'sequence' methods.

# Default constructor
sub new {
  my $gene = ['',[]];
  return bless $gene, ref $_[0] || $_[0];
}

# remember that clone method may require deep copying depending on
# your specific needs

sub clone {
  my $self = shift;
  my $new = [$self->[0]];
  $new->[1] = [@{$self->[1]}];
  return bless $new, ref $self;
}

# You need some way to use the gene you've made and mutated, but
# this will let you have a look, if it starts being odd.

sub render_gene {
  my $self = shift;
  my $return =  "$self\n";
  $return .= $self->[0] . "\n";
  $return .= (join ',', @{$self->[1]}). "\n";
  return $return;
}

# used for testing

sub _test_dump {
  my $self = shift;
  my @rt = ($self->[0], join('',@{$self->[1]}));
  return @rt;
}
1;

__END__;

=pod

=head1 NAME

 AI::Gene::Sequence

=head1 SYNOPSIS

A base class for storing and mutating genetic sequences.

 package Somegene;
 use AI::Gene::Sequence;
 our @ISA = qw(AI::Gene::Sequence);

 my %things = ( a => [qw(a1 a2 a3 a4 a5)],
	       b => [qw(b1 b2 b3 b4 b5)],);

 sub generate_token {
  my $self = shift;
  my ($type, $prev) = @_;
  if ($type) {
    $prev = ${ $things{$type} }[rand @{ $things{$type} }];
  } 
  else {
    $type = ('a','b')[rand 2];
    $prev = ${$things{$type}}[rand @{$things{$type}}];
  }
  return ($type, $prev); 
 }

 sub valid_gene {
   my $self = shift;
   return 0 if $_[0] =~ /(.)\1/;
   return 1;
 }

 sub seed {
   my $self = shift;
   $self->[0] = 'ababab';
   @{$self->[1]} = qw(A1 B1 A2 B2 A3 B3);
 }

 sub render {
   my $self = shift;
   return join(' ', @{$self->[1]});
 } 

 # elsewhere
 package main;

 my $gene = Somegene->new;
 $gene->seed;
 print $gene->render, "\n";
 $gene->mutate(5);
 print $gene->render, "\n";
 $gene->mutate(5);
 print $gene->render, "\n";

=head1 DESCRIPTION

This is a class which provides generic methods for the
creation and mutation of genetic sequences.  Various mutations
are provided as is a way to ensure that genes created by
mutations remain useful (for instance, if a gene gives rise to
code, it can be tested for correct syntax).

If you do not need to keep check on what sort of thing is
currently occupying a slot in the gene, you would be better
off using the AI::Gene::Simple class instead as this
will be somewhat faster.  The interface to the mutations is
the same though, so if you need to change in future, then
it will not be too painful.

This module should not be confused with the I<bioperl> modules
which are used to analyse DNA sequences.

It is intended that the methods in this code are inherited
by other modules.

=head2 Anatomy of a gene

A gene is a sequence of tokens, each a member of some group
of simillar tokens (they can of course all be members of a
single group).  This module encodes genes as a string
representing token types, and an array containing the
tokens themselves, this allows for arbitary data to be
stored as a token in a gene.

For instance, a regular expression could be encoded as:

 $self = ['ccartm',['a', 'b', '|', '[A-Z]', '\W', '*?'] ]

Using a string to indicate the sort of thing held at the
corresponding part of the gene allows for a simple test
of the validity of a proposed gene by using a regular
expression.

=head2 Using the module

To use the genetic sequences, you must write your own
implementations of the following methods:

=over 4

=item generate_token

=item valid_gene

=back

You may also want to override the following methods:

=over 4

=item new

=item clone

=item render_gene

=back

=head2 Mutation methods

Mutation methods are all named C<mutate_*>.  In general, the
first argument will be the number of mutations required, followed
by the positions in the genes which should be affected, followed
by the lengths of sequences within the gene which should be affected.
If positions are not defined, then random ones are chosen.  If
lengths are not defined, a length of 1 is assumed (ie. working on
single tokens only), if a length of 0 is requested, then a random
length is chosen.

Also, if a mutation is suggested but would result in an invalid
sequence, then the mutation will not be carried out.
If a mutation is attempted which could corrupt your gene (copying
from a region beyond the end of the gene for instance) then it
will be silently skipped.  Mutation methods all return the number
of mutations carried out (not the number of tokens affected).

These methods all expect to be passed positive integers, undef or zero,
other values could (and likely will) do something unpredictable.

=over 4

=item C<mutate([num, ref to hash of probs & methods])>

This will call at random one of the other mutation methods.
It will repeat itself I<num> times.  If passed a reference
to a hash as its second argument, it will use that to
decide which mutation to attempt.

This hash should contain keys which fit $1 in C<mutate_(.*)>
and values indicating the weight to be given to that method.
The module will normalise this nicely, so you do not have to.
This lets you define your own mutation methods in addition to
overriding any you do not like in the module.

=item C<mutate_insert([num, pos])>

Inserts a single token into the string at position I<pos>.
The token will be randomly generated by the calling object's 
C<generate_token> method.

=item C<mutate_overwrite([num, pos1, pos2, len])>

Copies a section of the gene (starting at I<pos1>, length I<len>)
and writes it back into the gene, overwriting current elements,
starting at I<pos2>.

=item C<mutate_reverse([num, pos, len])>

Takes a sequence within the gene and reverses the ordering of the
elements within that sequence.  Starts at position I<pos> for
length I<len>.

=item C<mutate_shuffle([num, pos1, pos2, len])>

This takes a sequence (starting at I<pos1> length I<len>)
 from within a gene and moves
it to another position (starting at I<pos2>).  Odd things might occur if the
position to move the sequence into lies within the
section to be moved, but the module will try its hardest
to cause a mutation.

=item C<mutate_duplicate([num, pos1, pos2, length])>

This copies a portion of the gene starting at I<pos1> of length
I<length> and then splices it into the gene before I<pos2>.

=item C<mutate_remove([num, pos, length]))>

Deletes I<length> tokens from the gene, starting at I<pos>. Repeats
I<num> times.

=item C<mutate_minor([num, pos])>

This will mutate a single token at position I<pos> in the gene 
into one of the same type (as decided by the object's C<generate_token>
method).

=item C<mutate_major([num, pos])>

This changes a single token into a token of any token type.
Token at postition I<pos>.  The token is produced by the object's
C<generate_token> method.

=item C<mutate_switch([num, pos1, pos2, len1, len2])>

This takes two sequences within the gene and swaps them
into each other's position.  The first starts at I<pos1>
with length I<len1> and the second at I<pos2> with length
I<len2>.  If the two sequences overlap, then no mutation will
be attempted.

=back

The following methods are also provided, but you will probably
want to overide them for your own genetic sequences.

=over 4

=item C<generate_token([token type, current token])>

This is used by the mutation methods when changing tokens or 
creating new ones.  It is expected to return a list consisting
of a single character to indicate the token type being produced
and the token itself.  Where it makes sense to do so the token
which is about to be modifed is passed along with the token type.
If the calling methods require a token of any type, then no
arguments will be passed to this method.

The provided version of this method returns a random character
from 'a'..'z' as both the token type and token.

=item C<valid_gene(string [, posn])>

This is used to determine if a proposed mutation is allowed.  This
method is passed a string of the whole gene's token types, it will
also be passed a position in the gene where this makes sense (for
instance, if only one token is to change).  It is expected to
return a true value if a change is acceptable and a false one
if it is not.

The provided version of this method always returns true.

=item C<clone()>

This returns a copy of the gene as a new object.  If you are using
nested genes, or other references as your tokens, then you may need
to produce your own version which will deep copy your structure.

=item C<new>

This returns an empty gene, into which you can put things.  If you
want to initialise your gene, or anything useful like that, then
you will need another one of these.

=item C<render_gene>

This is useful for debugging, returns a serialised summary of the
gene.

=back

=head1 AUTHOR

This module was written by Alex Gough (F<alex@rcon.org>).

=head1 SEE ALSO

For an illustration of the use of this module, see Regexgene.pm,
Musicgene.pm, spamscan.pl and music.pl from the gziped distribution.

=head1 COPYRIGHT

Copyright (c) 2000 Alex Gough <F<alex@rcon.org>>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 BUGS

This is very slow if you do not need to check that your mutations
create valid genes, but fast if you do, thems the breaks.  There
is a AI::Gene::Simple class instead if this bothers you.

Some methods will do odd things if you pass them weird values,
so try not to do that.  So long as you stick to passing
positive integers or C<undef> to the methods then they should
recover gracefully.

While it is easy and fun to write genetic and evolutionary
algorithms in perl, for most purposes, it will be much slower
than if they were implemented in another more suitable language.
There are some problems which do lend themselves to an approach
in perl and these are the ones where the time between mutations
will be large, for instance, when composing music where the
selection process is driven by human whims.

=cut

view all matches for this distribution

AI-General

6 results

view release on metacpan or search on metacpan

General.pm view on Meta::CPAN

package AI::General;

use 5.008;
use strict;
use warnings;


our $VERSION = '0.01';


sub new {
	my ( $class ) = @_;
	my $self = bless [], $class;
	$self->dwim( "Implement self" );
	return $self;
}


sub dwim {
	my ( $self, $args ) = @_;

	#... TO DO
}
	

1;


__END__

=head1 NAME

AI::General - A general-purpose artificial intelligence

=head1 SYNOPSIS

  use AI::General;
  
  AI::General->dwim( "Prove NP != P" ) or
  	die $AI::General::excuse;
  	
  
  
=head1 DESCRIPTION

This module is a general purpose artificial intelligence.  It consists
of one method, dwim ('Do what I mean'), which can take any number of 
arguments.  

=head1 TO DO

Implement dwim()

=head1 CREDITS

Cheers to Santiago Dala for suggesting the constructor implementation

=head1 AUTHOR

Maciej Ceglowski, E<lt>maciej@ceglowski.comE<gt>

=head1 SEE ALSO

L<perl>.

=cut

view all matches for this distribution

AI-Genetic-Pro

45 results

view release on metacpan or search on metacpan

lib/AI/Genetic/Pro.pm view on Meta::CPAN

package AI::Genetic::Pro;
$AI::Genetic::Pro::VERSION = '1.009';
#---------------

use warnings;
use strict;
use base 							qw( Class::Accessor::Fast::XS );
#-----------------------------------------------------------------------
use Carp;
use Clone 							qw( clone );
use Struct::Compare;
use Digest::MD5 					qw( md5_hex );
use List::Util 						qw( sum );
use List::MoreUtils 				qw( minmax first_index apply );
#use Data::Dumper; 					$Data::Dumper::Sortkeys = 1;
use Tie::Array::Packed;
use UNIVERSAL::require;
#-----------------------------------------------------------------------
use AI::Genetic::Pro::Array::Type 	qw( get_package_by_element_size );
use AI::Genetic::Pro::Chromosome;
#-----------------------------------------------------------------------
__PACKAGE__->mk_accessors(qw(
	mce
	type
	population
	terminate
	chromosomes 
	crossover 
	native
	parents 		_parents 
	history 		_history
	fitness 		_fitness 		_fitness_real
	cache
	mutation 		_mutator
	strategy 		_strategist
	selection 		_selector 
	_translations
	generation
	preserve		
	variable_length
	_fix_range
	_package
	_length
	strict			_strict
	workers
	size
	_init
));
#=======================================================================
# Additional modules
use constant STORABLE	=> 'Storable';
use constant GD 		=> 'GD::Graph::linespoints'; 
#=======================================================================
my $_Cache = { };
my $_temp_chromosome;
#=======================================================================
sub new {
	my ( $class, %args ) = ( shift, @_ );
	
	#-------------------------------------------------------------------
	my %opts = map { if(ref $_){$_}else{ /^-?(.*)$/o; $1 }} @_;
	my $self = bless \%opts, $class;
	
	#-------------------------------------------------------------------
	$AI::Genetic::Pro::Array::Type::Native = 1 if $self->native;
	
	#-------------------------------------------------------------------
	croak(q/Type of chromosomes cannot be "combination" if "variable length" feature is active!/)
		if $self->type eq q/combination/ and $self->variable_length;
	croak(q/You must specify a crossover strategy with -strategy!/)
		unless defined ($self->strategy);
	croak(q/Type of chromosomes cannot be "combination" if strategy is not one of: OX, PMX!/)
		if $self->type eq q/combination/ and ($self->strategy->[0] ne q/OX/ and $self->strategy->[0] ne q/PMX/);
	croak(q/Strategy cannot be "/,$self->strategy->[0],q/" if "variable length" feature is active!/ )
		if ($self->strategy->[0] eq 'PMX' or $self->strategy->[0] eq 'OX') and $self->variable_length;
	
	#-------------------------------------------------------------------
	$self->_set_strict if $self->strict;

	#-------------------------------------------------------------------
	return $self unless $self->mce;

	#-------------------------------------------------------------------
	delete $self->{ mce };
	'AI::Genetic::Pro::MCE'->use or die q[Cannot raise multicore support: ] . $@;
	
	return AI::Genetic::Pro::MCE->new( $self, \%args );
}
#=======================================================================
sub _Cache { $_Cache; }
#=======================================================================
# INIT #################################################################
#=======================================================================
sub _set_strict {
	my ($self) = @_;
	
	# fitness
	my $fitness = $self->fitness();
	my $replacement = sub {
		my @tmp = @{$_[1]};
		my $ret = $fitness->(@_);
		my @cmp = @{$_[1]};
		die qq/Chromosome was modified in a fitness function from "@tmp" to "@{$_[1]}"!\n/ unless compare(\@tmp, \@cmp);
		return $ret;
	};
	$self->fitness($replacement);
}
#=======================================================================
sub _fitness_cached {
	my ($self, $chromosome) = @_;
	
	#my $key = md5_hex(${tied(@$chromosome)});
	my $key = md5_hex( $self->_package ? md5_hex( ${ tied( @$chromosome ) } ) : join( q[:], @$chromosome ) );
	return $_Cache->{$key} if exists $_Cache->{$key};
	
	$_Cache->{$key} = $self->_fitness_real->($self, $chromosome);
	return $_Cache->{$key};
}
#=======================================================================
sub _init_cache {
	my ($self) = @_;
		
	$self->_fitness_real($self->fitness);
	$self->fitness(\&_fitness_cached);
	return;
}
#=======================================================================
sub _check_data_ref {
	my ($self, $data_org) = @_;
	my $data = clone($data_org);
	my $ars;
	for(0..$#$data){
		next if $ars->{$data->[$_]};
		$ars->{$data->[$_]} = 1;
		unshift @{$data->[$_]}, undef;
	}
	return $data;
}
#=======================================================================
# we have to find C to (in some cases) incrase value of range
# due to design model
sub _find_fix_range {
	my ($self, $data) = @_;

	for my $idx (0..$#$data){
		if($data->[$idx]->[1] < 1){ 
			my $const = 1 - $data->[$idx]->[1];
			push @{$self->_fix_range}, $const; 
			$data->[$idx]->[1] += $const;
			$data->[$idx]->[2] += $const;
		}else{ push @{$self->_fix_range}, 0; }
	}

	return $data;
}
#=======================================================================
sub init { 
	my ( $self, $data ) = @_;
	
	croak q/You have to pass some data to "init"!/ unless $data;
	#-------------------------------------------------------------------
	$self->generation(0);
	$self->_init( $data );
	$self->_fitness( { } );
	$self->_fix_range( [ ] );
	$self->_history( [  [ ], [ ], [ ] ] );
	$self->_init_cache if $self->cache;
	#-------------------------------------------------------------------
	
	if($self->type eq q/listvector/){
		croak(q/You have to pass array reference if "type" is set to "listvector"/) unless ref $data eq 'ARRAY';
		$self->_translations( $self->_check_data_ref($data) );
	}elsif($self->type eq q/bitvector/){
		croak(q/You have to pass integer if "type" is set to "bitvector"/) if $data !~ /^\d+$/o;
		$self->_translations( [ [ 0, 1 ] ] );
		$self->_translations->[$_] = $self->_translations->[0] for 1..$data-1;
	}elsif($self->type eq q/combination/){
		croak(q/You have to pass array reference if "type" is set to "combination"/) unless ref $data eq 'ARRAY';
		$self->_translations( [ clone($data) ] );
		$self->_translations->[$_] = $self->_translations->[0] for 1..$#$data;
	}elsif($self->type eq q/rangevector/){
		croak(q/You have to pass array reference if "type" is set to "rangevector"/) unless ref $data eq 'ARRAY';
		$self->_translations( $self->_find_fix_range( $self->_check_data_ref($data) ));
	}else{
		croak(q/You have to specify first "type" of vector!/);
	}
	
	my $size = 0;

	if($self->type ne q/rangevector/){ for(@{$self->_translations}){ $size = $#$_ if $#$_ > $size; } }
#	else{ for(@{$self->_translations}){ $size = $_->[1] if $_->[1] > $size; } }
	else{ for(@{$self->_translations}){ $size = $_->[2] if $_->[2] > $size; } }		# Provisional patch for rangevector values truncated to signed  8-bit quantities. Thx to Tod Hagan

	my $package = get_package_by_element_size($size);
	$self->_package($package);

	my $length = ref $data ? sub { $#$data; } : sub { $data - 1 };
	if($self->variable_length){
		$length = ref $data ? sub { 1 + int( rand( $#{ $self->_init } ) ); } : sub { 1 + int( rand( $self->_init - 1) ); };
	}

	$self->_length( $length );

	$self->chromosomes( [ ] );
	push @{$self->chromosomes}, 
		AI::Genetic::Pro::Chromosome->new($self->_translations, $self->type, $package, $length->())
			for 1..$self->population;
	
	$self->_calculate_fitness_all();
}
#=======================================================================
# SAVE / LOAD ##########################################################
#=======================================================================
sub spew {
	#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	STORABLE->use( qw( store retrieve freeze thaw ) ) or croak(q/You need "/.STORABLE.q/" module to save a state of "/.__PACKAGE__.q/"!/);
	$Storable::Deparse = 1;
	$Storable::Eval = 1;
	#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	my ( $self ) = @_;
 	
	my $clone = { 
		_selector	=> undef,
		_strategist	=> undef,
		_mutator	=> undef,
	};
	
	$clone->{ chromosomes } = [ map { ${ tied( @$_ ) } } @{ $self->chromosomes } ] 
		if $self->_package;
	
	foreach my $key(keys %$self){
		next if exists $clone->{$key};
		$clone->{$key} = $self->{$key};
	}
	
	return $clone;
}
#=======================================================================
sub slurp {
	my ( $self, $dump ) = @_;

	if( my $typ = $self->_package ){ 
		@{ $dump->{ chromosomes } } = map {
			my $arr = $typ->make_with_packed( $_ );
			bless $arr, q[AI::Genetic::Pro::Chromosome];
		} @{ $dump->{ chromosomes } };
	}
    
    %$self = %$dump;
    
	return 1;
}
#=======================================================================
sub save { 
	my ( $self, $file ) = @_;
	
	croak(q/You have to specify file!/) unless defined $file;
	
	store( $self->spew, $file );
}
#=======================================================================
sub load { 
	#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	STORABLE->use( qw( store retrieve freeze thaw ) ) or croak(q/You need "/.STORABLE.q/" module to load a state of "/.__PACKAGE__.q/"!/);	
	$Storable::Deparse = 1;
	$Storable::Eval = 1;
	#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	my ($self, $file) = @_;
	croak(q/You have to specify file!/) unless defined $file;

	my $clone = retrieve($file);
	return carp('Incorrect file!') unless $clone;
	
	return $self->slurp( $clone );
}
#=======================================================================
# CHARTS ###############################################################
#=======================================================================
sub chart { 
	GD->require or croak(q/You need "/.GD.q/" module to draw chart of evolution!/);	
	my ($self, %params) = (shift, @_);

	my $graph = GD()->new(($params{-width} || 640), ($params{-height} || 480));

	my $data = $self->getHistory;

	if(defined $params{-font}){
    	$graph->set_title_font  ($params{-font}, 12);
    	$graph->set_x_label_font($params{-font}, 10);
    	$graph->set_y_label_font($params{-font}, 10);
    	$graph->set_legend_font ($params{-font},  8);
	}
	
    $graph->set_legend(
    	$params{legend1} || q/Max value/,
    	$params{legend2} || q/Mean value/,
    	$params{legend3} || q/Min value/,
    );

    $graph->set(
        x_label_skip        => int(($data->[0]->[-1]*4)/100),
        x_labels_vertical   => 1,
        x_label_position    => .5,
        y_label_position    => .5,
        y_long_ticks        => 1,   # poziome linie
        x_ticks             => 1,   # poziome linie

        l_margin            => 10,
        b_margin            => 10,
        r_margin            => 10,
        t_margin            => 10,

        show_values         => (defined $params{-show_values} ? 1 : 0),
        values_vertical     => 1,
        values_format       => ($params{-format} || '%.2f'),

        zero_axis           => 1,
        #interlaced          => 1,
        logo_position       => 'BR',
        legend_placement    => 'RT',

        bgclr               => 'white',
        boxclr              => '#FFFFAA',
        transparent         => 0,

        title       		=> ($params{'-title'}   || q/Evolution/ ),
        x_label     		=> ($params{'-x_label'} || q/Generation/),
        y_label     		=> ($params{'-y_label'} || q/Value/     ),
        
        ( $params{-logo} && -f $params{-logo} ? ( logo => $params{-logo} ) : ( ) )
    );
	
	
    my $gd = $graph->plot( [ [ 0..$#{$data->[0]} ], @$data ] ) or croak($@);
    open(my $fh, '>', $params{-filename}) or croak($@);
    binmode $fh;
    print $fh $gd->png;
    close $fh;
    
    return 1;
}
#=======================================================================
# TRANSLATIONS #########################################################
#=======================================================================
sub as_array_def_only {
	my ($self, $chromosome) = @_;
	
	return $self->as_array($chromosome) 
		if not $self->variable_length or $self->variable_length < 2;
	
	if( $self->type eq q/bitvector/ ){
		return $self->as_array($chromosome);
	}else{
		my $ar = $self->as_array($chromosome);
		my $idx = first_index { $_ } @$ar;
		my @array = @$ar[$idx..$#$chromosome];
		return @array if wantarray;
		return \@array;
	}
}
#=======================================================================
sub as_array {
	my ($self, $chromosome) = @_;
	
	if($self->type eq q/bitvector/){
		# This could lead to internal error, bacause of underlaying Tie::Array::Packed
		#return @$chromosome if wantarray;
		#return $chromosome;
		
		my @chr = @$chromosome;
		return @chr if wantarray;
		return \@chr;
		
	}elsif($self->type eq q/rangevector/){
		my $fix_range = $self->_fix_range;
		my $c = -1;
		#my @array = map { $c++; warn "WARN: $c | ",scalar @$chromosome,"\n" if not defined $fix_range->[$c]; $_ ? $_ - $fix_range->[$c] : undef } @$chromosome;
		my @array = map { $c++; $_ ? $_ - $fix_range->[$c] : undef } @$chromosome;
		
		return @array if wantarray;
		return \@array;
	}else{
		my $cnt = 0;
		my @array = map { $self->_translations->[$cnt++]->[$_] } @$chromosome;
		return @array if wantarray;
		return \@array;
	}
}
#=======================================================================
sub as_string_def_only {	
	my ($self, $chromosome) = @_;
	
	return $self->as_string($chromosome) 
		if not $self->variable_length or $self->variable_length < 2;

	my $array = $self->as_array_def_only($chromosome);
	
	return join(q//, @$array) if $self->type eq q/bitvector/;
	return join(q/___/, @$array);
}
#=======================================================================
sub as_string {	
	return join(q//, @{$_[1]}) if $_[0]->type eq q/bitvector/;
	return 	join(q/___/, map { defined $_ ? $_ : q/ / } $_[0]->as_array($_[1]));
}
#=======================================================================
sub as_value { 
	my ($self, $chromosome) = @_;
	croak(q/You MUST call 'as_value' as method of 'AI::Genetic::Pro' object./)
		unless defined $_[0] and ref $_[0] and ( ref $_[0] eq 'AI::Genetic::Pro' or ref $_[0] eq 'AI::Genetic::Pro::MCE');
	croak(q/You MUST pass 'AI::Genetic::Pro::Chromosome' object to 'as_value' method./) 
		unless defined $_[1] and ref $_[1] and ref $_[1] eq 'AI::Genetic::Pro::Chromosome';
	return $self->fitness->($self, $chromosome);  
}
#=======================================================================
# ALGORITHM ############################################################
#=======================================================================
sub _calculate_fitness_all {
	my ($self) = @_;
	
	$self->_fitness( { } );
	$self->_fitness->{$_} = $self->fitness()->($self, $self->chromosomes->[$_]) 
		for 0..$#{$self->chromosomes};

# sorting the population is not necessary	
#	my (@chromosomes, %fitness);
#	for my $idx (sort { $self->_fitness->{$a} <=> $self->_fitness->{$b} } keys %{$self->_fitness}){
#		push @chromosomes, $self->chromosomes->[$idx];
#		$fitness{$#chromosomes} = $self->_fitness->{$idx};
#		delete $self->_fitness->{$idx};
#		delete $self->chromosomes->[$idx];
#	}
#	
#	$self->_fitness(\%fitness);
#	$self->chromosomes(\@chromosomes);

	return;
}
#=======================================================================
sub _select_parents {
	my ($self) = @_;
	unless($self->_selector){
		croak "You must specify a selection strategy!"
			unless defined $self->selection;
		my @tmp = @{$self->selection};
		my $selector = q/AI::Genetic::Pro::Selection::/ . shift @tmp;
		$selector->require or die $!;
		$self->_selector($selector->new(@tmp));
	}
	
	$self->_parents($self->_selector->run($self));
	
	return;
}
#=======================================================================
sub _crossover {
	my ($self) = @_;
	
	unless($self->_strategist){
		my @tmp = @{$self->strategy};
		my $strategist = q/AI::Genetic::Pro::Crossover::/ . shift @tmp;
		$strategist->require or die $!;
		$self->_strategist($strategist->new(@tmp));
	}

	my $a = $self->_strategist->run($self);
	$self->chromosomes( $a );
	
	return;
}
#=======================================================================
sub _mutation {
	my ($self) = @_;
	
	unless($self->_mutator){
		my $mutator = q/AI::Genetic::Pro::Mutation::/ . ucfirst(lc($self->type));
		unless($mutator->require){
			$mutator = q/AI::Genetic::Pro::Mutation::Listvector/;
			$mutator->require;
		}
		$self->_mutator($mutator->new);
	}
	
	return $self->_mutator->run($self);
}
#=======================================================================
sub _save_history {
	my @tmp;
	if($_[0]->history){ @tmp = $_[0]->getAvgFitness; }
	else { @tmp = (undef, undef, undef); }
	
	push @{$_[0]->_history->[0]}, $tmp[0]; 
	push @{$_[0]->_history->[1]}, $tmp[1];
	push @{$_[0]->_history->[2]}, $tmp[2];
	return 1;
}
#=======================================================================
sub inject {
	my ($self, $candidates) = @_;
	
	for(@$candidates){
		push @{$self->chromosomes}, 
			AI::Genetic::Pro::Chromosome->new_from_data($self->_translations, $self->type, $self->_package, $_, $self->_fix_range);
		$self->_fitness->{$#{$self->chromosomes}} = $self->fitness()->($self, $self->chromosomes->[-1]);

	}			
	$self->_strict( [ ] );
	$self->population( $self->population + scalar( @$candidates ) );

	return 1;
}
#=======================================================================
sub _state {
	my ( $self ) = @_;
	
	my @res;
	
	if( $self->_package ){
		@res = map { 
			[
				${ tied( @{ $self->chromosomes->[ $_ ] } ) },
				$self->_fitness->{ $_ },
			]
		} 0 .. $self->population - 1
	}else{
		@res = map { 
			[
				$self->chromosomes->[ $_ ],
				$self->_fitness->{ $_ },
			]
		} 0 .. $self->population - 1
	}
	
	return \@res;
}
#=======================================================================
sub evolve {
	my ($self, $generations) = @_;

	# generations must be defined
	$generations ||= -1; 	 
	
	if($self->strict and $self->_strict){
		for my $idx (0..$#{$self->chromosomes}){
			croak(q/Chromosomes was modified outside the 'evolve' function!/) unless $self->chromosomes->[$idx] and $self->_strict->[$idx];
			my @tmp0 = @{$self->chromosomes->[$idx]};
			my @tmp1 = @{$self->_strict->[$idx]};
			croak(qq/Chromosome was modified outside the 'evolve' function from "@tmp0" to "@tmp1"!/) unless compare(\@tmp0, \@tmp1);
		}
	}
	
	# split into two loops just for speed
	unless($self->preserve){
		for(my $i = 0; $i != $generations; $i++){
			# terminate ----------------------------------------------------
			last if $self->terminate and $self->terminate->($self);
			# update generation --------------------------------------------
			$self->generation($self->generation + 1);
			# update history -----------------------------------------------
			$self->_save_history;
			# selection ----------------------------------------------------
			$self->_select_parents();
			# crossover ----------------------------------------------------
			$self->_crossover();
			# mutation -----------------------------------------------------
			$self->_mutation();
		}
	}else{
		croak('You cannot preserve more chromosomes than is in population!') if $self->preserve > $self->population;
		my @preserved;
		for(my $i = 0; $i != $generations; $i++){
			# terminate ----------------------------------------------------
			last if $self->terminate and $self->terminate->($self);
			# update generation --------------------------------------------
			$self->generation($self->generation + 1);
			# update history -----------------------------------------------
			$self->_save_history;
			#---------------------------------------------------------------
			# preservation of N unique chromosomes
			@preserved = map { clone($_) } @{ $self->getFittest_as_arrayref($self->preserve - 1, 1) };
			# selection ----------------------------------------------------
			$self->_select_parents();
			# crossover ----------------------------------------------------
			$self->_crossover();
			# mutation -----------------------------------------------------
			$self->_mutation();
			#---------------------------------------------------------------
			for(@preserved){
				my $idx = int rand @{$self->chromosomes};
				$self->chromosomes->[$idx] = $_;
				$self->_fitness->{$idx} = $self->fitness()->($self, $_);
			}
		}
	}
	
	if($self->strict){
		$self->_strict( [ ] );
		push @{$self->_strict}, $_->clone for @{$self->chromosomes};
	}
}
#=======================================================================
# ALIASES ##############################################################
#=======================================================================
sub people { $_[0]->chromosomes() }
#=======================================================================
sub getHistory { $_[0]->_history()  }
#=======================================================================
sub mutProb { shift->mutation(@_) }
#=======================================================================
sub crossProb { shift->crossover(@_) }
#=======================================================================
sub intType { shift->type() }
#=======================================================================
# STATS ################################################################
#=======================================================================
sub getFittest_as_arrayref { 
	my ($self, $n, $uniq) = @_;
	$n ||= 1;
	
	$self->_calculate_fitness_all() unless scalar %{ $self->_fitness };
	my @keys = sort { $self->_fitness->{$a} <=> $self->_fitness->{$b} } 0..$#{$self->chromosomes};
	
	if($uniq){
		my %grep;
		my $chromosomes = $self->chromosomes;
		if( my $pkg = $self->_package ){
			my %tmp;
			@keys = grep { 
				my $key = ${ tied( @{ $chromosomes->[ $_ ] } ) };
				#my $key = md5_hex( ${ tied( @{ $chromosomes->[ $_ ] } ) } ); # ?
				$tmp{ $key } && 0 or $tmp{ $key } = 1;
			} @keys;
			#@keys = grep { 
			#		my $add_to_list = 0;
			#		my $key = md5_hex(${tied(@{$chromosomes->[$_]})});
			#		unless($grep{$key}) { 
			#			$grep{$key} = 1; 
			#			$add_to_list = 1;
			#		}
			#		$add_to_list;
			#	} @keys;
		}else{
			my %tmp;
			@keys = grep { 
				my $key = md5_hex( join( q[:], @{ $chromosomes->[ $_ ] } ) );
				$tmp{ $key } && 0 or $tmp{ $key } = 1;
			} @keys;
		}
	}
	
	$n = scalar @keys if $n > scalar @keys;
	return [ reverse @{$self->chromosomes}[ splice @keys, $#keys - $n + 1, $n ] ];
}
#=======================================================================
sub getFittest { return wantarray ? @{ shift->getFittest_as_arrayref(@_) } : shift @{ shift->getFittest_as_arrayref(@_) }; }
#=======================================================================
sub getAvgFitness {
	my ($self) = @_;
	
	my @minmax = minmax values %{$self->_fitness};
	my $mean = sum(values %{$self->_fitness}) / scalar values %{$self->_fitness};
	return $minmax[1], int($mean), $minmax[0];
}
#=======================================================================
1;


__END__

=head1 NAME

AI::Genetic::Pro - Efficient genetic algorithms for professional purpose with support for multiprocessing.

=head1 SYNOPSIS

    use AI::Genetic::Pro;
    
    sub fitness {
        my ($ga, $chromosome) = @_;
        return oct('0b' . $ga->as_string($chromosome)); 
    }
    
    sub terminate {
        my ($ga) = @_;
        my $result = oct('0b' . $ga->as_string($ga->getFittest));
        return $result == 4294967295 ? 1 : 0;
    }
    
    my $ga = AI::Genetic::Pro->new(        
        -fitness         => \&fitness,        # fitness function
        -terminate       => \&terminate,      # terminate function
        -type            => 'bitvector',      # type of chromosomes
        -population      => 1000,             # population
        -crossover       => 0.9,              # probab. of crossover
        -mutation        => 0.01,             # probab. of mutation
        -parents         => 2,                # number  of parents
        -selection       => [ 'Roulette' ],   # selection strategy
        -strategy        => [ 'Points', 2 ],  # crossover strategy
        -cache           => 0,                # cache results
        -history         => 1,                # remember best results
        -preserve        => 3,                # remember the bests
        -variable_length => 1,                # turn variable length ON
        -mce             => 1,                # optional MCE support
        -workers         => 3,                # number of workers (MCE)
    );
	
    # init population of 32-bit vectors
    $ga->init(32);
	
    # evolve 10 generations
    $ga->evolve(10);
    
    # best score
    print "SCORE: ", $ga->as_value($ga->getFittest), ".\n";
    
    # save evolution path as a chart
    $ga->chart(-filename => 'evolution.png');
     
    # save state of GA
    $ga->save('genetic.sga');
    
    # load state of GA
    $ga->load('genetic.sga');

=head1 DESCRIPTION

This module provides efficient implementation of a genetic algorithm for
professional purpose with support for multiprocessing. It was designed to operate as fast as possible
even on very large populations and big individuals/chromosomes. C<AI::Genetic::Pro> 
was inspired by C<AI::Genetic>, so it is in most cases compatible 
(there are some changes). Additionally C<AI::Genetic::Pro> isn't a pure Perl solution, so it 
doesn't have limitations of its ancestor (such as slow-down in the
case of big populations ( >10000 ) or vectors with more than 33 fields).

If You are looking for a pure Perl solution, consider L<AI::Genetic>.

=over 4

=item Speed

To increase speed XS code is used, however with portability in 
mind. This distribution was tested on Windows and Linux platforms 
(and should work on any other).

Multicore support is available through Many-Core Engine (C<MCE>). 
You can gain the most speed up for big populations or time/CPU consuming 
fitness functions, however for small populations and/or simple fitness 
function better choice will be single-process version.

You can get even more speed up if you turn on use of native arrays 
(parameter: C<native>) instead of packing chromosomes into single scalar. 
However you have to remember about expensive memory use in that case.

=item Memory

This module was designed to use as little memory as possible. A population
of size 10000 consisting of 92-bit vectors uses only ~24MB (C<AI::Genetic> 
would use about 78MB). However - if you use MCE - there will be bigger 
memory consumption. This is consequence of necessity of synchronization 
between many processes.

=item Advanced options

To provide more flexibility C<AI::Genetic::Pro> supports many 
statistical distributions, such as C<uniform>, C<natural>, C<chi_square>
and others. This feature can be used in selection and/or crossover. See
the documentation below.

=back

=head1 METHODS

=over 4

=item I<$ga>-E<gt>B<new>( %options )

Constructor. It accepts options in hash-value style. See options and 
an example below.

=over 8

=item -fitness

This defines a I<fitness> function. It expects a reference to a subroutine.

=item -terminate 

This defines a I<terminate> function. It expects a reference to a subroutine.

=item -type

This defines the type of chromosomes. Currently, C<AI::Genetic::Pro> supports four types:

=over 12

=item bitvector

Individuals/chromosomes of this type have genes that are bits. Each gene can be in one of two possible states, on or off.

=item listvector

Each gene of a "listvector" individual/chromosome can assume one string value from a specified list of possible string values.

=item rangevector

Each gene of a "rangevector" individual/chromosome can assume one integer 
value from a range of possible integer values. Note that only integers 
are supported. The user can always transform any desired fractional values 
by multiplying and dividing by an appropriate power of 10.

=item combination

Each gene of a "combination" individual/chromosome can assume one string value from a specified list of possible string values. B<All genes are unique.>

=back

=item -population

This defines the size of the population, i.e. how many chromosomes
simultaneously exist at each generation.

=item -crossover 

This defines the crossover rate. The fairest results are achieved with
crossover rate ~0.95.

=item -mutation 

This defines the mutation rate. The fairest results are achieved with mutation
rate ~0.01.

=item -preserve

This defines injection of the bests chromosomes into a next generation. It causes a little slow down, however (very often) much better results are achieved. You can specify, how many chromosomes will be preserved, i.e.

    -preserve => 1, # only one chromosome will be preserved
    # or
    -preserve => 9, # 9 chromosomes will be preserved
    # and so on...

Attention! You cannot preserve more chromosomes than exist in your population.

=item -variable_length

This defines whether variable-length chromosomes are turned on (default off)
and a which types of mutation are allowed. See below.

=over 8

=item level 0

Feature is inactive (default). Example:

	-variable_length => 0
	
    # chromosomes (i.e. bitvectors)
    0 1 0 0 1 1 0 1 1 1 0 1 0 1
    0 0 1 1 0 1 1 1 1 0 0 1 1 0
    0 1 1 1 0 1 0 0 1 1 0 1 1 1
    0 1 0 0 1 1 0 1 1 1 1 0 1 0
    # ...and so on

=item level 1 

Feature is active, but chromosomes can varies B<only on the right side>, Example:

	-variable_length => 1
	
    # chromosomes (i.e. bitvectors)
    0 1 0 0 1 1 0 1 1 1 
    0 0 1 1 0 1 1 1 1
    0 1 1 1 0 1 0 0 1 1 0 1 1 1
    0 1 0 0 1 1 0 1 1 1
    # ...and so on
	
=item level 2 

Feature is active and chromosomes can varies B<on the left side and on 
the right side>; unwanted values/genes on the left side are replaced with C<undef>, ie.
 
	-variable_length => 2
 
    # chromosomes (i.e. bitvectors)
    x x x 0 1 1 0 1 1 1 
    x x x x 0 1 1 1 1
    x 1 1 1 0 1 0 0 1 1 0 1 1 1
    0 1 0 0 1 1 0 1 1 1
    # where 'x' means 'undef'
    # ...and so on

In this situation returned chromosomes in an array context ($ga-E<gt>as_array($chromosome)) 
can have B<undef> values on the left side (only). In a scalar context each 
undefined value is replaced with a single space. If You don't want to see
any C<undef> or space, just use C<as_array_def_only> and C<as_string_def_only> 
instead of C<as_array> and C<as_string>.

=back

=item -parents  

This defines how many parents should be used in a crossover.

=item -selection

This defines how individuals/chromosomes are selected to crossover. It expects an array reference listed below:

    -selection => [ $type, @params ]

where type is one of:

=over 8

=item B<RouletteBasic>

Each individual/chromosome can be selected with probability proportional to its fitness.

=item B<Roulette>

First the best individuals/chromosomes are selected. From this collection
parents are selected with probability poportional to their fitness.

=item B<RouletteDistribution>

Each individual/chromosome has a portion of roulette wheel proportional to its
fitness. Selection is done with the specified distribution. Supported
distributions and parameters are listed below.

=over 12

=item C<-selection =E<gt> [ 'RouletteDistribution', 'uniform' ]>

Standard uniform distribution. No additional parameters are needed.

=item C<-selection =E<gt> [ 'RouletteDistribution', 'normal', $av, $sd ]>

Normal distribution, where C<$av> is average (default: size of population /2) and $C<$sd> is standard deviation (default: size of population).


=item C<-selection =E<gt> [ 'RouletteDistribution', 'beta', $aa, $bb ]>

I<Beta> distribution.  The density of the beta is:

    X^($aa - 1) * (1 - X)^($bb - 1) / B($aa , $bb) for 0 < X < 1.

C<$aa> and C<$bb> are set by default to number of parents.

B<Argument restrictions:> Both $aa and $bb must not be less than 1.0E-37.

=item C<-selection =E<gt> [ 'RouletteDistribution', 'binomial' ]>

Binomial distribution. No additional parameters are needed.

=item C<-selection =E<gt> [ 'RouletteDistribution', 'chi_square', $df ]>

Chi-square distribution with C<$df> degrees of freedom. C<$df> by default is set to size of population.

=item C<-selection =E<gt> [ 'RouletteDistribution', 'exponential', $av ]>

Exponential distribution, where C<$av> is average . C<$av> by default is set to size of population.

=item C<-selection =E<gt> [ 'RouletteDistribution', 'poisson', $mu ]>

Poisson distribution, where C<$mu> is mean. C<$mu> by default is set to size of population.

=back

=item B<Distribution>

Chromosomes/individuals are selected with specified distribution. See below.

=over 12

=item C<-selection =E<gt> [ 'Distribution', 'uniform' ]>

Standard uniform distribution. No additional parameters are needed.

=item C<-selection =E<gt> [ 'Distribution', 'normal', $av, $sd ]>

Normal distribution, where C<$av> is average (default: size of population /2) and $C<$sd> is standard deviation (default: size of population).

=item C<-selection =E<gt> [ 'Distribution', 'beta', $aa, $bb ]>

I<Beta> distribution.  The density of the beta is:

    X^($aa - 1) * (1 - X)^($bb - 1) / B($aa , $bb) for 0 < X < 1.

C<$aa> and C<$bb> are set by default to number of parents.

B<Argument restrictions:> Both $aa and $bb must not be less than 1.0E-37.

=item C<-selection =E<gt> [ 'Distribution', 'binomial' ]>

Binomial distribution. No additional parameters are needed.

=item C<-selection =E<gt> [ 'Distribution', 'chi_square', $df ]>

Chi-square distribution with C<$df> degrees of freedom. C<$df> by default is set to size of population.

=item C<-selection =E<gt> [ 'Distribution', 'exponential', $av ]>

Exponential distribution, where C<$av> is average . C<$av> by default is set to size of population.

=item C<-selection =E<gt> [ 'Distribution', 'poisson', $mu ]>

Poisson distribution, where C<$mu> is mean. C<$mu> by default is set to size of population.

=back

=back

=item -strategy 

This defines the astrategy of crossover operation. It expects an array
reference listed below:

    -strategy => [ $type, @params ]

where type is one of:

=over 4

=item PointsSimple

Simple crossover in one or many points. The best chromosomes/individuals are
selected for the new generation. For example:

    -strategy => [ 'PointsSimple', $n ]

where C<$n> is the number of points for crossing.

=item PointsBasic

Crossover in one or many points. In basic crossover selected parents are
crossed and one (randomly-chosen) child is moved to the new generation. For
example:

    -strategy => [ 'PointsBasic', $n ]

where C<$n> is the number of points for crossing.

=item Points

Crossover in one or many points. In normal crossover selected parents are crossed and the best child is moved to the new generation. For example:

    -strategy => [ 'Points', $n ]

where C<$n> is number of points for crossing.

=item PointsAdvenced

Crossover in one or many points. After crossover the best
chromosomes/individuals from all parents and chidren are selected for the  new
generation. For example:

    -strategy => [ 'PointsAdvanced', $n ]

where C<$n> is the number of points for crossing.

=item Distribution

In I<distribution> crossover parents are crossed in points selected with the
specified distribution. See below.

=over 8

=item C<-strategy =E<gt> [ 'Distribution', 'uniform' ]>

Standard uniform distribution. No additional parameters are needed.

=item C<-strategy =E<gt> [ 'Distribution', 'normal', $av, $sd ]>

Normal distribution, where C<$av> is average (default: number of parents/2) and C<$sd> is standard deviation (default: number of parents).

=item C<-strategy =E<gt> [ 'Distribution', 'beta', $aa, $bb ]>

I<Beta> distribution.  The density of the beta is:

    X^($aa - 1) * (1 - X)^($bb - 1) / B($aa , $bb) for 0 < X < 1.

C<$aa> and C<$bb> are set by default to the number of parents.

B<Argument restrictions:> Both $aa and $bb must not be less than 1.0E-37.

=item C<-strategy =E<gt> [ 'Distribution', 'binomial' ]>

Binomial distribution. No additional parameters are needed.

=item C<-strategy =E<gt> [ 'Distribution', 'chi_square', $df ]>

Chi-squared distribution with C<$df> degrees of freedom. C<$df> by default is set to the number of parents.

=item C<-strategy =E<gt> [ 'Distribution', 'exponential', $av ]>

Exponential distribution, where C<$av> is average . C<$av> by default is set to the number of parents.

=item C<-strategy =E<gt> [ 'Distribution', 'poisson', $mu ]>

Poisson distribution, where C<$mu> is mean. C<$mu> by default is set to the number of parents.

=back

=item PMX

PMX method defined by Goldberg and Lingle in 1985. Parameters: I<none>.

=item OX

OX method defined by Davis (?) in 1985. Parameters: I<none>.

=back

=item -cache    

This defines whether a cache should be used. Allowed values are 1 or 0
(default: I<0>).

=item -history 

This defines whether history should be collected. Allowed values are 1 or 0 (default: I<0>).

=item -native 

This defines whether native arrays should be used instead of packing each chromosome into signle scalar. 
Turning this option can give you speed up, but much more memory will be used. Allowed values are 1 or 0 (default: I<0>).

=item -mce

This defines whether Many-Core Engine (MCE) should be used during processing. 
This can give you significant speed up on many-core/CPU systems, but it'll 
increase memory consumption. Allowed values are 1 or 0 (default: I<0>).

=item -workers

This option has any meaning only if MCE is turned on. This defines how 
many process will be used during processing. Default will be used one proces per core (most efficient).

=item -strict

This defines if the check for modifying chromosomes in a user-defined fitness
function is active. Directly modifying chromosomes is not allowed and it is 
a highway to big trouble. This mode should be used only for testing, because it is B<slow>.

=back

=item I<$ga>-E<gt>B<inject>($chromosomes)

Inject new, user defined, chromosomes into the current population. See example below:

    # example for bitvector
    my $chromosomes = [
        [ 1, 1, 0, 1, 0, 1 ],
        [ 0, 0, 0, 1, 0, 1 ],
        [ 0, 1, 0, 1, 0, 0 ],
        ...
    ];
    
    # inject
    $ga->inject($chromosomes);

If You want to delete some chromosomes from population, just C<splice> them:

    my @remove = qw(1 2 3 9 12);
	for my $idx (sort { $b <=> $a }  @remove){
        splice @{$ga->chromosomes}, $idx, 1;
    }

=item I<$ga>-E<gt>B<population>($population)

Set/get size of the population. This defines the size of the population, i.e. how many chromosomes to simultaneously exist at each generation.

=item I<$ga>-E<gt>B<indType>()

Get type of individuals/chromosomes. Currently supported types are:

=over 4

=item C<bitvector>

Chromosomes will be just bitvectors. See documentation of C<new> method.

=item C<listvector>

Chromosomes will be lists of specified values. See documentation of C<new> method.

=item C<rangevector>

Chromosomes will be lists of values from specified range. See documentation of C<new> method.

=item C<combination>

Chromosomes will be unique lists of specified values. This is used for example
in the I<Traveling Salesman Problem>. See the documentation of the C<new>
method.

=back

In example:

    my $type = $ga->type();

=item I<$ga>-E<gt>B<type>()

Alias for C<indType>.

=item I<$ga>-E<gt>B<crossProb>()

This method is used to query and set the crossover rate.

=item I<$ga>-E<gt>B<crossover>()

Alias for C<crossProb>.

=item I<$ga>-E<gt>B<mutProb>()

This method is used to query and set the mutation rate.

=item I<$ga>-E<gt>B<mutation>()

Alias for C<mutProb>.

=item I<$ga>-E<gt>B<parents>($parents)

Set/get number of parents in a crossover.

=item I<$ga>-E<gt>B<init>($args)

This method initializes the population with random individuals/chromosomes. It MUST be called before any call to C<evolve()>. It expects one argument, which depends on the type of individuals/chromosomes:

=over 4

=item B<bitvector>

For bitvectors, the argument is simply the length of the bitvector.

    $ga->init(10);

This initializes a population where each individual/chromosome has 10 genes.

=item B<listvector>

For listvectors, the argument is an anonymous list of lists. The number of sub-lists is equal to the number of genes of each individual/chromosome. Each sub-list defines the possible string values that the corresponding gene can assume.

    $ga->init([
               [qw/red blue green/],
               [qw/big medium small/],
               [qw/very_fat fat fit thin very_thin/],
              ]);

This initializes a population where each individual/chromosome has 3 genes and each gene can assume one of the given values.

=item B<rangevector>

For rangevectors, the argument is an anonymous list of lists. The number of sub-lists is equal to the number of genes of each individual/chromosome. Each sub-list defines the minimum and maximum integer values that the corresponding gene can assume.

    $ga->init([
               [1, 5],
               [0, 20],
               [4, 9],
              ]);

This initializes a population where each individual/chromosome has 3 genes and each gene can assume an integer within the corresponding range.

=item B<combination>

For combination, the argument is an anonymous list of possible values of gene.

    $ga->init( [ 'a', 'b', 'c' ] );

This initializes a population where each chromosome has 3 genes and each gene
is a unique combination of 'a', 'b' and 'c'. For example genes looks something
like that:

    [ 'a', 'b', 'c' ]    # gene 1
    [ 'c', 'a', 'b' ]    # gene 2
    [ 'b', 'c', 'a' ]    # gene 3
    # ...and so on...

=back

=item I<$ga>-E<gt>B<evolve>($n)

This method causes the GA to evolve the population for the specified number of
generations. If its argument is 0 or C<undef> GA will evolve the population to
infinity unless a C<terminate> function is specified.

=item I<$ga>-E<gt>B<getHistory>()

Get history of the evolution. It is in a format listed below:

	[
		# gen0   gen1   gen2   ...          # generations
		[ max0,  max1,  max2,  ... ],       # max values
		[ mean,  mean1, mean2, ... ],       # mean values
		[ min0,  min1,  min2,  ... ],       # min values
	]

=item I<$ga>-E<gt>B<getAvgFitness>()

Get I<max>, I<mean> and I<min> score of the current generation. In example:

    my ($max, $mean, $min) = $ga->getAvgFitness();

=item I<$ga>-E<gt>B<getFittest>($n, $unique)

This function returns a list of the fittest chromosomes from the current
population.  You can specify how many chromosomes should be returned and if
the returned chromosomes should be unique. See example below.

    # only one - the best
    my ($best) = $ga->getFittest;

    # or 5 bests chromosomes, NOT unique
    my @bests = $ga->getFittest(5);

    # or 7 bests and UNIQUE chromosomes
    my @bests = $ga->getFittest(7, 1);

If you want to get a large number of chromosomes, try to use the
C<getFittest_as_arrayref> function instead (for efficiency).

=item I<$ga>-E<gt>B<getFittest_as_arrayref>($n, $unique)

This function is very similar to C<getFittest>, but it returns a reference 
to an array instead of a list. 

=item I<$ga>-E<gt>B<generation>()

Get the number of the current generation.

=item I<$ga>-E<gt>B<people>()

Returns an anonymous list of individuals/chromosomes of the current population. 

B<IMPORTANT:> the actual array reference used by the C<AI::Genetic::Pro> 
object is returned, so any changes to it will be reflected in I<$ga>.

=item I<$ga>-E<gt>B<chromosomes>()

Alias for C<people>.

=item I<$ga>-E<gt>B<chart>(%options)

Generate a chart describing changes of min, mean, and max scores in your
population. To satisfy your needs, you can pass the following options:

=over 4

=item -filename

File to save a chart in (B<obligatory>).

=item -title

Title of a chart (default: I<Evolution>).

=item -x_label

X label (default: I<Generations>).

=item -y_label

Y label (default: I<Value>).

=item -format

Format of values, like C<sprintf> (default: I<'%.2f'>).

=item -legend1

Description of min line (default: I<Min value>).

=item -legend2

Description of min line (default: I<Mean value>).

=item -legend3

Description of min line (default: I<Max value>).

=item -width

Width of a chart (default: I<640>).

=item -height

Height of a chart (default: I<480>).

=item -font

Path to font (in *.ttf format) to be used (default: none).

=item -logo

Path to logo (png/jpg image) to embed in a chart (default: none).

=item For example:

	$ga->chart(-width => 480, height => 320, -filename => 'chart.png');

=back

=item I<$ga>-E<gt>B<save>($file)

Save the current state of the genetic algorithm to the specified file.

=item I<$ga>-E<gt>B<load>($file)

Load a state of the genetic algorithm from the specified file. 

=item I<$ga>-E<gt>B<as_array>($chromosome)

In list context return an array representing the specified chromosome. 
In scalar context return an reference to an array representing the specified 
chromosome. If I<variable_length> is turned on and is set to level 2, an array 
can have some C<undef> values. To get only C<not undef> values use 
C<as_array_def_only> instead of C<as_array>.

=item I<$ga>-E<gt>B<as_array_def_only>($chromosome)

In list context return an array representing the specified chromosome. 
In scalar context return an reference to an array representing the specified 
chromosome. If I<variable_length> is turned off, this function is just an
alias for C<as_array>. If I<variable_length> is turned on and is set to 
level 2, this function will return only C<not undef> values from chromosome. 
See example below:

    # -variable_length => 2, -type => 'bitvector'
	
    my @chromosome = $ga->as_array($chromosome)
    # @chromosome looks something like that
    # ( undef, undef, undef, 1, 0, 1, 1, 1, 0 )
	
    @chromosome = $ga->as_array_def_only($chromosome)
    # @chromosome looks something like that
    # ( 1, 0, 1, 1, 1, 0 )

=item I<$ga>-E<gt>B<as_string>($chromosome)

Return a string representation of the specified chromosome. See example below:

	# -type => 'bitvector'
	
	my $string = $ga->as_string($chromosome);
	# $string looks something like that
	# 1___0___1___1___1___0 
	
	# or 
	
	# -type => 'listvector'
	
	$string = $ga->as_string($chromosome);
	# $string looks something like that
	# element0___element1___element2___element3...

Attention! If I<variable_length> is turned on and is set to level 2, it is 
possible to get C<undef> values on the left side of the vector. In the returned
string C<undef> values will be replaced with B<spaces>. If you don't want
to see any I<spaces>, use C<as_string_def_only> instead of C<as_string>.

=item I<$ga>-E<gt>B<as_string_def_only>($chromosome)

Return a string representation of specified chromosome. If I<variable_length> 
is turned off, this function is just alias for C<as_string>. If I<variable_length> 
is turned on and is set to level 2, this function will return a string without
C<undef> values. See example below:

	# -variable_length => 2, -type => 'bitvector'
	
	my $string = $ga->as_string($chromosome);
	# $string looks something like that
	#  ___ ___ ___1___1___0 
	
	$string = $ga->as_string_def_only($chromosome);
	# $string looks something like that
	# 1___1___0 

=item I<$ga>-E<gt>B<as_value>($chromosome)

Return the score of the specified chromosome. The value of I<chromosome> is 
calculated by the fitness function.

=back

=head1 SUPPORT

C<AI::Genetic::Pro> is still under development; however, it is used in many
production environments.

=head1 TODO

=over 4

=item Examples.

=item More tests.

=item More warnings about incorrect parameters.

=back

=head1 REPORTING BUGS

When reporting bugs/problems please include as much information as possible.
It may be difficult for me to reproduce the problem as almost every setup
is different.

A small script which yields the problem will probably be of help. 

=head1 THANKS

Mario Roy for suggestions about efficiency.

Miles Gould for suggestions and some fixes (even in this documentation! :-).

Alun Jones for fixing memory leaks.

Tod Hagan for reporting a bug (rangevector values truncated to signed  8-bit quantities) and supplying a patch.

Randal L. Schwartz for reporting a bug in this documentation.

Maciej Misiak for reporting problems with C<combination> (and a bug in a PMX strategy).

LEONID ZAMDBORG for recommending the addition of variable-length chromosomes as well as supplying relevant code samples, for testing and at the end reporting some bugs.

Christoph Meissner for reporting a bug.

Alec Chen for reporting some bugs.

=head1 AUTHOR

Strzelecki Lukasz <lukasz@strzeleccy.eu>

=head1 SEE ALSO

L<AI::Genetic>
L<Algorithm::Evolutionary>

=head1 COPYRIGHT

Copyright (c) Strzelecki Lukasz. All rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

view all matches for this distribution

AI-Genetic

14 results

view release on metacpan or search on metacpan

Genetic.pm view on Meta::CPAN


package AI::Genetic;

use strict;
use Carp;

use vars qw/$VERSION/;

$VERSION = 0.05;

use AI::Genetic::Defaults;

# new AI::Genetic. More modular.
# Not too many checks are done still.

##### Shared private vars
# this hash predefines some strategies

my %_strategy = (
		 rouletteSinglePoint => \&AI::Genetic::Defaults::rouletteSinglePoint,
		 rouletteTwoPoint    => \&AI::Genetic::Defaults::rouletteTwoPoint,
		 rouletteUniform     => \&AI::Genetic::Defaults::rouletteUniform,

		 tournamentSinglePoint => \&AI::Genetic::Defaults::tournamentSinglePoint,
		 tournamentTwoPoint    => \&AI::Genetic::Defaults::tournamentTwoPoint,
		 tournamentUniform     => \&AI::Genetic::Defaults::tournamentUniform,

		 randomSinglePoint => \&AI::Genetic::Defaults::randomSinglePoint,
		 randomTwoPoint    => \&AI::Genetic::Defaults::randomTwoPoint,
		 randomUniform     => \&AI::Genetic::Defaults::randomUniform,
		);

# this hash maps the genome types to the
# classes they're defined in.

my %_genome2class = (
		     bitvector   => 'AI::Genetic::IndBitVector',
		     rangevector => 'AI::Genetic::IndRangeVector',
		     listvector  => 'AI::Genetic::IndListVector',
		    );

##################

# sub new():
# This is the constructor. It creates a new AI::Genetic
# object. Options are:
# -population: set the population size
# -crossover:  set the crossover probability
# -mutation:   set the mutation probability
# -fitness:    set the fitness function
# -type:       set the genome type. See docs.
# -terminate:  set termination sub.

sub new {
  my ($class, %args) = @_;

  my $self = bless {
		    ADDSEL => {},   # user-defined selections
		    ADDCRS => {},   # user-defined crossovers
		    ADDMUT => {},   # user-defined mutations
		    ADDSTR => {},   # user-defined strategies
		   } => $class;

  $self->{FITFUNC}    = $args{-fitness}    || sub { 1 };
  $self->{CROSSRATE}  = $args{-crossover}  || 0.95;
  $self->{MUTPROB}    = $args{-mutation}   || 0.05;
  $self->{POPSIZE}    = $args{-population} || 100;
  $self->{TYPE}       = $args{-type}       || 'bitvector';
  $self->{TERM}       = $args{-terminate}  || sub { 0 };

  $self->{PEOPLE}     = [];   # list of individuals
  $self->{GENERATION} = 0;    # current gen.

  $self->{INIT}       = 0;    # whether pop is initialized or not.
  $self->{SORTED}     = 0;    # whether the population is sorted by score or not.
  $self->{INDIVIDUAL} = '';   # name of individual class to use().

  return $self;
}

# sub createStrategy():
# This method creates a new strategy.
# It takes two arguments: name of strategy, and
# anon sub that implements it.

sub createStrategy {
  my ($self, $name, $sub) = @_;

  if (ref($sub) eq 'CODE') {
    $self->{ADDSTR}{$name} = $sub;
  } else {
    # we don't know what this operation is.
    carp <<EOC;
ERROR: Must specify anonymous subroutine for strategy.
       Strategy '$name' will be deleted.
EOC
    ;
    delete $self->{ADDSTR}{$name};
    return undef;
  }

  return $name;
}

# sub evolve():
# This method evolves the population using a specific strategy
# for a specific number of generations.

sub evolve {
  my ($self, $strategy, $gens) = @_;

  unless ($self->{INIT}) {
    carp "can't evolve() before init()";
    return undef;
  }

  my $strSub;
  if      (exists $self->{ADDSTR}{$strategy}) {
    $strSub = $self->{ADDSTR}{$strategy};
  } elsif (exists $_strategy{$strategy}) {
    $strSub = $_strategy{$strategy};
  } else {
    carp "ERROR: Do not know what strategy '$strategy' is,";
    return undef;
  }

  $gens ||= 1;

  for my $i (1 .. $gens) {
    $self->sortPopulation;
    $strSub->($self);

    $self->{GENERATION}++;
    $self->{SORTED} = 0;

    last if $self->{TERM}->($self);

#    my @f = $self->getFittest(10);
#    for my $f (@f) {
#      print STDERR "    Fitness = ", $f->score, "..\n";
#      print STDERR "    Genes are: @{$f->genes}.\n";
#    }
  }
}

# sub sortIndividuals():
# This method takes as input an anon list of individuals, and returns
# another anon list of the same individuals but sorted in decreasing
# score.

sub sortIndividuals {
  my ($self, $list) = @_;

  # make sure all score's are calculated.
  # This is to avoid a bug in Perl where a sort is called from whithin another
  # sort, and they are in different packages, then you get a use of uninit value
  # warning. See http://rt.perl.org/rt3/Ticket/Display.html?id=7063
  $_->score for @$list;

  return [sort {$b->score <=> $a->score} @$list];
}

# sub sortPopulation():
# This method sorts the population of individuals.

sub sortPopulation {
  my $self = shift;

  return if $self->{SORTED};

  $self->{PEOPLE} = $self->sortIndividuals($self->{PEOPLE});
  $self->{SORTED} = 1;
}

# sub getFittest():
# This method returns the fittest individuals.

sub getFittest {
  my ($self, $N) = @_;

  $N ||= 1;
  $N = 1 if $N < 1;

  $N = @{$self->{PEOPLE}} if $N > @{$self->{PEOPLE}};

  $self->sortPopulation;

  my @r = @{$self->{PEOPLE}}[0 .. $N-1];

  return $r[0] if $N == 1 && not wantarray;

  return @r;
}

# sub init():
# This method initializes the population to completely
# random individuals. It deletes all current individuals!!!
# It also examines the type of individuals we want, and
# require()s the proper class. Throws an error if it can't.
# Must pass to it an anon list that will be passed to the
# newRandom method of the individual.

# In case of bitvector, $newArgs is length of bitvector.
# In case of rangevector, $newArgs is anon list of anon lists.
# each sub-anon list has two elements, min number and max number.
# In case of listvector, $newArgs is anon list of anon lists.
# Each sub-anon list contains possible values of gene.

sub init {
  my ($self, $newArgs) = @_;

  $self->{INIT} = 0;

  my $ind;
  if (exists $_genome2class{$self->{TYPE}}) {
    $ind = $_genome2class{$self->{TYPE}};
  } else {
    $ind = $self->{TYPE};
  }

  eval "use $ind";  # does this work if package is in same file?
  if ($@) {
    carp "ERROR: Init failed. Can't require '$ind': $@,";
    return undef;
  }

  $self->{INDIVIDUAL} = $ind;
  $self->{PEOPLE}     = [];
  $self->{SORTED}     = 0;
  $self->{GENERATION} = 0;
  $self->{INITARGS}   = $newArgs;

  push @{$self->{PEOPLE}} =>
    $ind->newRandom($newArgs) for 1 .. $self->{POPSIZE};

  $_->fitness($self->{FITFUNC}) for @{$self->{PEOPLE}};

  $self->{INIT} = 1;
}

# sub people():
# returns the current list of individuals in the population.
# note: this returns the actual array ref, so any changes
# made to it (ex, shift/pop/etc) will be reflected in the
# population.

sub people {
  my $self = shift;

  if (@_) {
    $self->{PEOPLE} = shift;
    $self->{SORTED} = 0;
  }

  $self->{PEOPLE};
}

# useful little methods to set/query parameters.
sub size       { $_[0]{POPSIZE}    = $_[1] if defined $_[1]; $_[0]{POPSIZE}   }
sub crossProb  { $_[0]{CROSSRATE}  = $_[1] if defined $_[1]; $_[0]{CROSSRATE} }
sub mutProb    { $_[0]{MUTPROB}    = $_[1] if defined $_[1]; $_[0]{MUTPROB}   }
sub indType    { $_[0]{INDIVIDUAL} }
sub generation { $_[0]{GENERATION} }

# sub inject():
# This method is used to add individuals to the current population.
# The point of it is that sometimes the population gets stagnant,
# so it could be useful add "fresh blood".
# Takes a variable number of arguments. The first argument is the
# total number, N, of new individuals to add. The remaining arguments
# are genomes to inject. There must be at most N genomes to inject.
# If the number, n, of genomes to inject is less than N, N - n random
# genomes are added. Perhaps an example will help?
# returns 1 on success and undef on error.

sub inject {
  my ($self, $count, @genomes) = @_;

  unless ($self->{INIT}) {
    carp "can't inject() before init()";
    return undef;
  }

  my $ind = $self->{INDIVIDUAL};

  my @newInds;
  for my $i (1 .. $count) {
    my $genes = shift @genomes;

    if ($genes) {
      push @newInds => $ind->newSpecific($genes, $self->{INITARGS});
    } else {
      push @newInds => $ind->newRandom  ($self->{INITARGS});      
    }
  }

  $_->fitness($self->{FITFUNC}) for @newInds;

  push @{$self->{PEOPLE}} => @newInds;

  return 1;
}

__END__

=head1 NAME

AI::Genetic - A pure Perl genetic algorithm implementation.

=head1 SYNOPSIS

    use AI::Genetic;
    my $ga = new AI::Genetic(
        -fitness    => \&fitnessFunc,
        -type       => 'bitvector',
        -population => 500,
        -crossover  => 0.9,
        -mutation   => 0.01,
	-terminate  => \&terminateFunc,
       );

     $ga->init(10);
     $ga->evolve('rouletteTwoPoint', 100);
     print "Best score = ", $ga->getFittest->score, ".\n";

     sub fitnessFunc {
         my $genes = shift;

         my $fitness;
         # assign a number to $fitness based on the @$genes
         # ...

         return $fitness;
      }

      sub terminateFunc {
         my $ga = shift;

         # terminate if reached some threshold.
         return 1 if $ga->getFittest->score > $THRESHOLD;
         return 0;
      }

=head1 DESCRIPTION

This module implements a Genetic Algorithm (GA) in pure Perl.
Other Perl modules that achieve the same thing (perhaps better,
perhaps worse) do exist. Please check CPAN. I mainly wrote this
module to satisfy my own needs, and to learn something about GAs
along the way.

B<PLEASE NOTE:> As of v0.02, AI::Genetic has been re-written from
scratch to be more modular and expandable. To achieve this, I had
to modify the API, so it is not backward-compatible with v0.01.
As a result, I do not plan on supporting v0.01.

I will not go into the details of GAs here, but here are the
bare basics. Plenty of information can be found on the web.

In a GA, a population of individuals compete for survival. Each
individual is designated by a set of genes that define its
behaviour. Individuals that perform better (as defined by the
fitness function) have a higher chance of mating with other
individuals. When two individuals mate, they swap some of
their genes, resulting in an individual that has properties
from both of its "parents". Every now and then, a mutation
occurs where some gene randomly changes value, resulting in
a different individual. If all is well defined, after a few
generations, the population should converge on a "good-enough"
solution to the problem being tackled.

A GA implementation runs for a discrete number of time steps
called I<generations>. What happens during each generation can
vary greatly depending on the strategy being used (See 
L</"STRATEGIES"> for more info).
Typically, a variation of the following happens at
each generation:

=over 4

=item B<1. Selection>

Here the performance of all the individuals is evaluated
based on the fitness function, and each is given a specific
fitness value. The higher the value, the bigger the chance
of an individual passing its genes on in future generations
through mating (crossover).

=item B<2. Crossover>

Here, individuals selected are randomly paired up for
crossover (aka I<sexual reproduction>). This is further
controlled by the crossover rate specified and may result in
a new offspring individual that contains genes common to
both parents. New individuals are injected into the current
population.

=item B<3. Mutation>

In this step, each individual is given the chance to mutate
based on the mutation probability specified. If an individual
is to mutate, each of its genes is given the chance to randomly
switch its value to some other state.

=back

=head1 CLASS METHODS

Here are the public methods.

=over 4

=item I<$ga>-E<gt>B<new>(I<options>)

This is the constructor. It accepts options in the form of
hash-value pairs. These are:

=over 8

=item B<-population>

This defines the size of the population, i.e. how many individuals
to simultaneously exist at each generation. Defaults to 100.

=item B<-crossover>

This defines the crossover rate. Defaults to 0.95.

=item B<-mutation>

This defines the mutation rate. Defaults to 0.05.

=item I<-fitness>

This defines a fitness function. It expects a reference to a subroutine.
More details are given in L</"FITNESS FUNCTION">.

=item I<-type>

This defines the type of the genome. Currently, AI::Genetic
supports only three types:

=over

=item I<bitvector>

Individuals of this type have genes that are bits. Each gene
can be in one of two possible states, on or off.

=item I<listvector>

Each gene of a listvector individual can assume one string value from
a specified list of possible string values.

=item I<rangevector>

Each gene of a rangevector individual can assume one integer value
from a range of possible integer values. Note that only integers are
supported. The user can always transform any desired fractional values
by multiplying and dividing by an appropriate power of 10.

=back

Defaults to I<bitvector>.

=item I<-terminate>

This option allows the definition of a termination subroutine.
It expects a subroutine reference. This sub will be called at
the end of each generation with one argument: the AI::Genetic
object. Evolution terminates if the sub returns a true value.

=back

=item I<$ga>-E<gt>B<createStrategy>(I<strategy_name>, I<sub_ref>)

This method allows the creation of a custom-made strategy to be used
during evolution. It expects a unique strategy name, and a subroutine
reference as arguments. The subroutine will be called with one argument:
the AI::Genetic object. It is expected to alter the population at each
generation. See L</"STRATEGIES"> for more information.

=item I<$ga>-E<gt>B<init>(I<initArgs>)

This method initializes the population with random individuals. It B<MUST>
be called before any call to I<evolve()> or I<inject()>. As a side effect,
any already existing individuals in the population are deleted. It expects
one argument, which depends on the type of individuals:

=over

=item o

For bitvectors, the argument is simply the length of the bitvector.

    $ga->init(10);

this initializes a population where each individual has 10 genes.

=item o

For listvectors, the argument is an anonymous list of lists. The
number of sub-lists is equal to the number of genes of each individual.
Each sub-list defines the possible string values that the corresponding gene
can assume.

    $ga->init([
               [qw/red blue green/],
               [qw/big medium small/],
               [qw/very_fat fat fit thin very_thin/],
              ]);

this initializes a population where each individual has 3 genes, and each gene
can assume one of the given values.

=item o

For rangevectors, the argument is an anonymous list of lists. The
number of sub-lists is equal to the number of genes of each individual.
Each sub-list defines the minimum and maximum integer values that the
corresponding gene can assume.

    $ga->init([
               [1, 5],
               [0, 20],
               [4, 9],
              ]);

this initializes a population where each individual has 3 genes, and each gene
can assume an integer within the corresponding range.

=back

=item I<$ga>-E<gt>B<inject>(I<N>, ?I<args>?)

This method can be used to add more individuals to the population. New individuals
can be randomly generated, or be explicitly specified. The first argument specifies
the number, I<N>, of new individuals to add. This can be followed by at most I<N>
arguments, each of which is an anonymous list that specifies the genome of a
single individual to add. If the number of genomes given, I<n>, is less than I<N>, then
I<N> - I<n> random individuals are added for a total of I<N> new individuals. Random
individuals are generated using the same arguments passed to the I<init()> method.
For example:

  $ga->inject(5,
              [qw/red big thin/],
              [qw/blue small fat/],
             );

this adds 5 new individuals, 2 with the specified genetic coding, and 3 randomly
generated.

=item I<$ga>-E<gt>B<evolve>(I<strategy>, ?I<num_generations>?)

This method causes the GA to evolve the population using the specified strategy.
A strategy name has to be specified as the first argument. The second argument
is optional and specifies the number of generations to evolve. It defaults to
1. See L</"STRATEGIES"> for more information on the default strategies.

Each generation consists of the following steps:

=over

=item o

The population is sorted according to the individuals' fitnesses.

=item o

The subroutine corresponding to the named strategy is called with one argument,
the AI::Genetic object. This subroutine is expected to alter the object itself.

=item o

If a termination subroutine is given, it is executed and the return value is
checked. Evolution terminates if this sub returns a true value.

=back

=item I<$ga>-E<gt>B<getFittest>(?I<N>?)

This returns the I<N> fittest individuals. If not specified,
I<N> defaults to 1. As a side effect, it sorts the population by
fitness score. The actual AI::Genetic::Individual objects are returned.
You can use the C<genes()> and C<score()> methods to get the genes and the
scores of the individuals. Please check L<AI::Genetic::Individual> for details.

=item I<$ga>-E<gt>B<sortPopulation>

This method sorts the population according to fitness function. The results
are cached for speed.

=item I<$ga>-E<gt>B<sortIndividuals>(?[I<ListOfIndividuals>]?)

Given an anonymous list of individuals, this method sorts them according
to fitness, returning an anonymous list of the sorted individuals.

=item I<$ga>-E<gt>B<people>()

Returns an anonymous list of individuals of the current population.
B<IMPORTANT>: the actual array reference used by the AI::Genetic object
is returned, so any changes to it will be reflected in I<$ga>.

=item I<$ga>-E<gt>B<size>(?I<newSize>?)

This method is used to query and set the population size.

=item I<$ga>-E<gt>B<crossProb>(?I<newProb>?)

This method is used to query and set the crossover rate.

=item I<$ga>-E<gt>B<mutProb>(?I<newProb>?)

This method is used to query and set the mutation rate.

=item I<$ga>-E<gt>B<indType>()

This method returns the type of individual: I<bitvector>, I<listvector>,
or I<rangevector>.

=item I<$ga>-E<gt>B<generation>()

This method returns the current generation.

=back

=head1 FITNESS FUNCTION

Very quickly you will realize that properly defining the fitness function
is the most important aspect of a GA. Most of the time that a genetic
algorithm takes to run is spent in running the fitness function for each
separate individual to get its fitness. AI::Genetic tries to minimize this
time by caching the fitness result for each individual. But, B<you should
spend a lot of time optimizing your fitness function to achieve decent run
times.>

The fitness function should expect only one argument, an anonymous list of
genes, corresponding to the individual being analyzed. It is expected
to return a number which defines the fitness score of the said individual.
The higher the score, the more fit the individual, the more the chance it
has to be chosen for crossover.

=head1 STRATEGIES

AI::Genetic comes with 9 predefined strategies. These are:

=over

=item rouletteSinglePoint

This strategy implements roulette-wheel selection and single-point crossover.

=item rouletteTwoPoint

This strategy implements roulette-wheel selection and two-point crossover.

=item rouletteUniform

This strategy implements roulette-wheel selection and uniform crossover.

=item tournamentSinglePoint

This strategy implements tournament selection and single-point crossover.

=item tournamentTwoPoint

This strategy implements tournament selection and two-point crossover.

=item tournamentUniform

This strategy implements tournament selection and uniform crossover.

=item randomSinglePoint

This strategy implements random selection and single-point crossover.

=item randomTwoPoint

This strategy implements random selection and two-point crossover.

=item randomUniform

This strategy implements random selection and uniform crossover.

=back

More detail on these strategies and how to call them in your own
custom strategies can be found in L<AI::Genetic::OpSelection>,
L<AI::Genetic::OpCrossover> and L<AI::Genetic::OpMutation>.

You can use the functions defined in the above modules in your
own custom-made strategy. Consult their manpages for more info.
A custom-made strategy can be defined using the I<strategy()>
method and is called at the beginning of each generation. The only
argument to it is the AI::Genetic object itself. Note that the
population at this point is sorted accoring to each individual's
fitness score. It is expected that the strategy sub will modify
the population stored in the AI::Genetic object. Here's the
pseudo-code of events:

    for (1 .. num_generations) {
      sort population;
      call strategy_sub;
      if (termination_sub exists) {
        call termination_sub;
        last if returned true value;
      }
    }

=head1 A NOTE ON SPEED/EFFICIENCY

Genetic algorithms are inherently slow.
Perl can be pretty fast, but will never reach the speed of optimized
C code (at least my Perl coding will not). I wrote AI::Genetic mainly
for my own learning experience, but still tried to optimize it as
much as I can while trying to keep it as flexible as possible.

To do that, I resorted to some well-known tricks like passing a
reference of a long list instead of the list itself (for example,
when calling the fitness function, a reference of the gene list
is passed), and caching fitness scores (if you try to evaluate
the fitness of the same individual more than once, then the fitness
function will not be called, and the cached result is returned).

To help speed up your run times, you should pay special attention
to the design of your fitness function since this will be called once
for each unique individual in each generation. If you can shave off a
few clock cycles here and there, then it will be greatly magnified in
the total run time.

=head1 BUGS

I have tested this module quite a bit, and even used it to solve a
work-related problem successfully. But, if you think you found a bug
then please let me know, and I promise to look at it.

Also, if you have any requests, comments or suggestions, then feel
free to email me.

=head1 INSTALLATION

Either the usual:

    perl Makefile.PL
    make
    make install

or just stick it somewhere in @INC where perl can find it. It is in pure Perl.

=head1 AUTHOR & CREDITS

Written by Ala Qumsieh I<aqumsieh@cpan.org>.

Special thanks go to John D. Porter and Oliver Smith for stimulating
discussions and great suggestions. Daniel Martin and Ivan Tubert-Brohman
uncovered various bugs and for this I'm grateful.

=head1 COPYRIGHTS

(c) 2003-2005 Ala Qumsieh. All rights reserved.
This module is distributed under the same terms as Perl itself.

=cut

view all matches for this distribution

AI-Image

13 results

view release on metacpan or search on metacpan

lib/AI/Image.pm view on Meta::CPAN

package AI::Image;

use strict;
use warnings;

use strict;
use warnings;

use Carp;
use HTTP::Tiny;
use JSON::PP;

our $VERSION = '0.1';
$VERSION = eval $VERSION;

my $http = HTTP::Tiny->new;

# Create Image object
sub new {
    my $class = shift;
    my %attr  = @_;

    $attr{'error'}      = '';

    $attr{'api'}        = 'OpenAI' unless $attr{'api'};
    $attr{'error'}      = 'Invalid API' unless $attr{'api'} eq 'OpenAI';
    $attr{'error'}      = 'API Key missing' unless $attr{'key'};

    $attr{'model'}      = 'dall-e-2' unless $attr{'model'};
    $attr{'size'}       = '512x512'  unless $attr{'size'};

    return bless \%attr, $class;
}

# Define endpoints for APIs
my %url    = (
    'OpenAI' => 'https://api.openai.com/v1/images/generations',
);

# Define HTTP Headers for APIs
my %header = (
    'OpenAI' => &_get_header_openai,
);

# Returns true if last operation was success
sub success {
    my $self = shift;
    return !$self->{'error'};
}

# Returns error if last operation failed
sub error {
    my $self = shift;
    return $self->{'error'};
}

# Header for calling OpenAI
sub _get_header_openai {
    my $self = shift;
    $self->{'key'} = '' unless defined $self->{'key'};
    return {
         'Authorization' => 'Bearer ' . $self->{'key'},
         'Content-type'  => 'application/json'
     };
 }

# Get URL from image prompt
sub image {
    my ($self, $prompt) = @_;

    my $response = $http->post($url{$self->{'api'}}, {
         'headers' => {
             'Authorization' => 'Bearer ' . $self->{'key'},
             'Content-type'  => 'application/json'
         },
         content => encode_json {
             model          => $self->{'model'},
             size           => $self->{'size'},
             prompt         => $prompt,
         }
     });
     if ($response->{'content'} =~ 'invalid_api_key') {
         croak 'Incorrect API Key - check your API Key is correct';
     }
     
     if ($self->{'debug'} and !$response->{'success'}) {
         croak $response if $self->{'debug'} eq 'verbose';
         croak $response->{'content'};
     }

     my $reply = decode_json($response->{'content'});

     return $reply->{'data'}[0]->{'url'};
}

__END__

=head1 NAME

AI::Image - Generate images using OpenAI's DALL-E

=head1 VERSION

Version 0.1

=head1 SYNOPSIS

    use AI::Image;

    my $ai = AI::Image->new(
        'key'   => 'sk-......',
    );
    
    my $image_url = $ai->image("A photorealistic image of a cat wearing a top hat and monocle.");

    print $image_url;

=head1 DESCRIPTION

This module provides a simple interface to generate images using OpenAI's DALL-E API.

=head1 API KEYS

A free OpenAI API can be obtained from L<https://platform.openai.com/account/api-keys>

=head1 MODELS

Although the API Key is free, each use incurs a cost.  This is dependent on the
model chosen and the size.  The 'dall-e-3' model produces better images but at a
higher cost.  Likewise, bigger images cost more.
The default model C<dall-e-2> with the default size of C<512x512> produces resonable
results at a low cost and is a good place to start using this module.

See also L<https://platform.openai.com/docs/models/overview>

=head1 METHODS

=head2 new

  my $ai = AI::Image->new(%params);

Creates a new AI::Image object.

=head3 Parameters

=over 4

=item key

C<required> Your OpenAI API key.

=item api

The API to use (currently only 'OpenAI' is supported).

=item model

The language model to use (default: 'dall-e-2').

See L<https://platform.openai.com/docs/models/overview>

=item size

The size for the generated image (default: '512x512').

=item debug

Used for testing.  If set to any true value, the image method
will return details of the error encountered instead of C<undef>

=back

=head2 image

  my $url = $ai->image($prompt);

Generates an image based on the provided prompt and returns the URL of the generated image.  The URL is valid for 1 hour.

=head3 Parameters

=over 4

=item prompt

The textual description of the desired image.

=back

=head2 success

  my $success = $ai->success();

Returns true if the last operation was successful.

=head2 error

  my $error = $ai->error();

Returns the error message if the last operation failed.

=head1 EXAMPLE

It is common that the generated image will want to be saved as a file.  This can be easily acheived
using the C<getstore> method of L<LWP::Simple>.

    use strict;
    use warnings;
    
    use LWP::Simple;
    use AI::Image;
    
    my $ai = AI::Image->new(
        'key'   => 'sk-......',
    );
    
    my $image_url = $ai->image("A dog reading a newspaper");
    
    getstore( $image_url, 'my_ai_image.png' );


=head1 SEE ALSO

L<https://openai.com> - OpenAI official website

=head1 AUTHOR

Ian Boddison <ian at boddison.com>

=head1 BUGS

Please report any bugs or feature requests to C<bug-ai-image at rt.cpan.org>, or through
the web interface at L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=bug-ai-image>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc AI::Image

You can also look for information at:

=over 4

=item * RT: CPAN's request tracker (report bugs here)

L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=AI-Image>

=item * Search CPAN

L<https://metacpan.org/release/AI::Image>

=back

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2024 by Ian Boddison

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

view all matches for this distribution

AI-LibNeural

10 results

view release on metacpan or search on metacpan

LibNeural.pm view on Meta::CPAN

#
# $Header$
#

package AI::LibNeural;

use 5.006;
use strict;
use warnings;
use Carp;

require Exporter;
require DynaLoader;
use AutoLoader;

our @ISA = qw(Exporter DynaLoader);

# This allows declaration	use AI::LibNeural ':all';
our %EXPORT_TAGS = ( 'all' => [ qw(
	ALL
	HIDDEN
	INPUT
	OUTPUT
) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
);

our $VERSION = '0.02';

sub AUTOLOAD {
    # This AUTOLOAD is used to 'autoload' constants from the constant()
    # XS function.  If a constant is not found then control is passed
    # to the AUTOLOAD in AutoLoader.
    my $constname;
    our $AUTOLOAD;
    ($constname = $AUTOLOAD) =~ s/.*:://;
    croak "& not defined" if $constname eq 'constant';
    my $val = constant($constname, @_ ? $_[0] : 0);
    if ($! != 0) {
	if ($! =~ /Invalid/ || $!{EINVAL}) {
	    $AutoLoader::AUTOLOAD = $AUTOLOAD;
	    goto &AutoLoader::AUTOLOAD;
	}
	else {
	    croak "Your vendor has not defined AI::LibNeural macro $constname";
	}
    }
    {
	no strict 'refs';
	# Fixed between 5.005_53 and 5.005_61
#	if ($] >= 5.00561) {
#	    *$AUTOLOAD = sub () { $val };
#	}
#	else {
	    *$AUTOLOAD = sub { $val };
#	}
    }
    goto &$AUTOLOAD;
}

bootstrap AI::LibNeural $VERSION;

# Preloaded methods go here.

# Autoload methods go after =cut, and are processed by the autosplit program.

1;
__END__

=head1 NAME

AI::LibNeural - Perl extension libneural

=head1 SYNOPSIS

  use AI::LibNeural;

  my $nn = AI::LibNeural->new( 2, 4, 1 );

  # teach it the logical AND
  $nn->train( [ 0, 0 ], [ 0.05 ], 0.0000000005, 0.2 );
  $nn->train( [ 0, 1 ], [ 0.05 ], 0.0000000005, 0.2 );
  $nn->train( [ 1, 0 ], [ 0.05 ], 0.0000000005, 0.2 );
  $nn->train( [ 1, 1 ], [ 0.95 ], 0.0000000005, 0.2 );

  my $result = $nn->run( [ 1, 1 ] );
  # result should be ~ 0.95
  $result = $nn->run( [ 0, 1 ] );
  # result should be ~ 0.05

  $nn->save('and.mem');

=head1 ABSTRACT

  Perl bindings for the libneural c++ neural netowrk library.

=head1 DESCRIPTION

Provides accessors for the libneural library as a perl object. libneural is a
C++ library that impelements a feed-forward back-proprogation neural network.
The interface is extremely simple and should take no more than a few minutes to
master given a reasonable knowledge of back proprogation neural networks.

=head2 FUNCTIONS

=over

=item $nn = AI:LibNeural->new()

Creates an empty AI::LibNeural object, should only be used when the load method
will be called soon after.

=item $nn = AI::LibNeural->new(FILENAME)

Creates a new AI::LibNeural object from the supplied memory file.

=item $nn = AI::LibNeural->new(INTPUTS,HIDDENS,OUTPUTS)

Creates a new AI::LibNeural object with INPUTS input nodes, HIDDENS hidden
nodes, and OUTPUTS output nodes.

=item $nn->train([I1,I2,...],[O1,O2,...],MINERR,TRAINRATE)

Completes a training cycle for the given inputs I1-IN, with the expected
results of O1-OM, where N is the number of inputs and M is the number of
outputs. MINERR is the mean squared error at the output that you wish to be achieved. TRAINRATE is the learning rate to be used.

=item (O1,O2) = $nn->run([I1,I2,...])

Calculate the corresponding outputs (O1-OM) for the given inputs (I1-ON) based
on the previous training. Should only be called after the network has been
suitably trained.

=item NUM = $nn->get_layersize(WHICH)

Retrieves the number of nodes at the specified layer, WHICH. WHICH should be
one of ALL, INPUT, HIDDEN, OUTPUT. Usefully mainly with a network is loaded
from a file.

=item status = $nn->load(FILENAME)

=item status = $nn->save(FILENAME)

Loads and saves respectively the 'memory,' node configuration and weights,
of the network.  FILENAME should be the location of the file in which the
memory is stored/retrieved.

=back

=head2 EXPORT

None by default

=head2 EXPORT TAGS

=over

=item all

=over

=item ALL

  The total number of nodes on all three layers

=item INPUT

  The number of nodes on the input layer

=item HIDDEN

  The number of nodes on the hidden layer

=item OUTPUT

  The number of nodes on the output layer

=back

=back

=head1 AUTHOR

Ross McFarland E<lt>rmcfarla at neces dot comE<gt>

=head1 SEE ALSO

L<perl>. libneural documentation

=head1 LICENSE

this is based off of code that i based off of other modules i've found in the
distant past. if you are the original author and you recognize this code let
me know and you'll be credited

Copyright (C) 2003 by Ross McFarland

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Library General Public License for more details.

You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the 
Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
Boston, MA  02111-1307  USA.

view all matches for this distribution

AI-Logic-AnswerSet

7 results

view release on metacpan or search on metacpan

lib/AI/Logic/AnswerSet.pm view on Meta::CPAN

package AI::Logic::AnswerSet;

use 5.010001;
use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);

our %EXPORT_TAGS = ( 'all' => [ qw(
	
) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
	
);

our $VERSION = '0.02';

sub executeFromFileAndSave {		#Executes DLV with a file as input and saves the output in another file

	open DLVW, ">>", "$_[1]";
	print DLVW $_[2];
	close DLVW;

	open(SAVESTDOUT, ">&STDOUT") or die "Can't save STDOUT: $!\n";
	open(STDOUT, ">$_[0]") or die "Can't open STDOUT to $_[0]", "$!\n";


	my @args = ("./dlv", "$_[1]");
	system(@args) == 0
		or die "system @args failed: $?";

	open(STDOUT,">&SAVESTDOUT"); #close file and restore STDOUT
	close OUTPUT;

}

sub executeAndSave {	#Executes DLV and saves the output of the program written by the user in a file

	open(SAVESTDOUT, ">&STDOUT") or die "Can't save STDOUT: $!\n";
	open(STDOUT, ">$_[0]") or die "Can't open STDOUT to $_[0]", "$!\n";

	my @args = ("./dlv --");
	system(@args) == 0 or die "system @args failed: $?";

	open(STDOUT,">&SAVESTDOUT"); #close file and restore STDOUT
	close OUTPUT;


}


sub iterativeExec {	# Executes an input program with several instances and stores them in a bidimensional array

	my @input = @_;

	my @returned_value;

	if(@input) {
		
		my $option = $input[$#input];

		if($option =~ /^-/) {
			pop(@input);
		}
		else {
			$option = "";
		}

		my $dir = pop(@input);
		my @files = qx(ls $dir);
			
		my $size = @files;

		for(my $i = 0; $i < $size; $i++) {

			my $elem = $files[$i];
			chomp $elem;
			my @args = ("./dlv", "@input", "$dir$elem", "$option");
			my (@out) = `@args`;
			push @{$returned_value[$i]}, @out;
		}
		
	}

	else {
		print "INPUT ERROR\n";
	}

	return @returned_value;

}

sub singleExec {	 # Executes a single input program or opens the DLV terminal and stores it in an array

	my @input = @_;
	my @returned_value;

	if(@input) {


		my @args = ("./dlv", "@input");
		(@returned_value) = `@args`;
		
	}

	else {
		my $command = "./dlv --";
		(@returned_value) = `$command`;		
	}

	return @returned_value;
}

sub selectOutput {	# Select one of the outputs returned by the iterative execution of more input programs 

	my @stdoutput = @{$_[0]};
	my $n = $_[1];

	return @{$stdoutput[$n]};
	
}

sub getFacts {	# Return the facts of the input program

	my $input = shift;

	my @isAFile = stat($input);

	my @facts;

	if(@isAFile) {

		open INPUT, "<", "$input";
		my @rows = <INPUT>;
		foreach my $row (@rows) {
			if($row =~ /^(\w+)(\(((\w|\d|\.)+,?)*\))?\./) {
				push @facts, $row;
			}
		}
		close INPUT;

	}
	else {
		my @str = split /\. /,$input;
		foreach my $elem (@str) {

			if($elem =~ /^(\w+)(\(((\w|\d|\.)+,?)*\))?\.?$/) {
				push @facts, $elem;
			}
		}
	}
	return @facts;
	
}

sub addCode {	#Adds code to input

	my $program = $_[0];
	my $code = $_[1];
	my @isAFile = stat($program);

	if(@isAFile) {
		open PROGRAM, ">>", $program;
		print PROGRAM "$code\n";
		close PROGRAM;
	}

	else {
		$program = \($_[0]);
		$$program = "$$program $code";
	}
		
}

sub getASFromFile {	#Gets the Answer Set from the file where the output was saved

	open RESULT, "<", "$_[0]" or die $!;
	my @result = <RESULT>;
	my @arr;
	foreach my $line (@result) {

		if($line =~ /\{\w*/) {
			$line =~ s/(\{|\})//g;
			#$line =~ s/\n//g;  # delete \n from $line
		        my @tmp = split(', ', $line);
			push @arr, @tmp;
		}

	}

	close RESULT;
	return @arr;
}

sub getAS { #Returns the Answer Sets from the array where the output was saved

	my @result = @_;
	my @arr;

	foreach my $line (@result) {


		if($line =~ /\{\w*/) {
			$line =~ s/(\{|\})//g;
			$line =~ s/(Best model:)//g;
		        my @tmp = split(', ', $line);
			push @arr, @tmp;
		}

	}

	return @arr;
}

sub statistics {	# Return an array of hashes in which the statistics of every predicate of every answerSets are stored
			# If a condition of comparison is specified(number of predicates) it returns the answer sets that satisfy
			# that condition 

	my @as = @{$_[0]};
	my @pred = @{$_[1]};
	my @num = @{$_[2]};
	my @operators = @{$_[3]};

	my @sets;
	my @ans;
	
	my $countAS = 0;
	my @stat;

	my $countPred;

	foreach my $elem (@as) {

		if($elem =~ /(\w+).*\n/) {
			push @{$sets[$countAS]}, $elem;
			if(_existsPred($1,\@pred)) {
				$stat[$countAS]{$1} += 1;
				$countAS += 1;
			}
		}

		elsif($elem =~ /(\w+).*/) {
			push @{$sets[$countAS]}, $elem;
			if(_existsPred($1,\@pred)) {
				$stat[$countAS]{$1} += 1;
			}
		}
	}

	my $comparison = 0;
	if(@num and @operators) {
		$comparison = 1;
	}
	elsif(@num and !@operators) {
		print "Error: comparison element missing";
		return @ans;
	}
	
	

	if($comparison) {
		my $size = @pred;
		my $statSize = @stat;

		for(my $j = 0; $j < $statSize; $j++) {
			for(my $i = 0; $i < $size; $i++) {

				my $t = $stat[$j]{$pred[$i]};

				if(_evaluate($t,$num[$i],$operators[$i])) {
					$countPred++;
				}
				else {
					$countPred = 0;
					break;
				}
			}

			if($countPred == $size) {
				push @ans , $sets[$j];
			}
			$countPred = 0;
		}
		return @ans;

	}

	return @stat;
}

sub _evaluate {		#private use only

	my $value = shift;
	my $num = shift;
	my $operator = shift;

	if($operator eq "==") {
		if($value == $num) {
			return 1;
		}
		return 0;
	}
	elsif($operator eq "!=") {
		if($value != $num) {
			return 1;
		}
		return 0;		
	}
	elsif($operator eq ">") {
		if($value > $num) {
			return 1;
		}
		return 0;
	}
	elsif($operator eq ">=") {
		if($value >= $num) {
			return 1;
		}
		return 0;
	}
	elsif($operator eq "<") {
		if($value < $num) {
			return 1;
		}
		return 0;
	}
	elsif($operator eq "<=") {
		if($value <= $num) {
			return 1;
		}
		return 0;
	}
	return 0;
}

sub mapAS {	#Mapping of the Answer Sets in an array of hashes

	my $countAS = 0;

	my @answerSets = @{$_[0]};

	my @second;
	if($_[1]) {
		@second = @{$_[1]};
	}

	my @third;
	if($_[2]) {
		@third = @{$_[2]};
	}

	my @selectedAS;
	
	my @predList;

	my @pred;

	if(@second) {
		if($second[0] =~ /\d+/) {

			@selectedAS = @second;
			if(@third) {
				@predList = @third;
			}

		}

		else {
			@predList = @second;
			if(@third) {
				@selectedAS = @third;
			}
		}
	}


	foreach my $elem (@answerSets) {


		if($elem =~ /(\w+).*\n/){
			if(@predList) {
				if(_existsPred($1,\@predList)) {
					push @{$pred[$countAS]{$1}}, $elem;
				}
			}
			else {
				push @{$pred[$countAS]{$1}}, $elem;
			}
			$countAS = $countAS + 1;
			
		}

		elsif($elem =~ /(\w+).*/) {
			if(@predList) {
				if(_existsPred($1,\@predList)) {
					push @{$pred[$countAS]{$1}}, $elem;
				}
			}
			else {
				push @{$pred[$countAS]{$1}}, $elem;
			}
		}
		
	}

	if(@selectedAS) {
		
		my $size = @selectedAS;

		my @selectedPred;


		for(my $i = 0; $i < $size; $i++) {
			my $as = $selectedAS[$i];
			push @selectedPred, $pred[$as];
		}

		return @selectedPred;
	}
	return @pred;

}

sub _existsPred {	#Verifies the existence of a predicate (private use only)

	my $pred = $_[0];
	my @predList = @{$_[1]};

	my $size = @predList;

	for(my $i = 0; $i < $size; $i++) {
		if($pred eq $predList[$i]) {
			return 1;
		}
	}
	return 0;
		
}

sub getPred {	#Returns the predicates from the array of hashes

	my @pr = @{$_[0]};
	return @{$pr[$_[1]]{$_[2]}};
}

sub getProjection {	#Returns the values selected by the user

	my @pr = @{$_[0]};
	my @projection;

	my @res = @{$pr[$_[1]]{$_[2]}};
	
	my $size = @res;
	my $fieldsStr;

	for(my $i = 0; $i < $size; $i++) {
		my $pred = @{$pr[$_[1]]{$_[2]}}[$i];
		if($pred =~ /(\w+)\((.+)\)/) {
			$fieldsStr = $2;
		}
		my @fields = split(',',$fieldsStr);
		push @projection , $fields[$_[3]-1];		
			
	}

	return @projection;
}

sub createNewFile {

	my $file = $_[0];
	my $code = $_[1];

	open FILE, ">", $file;
	print FILE "$code\n";
	close FILE;

}

sub addFacts {

	my $name = $_[0];
	my @facts = @{$_[1]};
	my $append = $_[2];
	my $filename = $_[3];
	
	open FILE, $append, $filename;

	foreach my $f (@facts) {
		print FILE "$name($f).\n";
	}
	close FILE;
}


1;
__END__

# 

=head1 NAME

AI::Logic::AnswerSet - Perl extension for embedding ASP (Answer Set Programming) programs in Perl.


=head1 SYNOPSIS

  use AI::Logic::AnswerSet;
  
  # invoke DLV( AnwerSetProgramming-based system) and save the stdoutput
  my @stdoutput = AI::Logic::AnswerSet::singleExec("3-colorability.txt");

  # parse the output
  my @res = AI::Logic::AnswerSet::getAS(@stdoutput);

  # map the results
  my @mappedAS = AI::Logic::AnswerSet::mapAS(\@res);

  # get a predicate from the results
  my @col = AI::Logic::AnswerSet::getPred(\@mappedAS,1,"col");

  # get a term of a predicate
  my @term = AI::Logic::AnswerSet::getProjection(\@mappedAS,1,"col",2);


=head1 DESCRIPTION

This extension allows to interact with DLV, an Artificial Intelligence system
for Answer Set Programming (ASP).
Please note that the DLV system must appear in the same folder of the perl program
and it must be renamed as "dlv";
DLV can be freely obtained at www.dlvsystem.com.
For further info about DLV and Answer Set Programming please start from www.dlvsystem.com.

The module was originally published as "ASPerl", but suffered from
some problems with the namespace, now changed. The module has been
also significantly rearranged according to the advices coming from the
community. Thank you all!
If you are using this module, please let us know: we are always
interested in end-users desires, and we wish to improve our library:
comments are truly welcome!

=head2 Methods

=head3 executeFromFileAndSave

This method allows to execute DLV with and input file and save the output in another file.

	AI::Logic::AnswerSet::executeFromFileAndSave("outprog.txt","dlvprog.txt","");

In this case the file "outprog.txt" consists of the result of the DLV invocation 
with the file "dlvprog.txt".
No code is specified in the third value of the method. It can be used to add code 
to an existing file or to a new one.

	AI::Logic::AnswerSet::executeFromFileAndSave("outprog.txt","dlvprog.txt",
	"b(X):-a(X). a(1).");
  
=head3 executeAndSave

To call DLV without an input file, directly writing the ASP code from the terminal, 
use this method, passing only the name of the output file.

	AI::Logic::AnswerSet::executeAndSave("outprog.txt");

Press Ctrl+D to stop using the DLV terminal and execute the program.

=head3 singleExec

Use this method to execute DLV whit several input files, including also
DLV options like "-nofacts".
The output will be stored inside an array.

	my @out = AI::Logic::AnswerSet::singleExec("3col.txt","nodes.txt","edges.txt","-nofacts");

Another way to use this method:

	my @out = AI::Logic::AnswerSet::singleExec();

In this way it will work like C<executeAndSave()> without saving the output to a file.

=head3 iterativeExec

This method allows to call multiples DLV executions for several instances of the same problem.
Suppose you have a program that calculates the 3-colorability of a graph; in this case
one might have more than a graph, and each graph instance can be stored in a different file.
A Perl programmer might want to work with the results of all the graphs she has in her files,
so this function will be useful for this purpose.
Use it like in the following:

	my @outputs = AI::Logic::AnswerSet::iterativeExec("3col.txt","nodes.txt","./instances");

In this case the nodes of each graph are the same, but not the edges.
Notice that in order to correctly use this method, the user must specify the path 
to the instances (the edges, in this case).

The output of this function is a two-dimensional array; each element corresponds to the result
of a single DLV execution, exactly as in the case of the function C<singleExec()>.

=head3 selectOutput

This method allows to get one of the results of C<iterativeExec>.

	my @outputs = AI::Logic::AnswerSet::iterativeExec("3col.txt","nodes.txt","./instances");
	my @out = AI::Logic::AnswerSet::selectOutput(\@outputs,0);

In this case the first output is selected.

=head3 getASFromFile

Parses the output of a DLV execution saved in a file and gather the answer sets.

	AI::Logic::AnswerSet::executeFromFileAndSave("outprog.txt","dlvprog.txt","");
	my @result = AI::Logic::AnswerSet::getASFromFile("outprog.txt");

=head3 getAS

Parses the output of a DLV execution and gather the answer sets.

	my @out = AI::Logic::AnswerSet::singleExec("3col.txt","nodes.txt","edges.txt","-nofacts");
	my @result = AI::Logic::AnswerSet::getAS(@out);

=head3 mapAS

Parses the new output in order to save and organize the results into a hashmap.

	my @out = AI::Logic::AnswerSet::singleExec("3col.txt","nodes.txt","edges.txt","-nofacts");
	my @result = AI::Logic::AnswerSet::getAS(@out);
	my @mappedAS = AI::Logic::AnswerSet::mapAS(@result);

The user can set some constraints on the data to be saved in the hashmap, such as predicates, or answer sets, or both.

	my @mappedAS = AI::Logic::AnswerSet::mapAS(@result,@predicates,@answerSets);

For instance, think about the 3-colorability problem: imagine to 
have the edges in the hashmap, and to print the edges contained in the third answer set
returned by DLV; this is an example of the print instruction, useful to understand how
the hashmap works:

	print "Edges: @{$mappedAS[2]{edge}}\n";

In this case, we are printing the array containing the predicate "edge".

=head3 getPred

Easily manage the hashmap and get the desired predicate(see the print example
described in the method above):

	my @edges = AI::Logic::AnswerSet::getPred(\@mappedAS,3,"edge");

=head3 getProjection

Returns the projection of the n-th term of a specified predicate.
Suppose that we have the predicate "person" C<person(Name,Surename);> and
that we just want the surenames of all the instances of "person":

	my @surenames = AI::Logic::AnswerSet::getProjection(\@mappedAS,3,"person",2);

The parameters are, respectively: hashmap, number of the answer set, name of the predicate,
position of the term.

=head3 statistics

This method returns an array of hashes with some stats of every predicate of every answer set,
namely the number of occurrences of the specified predicates of each answer set.
If a condition is specified(number of predicates), only the answer sets that satisfy
the condition are returned.

	my @res = AI::Logic::AnswerSet::getAS(@output);
	my @predicates = ("node","edge");
	my @stats = AI::Logic::AnswerSet::statistics(\@res,\@predicates);

In this case the data structure returned is the same as the one returned by C<mapAS()>.
Hence, for each answer set (each element of the array of hashes), the hashmap will appear 
like this:

	{
		node => 6
		edge => 9
	}

This means that for a particular answer set we have 6 nodes and 9 edges.
In addition, this method can be used with some constraints:

	my @res = AI::Logic::AnswerSet::getAS(@output);
	my @predicates = ("node,"edge");
	my @numbers = (4,15);
	my @operators = (">","<");
	my @stats = AI::Logic::AnswerSet::statistics(\@res,\@predicates,\@numbers,\@operators);

Now the functions returns the answer sets that satisfy the condition, i.e., an answer set
is returned only if the number of occurrences of the predicate "node" is higher than 4, and the number of occurrences of the predicate "edge" less than 15.

=head3 getFacts

Get the logic program facts from a file or a string.

	my @facts = AI::Logic::AnswerSet::getFacts($inputFile);

or

	my $code = "a(X):-b(X). b(1). b(2).";
	my @facts = AI::Logic::AnswerSet::getFacts($code);

DLV code can be freely exploited, with the only constraint of putting a space between rules
or facts.
This is an example of wrong input code:

	my $code = "a(X):-b(X).b(1).b(2).";

=head3 addCode

Use this method to quiclky add new code to a string or a file.

	my $code = "a(X):-b(X). b(1). b(2).";
	AI::Logic::AnswerSet::addCode($code,"b(3). b(4).");

or

	my $file = "myfile.txt";
	AI::Logic::AnswerSet::addCode($file,"b(3). b(4).");

=head3 createNewFile

Creates a new file with some code.

	AI::Logic::AnswerSet::createNewFile($file,"b(3). b(4).");

=head3 addFacts

Quiclky adds facts to a file. Imagine to have some data(representing facts) 
stored inside an array; just use this method to put them in a file and give it a name.

	AI::Logic::AnswerSet::addFacts("villagers",\@villagers,">","villagersFile.txt");

In the example above, "villagers" will be the name of the facts; C<@villagers> is the array 
containing the data; ">" is the file operator(will create a new file, in this case); 
"villagersFile.txt" is the filename. The file will contain facts of the form "villagers(X)",
for each "X", appearing in the array C<@villagers>.


=head1 SEE ALSO

www.dlvsystem.com

=head1 AUTHOR

Ferdinando Primerano, E<lt>levia@cpan.orgE<gt>
Francesco Calimeri, E<lt>calimeri@mat.unical.itE<gt>

This work started within the bachelor degree thesis program of the
Computer Science course at Department of Mathematics of the University
of Calabria.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2012 by Ferdinando Primerano , Francesco Calimeri

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.


=cut

view all matches for this distribution

AI-ML

54 results

view release on metacpan or search on metacpan

inc/MyBuilder.pm view on Meta::CPAN

package MyBuilder;
use base 'Module::Build';

use warnings;
use strict;

use Config;
use ExtUtils::ParseXS;
use ExtUtils::Mkbootstrap;

use Path::Tiny;

my $EXTRA_O_FLAGS = "";
my $EXTRA_FLAGS = "-lblas -llapack";

sub ACTION_code {
    my $self = shift;

    $EXTRA_O_FLAGS .= " -DUSE_REAL" unless exists $self->args->{'with-float'};

    $self->update_XS("XS/ML.xs.inc");

    $self->dispatch("create_objects");
    $self->dispatch("compile_xs");

    $self->SUPER::ACTION_code;
}

sub update_XS {
    my ($self, $file) = @_;
    my $output = $file;
    $output =~ s/\.inc$//;

    open my $i_fh, "<", $file   or die "$!";
    open my $o_fh, ">", $output or die "$!";
    while (<$i_fh>) {
        s/REAL/float/g;
        print {$o_fh} $_;
    }
    close $o_fh;
    close $i_fh;
}

sub ACTION_create_objects {
    my $self = shift;
    my $cbuilder = $self->cbuilder;

    my $c_progs = $self->rscan_dir("C", qr/\.c$/);
    for my $file (@$c_progs) {
        my $object = $file;
        $object =~ s/\.c$/.o/;
        next if $self->up_to_date($file, $object);
        $cbuilder->compile(
            object_file => $object,
            extra_compiler_flags => $EXTRA_O_FLAGS,
            source => $file,
            include_dirs => ["."]
        );
    }
}

sub ACTION_compile_xs {
    my $self = shift;
    my $cbuilder = $self->cbuilder;

    my $archdir = path($self->blib, "arch", "auto", "AI", "ML");
    $archdir->mkpath unless -d $archdir;

    my $xs = path("XS", "ML.xs");
    my $xs_c = path("XS", "ML.c");

    if (!$self->up_to_date($xs, $xs_c)) {
        ExtUtils::ParseXS::process_file(
            filename => $xs->stringify, 
            prototypes => 0,
            output => $xs_c->stringify
        );
    }

    my $xs_o = path("XS", "ML.o");
    if (!$self->up_to_date($xs_c, $xs_o)) {
        $cbuilder->compile(
            source => $xs_c,
            extra_compiler_flags => $EXTRA_O_FLAGS,
            include_dirs => ["."], 
            object_file => $xs_o
        );
    }
    my $bs_file = path( $archdir, "ML.bs");
    if (!$self->up_to_date($xs_o, $bs_file) ) {
        ExtUtils::Mkbootstrap::Mkbootstrap($bs_file);
        if (!-f $bs_file) {
            $bs_file->touch;
        }
    }

    my $objects = $self->rscan_dir("C", qr/\.o$/);
    push @$objects, $xs_o;
    my $lib_file = path($archdir, "ML.$Config{dlext}");
    if (!$self->up_to_date( $objects, $lib_file )) {
        $cbuilder->link(
            module_name => 'AI::ML',
            extra_linker_flags => $EXTRA_FLAGS,
            objects => $objects,
            lib_file => $lib_file,
        );
    }
}

1;

view all matches for this distribution

AI-MXNet-Gluon-Contrib

9 results

view release on metacpan or search on metacpan

lib/AI/MXNet/Gluon/Contrib.pm view on Meta::CPAN

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

package AI::MXNet::Gluon::Contrib;
use strict;
use warnings;
use AI::MXNet;
use AI::MXNet::Gluon::Contrib::NN::BasicLayers;
our $VERSION = '1.33';
=head1 NAME 

    AI::MXNet::Gluon::Contrib - A collection of supplemental Gluon blocks.
=cut

1;

=head1 AUTHOR

    Sergey Kolychev, <sergeykolychev.github@gmail.com>

=head1 COPYRIGHT & LICENSE

    This library is licensed under Apache 2.0 license L<https://www.apache.org/licenses/LICENSE-2.0>

=cut

view all matches for this distribution

AI-MXNet-Gluon-ModelZoo

19 results

view release on metacpan or search on metacpan

examples/image_classification.pl view on Meta::CPAN

#!/usr/bin/env perl
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

use strict;
use warnings;
use AI::MXNet::Gluon::ModelZoo 'get_model';
use AI::MXNet::Gluon::Utils 'download';
use Getopt::Long qw(HelpMessage);

GetOptions(
    ## my Pembroke Welsh Corgi Kyuubi, enjoing Solar eclipse of August 21, 2017
    'image=s' => \(my $image = 'http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'.
                               'gluon/dataset/kyuubi.jpg'),
    'model=s' => \(my $model = 'resnet152_v2'),
    'help'    => sub { HelpMessage(0) },
) or HelpMessage(1);

## get a pretrained model (download parameters file if necessary)
my $net = get_model($model, pretrained => 1);

## ImageNet classes
my $fname = download('http://data.mxnet.io/models/imagenet/synset.txt');
my @text_labels = map { chomp; s/^\S+\s+//; $_ } IO::File->new($fname)->getlines;

## get the image from the disk or net
if($image =~ /^https/)
{
    eval { require IO::Socket::SSL; };
    die "Need to have IO::Socket::SSL installed for https images" if $@;
}
$image = $image =~ /^https?/ ? download($image) : $image;

# Following the conventional way of preprocessing ImageNet data:
# Resize the short edge into 256 pixes,
# And then perform a center crop to obtain a 224-by-224 image.
# The following code uses the image processing functions provided 
# in the AI::MXNet::Image module.

$image = mx->image->imread($image);
$image = mx->image->resize_short($image, $model =~ /inception/ ? 330 : 256);
($image) = mx->image->center_crop($image, [($model =~ /inception/ ? 299 : 224)x2]);

## CV that is used to read image is column major (as PDL)
$image = $image->transpose([2,0,1])->expand_dims(axis=>0);

## normalizing the image
my $rgb_mean = nd->array([0.485, 0.456, 0.406])->reshape([1,3,1,1]);
my $rgb_std = nd->array([0.229, 0.224, 0.225])->reshape([1,3,1,1]);
$image = ($image->astype('float32') / 255 - $rgb_mean) / $rgb_std;

# Now we can recognize the object in the image.
# We perform an additional softmax on the output to obtain probability scores.
# And then print the top-5 recognized objects.
my $prob = $net->($image)->softmax;
for my $idx (@{ $prob->topk(k=>5)->at(0) })
{
    my $i = $idx->asscalar;
    printf(
        "With prob = %.5f, it contains %s\n",
        $prob->at(0)->at($i)->asscalar, $text_labels[$i]
    );
}

view all matches for this distribution

AI-MXNet

77 results

view release on metacpan or search on metacpan

examples/calculator.pl view on Meta::CPAN

#!/usr/bin/perl
use strict;
use warnings;
use AI::MXNet ('mx');

## preparing the samples
## to train our network
sub samples {
    my($batch_size, $func) = @_;
    # get samples
    my $n = 16384;
    ## creates a pdl with $n rows and two columns with random
    ## floats in the range between 0 and 1
    my $data = PDL->random(2, $n);
    ## creates the pdl with $n rows and one column with labels
    ## labels are floats that either sum or product, etc of
    ## two random values in each corresponding row of the data pdl
    my $label = $func->($data->slice('0,:'), $data->slice('1,:'));
    # partition into train/eval sets
    my $edge = int($n / 8);
    my $validation_data = $data->slice(":,0:@{[ $edge - 1 ]}");
    my $validation_label = $label->slice(":,0:@{[ $edge - 1 ]}");
    my $train_data = $data->slice(":,$edge:");
    my $train_label = $label->slice(":,$edge:");
    # build iterators around the sets
    return(mx->io->NDArrayIter(
        batch_size => $batch_size,
        data => $train_data,
        label => $train_label,
    ), mx->io->NDArrayIter(
        batch_size => $batch_size,
        data => $validation_data,
        label => $validation_label,
    ));
}

## the network model
sub nn_fc {
    my $data = mx->sym->Variable('data');
    my $ln = mx->sym->exp(mx->sym->FullyConnected(
        data => mx->sym->log($data),
        num_hidden => 1,
    ));
    my $wide = mx->sym->Concat($data, $ln);
    my $fc = mx->sym->FullyConnected(
	$wide,
	num_hidden => 1
    );
    return mx->sym->MAERegressionOutput(data => $fc, name => 'softmax');
}

sub learn_function {
    my(%args) = @_;
    my $func = $args{func};
    my $batch_size = $args{batch_size}//128;
    my($train_iter, $eval_iter) = samples($batch_size, $func);
    my $sym = nn_fc();

    ## call as ./calculator.pl 1 to just print model and exit
    if($ARGV[0]) {
        my @dsz = @{$train_iter->data->[0][1]->shape};
        my @lsz = @{$train_iter->label->[0][1]->shape};
        my $shape = {
            data          => [ $batch_size, splice @dsz,  1 ],
            softmax_label => [ $batch_size, splice @lsz, 1 ],
        };
        print mx->viz->plot_network($sym, shape => $shape)->graph->as_png;
        exit;
    }

    my $model = mx->mod->Module(
        symbol => $sym,
        context => mx->cpu(),
    );
    $model->fit($train_iter,
        eval_data => $eval_iter,
        optimizer => 'adam',
        optimizer_params => {
            learning_rate => $args{lr}//0.01,
            rescale_grad => 1/$batch_size,
            lr_scheduler  => AI::MXNet::FactorScheduler->new(
        	step => 100,
        	factor => 0.99
            )
        },
        eval_metric => 'mse',
        num_epoch => $args{epoch}//25,
    );

    # refit the model for calling on 1 sample at a time
    my $iter = mx->io->NDArrayIter(
        batch_size => 1,
        data => PDL->pdl([[ 0, 0 ]]),
        label => PDL->pdl([[ 0 ]]),
    );
    $model->reshape(
        data_shapes => $iter->provide_data,
        label_shapes => $iter->provide_label,
    );

    # wrap a helper around making predictions
    my ($arg_params) = $model->get_params;
    for my $k (sort keys %$arg_params)
    {
	print "$k -> ". $arg_params->{$k}->aspdl."\n";
    }
    return sub {
        my($n, $m) = @_;
        return $model->predict(mx->io->NDArrayIter(
            batch_size => 1,
            data => PDL->new([[ $n, $m ]]),
        ))->aspdl->list;
    };
}

my $add = learn_function(func => sub {
    my($n, $m) = @_;
    return $n + $m;
});
my $sub = learn_function(func => sub {
    my($n, $m) = @_;
    return $n - $m;
}, batch_size => 50, epoch => 40);
my $mul = learn_function(func => sub {
    my($n, $m) = @_;
    return $n * $m;
}, batch_size => 50, epoch => 40);
my $div = learn_function(func => sub {
    my($n, $m) = @_;
    return $n / $m;
}, batch_size => 10, epoch => 80);


print "12345 + 54321 â‰ˆ ", $add->(12345, 54321), "\n";
print "188 - 88 â‰ˆ ", $sub->(188, 88), "\n";
print "250 * 2 â‰ˆ ", $mul->(250, 2), "\n";
print "250 / 2 â‰ˆ ", $div->(250, 2), "\n";

view all matches for this distribution

AI-MXNetCAPI

10 results

view release on metacpan or search on metacpan

lib/AI/MXNetCAPI.pm view on Meta::CPAN

package AI::MXNetCAPI;
use base qw(DynaLoader);
bootstrap AI::MXNetCAPI;
our $VERSION = '1.0102';
1;
__END__

=head1 NAME

AI::MXNetCAPI - Swig interface to mxnet c api

=head1 SYNOPSIS

 use AI::MXNetCAPI;

=head1 DESCRIPTION

This module provides interface to mxnet
via its api.

=head1 SEE ALSO

L<AI::MXNet>

=head1 AUTHOR

Sergey Kolychev, <sergeykolychev.github@gmail.com>

=head1 COPYRIGHT & LICENSE

Copyright 2017 Sergey Kolychev.

This library is licensed under Apache 2.0 license.

See https://www.apache.org/licenses/LICENSE-2.0 for more information.

=cut

view all matches for this distribution

AI-MaxEntropy

29 results

view release on metacpan or search on metacpan

inc/Module/AutoInstall.pm view on Meta::CPAN

#line 1
package Module::AutoInstall;

use strict;
use Cwd                 ();
use ExtUtils::MakeMaker ();

use vars qw{$VERSION};
BEGIN {
	$VERSION = '1.03';
}

# special map on pre-defined feature sets
my %FeatureMap = (
    ''      => 'Core Features',    # XXX: deprecated
    '-core' => 'Core Features',
);

# various lexical flags
my ( @Missing, @Existing,  %DisabledTests, $UnderCPAN,     $HasCPANPLUS );
my ( $Config,  $CheckOnly, $SkipInstall,   $AcceptDefault, $TestOnly );
my ( $PostambleActions, $PostambleUsed );

# See if it's a testing or non-interactive session
_accept_default( $ENV{AUTOMATED_TESTING} or ! -t STDIN ); 
_init();

sub _accept_default {
    $AcceptDefault = shift;
}

sub missing_modules {
    return @Missing;
}

sub do_install {
    __PACKAGE__->install(
        [
            $Config
            ? ( UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config} )
            : ()
        ],
        @Missing,
    );
}

# initialize various flags, and/or perform install
sub _init {
    foreach my $arg (
        @ARGV,
        split(
            /[\s\t]+/,
            $ENV{PERL_AUTOINSTALL} || $ENV{PERL_EXTUTILS_AUTOINSTALL} || ''
        )
      )
    {
        if ( $arg =~ /^--config=(.*)$/ ) {
            $Config = [ split( ',', $1 ) ];
        }
        elsif ( $arg =~ /^--installdeps=(.*)$/ ) {
            __PACKAGE__->install( $Config, @Missing = split( /,/, $1 ) );
            exit 0;
        }
        elsif ( $arg =~ /^--default(?:deps)?$/ ) {
            $AcceptDefault = 1;
        }
        elsif ( $arg =~ /^--check(?:deps)?$/ ) {
            $CheckOnly = 1;
        }
        elsif ( $arg =~ /^--skip(?:deps)?$/ ) {
            $SkipInstall = 1;
        }
        elsif ( $arg =~ /^--test(?:only)?$/ ) {
            $TestOnly = 1;
        }
    }
}

# overrides MakeMaker's prompt() to automatically accept the default choice
sub _prompt {
    goto &ExtUtils::MakeMaker::prompt unless $AcceptDefault;

    my ( $prompt, $default ) = @_;
    my $y = ( $default =~ /^[Yy]/ );

    print $prompt, ' [', ( $y ? 'Y' : 'y' ), '/', ( $y ? 'n' : 'N' ), '] ';
    print "$default\n";
    return $default;
}

# the workhorse
sub import {
    my $class = shift;
    my @args  = @_ or return;
    my $core_all;

    print "*** $class version " . $class->VERSION . "\n";
    print "*** Checking for Perl dependencies...\n";

    my $cwd = Cwd::cwd();

    $Config = [];

    my $maxlen = length(
        (
            sort   { length($b) <=> length($a) }
              grep { /^[^\-]/ }
              map  {
                ref($_)
                  ? ( ( ref($_) eq 'HASH' ) ? keys(%$_) : @{$_} )
                  : ''
              }
              map { +{@args}->{$_} }
              grep { /^[^\-]/ or /^-core$/i } keys %{ +{@args} }
        )[0]
    );

    while ( my ( $feature, $modules ) = splice( @args, 0, 2 ) ) {
        my ( @required, @tests, @skiptests );
        my $default  = 1;
        my $conflict = 0;

        if ( $feature =~ m/^-(\w+)$/ ) {
            my $option = lc($1);

            # check for a newer version of myself
            _update_to( $modules, @_ ) and return if $option eq 'version';

            # sets CPAN configuration options
            $Config = $modules if $option eq 'config';

            # promote every features to core status
            $core_all = ( $modules =~ /^all$/i ) and next
              if $option eq 'core';

            next unless $option eq 'core';
        }

        print "[" . ( $FeatureMap{ lc($feature) } || $feature ) . "]\n";

        $modules = [ %{$modules} ] if UNIVERSAL::isa( $modules, 'HASH' );

        unshift @$modules, -default => &{ shift(@$modules) }
          if ( ref( $modules->[0] ) eq 'CODE' );    # XXX: bugward combatability

        while ( my ( $mod, $arg ) = splice( @$modules, 0, 2 ) ) {
            if ( $mod =~ m/^-(\w+)$/ ) {
                my $option = lc($1);

                $default   = $arg    if ( $option eq 'default' );
                $conflict  = $arg    if ( $option eq 'conflict' );
                @tests     = @{$arg} if ( $option eq 'tests' );
                @skiptests = @{$arg} if ( $option eq 'skiptests' );

                next;
            }

            printf( "- %-${maxlen}s ...", $mod );

            if ( $arg and $arg =~ /^\D/ ) {
                unshift @$modules, $arg;
                $arg = 0;
            }

            # XXX: check for conflicts and uninstalls(!) them.
            if (
                defined( my $cur = _version_check( _load($mod), $arg ||= 0 ) ) )
            {
                print "loaded. ($cur" . ( $arg ? " >= $arg" : '' ) . ")\n";
                push @Existing, $mod => $arg;
                $DisabledTests{$_} = 1 for map { glob($_) } @skiptests;
            }
            else {
                print "missing." . ( $arg ? " (would need $arg)" : '' ) . "\n";
                push @required, $mod => $arg;
            }
        }

        next unless @required;

        my $mandatory = ( $feature eq '-core' or $core_all );

        if (
            !$SkipInstall
            and (
                $CheckOnly
                or _prompt(
                    qq{==> Auto-install the }
                      . ( @required / 2 )
                      . ( $mandatory ? ' mandatory' : ' optional' )
                      . qq{ module(s) from CPAN?},
                    $default ? 'y' : 'n',
                ) =~ /^[Yy]/
            )
          )
        {
            push( @Missing, @required );
            $DisabledTests{$_} = 1 for map { glob($_) } @skiptests;
        }

        elsif ( !$SkipInstall
            and $default
            and $mandatory
            and
            _prompt( qq{==> The module(s) are mandatory! Really skip?}, 'n', )
            =~ /^[Nn]/ )
        {
            push( @Missing, @required );
            $DisabledTests{$_} = 1 for map { glob($_) } @skiptests;
        }

        else {
            $DisabledTests{$_} = 1 for map { glob($_) } @tests;
        }
    }

    $UnderCPAN = _check_lock();    # check for $UnderCPAN

    if ( @Missing and not( $CheckOnly or $UnderCPAN ) ) {
        require Config;
        print
"*** Dependencies will be installed the next time you type '$Config::Config{make}'.\n";

        # make an educated guess of whether we'll need root permission.
        print "    (You may need to do that as the 'root' user.)\n"
          if eval '$>';
    }
    print "*** $class configuration finished.\n";

    chdir $cwd;

    # import to main::
    no strict 'refs';
    *{'main::WriteMakefile'} = \&Write if caller(0) eq 'main';
}

# Check to see if we are currently running under CPAN.pm and/or CPANPLUS;
# if we are, then we simply let it taking care of our dependencies
sub _check_lock {
    return unless @Missing;

    if ($ENV{PERL5_CPANPLUS_IS_RUNNING}) {
        print <<'END_MESSAGE';

*** Since we're running under CPANPLUS, I'll just let it take care
    of the dependency's installation later.
END_MESSAGE
        return 1;
    }

    _load_cpan();

    # Find the CPAN lock-file
    my $lock = MM->catfile( $CPAN::Config->{cpan_home}, ".lock" );
    return unless -f $lock;

    # Check the lock
    local *LOCK;
    return unless open(LOCK, $lock);

    if (
            ( $^O eq 'MSWin32' ? _under_cpan() : <LOCK> == getppid() )
        and ( $CPAN::Config->{prerequisites_policy} || '' ) ne 'ignore'
    ) {
        print <<'END_MESSAGE';

*** Since we're running under CPAN, I'll just let it take care
    of the dependency's installation later.
END_MESSAGE
        return 1;
    }

    close LOCK;
    return;
}

sub install {
    my $class = shift;

    my $i;    # used below to strip leading '-' from config keys
    my @config = ( map { s/^-// if ++$i; $_ } @{ +shift } );

    my ( @modules, @installed );
    while ( my ( $pkg, $ver ) = splice( @_, 0, 2 ) ) {

        # grep out those already installed
        if ( defined( _version_check( _load($pkg), $ver ) ) ) {
            push @installed, $pkg;
        }
        else {
            push @modules, $pkg, $ver;
        }
    }

    return @installed unless @modules;  # nothing to do
    return @installed if _check_lock(); # defer to the CPAN shell

    print "*** Installing dependencies...\n";

    return unless _connected_to('cpan.org');

    my %args = @config;
    my %failed;
    local *FAILED;
    if ( $args{do_once} and open( FAILED, '.#autoinstall.failed' ) ) {
        while (<FAILED>) { chomp; $failed{$_}++ }
        close FAILED;

        my @newmod;
        while ( my ( $k, $v ) = splice( @modules, 0, 2 ) ) {
            push @newmod, ( $k => $v ) unless $failed{$k};
        }
        @modules = @newmod;
    }

    if ( _has_cpanplus() ) {
        _install_cpanplus( \@modules, \@config );
    } else {
        _install_cpan( \@modules, \@config );
    }

    print "*** $class installation finished.\n";

    # see if we have successfully installed them
    while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) {
        if ( defined( _version_check( _load($pkg), $ver ) ) ) {
            push @installed, $pkg;
        }
        elsif ( $args{do_once} and open( FAILED, '>> .#autoinstall.failed' ) ) {
            print FAILED "$pkg\n";
        }
    }

    close FAILED if $args{do_once};

    return @installed;
}

sub _install_cpanplus {
    my @modules   = @{ +shift };
    my @config    = _cpanplus_config( @{ +shift } );
    my $installed = 0;

    require CPANPLUS::Backend;
    my $cp   = CPANPLUS::Backend->new;
    my $conf = $cp->configure_object;

    return unless $conf->can('conf') # 0.05x+ with "sudo" support
               or _can_write($conf->_get_build('base'));  # 0.04x

    # if we're root, set UNINST=1 to avoid trouble unless user asked for it.
    my $makeflags = $conf->get_conf('makeflags') || '';
    if ( UNIVERSAL::isa( $makeflags, 'HASH' ) ) {
        # 0.03+ uses a hashref here
        $makeflags->{UNINST} = 1 unless exists $makeflags->{UNINST};

    } else {
        # 0.02 and below uses a scalar
        $makeflags = join( ' ', split( ' ', $makeflags ), 'UNINST=1' )
          if ( $makeflags !~ /\bUNINST\b/ and eval qq{ $> eq '0' } );

    }
    $conf->set_conf( makeflags => $makeflags );
    $conf->set_conf( prereqs   => 1 );

    

    while ( my ( $key, $val ) = splice( @config, 0, 2 ) ) {
        $conf->set_conf( $key, $val );
    }

    my $modtree = $cp->module_tree;
    while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) {
        print "*** Installing $pkg...\n";

        MY::preinstall( $pkg, $ver ) or next if defined &MY::preinstall;

        my $success;
        my $obj = $modtree->{$pkg};

        if ( $obj and defined( _version_check( $obj->{version}, $ver ) ) ) {
            my $pathname = $pkg;
            $pathname =~ s/::/\\W/;

            foreach my $inc ( grep { m/$pathname.pm/i } keys(%INC) ) {
                delete $INC{$inc};
            }

            my $rv = $cp->install( modules => [ $obj->{module} ] );

            if ( $rv and ( $rv->{ $obj->{module} } or $rv->{ok} ) ) {
                print "*** $pkg successfully installed.\n";
                $success = 1;
            } else {
                print "*** $pkg installation cancelled.\n";
                $success = 0;
            }

            $installed += $success;
        } else {
            print << ".";
*** Could not find a version $ver or above for $pkg; skipping.
.
        }

        MY::postinstall( $pkg, $ver, $success ) if defined &MY::postinstall;
    }

    return $installed;
}

sub _cpanplus_config {
	my @config = ();
	while ( @_ ) {
		my ($key, $value) = (shift(), shift());
		if ( $key eq 'prerequisites_policy' ) {
			if ( $value eq 'follow' ) {
				$value = CPANPLUS::Internals::Constants::PREREQ_INSTALL();
			} elsif ( $value eq 'ask' ) {
				$value = CPANPLUS::Internals::Constants::PREREQ_ASK();
			} elsif ( $value eq 'ignore' ) {
				$value = CPANPLUS::Internals::Constants::PREREQ_IGNORE();
			} else {
				die "*** Cannot convert option $key = '$value' to CPANPLUS version.\n";
			}
		} else {
			die "*** Cannot convert option $key to CPANPLUS version.\n";
		}
	}
	return @config;
}

sub _install_cpan {
    my @modules   = @{ +shift };
    my @config    = @{ +shift };
    my $installed = 0;
    my %args;

    _load_cpan();
    require Config;

    if (CPAN->VERSION < 1.80) {
        # no "sudo" support, probe for writableness
        return unless _can_write( MM->catfile( $CPAN::Config->{cpan_home}, 'sources' ) )
                  and _can_write( $Config::Config{sitelib} );
    }

    # if we're root, set UNINST=1 to avoid trouble unless user asked for it.
    my $makeflags = $CPAN::Config->{make_install_arg} || '';
    $CPAN::Config->{make_install_arg} =
      join( ' ', split( ' ', $makeflags ), 'UNINST=1' )
      if ( $makeflags !~ /\bUNINST\b/ and eval qq{ $> eq '0' } );

    # don't show start-up info
    $CPAN::Config->{inhibit_startup_message} = 1;

    # set additional options
    while ( my ( $opt, $arg ) = splice( @config, 0, 2 ) ) {
        ( $args{$opt} = $arg, next )
          if $opt =~ /^force$/;    # pseudo-option
        $CPAN::Config->{$opt} = $arg;
    }

    local $CPAN::Config->{prerequisites_policy} = 'follow';

    while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) {
        MY::preinstall( $pkg, $ver ) or next if defined &MY::preinstall;

        print "*** Installing $pkg...\n";

        my $obj     = CPAN::Shell->expand( Module => $pkg );
        my $success = 0;

        if ( $obj and defined( _version_check( $obj->cpan_version, $ver ) ) ) {
            my $pathname = $pkg;
            $pathname =~ s/::/\\W/;

            foreach my $inc ( grep { m/$pathname.pm/i } keys(%INC) ) {
                delete $INC{$inc};
            }

            my $rv = $args{force} ? CPAN::Shell->force( install => $pkg )
                                  : CPAN::Shell->install($pkg);
            $rv ||= eval {
                $CPAN::META->instance( 'CPAN::Distribution', $obj->cpan_file, )
                  ->{install}
                  if $CPAN::META;
            };

            if ( $rv eq 'YES' ) {
                print "*** $pkg successfully installed.\n";
                $success = 1;
            }
            else {
                print "*** $pkg installation failed.\n";
                $success = 0;
            }

            $installed += $success;
        }
        else {
            print << ".";
*** Could not find a version $ver or above for $pkg; skipping.
.
        }

        MY::postinstall( $pkg, $ver, $success ) if defined &MY::postinstall;
    }

    return $installed;
}

sub _has_cpanplus {
    return (
        $HasCPANPLUS = (
            $INC{'CPANPLUS/Config.pm'}
              or _load('CPANPLUS::Shell::Default')
        )
    );
}

# make guesses on whether we're under the CPAN installation directory
sub _under_cpan {
    require Cwd;
    require File::Spec;

    my $cwd  = File::Spec->canonpath( Cwd::cwd() );
    my $cpan = File::Spec->canonpath( $CPAN::Config->{cpan_home} );

    return ( index( $cwd, $cpan ) > -1 );
}

sub _update_to {
    my $class = __PACKAGE__;
    my $ver   = shift;

    return
      if defined( _version_check( _load($class), $ver ) );  # no need to upgrade

    if (
        _prompt( "==> A newer version of $class ($ver) is required. Install?",
            'y' ) =~ /^[Nn]/
      )
    {
        die "*** Please install $class $ver manually.\n";
    }

    print << ".";
*** Trying to fetch it from CPAN...
.

    # install ourselves
    _load($class) and return $class->import(@_)
      if $class->install( [], $class, $ver );

    print << '.'; exit 1;

*** Cannot bootstrap myself. :-( Installation terminated.
.
}

# check if we're connected to some host, using inet_aton
sub _connected_to {
    my $site = shift;

    return (
        ( _load('Socket') and Socket::inet_aton($site) ) or _prompt(
            qq(
*** Your host cannot resolve the domain name '$site', which
    probably means the Internet connections are unavailable.
==> Should we try to install the required module(s) anyway?), 'n'
          ) =~ /^[Yy]/
    );
}

# check if a directory is writable; may create it on demand
sub _can_write {
    my $path = shift;
    mkdir( $path, 0755 ) unless -e $path;

    return 1 if -w $path;

    print << ".";
*** You are not allowed to write to the directory '$path';
    the installation may fail due to insufficient permissions.
.

    if (
        eval '$>' and lc(`sudo -V`) =~ /version/ and _prompt(
            qq(
==> Should we try to re-execute the autoinstall process with 'sudo'?),
            ((-t STDIN) ? 'y' : 'n')
        ) =~ /^[Yy]/
      )
    {

        # try to bootstrap ourselves from sudo
        print << ".";
*** Trying to re-execute the autoinstall process with 'sudo'...
.
        my $missing = join( ',', @Missing );
        my $config = join( ',',
            UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config} )
          if $Config;

        return
          unless system( 'sudo', $^X, $0, "--config=$config",
            "--installdeps=$missing" );

        print << ".";
*** The 'sudo' command exited with error!  Resuming...
.
    }

    return _prompt(
        qq(
==> Should we try to install the required module(s) anyway?), 'n'
    ) =~ /^[Yy]/;
}

# load a module and return the version it reports
sub _load {
    my $mod  = pop;    # class/instance doesn't matter
    my $file = $mod;

    $file =~ s|::|/|g;
    $file .= '.pm';

    local $@;
    return eval { require $file; $mod->VERSION } || ( $@ ? undef: 0 );
}

# Load CPAN.pm and it's configuration
sub _load_cpan {
    return if $CPAN::VERSION;
    require CPAN;
    if ( $CPAN::HandleConfig::VERSION ) {
        # Newer versions of CPAN have a HandleConfig module
        CPAN::HandleConfig->load;
    } else {
    	# Older versions had the load method in Config directly
        CPAN::Config->load;
    }
}

# compare two versions, either use Sort::Versions or plain comparison
sub _version_check {
    my ( $cur, $min ) = @_;
    return unless defined $cur;

    $cur =~ s/\s+$//;

    # check for version numbers that are not in decimal format
    if ( ref($cur) or ref($min) or $cur =~ /v|\..*\./ or $min =~ /v|\..*\./ ) {
        if ( ( $version::VERSION or defined( _load('version') )) and
             version->can('new') 
            ) {

            # use version.pm if it is installed.
            return (
                ( version->new($cur) >= version->new($min) ) ? $cur : undef );
        }
        elsif ( $Sort::Versions::VERSION or defined( _load('Sort::Versions') ) )
        {

            # use Sort::Versions as the sorting algorithm for a.b.c versions
            return ( ( Sort::Versions::versioncmp( $cur, $min ) != -1 )
                ? $cur
                : undef );
        }

        warn "Cannot reliably compare non-decimal formatted versions.\n"
          . "Please install version.pm or Sort::Versions.\n";
    }

    # plain comparison
    local $^W = 0;    # shuts off 'not numeric' bugs
    return ( $cur >= $min ? $cur : undef );
}

# nothing; this usage is deprecated.
sub main::PREREQ_PM { return {}; }

sub _make_args {
    my %args = @_;

    $args{PREREQ_PM} = { %{ $args{PREREQ_PM} || {} }, @Existing, @Missing }
      if $UnderCPAN or $TestOnly;

    if ( $args{EXE_FILES} and -e 'MANIFEST' ) {
        require ExtUtils::Manifest;
        my $manifest = ExtUtils::Manifest::maniread('MANIFEST');

        $args{EXE_FILES} =
          [ grep { exists $manifest->{$_} } @{ $args{EXE_FILES} } ];
    }

    $args{test}{TESTS} ||= 't/*.t';
    $args{test}{TESTS} = join( ' ',
        grep { !exists( $DisabledTests{$_} ) }
          map { glob($_) } split( /\s+/, $args{test}{TESTS} ) );

    my $missing = join( ',', @Missing );
    my $config =
      join( ',', UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config} )
      if $Config;

    $PostambleActions = (
        $missing
        ? "\$(PERL) $0 --config=$config --installdeps=$missing"
        : "\$(NOECHO) \$(NOOP)"
    );

    return %args;
}

# a wrapper to ExtUtils::MakeMaker::WriteMakefile
sub Write {
    require Carp;
    Carp::croak "WriteMakefile: Need even number of args" if @_ % 2;

    if ($CheckOnly) {
        print << ".";
*** Makefile not written in check-only mode.
.
        return;
    }

    my %args = _make_args(@_);

    no strict 'refs';

    $PostambleUsed = 0;
    local *MY::postamble = \&postamble unless defined &MY::postamble;
    ExtUtils::MakeMaker::WriteMakefile(%args);

    print << "." unless $PostambleUsed;
*** WARNING: Makefile written with customized MY::postamble() without
    including contents from Module::AutoInstall::postamble() --
    auto installation features disabled.  Please contact the author.
.

    return 1;
}

sub postamble {
    $PostambleUsed = 1;

    return << ".";

config :: installdeps
\t\$(NOECHO) \$(NOOP)

checkdeps ::
\t\$(PERL) $0 --checkdeps

installdeps ::
\t$PostambleActions

.

}

1;

__END__

#line 1003

view all matches for this distribution

AI-MegaHAL

13 results

view release on metacpan or search on metacpan

lib/AI/MegaHAL.pm view on Meta::CPAN

package AI::MegaHAL;

require DynaLoader;
require Exporter;

use AutoLoader;
use Carp;

use strict;

use vars qw(@EXPORT @ISA $VERSION $AUTOLOAD);

@EXPORT = qw(megahal_setnoprompt
	     megahal_setnowrap
	     megahal_setnobanner
	     megahal_seterrorfile
	     megahal_setstatusfile
	     megahal_initialize
	     megahal_initial_greeting
	     megahal_command
	     megahal_do_reply
	     megahal_learn
	     megahal_output
	     megahal_input
	     megahal_cleanup);

@ISA = qw(Exporter DynaLoader);
$VERSION = '0.08';

sub AUTOLOAD {
    # This AUTOLOAD is used to 'autoload' constants from the constant()
    # XS function.  If a constant is not found then control is passed
    # to the AUTOLOAD in AutoLoader.

    my $constname;
    ($constname = $AUTOLOAD) =~ s/.*:://;
    croak "& not defined" if $constname eq 'constant';
    my $val = constant($constname, @_ ? $_[0] : 0);
    if ($! != 0) {
	if ($! =~ /Invalid/ || $!{EINVAL}) {
	    $AutoLoader::AUTOLOAD = $AUTOLOAD;
	    goto &AutoLoader::AUTOLOAD;
	}
	else {
	    croak "Your vendor has not defined AI::MegaHAL macro $constname";
	}
    }
    {
	no strict 'refs';
	# Fixed between 5.005_53 and 5.005_61
	if ($] >= 5.00561) {
	    *$AUTOLOAD = sub () { $val };
	}
	else {
	    *$AUTOLOAD = sub { $val };
	}
    }
    goto &$AUTOLOAD;
}

sub new {
    my ($class,%args) = @_;
    my $self;

    # Bless ourselves into the AI::MegaHAL class.
    $self = bless({ },$class);

    # Make sure that we can find a brain or a training file somewhere
    # else die with an error.
    my $path = $args{'Path'} || ".";
    if(-e "$path/megahal.brn" || -e "$path/megahal.trn") {
	chdir($path) || die("Error: chdir: $!\n");
    } else {
	die("Error: unable to locate megahal.brn or megahal.trn\n");
    }

    # Set some of the options that may have been passed to us.
    megahal_setnobanner() if(! $args{'Banner'});
    megahal_setnowrap()   if(! $args{'Wrap'});
    megahal_setnoprompt() if(! $args{'Prompt'});

    # This flag indicates whether or not we should automatically save
    # our brain when the object goes out of scope.
    $self->{'AutoSave'} = $args{'AutoSave'};

    # Initialize ourselves.
    $self->_initialize();

    return $self;
}

sub initial_greeting {
    my $self = shift;

    return megahal_initial_greeting();
}

sub do_reply {
    my ($self,$text) = @_;

    return megahal_do_reply($text,0);
}

sub learn {
    my ($self,$text) = @_;

    return megahal_learn($text,0);
}

sub _initialize {
    my $self = shift;

    megahal_initialize();
    return;
}

sub _cleanup {
    my $self = shift;

    megahal_cleanup();
    return;
}

sub DESTROY {
    my $self = shift;

    $self->_cleanup() if($self->{'AutoSave'});
    return;
}

bootstrap AI::MegaHAL $VERSION;
1;

__END__

=head1 NAME

AI::MegaHAL - Perl interface to the MegaHAL natural language conversation simulator.

=head1 SYNOPSIS

use AI::MegaHAL;

my $megahal = AI::MegaHAL->new('Path'     => './',
                           'Banner'   => 0,
                           'Prompt'   => 0,
                           'Wrap'     => 0,
                           'AutoSave' => 0);

my $text = $megahal->initial_greeting();

$text = $megahal->do_reply($message);

$megahal->learn($message);

=head1 DESCRIPTION

Conversation simulators are computer programs which give the appearance of conversing with a user in natural language.  Such programs are effective because they exploit the fact that human beings tend to read much more meaning into what is said than ...

This package provides a Perl interface to the MegaHAL conversation simulator written by Jason Hutchens.

=head1 CONSTRUCTOR

$megahal = AI::MegaHAL->new('Path'     => './',
                           'Banner'   => 0,
                           'Prompt'   => 0,
                           'Wrap'     => 0,
                           'AutoSave' => 0);

Creates a new AI::MegaHAL object.  The object constructor can optionaly receive the following named parameters:

=over 4

=item B<Path> - The path to MegaHALs brain or training file (megahal.brn and megahal.trn respectively).  If 'Path' is not specified the current working directory is assumed.

=item B<Banner> - A flag which enables/disables the banner which is displayed when MegaHAL starts up.  The default is to disable the banner.

=item B<Prompt> - A flag which enables/disables the prompt. This flag is only useful when MegaHAL is run interactively and is disabled by default.

=item B<Wrap> - A flag which enables/disables word wrapping of MegaHALs responses when the lines exceed 80 characters in length.  The default is to disable word wrapping.

=back

=head1 METHODS

=head2 initial_greeting

$text = $megahal->initial_greeting();

Returns a string containing the initial greeting which is created by MegaHAL at startup.

=head2 do_reply

$text = $megahal->do_reply($message);

Generates reply $text to a given message $message.

=head2 learn

$megahal->learn($message);

Learns from $message without generating a response

=head1 BUGS

None known at this time.

=head1 SEE ALSO

POE::Component::AI::MegaHAL - IRC bot,

L<http://teaandbiscuits.org.uk/drupal/node/65> - Irssi IRC bot,

L<Hailo> - A pluggable Markov engine analogous to MegaHAL

=head1 AUTHOR

The Perl AI::MegaHAL interface was written by Cory Spencer <cspencer[at]sprocket.org>.

Now maintained by Alexandr Ciornii <alexchorny[at]gmail.com>.

MegaHAL was originally written by and is copyright Jason Hutchens <hutch[at]ciips.ee.uwa.edu.au>

=cut

view all matches for this distribution

AI-MicroStructure

103 results

view release on metacpan or search on metacpan

bin/from-folder.pl view on Meta::CPAN

#!/usr/bin/perl -w
use strict;

use File::Find;
use Storable qw(lock_store lock_retrieve);
use Getopt::Long;
use Digest::MD5 qw(md5_hex);
use Data::Dumper;
use Data::Printer;
use Env qw(PWD);
my ($TOP,$storage) ;
       $TOP = $ARGV[0] unless(!@ARGV);
       $storage = $ARGV[1] unless(!@ARGV);
        $storage = "/tmp" unless($storage);
our $cache = {};
our $files={};
our $curSysDate = `date +"%F"`;
        $curSysDate=~ s/\n//g;

our %opts = (cache_file =>
              sprintf("%s/%s_.cache",
              $storage,$curSysDate));

GetOptions (\%opts, "cache_file=s");



sub translate
{
  return unless -f;
  (my $rel_name = $File::Find::name) =~ s{.*/}{}xs;
   my $name = md5_hex($rel_name);
    my $go = 0;

     foreach(@ARGV){
    my $t = $_;

    if( ! -d $t &&  $rel_name !~ m/($t)/i){
        $go ++;
        print $rel_name."\n";
       }
    }
        

        if (/\.(html|htm)$/) {
          $files->{html}->{$name}=$rel_name;
        }elsif (/\.txt$/) {
          $files->{latex}->{$name}=$rel_name;
        }elsif (/\.json$/) {
          $files->{text}->{$name}=$rel_name;
        }
        
  }


find(\&translate, "$TOP");


p @{[keys %$files,reverse @ARGV,$storage]};

__DATA__
our $c = AI::MicroStructure::Context->new(@ARGV);
    $c->retrieveIndex($PWD."/t/docs"); #"/home/santex/data-hub/data-hub" structures=0 text=1 json=1



   my $style = {};
      $style->{explicit}  = 1;
ok($c->simpleMixedSearch($style,$_)) && ok($c->play($style,$_))   for
 qw(atom antimatter planet);



ok(print Dumper $c->intersect($style,$_)) for
 qw(atom antimatter planet);

ok(print Dumper $c->similar($style,$_)) for
 qw(atom antimatter planet);

#p @out;

1;

package main;

$|++;
use strict;

use Fi  le::Find;
use Data::Dumper;
use Storable qw(lock_store lock_retrieve);
use Getopt::Long;
our $curSysDate = `date +"%F"`;
    $curSysDate=~ s/\n//g;

our %opts = (cache_file =>
              sprintf("/tmp/%s.cache",
              $curSysDate));

GetOptions (\%opts, "cache_file=s");

our $cache = {};
our @target = split("\/",$opts{cache_file});
my $set = AI::MicroStructure::ObjectSet->new();

eval {
    local $^W = 0;  # because otherwhise doesn't pass errors
#`rm $opts{cache_file}`;
    $cache = lock_retrieve($opts{cache_file});

    $cache = {} unless $cache;

    warn "New cache!\n" unless defined $cache;
};


END{

  lock_store($cache,$opts{cache_file});

  print Dumper [$set->size,$set->members];


  }




find(\&translate, "$TOP/./");

sub translate {
  return unless -f;
  (my $rel_name = $File::Find::name) =~ s{.*/}{}xs;

  $set->insert(AI::MicroStructure::Object->new($rel_name));

}
#print Dumper join "-", soundex(("rock'n'roll", 'rock and roll', 'rocknroll'));

view all matches for this distribution

AI-NNEasy

21 results

view release on metacpan or search on metacpan

lib/AI/NNEasy.pm view on Meta::CPAN

#############################################################################
## This file was generated automatically by Class::HPLOO/0.21
##
## Original file:    ./lib/AI/NNEasy.hploo
## Generation date:  2005-01-16 22:07:24
##
## ** Do not change this file, use the original HPLOO source! **
#############################################################################

#############################################################################
## Name:        NNEasy.pm
## Purpose:     AI::NNEasy
## Author:      Graciliano M. P. 
## Modified by:
## Created:     2005-01-14
## RCS-ID:      
## Copyright:   (c) 2005 Graciliano M. P. 
## Licence:     This program is free software; you can redistribute it and/or
##              modify it under the same terms as Perl itself
#############################################################################


{ package AI::NNEasy ;

  
use strict qw(vars) ; no warnings ;

  
use vars qw(%CLASS_HPLOO @ISA $VERSION) ;

  
$VERSION = '0.06' ;

  
@ISA = qw(Class::HPLOO::Base UNIVERSAL) ;

  
my $CLASS = 'AI::NNEasy' ; sub __CLASS__ { 'AI::NNEasy' } ;

  
use Class::HPLOO::Base ;

  use AI::NNEasy::NN ;
  use Storable qw(freeze thaw) ;
  use Data::Dumper ;
  


  sub NNEasy { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $file = shift(@_) ;
    my @out_types = ref($_[0]) eq 'ARRAY' ? @{ shift(@_) } : ( ref($_[0]) eq 'HASH' ? %{ shift(@_) } : shift(@_) ) ;
    my $error_ok = shift(@_) ;
    my $in = shift(@_) ;
    my $out = shift(@_) ;
    my @layers = ref($_[0]) eq 'ARRAY' ? @{ shift(@_) } : ( ref($_[0]) eq 'HASH' ? %{ shift(@_) } : shift(@_) ) ;
    my $conf = shift(@_) ;
    
    $file ||= 'nneasy.nne' ;
  
    if ( $this->load($file) ) {
      return $this ;
    }
  
    my $in_sz  = ref $in  ? $in->{nodes}  : $in ;
    my $out_sz = ref $out ? $out->{nodes} : $out ;
  
    @layers = ($in_sz+$out_sz) if !@layers ;
    
    foreach my $layers_i ( @layers ) {
      $layers_i = $in_sz+$out_sz if $layers_i <= 0 ;
    }
    
    $conf ||= {} ;
    
    my $decay = $$conf{decay} || 0 ; 
    
    my $nn_in  = $this->_layer_conf( { decay=>$decay } , $in ) ;
    my $nn_out  = $this->_layer_conf( { decay=>$decay , activation_function=>'linear' } , $out ) ;
    
    foreach my $layers_i ( @layers ) {
      $layers_i = $this->_layer_conf( { decay=>$decay } , $layers_i ) ;
    }
    
    my $nn_conf = {random_connections=>0 , networktype=>'feedforward' , random_weights=>1 , learning_algorithm=>'backprop' , learning_rate=>0.1 , bias=>1} ;
    foreach my $Key ( keys %$nn_conf ) { $$nn_conf{$Key} = $$conf{$Key} if exists $$conf{$Key} ;}
    
    $this->{NN_ARGS} = [[ $nn_in , @layers , $nn_out ] , $nn_conf] ;

    $this->{NN} = AI::NNEasy::NN->new( @{$this->{NN_ARGS}} ) ;
    
    $this->{FILE} = $file ;
    
    @out_types = (0,1) if !@out_types ;
    
    @out_types = sort {$a <=> $b} @out_types ;
    
    $this->{OUT_TYPES} = \@out_types ;
    
    if ( $error_ok <= 0 ) {
      my ($min_dif , $last) ;
      my $i = -1 ;
      foreach my $out_types_i ( @out_types ) {
        ++$i ;
        if ($i > 0) {
          my $dif = $out_types_i - $last ;
          $min_dif = $dif if !defined $min_dif || $dif < $min_dif ;
        }
        $last = $out_types_i ;
      }
      $error_ok = $min_dif / 2 ;
      $error_ok -= $error_ok*0.1 ;
    }
    
    $this->{ERROR_OK} = $error_ok ;
    
    return $this ;
  }
  
  sub _layer_conf { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $def = shift(@_) ;
    my $conf = shift(@_) ;
    
    $def ||= {} ;
    $conf = { nodes=>$conf } if !ref($conf) ;
    
    foreach my $Key ( keys %$def ) { $$conf{$Key} = $$def{$Key} if !exists $$conf{$Key} ;}
  
    my $layer_conf  = {nodes=>1  , persistent_activation=>0 , decay=>0 , random_activation=>0 , threshold=>0 , activation_function=>'tanh' , random_weights=>1} ;
    foreach my $Key ( keys %$layer_conf ) { $$layer_conf{$Key} = $$conf{$Key} if exists $$conf{$Key} ;}

    return $layer_conf ;
  }
  
  sub reset_nn { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    
    $this->{NN} = AI::NNEasy::NN->new( @{ $this->{NN_ARGS} } ) ;
  }
  
  sub load { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $file = shift(@_) ;
    
    $file ||= $this->{FILE} ;
    if ( -s $file ) {
      open (my $fh, $file) ;
      my $dump = join '' , <$fh> ;
      close ($fh) ;
      
      my $restored = thaw($dump) ;
      
      if ($restored) {
        my $fl = $this->{FILE} ;
        %$this = %$restored ;
        $this->{FILE} = $fl if $fl ;
        return 1 ;
      }
    }
    return ;
  }
  
  sub save { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $file = shift(@_) ;
    
    $file ||= $this->{FILE} ;
        
    my $dump = freeze( {%$this} ) ;
    open (my $fh,">$this->{FILE}") ;
    print $fh $dump ;
    close ($fh) ;
  }
  
  sub learn { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $in = shift(@_) ;
    my $out = shift(@_) ;
    my $n = shift(@_) ;
    
    $n ||= 100 ;
    
    my $err ;
    for (1..100) {
      $this->{NN}->run($in) ;
      $err = $this->{NN}->learn($out) ;
    }
    
    $err *= -1 if $err < 0 ;
    return $err ;
  }
  
  *_learn_set_get_output_error = \&_learn_set_get_output_error_c ;
  
  sub _learn_set_get_output_error_pl { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $set = shift(@_) ;
    my $error_ok = shift(@_) ;
    my $ins_ok = shift(@_) ;
    my $verbose = shift(@_) ;
    
    for (my $i = 0 ; $i < @$set ; $i+=2) {
      $this->{NN}->run($$set[$i]) ;
      $this->{NN}->learn($$set[$i+1]) ;
    }

    my ($err,$learn_ok,$print) ;
    for (my $i = 0 ; $i < @$set ; $i+=2) {
      $this->{NN}->run($$set[$i]) ;
      my $er = $this->{NN}->RMSErr($$set[$i+1]) ;
      $er *= -1 if $er < 0 ;
      ++$learn_ok if $er < $error_ok ;
      $err += $er ;
      $print .= join(' ',@{$$set[$i]}) ." => ". join(' ',@{$$set[$i+1]}) ." > $er\n" if $verbose ;
    }
    
    $err /= $ins_ok ;
    
    return ( $err , $learn_ok , $print ) ;
  }
  
  
  
  
    
  sub learn_set { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my @set = ref($_[0]) eq 'ARRAY' ? @{ shift(@_) } : ( ref($_[0]) eq 'HASH' ? %{ shift(@_) } : shift(@_) ) ;
    my $ins_ok = shift(@_) ;
    my $limit = shift(@_) ;
    my $verbose = shift(@_) ;
    
    my $ins_sz = @set / 2 ;

    $ins_ok ||= $ins_sz ;
    
    my $err_static_limit = 15 ;
    my $err_static_limit_positive ;

    if ( ref($limit) eq 'ARRAY' ) {
      ($limit,$err_static_limit,$err_static_limit_positive) = @$limit ;
    }
    
    $limit ||= 30000 ;
    $err_static_limit_positive ||= $err_static_limit/2 ;
  
    my $error_ok = $this->{ERROR_OK} ;
    
    my $check_diff_count = 1000 ;
    
    my ($learn_ok,$counter,$err,$err_last,$err_count,$err_static, $reset_count1 , $reset_count2 ,$print) ;
    
    $err_static = 0 ;
    
    while ( ($learn_ok < $ins_ok) && ($counter < $limit) ) {
      ($err , $learn_ok , $print) = $this->_learn_set_get_output_error(\@set , $error_ok , $ins_ok , $verbose) ;
      
      ++$counter ;
      
      if ( !($counter % 100) || $learn_ok == $ins_ok ) {
        my $err_diff = $err_last - $err ;
        $err_diff *= -1 if $err_diff < 0 ;
        
        $err_count += $err_diff ;
        
        ++$err_static if $err_diff <= 0.00001 || $err > 1 ;
        
        print "err_static = $err_static\n" if $verbose && $err_static ;

        $err_last = $err ;
        
        my $reseted ;
        if ( $err_static >= $err_static_limit || ($err > 1 && $err_static >= $err_static_limit_positive) ) {
          $err_static = 0 ;
          $counter -= 2000 ;
          $reseted = 1 ;
          ++$reset_count1 ;
          
          if ( ( $reset_count1 + $reset_count2 ) > 2 ) {
            $reset_count1 = $reset_count2 = 0 ;
            print "** Reseting NN...\n" if $verbose ;
            $this->reset_nn ;
          }
          else {
            print "** Reseting weights due NULL diff...\n" if $verbose ;
            $this->{NN}->init ;
          }
        }
        
        if ( !($counter % $check_diff_count) ) {
          $err_count /= ($check_diff_count/100) ;
          
          print "ERR COUNT> $err_count\n" if $verbose ;
          
          if ( !$reseted && $err_count < 0.001 ) {
            $err_static = 0 ;
            $counter -= 1000 ;
            ++$reset_count2 ;
            
            if ( ($reset_count1 + $reset_count2) > 2 ) {
              $reset_count1 = $reset_count2 = 0 ;
              print "** Reseting NN...\n" if $verbose ;
              $this->reset_nn ;
            }
            else {
              print "** Reseting weights due LOW diff...\n" if $verbose ;
              $this->{NN}->init ;
            }
          }

          $err_count = 0 ;
        }
        
        if ( $verbose ) {
          print "\nepoch $counter : error_ok = $error_ok : error = $err : err_diff = $err_diff : err_static = $err_static : ok = $learn_ok\n" ;
          print $print ;
        }
      }

      print "epoch $counter : error = $err : ok = $learn_ok\n" if $verbose > 1 ;
      
    }
    
  }
  
  sub get_set_error { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my @set = ref($_[0]) eq 'ARRAY' ? @{ shift(@_) } : ( ref($_[0]) eq 'HASH' ? %{ shift(@_) } : shift(@_) ) ;
    my $ins_ok = shift(@_) ;
    
    my $ins_sz = @set / 2 ;

    $ins_ok ||= $ins_sz ;
  
    my $err ;
    for (my $i = 0 ; $i < @set ; $i+=2) {
      $this->{NN}->run($set[$i]) ;
      my $er = $this->{NN}->RMSErr($set[$i+1]) ;
      $er *= -1 if $er < 0 ;
      $err += $er ;
    }
    
    $err /= $ins_ok ;
    return $err ;
  }
  
  sub run { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $in = shift(@_) ;
    
    $this->{NN}->run($in) ;
    my $out = $this->{NN}->output() ;
    return $out ;
  }
  
  sub run_get_winner { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    
    my $out = $this->run(@_) ;
    
    foreach my $out_i ( @$out ) {
      $out_i = $this->out_type_winner($out_i) ;
    }
    
    return $out ;
  }
  
  sub out_type_winner { 
    my $this = ref($_[0]) ? shift : undef ;
    my $CLASS = ref($this) || __PACKAGE__ ;
    my $val = shift(@_) ;
    
    my ($out_type , %err) ;
    
    foreach my $types_i ( @{ $this->{OUT_TYPES} } ) {
      my $er = $types_i - $val ;
      $er *= -1 if $er < 0 ;
      $err{$types_i} = $er ;
    }
    
    my $min_type_err = (sort { $err{$a} <=> $err{$b} } keys %err)[0] ;
    $out_type = $min_type_err ;

    return $out_type ;
  }


my $INLINE_INSTALL ; BEGIN { use Config ; my @installs = ($Config{installarchlib} , $Config{installprivlib} , $Config{installsitelib}) ; foreach my $i ( @installs ) { $i =~ s/[\\\/]/\//gs ;} $INLINE_INSTALL = 1 if ( __FILE__ =~ /\.pm$/ && ( join(" ",...

use Inline C => <<'__INLINE_C_SRC__' , ( $INLINE_INSTALL ? (NAME => 'AI::NNEasy' , VERSION => '0.06') : () ) ;


#define OBJ_SV(self)		SvRV( self )
#define OBJ_HV(self)		(HV*) SvRV( self )
#define OBJ_AV(self)		(AV*) SvRV( self )

#define FETCH_ATTR(hv,k)	*hv_fetch(hv, k , strlen(k) , 0)
#define FETCH_ATTR_PV(hv,k)	SvPV( FETCH_ATTR(hv,k) , len)
#define FETCH_ATTR_NV(hv,k)	SvNV( FETCH_ATTR(hv,k) )
#define FETCH_ATTR_IV(hv,k)	SvIV( FETCH_ATTR(hv,k) )  
#define FETCH_ATTR_HV(hv,k)	(HV*) FETCH_ATTR(hv,k)
#define FETCH_ATTR_AV(hv,k)	(AV*) FETCH_ATTR(hv,k)
#define FETCH_ATTR_SV_REF(hv,k)	SvRV( FETCH_ATTR(hv,k) )
#define FETCH_ATTR_HV_REF(hv,k)	(HV*) SvRV( FETCH_ATTR(hv,k) )
#define FETCH_ATTR_AV_REF(hv,k)	(AV*) SvRV( FETCH_ATTR(hv,k) )

#define FETCH_ELEM(av,i)		*av_fetch(av,i,0)
#define FETCH_ELEM_HV_REF(av,i)	(HV*) SvRV( FETCH_ELEM(av,i) )
#define FETCH_ELEM_AV_REF(av,i)	(AV*) SvRV( FETCH_ELEM(av,i) )

SV* _av_join( AV* av ) {
    SV* ret = sv_2mortal(newSVpv("",0)) ;
    int i ;
    for (i = 0 ; i <= av_len(av) ; ++i) {
      SV* elem = *av_fetch(av, i ,0) ;
      if (i > 0) sv_catpv(ret , " ") ;
      sv_catsv(ret , elem) ;
    }
    return ret ;
}

void _learn_set_get_output_error_c( SV* self , SV* set , double error_ok , int ins_ok , bool verbose ) {
    dXSARGS;
    
    STRLEN len;
    int i ;
    HV* self_hv = OBJ_HV( self );
    AV* set_av = OBJ_AV( set ) ;
    SV* nn = FETCH_ATTR(self_hv , "NN") ;
    SV* print_verbose = verbose ? sv_2mortal(newSVpv("",0)) : NULL ;
    SV* ret ;
    double err = 0 ;
    double er = 0 ;
    int learn_ok = 0 ;
        
    for (i = 0 ; i <= av_len(set_av) ; i+=2) {
      SV* set_in = *av_fetch(set_av, i ,0) ;
      SV* set_out = *av_fetch(set_av, i+1 ,0) ;

      PUSHMARK(SP) ;
        XPUSHs( nn );
        XPUSHs( set_in );
      PUTBACK ;
      call_method("run", G_DISCARD) ;
      
      PUSHMARK(SP) ;
        XPUSHs( nn );
        XPUSHs( set_out );
      PUTBACK ;
      call_method("learn", G_SCALAR) ;
    }
    
    for (i = 0 ; i <= av_len(set_av) ; i+=2) {
      SV* set_in = *av_fetch(set_av, i ,0) ;
      SV* set_out = *av_fetch(set_av, i+1 ,0) ;

      PUSHMARK(SP) ;
        XPUSHs( nn );
        XPUSHs( set_in );
      PUTBACK ;
      call_method("run", G_DISCARD) ;
      
      PUSHMARK(SP) ;
        XPUSHs( nn );
        XPUSHs( set_out );
      PUTBACK ;
      call_method("RMSErr", G_SCALAR) ;
      
      SPAGAIN ;
      ret = POPs ;
      er = SvNV(ret) ;
      if (er < 0) er *= -1 ;
      if (er < error_ok) ++learn_ok ;
      err += er ;
      
      if ( verbose ) sv_catpvf(print_verbose , "%s => %s > %f\n" ,
                       SvPV( _av_join( OBJ_AV(set_in) ) , len) ,
                       SvPV( _av_join( OBJ_AV(set_out) ) , len) ,
                       er
                     ) ;

    }
    
    err /= ins_ok ;

    if (verbose) {
      EXTEND(SP , 3) ;
        ST(0) = sv_2mortal(newSVnv(err)) ;
        ST(1) = sv_2mortal(newSViv(learn_ok)) ;
        ST(2) = print_verbose ;
      XSRETURN(3) ;
    }
    else {
      EXTEND(SP , 2) ;
        ST(0) = sv_2mortal(newSVnv(err)) ;
        ST(1) = sv_2mortal(newSViv(learn_ok)) ;
      XSRETURN(2) ;
    }
}

__INLINE_C_SRC__


}


1;

__END__

=head1 NAME

AI::NNEasy - Define, learn and use easy Neural Networks of different types using a portable code in Perl and XS.

=head1 DESCRIPTION

The main purpose of this module is to create easy Neural Networks with Perl.

The module was designed to can be extended to multiple network types, learning algorithms and activation functions.
This architecture was 1st based in the module L<AI::NNFlex>, than I have rewrited it to fix some
serialization bugs, and have otimized the code and added some XS functions to get speed
in the learning process. Finally I have added an intuitive inteface to create and use the NN,
and added a winner algorithm to the output.

I have writed this module because after test different NN module on Perl I can't find
one that is portable through Linux and Windows, easy to use and the most important,
one that really works in a reall problem.

With this module you don't need to learn much about NN to be able to construct one, you just
define the construction of the NN, learn your set of inputs, and use it.

=head1 USAGE

Here's an example of a NN to compute XOR:

  use AI::NNEasy ;
  
  ## Our maximal error for the output calculation.
  my $ERR_OK = 0.1 ;

  ## Create the NN:
  my $nn = AI::NNEasy->new(
  'xor.nne' , ## file to save the NN.
  [0,1] ,     ## Output types of the NN.
  $ERR_OK ,   ## Maximal error for output.
  2 ,         ## Number of inputs.
  1 ,         ## Number of outputs.
  [3] ,       ## Hidden layers. (this is setting 1 hidden layer with 3 nodes).
  ) ;
  
  
  ## Our set of inputs and outputs to learn:
  my @set = (
  [0,0] => [0],
  [0,1] => [1],
  [1,0] => [1],
  [1,1] => [0],
  );
  
  ## Calculate the actual error for the set:
  my $set_err = $nn->get_set_error(\@set) ;
  
  ## If set error is bigger than maximal error lest's learn this set:
  if ( $set_err > $ERR_OK ) {
    $nn->learn_set( \@set ) ;
    ## Save the NN:
    $nn->save ;
  }
  
  ## Use the NN:
  
  my $out = $nn->run_get_winner([0,0]) ;
  print "0 0 => @$out\n" ; ## 0 0 => 0
  
  my $out = $nn->run_get_winner([0,1]) ;
  print "0 1 => @$out\n" ; ## 0 1 => 1
  
  my $out = $nn->run_get_winner([1,0]) ;
  print "1 0 => @$out\n" ; ## 1 0 => 1
  
  my $out = $nn->run_get_winner([1,1]) ;
  print "1 1 => @$out\n" ; ## 1 1 => 0
  
  ## or just interate through the @set:
  for (my $i = 0 ; $i < @set ; $i+=2) {
    my $out = $nn->run_get_winner($set[$i]) ;
    print "@{$set[$i]}) => @$out\n" ;
  }

=head1 METHODS

=head2 new ( FILE , @OUTPUT_TYPES , ERROR_OK , IN_SIZE , OUT_SIZE , @HIDDEN_LAYERS , %CONF )

=over 4

=item FILE

The file path to save the NN. Default: 'nneasy.nne'.

=item @OUTPUT_TYPES

An array of outputs that the NN can have, so the NN can find the nearest number in this
list to give your the right output.

=item ERROR_OK

The maximal error of the calculated output.

If not defined ERROR_OK will be calculated by the minimal difference between 2 types at
@OUTPUT_TYPES dived by 2:

  @OUTPUT_TYPES = [0 , 0.5 , 1] ;
  
  ERROR_OK = (1 - 0.5) / 2 = 0.25 ;

=item IN_SIZE

The input size (number of nodes in the inpute layer).

=item OUT_SIZE

The output size (number of nodes in the output layer).

=item @HIDDEN_LAYERS

A list of size of hidden layers. By default we have 1 hidden layer, and
the size is calculated by I<(IN_SIZE + OUT_SIZE)>. So, for a NN of
2 inputs and 1 output the hidden layer have 3 nodes.

=item %CONF

Conf can be used to define special parameters of the NN:

Default:

 {networktype=>'feedforward' , random_weights=>1 , learning_algorithm=>'backprop' , learning_rate=>0.1 , bias=>1}
 
Options:

=over 4

=item networktype

The type of the NN. For now only accepts I<'feedforward'>.

=item random_weights

Maximum value for initial weight.

=item learning_algorithm

Algorithm to train the NN. Accepts I<'backprop'> and I<'reinforce'>.

=item learning_rate

Rate used in the learning_algorithm.

=item bias

If true will create a BIAS node. Usefull when you have NULL inputs, like [0,0].

=back

=back

Here's a completly example of use:

  my $nn = AI::NNEasy->new(
  'xor.nne' , ## file to save the NN.
  [0,1] ,     ## Output types of the NN.
  0.1 ,       ## Maximal error for output.
  2 ,         ## Number of inputs.
  1 ,         ## Number of outputs.
  [3] ,       ## Hidden layers. (this is setting 1 hidden layer with 3 nodes).
  {random_connections=>0 , networktype=>'feedforward' , random_weights=>1 , learning_algorithm=>'backprop' , learning_rate=>0.1 , bias=>1} ,
  ) ;

And a simple example that will create a NN equal of the above:

  my $nn = AI::NNEasy->new('xor.nne' , [0,1] , 0.1 , 2 , 1 ) ;

=head2 load

Load the NN if it was previously saved.

=head2 save

Save the NN to a file using L<Storable>.

=head2 learn (@IN , @OUT , N)

Learn the input.

=over 4

=item @IN

The values of one input.

=item @OUT

The values of the output for the input above.

=item N

Number of times that this input should be learned. Default: 100

Example:

  $nn->learn( [0,1] , [1] , 10 ) ;

=back

=head2 learn_set (@SET , OK_OUTPUTS , LIMIT , VERBOSE)

Learn a set of inputs until get the right error for the outputs.

=over 4

=item @SET

A list of inputs and outputs.

=item OK_OUTPUTS

Minimal number of outputs that should be OK when calculating the erros.

By default I<OK_OUTPUTS> should have the same size of number of different
inouts in the @SET.

=item LIMIT

Limit of interations when learning. Default: 30000

=item VERBOSE

If TRUE turn verbose method ON when learning.

=back

=head2 get_set_error (@SET , OK_OUTPUTS)

Get the actual error of a set in the NN. If the returned error is bigger than
I<ERROR_OK> defined on I<new()> you should learn or relearn the set.

=head2 run (@INPUT)

Run a input and return the output calculated by the NN based in what the NN already have learned.

=head2 run_get_winner (@INPUT)

Same of I<run()>, but the output will return the nearest output value based in the
I<@OUTPUT_TYPES> defined at I<new()>.

For example an input I<[0,1]> learned that have
the output I<[1]>, actually will return something like 0.98324 as output and
not 1, since the error never should be 0. So, with I<run_get_winner()>
we get the output of I<run()>, let's say that is 0.98324, and find what output
is near of this number, that in this case should be 1. An output [0], will return
by I<run()> something like 0.078964, and I<run_get_winner()> return 0.

=head1 Samples

Inside the release sources you can find the directory ./samples where you have some
examples of code using this module.

=head1 INLINE C

Some functions of this module have I<Inline> functions writed in C.

I have made a C version only for the functions that are wild called, like:

  AI::NNEasy::_learn_set_get_output_error

  AI::NNEasy::NN::tanh

  AI::NNEasy::NN::feedforward::run
  
  AI::NNEasy::NN::backprop::hiddenToOutput
  AI::NNEasy::NN::backprop::hiddenOrInputToHidden
  AI::NNEasy::NN::backprop::RMSErr

What give to us the speed that we need to learn fast the inputs, but at the same time
be able to create flexible NN.

=head1 Class::HPLOO

I have used L<Class::HPLOO> to write fast the module, specially the XS support.

L<Class::HPLOO> enables this kind of syntax for Perl classes:

  class Foo {
    
    sub bar($x , $y) {
      $this->add($x , $y) ;
    }
    
    sub[C] int add( int x , int y ) {
      int res = x + y ;
      return res ;
    }
    
  }

What make possible to write the module in 2 days! ;-P

=head1 Basics of a Neural Network

I<- This is just a simple text for lay pleople,
to try to make them to understand what is a Neural Network and how it works
without need to read a lot of books -.>

A NN is based in nodes/neurons and layers, where we have the input layer, the hidden layers and the output layer.

For example, here we have a NN with 2 inputs, 1 hidden layer, and 2 outputs:

         Input  Hidden  Output
 input1  ---->n1\    /---->n4---> output1
                 \  /
                  n3
                 /  \
 input2  ---->n2/    \---->n5---> output2


Basically, when we have an input, let's say [0,1], it will active I<n2>, that will
active I<n3> and I<n3> will active I<n4> and I<n5>, but the link between I<n3> and I<n4> has a I<weight>, and
between I<n3> and I<n5> another I<weight>. The idea is to find the I<weights> between the
nodes that can give to us an output near the real output. So, if the output of [0,1]
is [1,1], the nodes I<output1> and I<output2> should give to us a number near 1,
let's say 0.98654. And if the output for [0,0] is [0,0], I<output1> and I<output2> should give to us a number near 0,
let's say 0.078875.

What is hard in a NN is to find this I<weights>. By default L<AI::NNEasy> uses
I<backprop> as learning algorithm. With I<backprop> it pastes the inputs through
the Neural Network and adjust the I<weights> using random numbers until we find
a set of I<weights> that give to us the right output.

The secret of a NN is the number of hidden layers and nodes/neurons for each layer.
Basically the best way to define the hidden layers is 1 layer of (INPUT_NODES+OUTPUT_NODES).
So, a layer of 2 input nodes and 1 output node, should have 3 nodes in the hidden layer.
This definition exists because the number of inputs define the maximal variability of
the inputs (N**2 for bollean inputs), and the output defines if the variability is reduced by some logic restriction, like
int the XOR example, where we have 2 inputs and 1 output, so, hidden is 3. And as we can see in the
logic we have 3 groups of inputs:

  0 0 => 0 # false
  0 1 => 1 # or
  1 0 => 1 # or
  1 1 => 1 # true

Actually this is not the real explanation, but is the easiest way to understand that
you need to have a number of nodes/neuros in the hidden layer that can give the
right output for your problem.

Other inportant step of a NN is the learning fase. Where we get a set of inputs
and paste them through the NN until we have the right output. This process basically
will adjust the nodes I<weights> until we have an output near the real output that we want.

Other important concept is that the inputs and outputs in the NN should be from 0 to 1.
So, you can define sets like:

  0 0      => 0
  0 0.5    => 0.5
  0.5 0.5  => 1
  1 0.5    => 0
  1 1      => 1

But what is really recomended is to always use bollean values, just 0 or 1, for inputs and outputs,
since the learning fase will be faster and works better for complex problems.

=head1 SEE ALSO

L<AI::NNFlex>, L<AI::NeuralNet::Simple>, L<Class::HPLOO>, L<Inline>.

=head1 AUTHOR

Graciliano M. P. <gmpassos@cpan.org>

I will appreciate any type of feedback (include your opinions and/or suggestions). ;-P

Thanks a lot to I<Charles Colbourn <charlesc at nnflex.g0n.net>>, that is the
author of L<AI::NNFlex>, that 1st wrote it, since NNFlex was my starting point to
do this NN work, and 2nd to be in touch with the development of L<AI::NNEasy>.

=head1 COPYRIGHT

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

view all matches for this distribution

AI-NNFlex

30 results

view release on metacpan or search on metacpan

examples/add.pl view on Meta::CPAN

use AI::NNFlex::Backprop;
use AI::NNFlex::Dataset;


# train the network to do addition. Adapted from code posted to perlmonks
# by tlpriest on 13/05/05




my $network = AI::NNFlex::Backprop->new(
                learningrate=>.00001,
		fahlmanconstant=>0,
		fixedweights=>1,
		momentum=>0.3,
		bias=>0);


$network->add_layer(    nodes=>2,
            activationfunction=>"linear");


$network->add_layer(    nodes=>2,
            activationfunction=>"linear");

$network->add_layer(    nodes=>1,
            activationfunction=>"linear");


$network->init();

# Taken from Mesh ex_add.pl
my $dataset = AI::NNFlex::Dataset->new([
[ 1,   1   ], [ 2    ],
[ 1,   2   ], [ 3    ],
[ 2,   2   ], [ 4    ],
[ 20,  20  ], [ 40   ],
[ 10,  10  ], [ 20   ],
[ 15,  15  ], [ 30   ],
[ 12,  8   ], [ 20   ],

]);

my $err = 10;
# Stop after 4096 epochs -- don't want to wait more than that
for ( my $i = 0; ($err > 0.0001) && ($i < 4096); $i++ ) {
    $err = $dataset->learn($network);
    print "Epoch = $i error = $err\n";
}

foreach (@{$dataset->run($network)})
{
    foreach (@$_){print $_}
    print "\n";    
}

print "this should be 4000 - ";
$network->run([2000,2000]);
foreach ( @{$network->output}){print $_."\n";}

 foreach my $a ( 1..10 ) {
     foreach my $b ( 1..10 ) {
     my($ans) = $a+$b;
     my($nnans) = @{$network->run([$a,$b])};
     print "[$a] [$b] ans=$ans but nnans=$nnans\n" unless $ans == $nnans;
     }
 }

view all matches for this distribution

AI-NNVMCAPI

10 results

view release on metacpan or search on metacpan

lib/AI/NNVMCAPI.pm view on Meta::CPAN

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

package AI::NNVMCAPI;
use strict;
use base qw(DynaLoader);
bootstrap AI::NNVMCAPI;
our $VERSION = '1.4';
1;
__END__

=head1 NAME

AI::NNVMCAPI - Swig interface to nnvm c api

=head1 SYNOPSIS

 use AI::NNVMCAPI;

=head1 DESCRIPTION

This module provides interface to nnvm
via its api.

=head1 SEE ALSO

L<AI::MXNet>

=head1 AUTHOR

Sergey Kolychev, <sergeykolychev.github@gmail.com>

=head1 COPYRIGHT & LICENSE

This library is licensed under Apache 2.0 license.

See https://www.apache.org/licenses/LICENSE-2.0 for more information.

=cut

view all matches for this distribution

AI-NaiveBayes

19 results

view release on metacpan or search on metacpan

lib/AI/NaiveBayes.pm view on Meta::CPAN

package AI::NaiveBayes;
$AI::NaiveBayes::VERSION = '0.04';
use strict;
use warnings;
use 5.010;
use AI::NaiveBayes::Classification;
use AI::NaiveBayes::Learner;
use Moose;
use MooseX::Storage;

use List::Util qw(max);

with Storage(format => 'Storable', io => 'File');

has model   => (is => 'ro', isa => 'HashRef[HashRef]', required => 1);

sub train {
    my $self = shift;
    my $learner = AI::NaiveBayes::Learner->new();
    for my $example ( @_ ){
        $learner->add_example( %$example );
    }
    return $learner->classifier;
}


sub classify {
    my ($self, $newattrs) = @_;
    $newattrs or die "Missing parameter for classify()";

    my $m = $self->model;

    # Note that we're using the log(prob) here.  That's why we add instead of multiply.

    my %scores = %{$m->{prior_probs}};
    my %features;
    while (my ($feature, $value) = each %$newattrs) {
        next unless exists $m->{attributes}{$feature};  # Ignore totally unseen features
        while (my ($label, $attributes) = each %{$m->{probs}}) {
            my $score = ($attributes->{$feature} || $m->{smoother}{$label})*$value;  # P($feature|$label)**$value
            $scores{$label} += $score;
            $features{$feature}{$label} = $score;
        }
    }

    rescale(\%scores);

    return AI::NaiveBayes::Classification->new( label_sums => \%scores, features => \%features );
}

sub rescale {
    my ($scores) = @_;

    # Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
    my $total = 0;
    my $max = max(values %$scores);
    foreach (values %$scores) {
        $_ = exp($_ - $max);
        $total += $_**2;
    }
    $total = sqrt($total);
    foreach (values %$scores) {
        $_ /= $total;
    }
}


__PACKAGE__->meta->make_immutable;

1;

=pod

=encoding UTF-8

=head1 NAME

AI::NaiveBayes - A Bayesian classifier

=head1 VERSION

version 0.04

=head1 SYNOPSIS

    # AI::NaiveBayes objects are created by AI::NaiveBayes::Learner
    # but for quick start you can use the 'train' class method
    # that is a shortcut using default AI::NaiveBayes::Learner settings

    my $classifier = AI::NaiveBayes->train( 
        {
            attributes => {
                sheep => 1, very => 1,  valuable => 1, farming => 1
            },
            labels => ['farming']
        },
        {
            attributes => {
                vampires => 1, cannot => 1, see => 1, their => 1,
                images => 1, mirrors => 1
            },
            labels => ['vampire']
        },
    );

    # Classify a feature vector
    my $result = $classifier->classify({bar => 3, blurp => 2});
    
    # $result is now a AI::NaiveBayes::Classification object
    
    my $best_category = $result->best_category;

=head1 DESCRIPTION

This module implements the classic "Naive Bayes" machine learning
algorithm.  This is a low level class that accepts only pre-computed feature-vectors
as input, see L<AI::Classifier::Text> for a text classifier that uses
this class.  

Creation of C<AI::NaiveBayes> classifier object out of training
data is done by L<AI::NaiveBayes::Learner>. For quick start 
you can use the limited C<train> class method that trains the 
classifier in a default way.

The classifier object is immutable.

It is a well-studied probabilistic algorithm often used in
automatic text categorization.  Compared to other algorithms (kNN,
SVM, Decision Trees), it's pretty fast and reasonably competitive in
the quality of its results.

A paper by Fabrizio Sebastiani provides a really good introduction to
text categorization:
L<http://faure.iei.pi.cnr.it/~fabrizio/Publications/ACMCS02.pdf>

=head1 METHODS

=over 4

=item new( model => $model )

Internal. See L<AI::NaiveBayes::Learner> to learn how to create a C<AI::NaiveBayes>
classifier from training data.

=item train( LIST of HASHREFS )

Shortcut for creating a trained classifier using L<AI::NaiveBayes::Learner> default
settings. 
Arguments are passed to the C<add_example> method of the L<AI::NaiveBayes::Learner>
object one by one.

=item classify( HASHREF )

Classifies a feature-vector of the form:

    { feature1 => weight1, feature2 => weight2, ... }

The result is a C<AI::NaiveBayes::Classification> object.

=item rescale

Internal

=back

=head1 ATTRIBUTES 

=over 4

=item model

Internal

=back

=head1 THEORY

Bayes' Theorem is a way of inverting a conditional probability. It
states:

    P(y|x) P(x)
        P(x|y) = -------------
    P(y)

The notation C<P(x|y)> means "the probability of C<x> given C<y>."  See also
L<"http://mathforum.org/dr.math/problems/battisfore.03.22.99.html">
for a simple but complete example of Bayes' Theorem.

In this case, we want to know the probability of a given category given a
certain string of words in a document, so we have:

    P(words | cat) P(cat)
        P(cat | words) = --------------------
    P(words)

We have applied Bayes' Theorem because C<P(cat | words)> is a difficult
quantity to compute directly, but C<P(words | cat)> and C<P(cat)> are accessible
(see below).

The greater the expression above, the greater the probability that the given
document belongs to the given category.  So we want to find the maximum
value.  We write this as

    P(words | cat) P(cat)
        Best category =   ArgMax      -----------------------
    cat in cats          P(words)

Since C<P(words)> doesn't change over the range of categories, we can get rid
of it.  That's good, because we didn't want to have to compute these values
anyway.  So our new formula is:

    Best category =   ArgMax      P(words | cat) P(cat)
        cat in cats

Finally, we note that if C<w1, w2, ... wn> are the words in the document,
then this expression is equivalent to:

    Best category =   ArgMax      P(w1|cat)*P(w2|cat)*...*P(wn|cat)*P(cat)
        cat in cats

That's the formula I use in my document categorization code.  The last
step is the only non-rigorous one in the derivation, and this is the
"naive" part of the Naive Bayes technique.  It assumes that the
probability of each word appearing in a document is unaffected by the
presence or absence of each other word in the document.  We assume
this even though we know this isn't true: for example, the word
"iodized" is far more likely to appear in a document that contains the
word "salt" than it is to appear in a document that contains the word
"subroutine".  Luckily, as it turns out, making this assumption even
when it isn't true may have little effect on our results, as the
following paper by Pedro Domingos argues:
L<"http://www.cs.washington.edu/homes/pedrod/mlj97.ps.gz">

=head1 SEE ALSO

Algorithm::NaiveBayes (3), AI::Classifier::Text(3) 

=head1 BASED ON

Much of the code and description is from L<Algorithm::NaiveBayes>.

=head1 AUTHORS

=over 4

=item *

Zbigniew Lukasiak <zlukasiak@opera.com>

=item *

Tadeusz SoÅ›nierz <tsosnierz@opera.com>

=item *

Ken Williams <ken@mathforum.org>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2012 by Opera Software ASA.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut

__END__


# ABSTRACT: A Bayesian classifier

view all matches for this distribution

AI-NaiveBayes1

41 results

view release on metacpan or search on metacpan

NaiveBayes1.pm view on Meta::CPAN

# (c) 2003-21 Vlado Keselj https://web.cs.dal.ca/~vlado

package AI::NaiveBayes1;
use strict;
require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@EXPORT = qw(new);
use vars qw($Version);
$Version = $VERSION = '2.012';

use vars @EXPORT_OK;

# non-exported package globals go here
use vars qw();

sub new {
  my $package = shift;
  return bless {
                attributes => [ ],
		labels     => [ ],
		attvals    => {},
		real_stat  => {},
		numof_instances => 0,
		stat_labels => {},
		stat_attributes => {},
		smoothing => {},
		attribute_type => {},
	       }, $package;
}

sub set_real {
    my ($self, @attr) = @_;
    foreach my $a (@attr) { $self->{attribute_type}{$a} = 'real' }
}

sub import_from_YAML {
    my $package = shift;
    my $yaml = shift;
    my $self = YAML::Load($yaml);
    return bless $self, $package;
}

sub import_from_YAML_file {
    my $package = shift;
    my $yamlf = shift;
    my $self = YAML::LoadFile($yamlf);
    return bless $self, $package;
}

# assume that the last header count means counts
# after optionally removing counts, the last header is label
sub add_table {
    my $self = shift;
    my @atts = (); my $lbl=''; my $cnt = '';
    while (@_) {
	my $table = shift;
	if ($table =~ /^(.*)\n[ \t]*-+\n/) {
	    my $a = $1; $table = $';
	    $a =~ s/^\s+//; $a =~ s/\s+$//;
	    if ($a =~ /\s*\bcount\s*$/) {
		$a=$`; $cnt=1; } else { $cnt='' }
	    @atts = split(/\s+/, $a);
	    $lbl = pop @atts;
	}
	while ($table ne '') {
	    $table =~ /^(.*)\n?/ or die;
	    my $r=$1; $table = $';
	    $r =~ s/^\s+//; $r=~ s/\s+$//;
	    if ($r =~ /^-+$/) { next }
	    my @v = split(/\s+/, $r);
	    die "values (#=$#v): {@v}\natts (#=$#atts): @atts, lbl=$lbl,\n".
                 "count: $cnt\n" unless $#v-($cnt?2:1) == $#atts;
	    my %av=(); my @a = @atts;
	    while (@a) { $av{shift @a} = shift(@v) }
	    $self->add_instances(attributes=>\%av,
				 label=>"$lbl=$v[0]",
				 cases=>($cnt?$v[1]:1) );
	}
    }
} # end of add_table

# Simplified; not generally compatible.
# Assume that the last header is label.  The first row contains
# attribute names.
sub add_csv_file {
    my $self = shift; my $fn = shift; local *F;
    open(F,$fn) or die "Cannot open CSV file `$fn': $!";
    local $_ = <F>; my @atts = (); my $lbl=''; my $cnt = '';
    chomp; @atts = split(/\s*,\s*/, $_); $lbl = pop @atts;
    while (<F>) {
	chomp; my @v = split(/\s*,\s*/, $_);
	die "values (#=$#v): {@v}\natts (#=$#atts): @atts, lbl=$lbl,\n".
	    "count: $cnt\n" unless $#v-($cnt?2:1) == $#atts;
	my %av=(); my @a = @atts;
	while (@a) { $av{shift @a} = shift(@v) }
	$self->add_instances(attributes=>\%av,
			     label=>"$lbl=$v[0]",
			     cases=>($cnt?$v[1]:1) );
    }
    close(F);
} # end of add_csv_file

sub drop_attributes {
    my $self = shift;
    foreach my $a (@_) {
	my @tmp = grep { $a ne $_ } @{ $self->{attributes} };
	$self->{attributes} = \@tmp;
	delete($self->{attvals}{$a});
	delete($self->{stat_attributes}{$a});
	delete($self->{attribute_type}{$a});
	delete($self->{real_stat}{$a});
	delete($self->{smoothing}{$a});
    }
} # end of drop_attributes

sub add_instances {
  my ($self, %params) = @_;
  for ('attributes', 'label', 'cases') {
      die "Missing required '$_' parameter" unless exists $params{$_};
  }

  if (scalar(keys(%{ $self->{stat_attributes} })) == 0) {
      foreach my $a (keys(%{$params{attributes}})) {
	  $self->{stat_attributes}{$a} = {};
	  push @{ $self->{attributes} }, $a;
	  $self->{attvals}{$a} = [ ];
	  $self->{attribute_type}{$a} = 'nominal' unless defined($self->{attribute_type}{$a});
      }
  } else {
      foreach my $a (keys(%{$self->{stat_attributes}}))
      { die "attribute not given in instance: $a"
	    unless exists($params{attributes}{$a}) }
  }

  $self->{numof_instances} += $params{cases};

  push @{ $self->{labels} }, $params{label} unless
      exists $self->{stat_labels}->{$params{label}};

  $self->{stat_labels}{$params{label}} += $params{cases};

  foreach my $a (keys(%{$self->{stat_attributes}})) {
      if ( not exists($params{attributes}{$a}) )
      { die "attribute $a not given" }
      my $attval = $params{attributes}{$a};
      if (not exists($self->{stat_attributes}{$a}{$attval})) {
	  push @{ $self->{attvals}{$a} }, $attval;
	  $self->{stat_attributes}{$a}{$attval} = {};
      }
      $self->{stat_attributes}{$a}{$attval}{$params{label}} += $params{cases};
  }
}

sub add_instance {
    my ($self, %params) = @_; $params{cases} = 1;
    $self->add_instances(%params);
}

sub train {
    my $self = shift;
    my $m = $self->{model} = {};
    
    $m->{labelprob} = {};
    foreach my $label (keys(%{$self->{stat_labels}}))
    { $m->{labelprob}{$label} = $self->{stat_labels}{$label} /
                                $self->{numof_instances} } 

    $m->{condprob} = {};
    $m->{condprobe} = {};
    foreach my $att (keys(%{$self->{stat_attributes}})) {
        next if $self->{attribute_type}{$att} eq 'real';
	$m->{condprob}{$att} = {};
	$m->{condprobe}{$att} = {};
	foreach my $label (keys(%{$self->{stat_labels}})) {
	    my $total = 0; my @attvals = ();
	    foreach my $attval (keys(%{$self->{stat_attributes}{$att}})) {
		next unless
		    exists($self->{stat_attributes}{$att}{$attval}{$label}) and
		    $self->{stat_attributes}{$att}{$attval}{$label} > 0;
		push @attvals, $attval;
		$m->{condprob}{$att}{$attval} = {} unless
		    exists( $m->{condprob}{$att}{$attval} );
		$m->{condprob}{$att}{$attval}{$label} = 
		    $self->{stat_attributes}{$att}{$attval}{$label};
		$m->{condprobe}{$att}{$attval} = {} unless
		    exists( $m->{condprob}{$att}{$attval} );
		$m->{condprobe}{$att}{$attval}{$label} = 
		    $self->{stat_attributes}{$att}{$attval}{$label};
		$total += $m->{condprob}{$att}{$attval}{$label};
	    }
	    if (exists($self->{smoothing}{$att}) and
		$self->{smoothing}{$att} =~ /^unseen count=/) {
		my $uc = $'; $uc = 0.5 if $uc <= 0;
		if(! exists($m->{condprob}{$att}{'*'}) ) {
		    $m->{condprob}{$att}{'*'} = {};
		    $m->{condprobe}{$att}{'*'} = {};
		}
		$m->{condprob}{$att}{'*'}{$label} = $uc;
		$total += $uc;
		if (grep {$_ eq '*'} @attvals) { die }
		push @attvals, '*';
	    }
	    foreach my $attval (@attvals) {
		$m->{condprobe}{$att}{$attval}{$label} =
		    "(= $m->{condprob}{$att}{$attval}{$label} / $total)";
		$m->{condprob}{$att}{$attval}{$label} /= $total;
	    }
	}
    }

    # For real-valued attributes, we use Gaussian distribution
    # let us collect statistics
    foreach my $att (keys(%{$self->{stat_attributes}})) {
        next unless $self->{attribute_type}{$att} eq 'real';
	print STDERR "Smoothing ignored for real attribute $att!\n" if
	    defined($self->{smoothing}{att}) and $self->{smoothing}{att};
        $m->{real_stat}->{$att} = {};
        foreach my $attval (keys %{$self->{stat_attributes}{$att}}){
            foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
                $m->{real_stat}{$att}{$label}{sum}
                += $attval * $self->{stat_attributes}{$att}{$attval}{$label};

                $m->{real_stat}{$att}{$label}{count}
                += $self->{stat_attributes}{$att}{$attval}{$label};
            }
            foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
		next if
                !defined($m->{real_stat}{$att}{$label}{count}) ||
		$m->{real_stat}{$att}{$label}{count} == 0;

                $m->{real_stat}{$att}{$label}{mean} =
                    $m->{real_stat}{$att}{$label}{sum} /
                        $m->{real_stat}{$att}{$label}{count};
            }
        }

        # calculate stddev
        foreach my $attval (keys %{$self->{stat_attributes}{$att}}) {
            foreach my $label (keys %{$self->{stat_attributes}{$att}{$attval}}){
                $m->{real_stat}{$att}{$label}{stddev} +=
		    ($attval - $m->{real_stat}{$att}{$label}{mean})**2 *
		    $self->{stat_attributes}{$att}{$attval}{$label};
            }
        }
	foreach my $label (keys %{$m->{real_stat}{$att}}) {
	    $m->{real_stat}{$att}{$label}{stddev} =
		sqrt($m->{real_stat}{$att}{$label}{stddev} /
		     ($m->{real_stat}{$att}{$label}{count}-1)
		     );
	}
    }				# foreach real attribute
}				# end of sub train

sub predict {
  my ($self, %params) = @_;
  my $newattrs = $params{attributes} or die "Missing 'attributes' parameter for predict()";
  my $m = $self->{model};  # For convenience
  
  my %scores;
  my @labels = @{ $self->{labels} };
  $scores{$_} = $m->{labelprob}{$_} foreach (@labels);
  foreach my $att (keys(%{ $newattrs })) {
      if (!defined($self->{attribute_type}{$att})) { die "Unknown attribute: `$att'" }
      next if $self->{attribute_type}{$att} eq 'real';
      die unless exists($self->{stat_attributes}{$att});
      my $attval = $newattrs->{$att};
      die "Unknown value `$attval' for attribute `$att'."
      unless exists($self->{stat_attributes}{$att}{$attval}) or
	  exists($self->{smoothing}{$att});
      foreach my $label (@labels) {
	  if (exists($m->{condprob}{$att}{$attval}) and
	      exists($m->{condprob}{$att}{$attval}{$label}) and
	      $m->{condprob}{$att}{$attval}{$label} > 0 ) {
	      $scores{$label} *=
		  $m->{condprob}{$att}{$attval}{$label};
	  } elsif (exists($self->{smoothing}{$att})) {
	      $scores{$label} *=
                  $m->{condprob}{$att}{'*'}{$label};
	  } else { $scores{$label} = 0 }

      }
  }

  foreach my $att (keys %{$newattrs}){
      next unless $self->{attribute_type}{$att} eq 'real';
      my $sum=0; my %nscores;
      foreach my $label (@labels) {
	  die unless exists $m->{real_stat}{$att}{$label}{mean};
	  $nscores{$label} =
              0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
              exp( -0.5 *
                  ( ( $newattrs->{$att} -
                      $m->{real_stat}{$att}{$label}{mean})
                    / $m->{real_stat}{$att}{$label}{stddev}
                  ) ** 2
		 );
	  $sum += $nscores{$label};
      }
      if ($sum==0) { print STDERR "Ignoring all Gaussian probabilities: all=0!\n" }
      else {
	  foreach my $label (@labels) { $scores{$label} *= $nscores{$label} }
      }
  }

  my $sumPx = 0.0;
  $sumPx += $scores{$_} foreach (keys(%scores));
  $scores{$_} /= $sumPx foreach (keys(%scores));
  return \%scores;
}

sub print_model {
    my $self = shift;
    my $withcounts = '';
    if ($#_>-1 && $_[0] eq 'with counts')
    { shift @_; $withcounts = 1; }
    my $m = $self->{model};
    my @labels = $self->labels;
    my $r;

    # prepare table category P(category)
    my @lines;
    push @lines, 'category ', '-';
    push @lines, "$_ " foreach @labels;
    @lines = _append_lines(@lines);
    @lines = map { $_.='| ' } @lines;
    $lines[1] = substr($lines[1],0,length($lines[1])-2).'+-';
    $lines[0] .= "P(category) ";
    foreach my $i (2..$#lines) {
	my $label = $labels[$i-2];
	$lines[$i] .= $m->{labelprob}{$label} .' ';
	if ($withcounts) {
	    $lines[$i] .= "(= $self->{stat_labels}{$label} / ".
		"$self->{numof_instances} ) ";
	}
    }
    @lines = _append_lines(@lines);

    $r .= join("\n", @lines) . "\n". $lines[1]. "\n\n";

    # prepare conditional tables
    my @attributes = sort $self->attributes;
    foreach my $att (@attributes) {
	@lines = ( "category ", '-' );
	my @lines1 = ( "$att ", '-' );
	my @lines2 = ( "P( $att | category ) ", '-' );
	my @attvals = sort keys(%{ $m->{condprob}{$att} });
	foreach my $label (@labels) {
	    if ( $self->{attribute_type}{$att} ne 'real' ) {
		foreach my $attval (@attvals) {
		    next unless exists($m->{condprob}{$att}{$attval}{$label});
		    push @lines, "$label ";
		    push @lines1, "$attval ";

		    my $line = $m->{condprob}{$att}{$attval}{$label};
		    if ($withcounts)
		    { $line.= ' '.$m->{condprobe}{$att}{$attval}{$label} }
		    $line .= ' ';
		    push @lines2, $line;
		}
	    } else {
		push @lines, "$label ";
		push @lines1, "real ";
		push @lines2, "Gaussian(mean=".
		    $m->{real_stat}{$att}{$label}{mean}.",stddev=".
		    $m->{real_stat}{$att}{$label}{stddev}.") ";
	    }
	    push @lines, '-'; push @lines1, '-'; push @lines2, '-';
	}
	@lines = _append_lines(@lines);
	foreach my $i (0 .. $#lines)
	{ $lines[$i] .= ($lines[$i]=~/-$/?'+-':'| ') . $lines1[$i] }
	@lines = _append_lines(@lines);
	foreach my $i (0 .. $#lines)
	{ $lines[$i] .= ($lines[$i]=~/-$/?'+-':'| ') . $lines2[$i] }
	@lines = _append_lines(@lines);

	$r .= join("\n", @lines). "\n\n";
    }

    return $r;
}

sub _append_lines {
    my @l = @_;
    my $m = 0;
    foreach (@l) { $m = length($_) if length($_) > $m }
    @l = map 
    { while (length($_) < $m) { $_.=substr($_,length($_)-1) }; $_ }
    @l;
    return @l;
}

sub labels {
  my $self = shift;
  return @{ $self->{labels} };
}

sub attributes {
  my $self = shift;
  return keys %{ $self->{stat_attributes} };
}

sub export_to_YAML {
    my $self = shift;
    require YAML;
    return YAML::Dump($self);
}

sub export_to_YAML_file {
    my $self = shift;
    my $file = shift;
    require YAML;
    YAML::DumpFile($file, $self);
}

1;
__END__

=head1 NAME

AI::NaiveBayes1 - Naive Bayes Classification

=head1 SYNOPSIS

  use AI::NaiveBayes1;
  my $nb = AI::NaiveBayes1->new;
  $nb->add_table(
  "Html  Caps  Free  Spam  count
  -------------------------------
     Y     Y     Y     Y    42   
     Y     Y     Y     N    32   
     Y     Y     N     Y    17   
     Y     Y     N     N     7   
     Y     N     Y     Y    32   
     Y     N     Y     N    12   
     Y     N     N     Y    20   
     Y     N     N     N    16   
     N     Y     Y     Y    38   
     N     Y     Y     N    18   
     N     Y     N     Y    16   
     N     Y     N     N    16   
     N     N     Y     Y     2   
     N     N     Y     N     9   
     N     N     N     Y    11   
     N     N     N     N    91   
  -------------------------------
  ");
  $nb->train;
  print "Model:\n" . $nb->print_model;
  print "Model (with counts):\n" . $nb->print_model('with counts');

  $nb = AI::NaiveBayes1->new;
  $nb->add_instances(attributes=>{model=>'H',place=>'B'},
		     label=>'repairs=Y',cases=>30);
  $nb->add_instances(attributes=>{model=>'H',place=>'B'},
		     label=>'repairs=N',cases=>10);
  $nb->add_instances(attributes=>{model=>'H',place=>'N'},
		     label=>'repairs=Y',cases=>18);
  $nb->add_instances(attributes=>{model=>'H',place=>'N'},
		     label=>'repairs=N',cases=>16);
  $nb->add_instances(attributes=>{model=>'T',place=>'B'},
		     label=>'repairs=Y',cases=>22);
  $nb->add_instances(attributes=>{model=>'T',place=>'B'},
		     label=>'repairs=N',cases=>14);
  $nb->add_instances(attributes=>{model=>'T',place=>'N'},
		     label=>'repairs=Y',cases=> 6);
  $nb->add_instances(attributes=>{model=>'T',place=>'N'},
		     label=>'repairs=N',cases=>84);

  $nb->train;

  print "Model:\n" . $nb->print_model;
  
  # Find results for unseen instances
  my $result = $nb->predict
     (attributes => {model=>'T', place=>'N'});

  foreach my $k (keys(%{ $result })) {
      print "for label $k P = " . $result->{$k} . "\n";
  }

  # export the model into a string
  my $string = $nb->export_to_YAML();

  # create the same model from the string
  my $nb1 = AI::NaiveBayes1->import_from_YAML($string);

  # write the model to a file (shorter than model->string->file)
  $nb->export_to_YAML_file('t/tmp1');

  # read the model from a file (shorter than file->string->model)
  my $nb2 = AI::NaiveBayes1->import_from_YAML_file('t/tmp1');

See Examples for more examples.

=head1 DESCRIPTION

This module implements the classic "Naive Bayes" machine learning
algorithm.

=head2 Data Structure

An object contains the following fields:

=over 4

=item C<{attributes}>

List of attribute names.

=item C<{attribute_type}{$a}>

Attribute types - 'real', or not (e.g., 'nominal')

=item C<{labels}>

List of labels.

=item C<{attvals}{$a}>

List of attribute values

=item C<{real_stat}{$a}{$v}{$l}{sum}>

Statistics for real valued attributes; besides 'sum' also: count, mean, stddev

=item C<{numof_instances}>

Number of training instances.

=item C<{stat_labels}{$l}>

Label count in training data.

=item C<{stat_attributes}{$a}>

Statistics for an attribute: C<...{$value}{$label}> = count of
instances.

=item C<{smoothing}{$attribute}>

Attribute smoothing.  No smoothing if does not exist.  Implemented smoothing:

      - /^unseen count=/ followed by number, e.g., 0.5

=back

=head2 Attribute Smoothing

For an attribute A one can specify:

    $nb->{smoothing}{A} = 'unseen count=0.5';

to provide a count for unseen data.  The count is taken into
consideration in training and prediction, when any unseen attribute
values are observed.  Zero probabilities can be prevented in this way.
A count other than 0.5 can be provided, but if it is <=0 it will be
set to 0.5.  The method is similar to add-one smoothing.  A special
attribute value '*' is used for all unseen data. 

=head1 METHODS

=head2 Constructor Methods

=over 4

=item new()

Constructor. Creates a new C<AI::NaiveBayes1> object and returns it.

=item import_from_YAML($string)

Constructor. Creates a new C<AI::NaiveBayes1> object from a string where it is
represented in C<YAML>.  Requires YAML module.

=item import_from_YAML_file($file_name)

Constructor. Creates a new C<AI::NaiveBayes1> object from a file where it is
represented in C<YAML>.  Requires YAML module.

=back

=head2 Non-Constructor Methods

=over 4

=item add_table()

Add instances from a table.  The first row are attributes, followed by
values.  If the name of the last attribute is `count', it is
interpreted as a repetition count and used appropriatelly.  The last
attribute (after optionally removing `count') is the class attribute.
The attributes and values are separated by white space.

=item add_csv_file($filename)

Add instances from a CSV file.  Primitive format implementation (e.g.,
no commas allowed in attribute names or values).

=item drop_attributes(@attributes)

Delete attributes after adding instances.

=item set_real(list_of_attributes)

Delares a list of attributes to be real-valued.  During training,
their conditional probabilities will be modeled with Gaussian (normal)
distributions. 

=item C<add_instance(attributes=E<gt>HASH,label=E<gt>STRING|ARRAY)>

Adds a training instance to the categorizer.

=item C<add_instances(attributes=E<gt>HASH,label=E<gt>STRING|ARRAY,cases=E<gt>NUMBER)>

Adds a number of identical instances to the categorizer.

=item export_to_YAML()

Returns a C<YAML> string representation of an C<AI::NaiveBayes1>
object.  Requires YAML module.

=item C<export_to_YAML_file( $file_name )>

Writes a C<YAML> string representation of an C<AI::NaiveBayes1>
object to a file.  Requires YAML module.

=item C<print_model( OPTIONAL 'with counts' )>

Returns a string, human-friendly representation of the model.
The model is supposed to be trained before calling this method.
One argument 'with counts' can be supplied, in which case explanatory
expressions with counts are printed as well.

=item train()

Calculates the probabilities that will be necessary for categorization
using the C<predict()> method.

=item C<predict( attributes =E<gt> HASH )>

Use this method to predict the label of an unknown instance.  The
attributes should be of the same format as you passed to
C<add_instance()>.  C<predict()> returns a hash reference whose keys
are the names of labels, and whose values are corresponding
probabilities.

=item C<labels>

Returns a list of all the labels the object knows about (in no
particular order), or the number of labels if called in a scalar
context.

=back

=head1 THEORY

Bayes' Theorem is a way of inverting a conditional probability. It
states:

                P(y|x) P(x)
      P(x|y) = -------------
                   P(y)

and so on...

This is a pretty standard algorithm explained in many machine learning
textbooks (e.g., "Data Mining" by Witten and Eibe).

The algorithm relies on estimating P(A|C), where A is an arbitrary
attribute, and C is the class attribute.  If A is not real-valued,
then this conditional probability is estimated using a table of all
possible values for A and C.

If A is real-valued, then the distribution P(A|C) is modeled as a
Gaussian (normal) distribution for each possible value of C=c,  Hence,
for each C=c we collect the mean value (m) and standard deviation (s)
for A during training.  During classification, P(A=a|C=c) is estimated
using Gaussian distribution, i.e., in the following way:

                    1               (a-m)^2
 P(A=a|C=c) = ------------ * exp( - ------- )
              sqrt(2*Pi)*s           2*s^2

this boils down to the following lines of code:

    $scores{$label} *=
    0.398942280401433 / $m->{real_stat}{$att}{$label}{stddev}*
      exp( -0.5 *
           ( ( $newattrs->{$att} -
               $m->{real_stat}{$att}{$label}{mean})
             / $m->{real_stat}{$att}{$label}{stddev}
           ) ** 2
	   );

i.e.,

  P(A=a|C=c) = 0.398942280401433 / s *
    exp( -0.5 * ( ( a-m ) / s ) ** 2 );


=head1 EXAMPLES

Example with a real-valued attribute modeled by a Gaussian
distribution (from Witten I. and Frank E. book "Data Mining" (the WEKA
book), page 86):

 # @relation weather
 # 
 # @attribute outlook {sunny, overcast, rainy}
 # @attribute temperature real
 # @attribute humidity real
 # @attribute windy {TRUE, FALSE}
 # @attribute play {yes, no}
 # 
 # @data
 # sunny,85,85,FALSE,no
 # sunny,80,90,TRUE,no
 # overcast,83,86,FALSE,yes
 # rainy,70,96,FALSE,yes
 # rainy,68,80,FALSE,yes
 # rainy,65,70,TRUE,no
 # overcast,64,65,TRUE,yes
 # sunny,72,95,FALSE,no
 # sunny,69,70,FALSE,yes
 # rainy,75,80,FALSE,yes
 # sunny,75,70,TRUE,yes
 # overcast,72,90,TRUE,yes
 # overcast,81,75,FALSE,yes
 # rainy,71,91,TRUE,no
 
 $nb->set_real('temperature', 'humidity');
 
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>85,humidity=>85,windy=>'FALSE'},label=>'play=no');
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>80,humidity=>90,windy=>'TRUE'},label=>'play=no');
 $nb->add_instance(attributes=>{outlook=>'overcast',temperature=>83,humidity=>86,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>70,humidity=>96,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>68,humidity=>80,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>65,humidity=>70,windy=>'TRUE'},label=>'play=no');
 $nb->add_instance(attributes=>{outlook=>'overcast',temperature=>64,humidity=>65,windy=>'TRUE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>72,humidity=>95,windy=>'FALSE'},label=>'play=no');
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>69,humidity=>70,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>75,humidity=>80,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'sunny',temperature=>75,humidity=>70,windy=>'TRUE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'overcast',temperature=>72,humidity=>90,windy=>'TRUE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'overcast',temperature=>81,humidity=>75,windy=>'FALSE'},label=>'play=yes');
 $nb->add_instance(attributes=>{outlook=>'rainy',temperature=>71,humidity=>91,windy=>'TRUE'},label=>'play=no');
 
 $nb->train;
 
 my $printedmodel =  "Model:\n" . $nb->print_model;
 my $p = $nb->predict(attributes=>{outlook=>'sunny',temperature=>66,humidity=>90,windy=>'TRUE'});

 YAML::DumpFile('file', $p);
 die unless (abs($p->{'play=no'}  - 0.792) < 0.001);
 die unless(abs($p->{'play=yes'} - 0.208) < 0.001);

=head1 HISTORY

L<Algorithm::NaiveBayes> by Ken Williams was not what I needed so I
wrote this one.  L<Algorithm::NaiveBayes> is oriented towards text
categorization, it includes smoothing, and log probabilities.  This
module is a generic, basic Naive Bayes algorithm.

=head1 THANKS

I would like to thank Daniel Bohmer for documentation corrections,
Yung-chung Lin (cpan:xern) for the implementation of the Gaussian model
for continuous variables, and the following people for bug reports, support,
and comments (in no particular order):

Michael Stevens, Tom Dyson, Dan Von Kohorn, Craig Talbert,
Andrew Brian Clegg,

and CPAN-testers, including: Andreas Koenig, Alexandr Ciornii, jlatour,
Jost.Krieger, tvmaly, Matthew Musgrove, Michael Stevens, Nigel Horne,
Graham Crookham, David Cantrell (dcantrell).

=head1 AUTHOR

Copyright 2003-21 Vlado Keselj L<https://web.cs.dal.ca/~vlado>.
In 2004 Yung-chung Lin provided implementation of the Gaussian model for
continous variables.

This script is provided "as is" without expressed or implied warranty.
This is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

The module is available on CPAN (L<https://metacpan.org/author/VLADO>), and
L<https://web.cs.dal.ca/~vlado/srcperl/>.  The latter site is
updated more frequently.

=head1 SEE ALSO

L<Algorithm::NaiveBayes>, L<perl>.

=cut

view all matches for this distribution

( run in 0.398 second using v1.01-cache-2.11-cpan-bf8d7bb2d05 )