AI-Gene-Sequence

 view release on metacpan or  search on metacpan

AI/Gene/Sequence.pm  view on Meta::CPAN

package AI::Gene::Sequence;
require 5.6.0;
use strict;
use warnings;

BEGIN {
  use Exporter   ();
  our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
  $VERSION     = 0.22;
  @ISA         = qw(Exporter);
  @EXPORT      = ();
  %EXPORT_TAGS = ();
  @EXPORT_OK   = qw();
}
our @EXPORT_OK;

my ($probs,$mut_keys) = _normalise( { map {$_ => 1} 
				      qw(insert remove overwrite 
					 duplicate minor major 
					 switch shuffle reverse) } );

##
# calls mutation method at random
# 0: number of mutations to perform
# 1: ref to hash of probs to use (otherwise uses default mutations and probs)

sub mutate {
  my $self = shift;
  my $num_mutates = +$_[0] || 1;
  my $rt = 0;
  my ($hr_probs, $muts);
  if (ref $_[1] eq 'HASH') { # use non standard mutations or probs
    ($hr_probs, $muts) = _normalise($_[1]);
  }
  else {                     # use standard mutations and probs
    $hr_probs = $probs;
    $muts = $mut_keys;
  }

 MUT_CYCLE: for (1..$num_mutates) {
    my $rand = rand;
    foreach my $mutation (@{$muts}) {
      next unless $rand < $hr_probs->{$mutation};
      my $mut = 'mutate_' . $mutation;
      $rt += $self->$mut(1);
      next MUT_CYCLE;
    }
  }
  return $rt;
}

##
# creates a normalised and cumulative prob distribution for the
# keys of the referenced hash

sub _normalise {
  my $hr = $_[0];
  my $h2 = {};
  my $muts = [keys %{$hr}];
  my $sum = 0;
  foreach (values %{$hr}) {
    $sum += $_;
  }
  if ($sum <= 0) {
    die "Cannot randomly mutate with bad probability distribution";
  }
  else {
    my $cum;
    @{$h2}{ @{$muts} } = map {$cum +=$_; $cum / $sum} @{$hr}{ @{$muts} };
    return ($h2, $muts);
  }
}

##
# inserts one element into the sequence
# 0: number to perform ( or 1)
# 1: position to mutate (undef for random)

sub mutate_insert {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    next if $pos > $length; # further than 1 place after gene
    my @token = $self->generate_token;
    my $new = $self->[0];
    substr($new, $pos, 0) = $token[0];
    next unless $self->valid_gene($new, $pos);
    $self->[0] = $new;
    splice @{$self->[1]}, $pos, 0, $token[1];
    $rt++;
  }
  return $rt;
}

##
# removes element(s) from sequence
# 0: number of times to perform
# 1: position to affect (undef for rand)
# 2: length to affect, undef => 1, 0 => random length

sub mutate_remove {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);
    next if ($length - $len) <= 0;
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    next if $pos >= $length; # outside of gene
    my $new = $self->[0];
    substr($new, $pos, $len) = '';
    next unless $self->valid_gene($new, $pos);
    $self->[0] = $new;
    splice @{$self->[1]}, $pos, $len;
    $rt++;
  }
  return $rt;
}

##
# copies an element or run of elements into a random place in the gene
# 0: number to perform (or 1)
# 1: posn to copy from (undef for rand)
# 2: posn to splice in (undef for rand)
# 3: length            (undef for 1, 0 for random)

sub mutate_duplicate {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
    my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
    my $new = $self->[0];
    next if ($pos1 + $len) > $length;
    next if $pos2 > $length;
    my $seq = substr($new, $pos1, $len);
    substr($new, $pos2,0) = $seq;
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    splice @{$self->[1]}, $pos2, 0, @{$self->[1]}[$pos1..($pos1+$len-1)];
    $rt++;
  }
  return $rt;
}

##
# Duplicates a sequence and writes it on top of some other position
# 0: num to perform  (or 1)
# 1: pos to get from          (undef for rand)
# 2: pos to start replacement (undef for rand)
# 3: length to operate on     (undef => 1, 0 => rand)

sub mutate_overwrite {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $new = $self->[0];
    my $length = length $self->[0];
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
    my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
    next if ( ($pos1 + $len) >= $length
	      or $pos2 > $length);
    substr($new, $pos2, $len) = substr($new, $pos1, $len);
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    splice (@{$self->[1]}, $pos2, $len,
	    @{$self->[1]}[$pos1..($pos1+$len-1)] );
    $rt++;
  }

  return $rt;
}

##
# Takes a run of tokens and reverses their order, is a noop with 1 item
# 0: number to perform
# 1: posn to start from (undef for rand)
# 2: length             (undef=>1, 0=>rand)

sub mutate_reverse {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $length = length $self->[0];
    my $new = $self->[0];
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);

    next if ($pos >= $length
	    or $pos + $len > $length);

    my $chunk = reverse split('', substr($new, $pos, $len));
    substr($new, $pos, $len) = join('', $chunk);
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    splice (@{$self->[1]}, $pos, $len,
	    reverse( @{$self->[1]}[$pos..($pos+$len-1)] ));
    $rt++;
  }
  return $rt;
}

##
# Changes token into one of same type (ie. passes type to generate..)
# 0: number to perform
# 1: position to affect (undef for rand)

sub mutate_minor {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $pos = defined $_[1] ? $_[1] : int rand length $self->[0];
    next if $pos >= length($self->[0]); # pos lies outside of gene
    my $type = substr($self->[0], $pos, 1);
    my @token = $self->generate_token($type, $self->[1][$pos]);
    # still need to check for niceness, just in case
    if ($token[0] eq $type) {
      $self->[1][$pos] = $token[1];
    }
    else {
      my $new = $self->[0];
      substr($new, $pos, 1) = $token[0];
      next unless $self->valid_gene($new, $pos);
      $self->[0] = $new;
      $self->[1][$pos] = $token[1];
    }
    $rt++;
  }
  return $rt;
}

##
# Changes one token into some other token
# 0: number to perform
# 1: position to affect (undef for random)

sub mutate_major {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $pos = defined $_[1] ? $_[1] : int rand length $self->[0];
    next if $pos >= length($self->[0]); # outside of gene
    my @token = $self->generate_token();
    my $new = $self->[0];
    substr($new, $pos, 1) = $token[0];
    next unless $self->valid_gene($new, $pos);
    $self->[0] = $new;
    $self->[1][$pos] = $token[1];
    $rt++;
  }
  return $rt;
}

##
# swaps over two sequences within the gene
# any sort of oddness can occur if regions overlap
# 0: number to perform
# 1: start of first sequence   (undef for rand)
# 2: start of second sequence  (undef for rand)
# 3: length of first sequence  (undef for 1, 0 for rand)
# 4: length of second sequence (undef for 1, 0 for rand)

sub mutate_switch {
  my $self = shift;
  my $num = $_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $length = length $self->[0];
    my $pos1 = defined $_[1] ? $_[1] : int rand $length;
    my $pos2 = defined $_[2] ? $_[2] : int rand $length;
    my $len1 = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
    my $len2 = !defined($_[4]) ? 1 : ($_[4] || int rand $length);

    my $new = $self->[0];
    next if $pos1 == $pos2;
    if ($pos1 > $pos2) { # ensure $pos1 comes first
      ($pos1, $pos2) = ($pos2, $pos1);
      ($len1, $len2) = ($len2, $len1);
    }
    if ( ($pos1 + $len1) > $pos2 # ensure no overlaps
	 or ($pos2 + $len2) > $length
	 or $pos1 >= $length ) {
      next;
    }
    my $chunk1 = substr($new, $pos1, $len1, substr($new, $pos2, $len2,''));
    substr($new,$pos2 -$len1 + $len2,0) = $chunk1;
    next unless $self->valid_gene($new);
    $self->[0]= $new;
    my @chunk1 = splice(@{$self->[1]}, $pos1, $len1,
			splice(@{$self->[1]}, $pos2, $len2) );
    splice @{$self->[1]}, $pos2 + $len2 - $len1,0, @chunk1;
    $rt++;
  }
  return $rt;
}

##
# takes a sequence, removes it, then inserts it at another position
# odd things might occur if posn to replace to lies within area taken from
# 0: number to perform
# 1: posn to get from   (undef for rand)
# 2: posn to put        (undef for rand)
# 3: length of sequence (undef for 1, 0 for rand)

sub mutate_shuffle {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $length = length $self->[0];
    my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);

    my $new = $self->[0];
    if ($pos1 +$len > $length   # outside gene
	or $pos2 >= $length      # outside gene
	or ($pos2 < ($pos1 + $len) and $pos2 > $pos1)) { # overlap
      next;
    }
    if ($pos1 < $pos2) {
      substr($new, $pos2-$len,0) = substr($new, $pos1, $len, '');
    }
    else {
      substr($new, $pos2, 0) = substr($new, $pos1, $len, '');
    }
    next unless $self->valid_gene($new);
    $self->[0] = $new;
    if ($pos1 < $pos2) {
      splice (@{$self->[1]}, $pos2-$len, 0, 
	      splice(@{$self->[1]}, $pos1, $len) );
    }
    else {
      splice(@{$self->[1]}, $pos2, 0,
	     splice(@{$self->[1]}, $pos1, $len) );
    }
    $rt++;
  }
  return $rt;
}

# These are intended to be overriden, simple versions are
# provided for the sake of testing.

# Generates things to make up genes
# can be called with a token type to produce, or with none.
# if called with a token type, it will also be passed the original
# token as the second argument.
# should return a two element list of the token type followed by the token itself.

sub generate_token {
  my $self = shift;
  my $token_type = $_[0];
  my $letter = ('a'..'z')[rand 25];
  unless ($token_type) {
    return ($letter) x2;
  }
  return ($token_type) x2;
}

# takes sting of token types to be checked for validity.
# If a mutation affects only one place, then the position of the
# mutation can be passed as a second argument.
sub valid_gene {1}

## You might also want to have methods like the following,
# they will not be called by the 'sequence' methods.

# Default constructor
sub new {
  my $gene = ['',[]];
  return bless $gene, ref $_[0] || $_[0];
}

# remember that clone method may require deep copying depending on
# your specific needs

sub clone {
  my $self = shift;
  my $new = [$self->[0]];
  $new->[1] = [@{$self->[1]}];
  return bless $new, ref $self;
}

# You need some way to use the gene you've made and mutated, but
# this will let you have a look, if it starts being odd.

sub render_gene {
  my $self = shift;
  my $return =  "$self\n";
  $return .= $self->[0] . "\n";
  $return .= (join ',', @{$self->[1]}). "\n";
  return $return;
}

# used for testing

sub _test_dump {
  my $self = shift;
  my @rt = ($self->[0], join('',@{$self->[1]}));
  return @rt;
}
1;

__END__;

=pod

=head1 NAME

 AI::Gene::Sequence

=head1 SYNOPSIS

A base class for storing and mutating genetic sequences.

 package Somegene;
 use AI::Gene::Sequence;
 our @ISA = qw(AI::Gene::Sequence);

 my %things = ( a => [qw(a1 a2 a3 a4 a5)],
	       b => [qw(b1 b2 b3 b4 b5)],);

 sub generate_token {
  my $self = shift;
  my ($type, $prev) = @_;
  if ($type) {
    $prev = ${ $things{$type} }[rand @{ $things{$type} }];
  } 
  else {
    $type = ('a','b')[rand 2];
    $prev = ${$things{$type}}[rand @{$things{$type}}];
  }
  return ($type, $prev); 
 }

 sub valid_gene {
   my $self = shift;
   return 0 if $_[0] =~ /(.)\1/;
   return 1;
 }

 sub seed {
   my $self = shift;
   $self->[0] = 'ababab';
   @{$self->[1]} = qw(A1 B1 A2 B2 A3 B3);
 }

 sub render {
   my $self = shift;
   return join(' ', @{$self->[1]});
 } 

 # elsewhere
 package main;

 my $gene = Somegene->new;
 $gene->seed;
 print $gene->render, "\n";
 $gene->mutate(5);
 print $gene->render, "\n";
 $gene->mutate(5);
 print $gene->render, "\n";

=head1 DESCRIPTION

This is a class which provides generic methods for the
creation and mutation of genetic sequences.  Various mutations
are provided as is a way to ensure that genes created by
mutations remain useful (for instance, if a gene gives rise to
code, it can be tested for correct syntax).

If you do not need to keep check on what sort of thing is
currently occupying a slot in the gene, you would be better
off using the AI::Gene::Simple class instead as this
will be somewhat faster.  The interface to the mutations is
the same though, so if you need to change in future, then
it will not be too painful.

This module should not be confused with the I<bioperl> modules
which are used to analyse DNA sequences.

It is intended that the methods in this code are inherited
by other modules.

=head2 Anatomy of a gene

A gene is a sequence of tokens, each a member of some group
of simillar tokens (they can of course all be members of a
single group).  This module encodes genes as a string
representing token types, and an array containing the
tokens themselves, this allows for arbitary data to be
stored as a token in a gene.

For instance, a regular expression could be encoded as:

 $self = ['ccartm',['a', 'b', '|', '[A-Z]', '\W', '*?'] ]

Using a string to indicate the sort of thing held at the
corresponding part of the gene allows for a simple test
of the validity of a proposed gene by using a regular
expression.

=head2 Using the module

To use the genetic sequences, you must write your own
implementations of the following methods:

=over 4

=item generate_token

=item valid_gene

=back

You may also want to override the following methods:

=over 4

=item new

=item clone

=item render_gene

=back

=head2 Mutation methods

Mutation methods are all named C<mutate_*>.  In general, the
first argument will be the number of mutations required, followed
by the positions in the genes which should be affected, followed
by the lengths of sequences within the gene which should be affected.
If positions are not defined, then random ones are chosen.  If
lengths are not defined, a length of 1 is assumed (ie. working on
single tokens only), if a length of 0 is requested, then a random
length is chosen.

Also, if a mutation is suggested but would result in an invalid
sequence, then the mutation will not be carried out.
If a mutation is attempted which could corrupt your gene (copying
from a region beyond the end of the gene for instance) then it
will be silently skipped.  Mutation methods all return the number
of mutations carried out (not the number of tokens affected).

These methods all expect to be passed positive integers, undef or zero,
other values could (and likely will) do something unpredictable.

=over 4

=item C<mutate([num, ref to hash of probs & methods])>

This will call at random one of the other mutation methods.
It will repeat itself I<num> times.  If passed a reference
to a hash as its second argument, it will use that to
decide which mutation to attempt.

This hash should contain keys which fit $1 in C<mutate_(.*)>
and values indicating the weight to be given to that method.
The module will normalise this nicely, so you do not have to.
This lets you define your own mutation methods in addition to
overriding any you do not like in the module.

=item C<mutate_insert([num, pos])>

Inserts a single token into the string at position I<pos>.
The token will be randomly generated by the calling object's 
C<generate_token> method.

=item C<mutate_overwrite([num, pos1, pos2, len])>

Copies a section of the gene (starting at I<pos1>, length I<len>)
and writes it back into the gene, overwriting current elements,
starting at I<pos2>.

=item C<mutate_reverse([num, pos, len])>

Takes a sequence within the gene and reverses the ordering of the
elements within that sequence.  Starts at position I<pos> for
length I<len>.

=item C<mutate_shuffle([num, pos1, pos2, len])>

This takes a sequence (starting at I<pos1> length I<len>)
 from within a gene and moves
it to another position (starting at I<pos2>).  Odd things might occur if the
position to move the sequence into lies within the
section to be moved, but the module will try its hardest
to cause a mutation.

=item C<mutate_duplicate([num, pos1, pos2, length])>

This copies a portion of the gene starting at I<pos1> of length
I<length> and then splices it into the gene before I<pos2>.

=item C<mutate_remove([num, pos, length]))>

Deletes I<length> tokens from the gene, starting at I<pos>. Repeats
I<num> times.

=item C<mutate_minor([num, pos])>

This will mutate a single token at position I<pos> in the gene 
into one of the same type (as decided by the object's C<generate_token>
method).

=item C<mutate_major([num, pos])>

This changes a single token into a token of any token type.
Token at postition I<pos>.  The token is produced by the object's
C<generate_token> method.

=item C<mutate_switch([num, pos1, pos2, len1, len2])>

This takes two sequences within the gene and swaps them
into each other's position.  The first starts at I<pos1>
with length I<len1> and the second at I<pos2> with length
I<len2>.  If the two sequences overlap, then no mutation will
be attempted.

=back

The following methods are also provided, but you will probably
want to overide them for your own genetic sequences.

=over 4

=item C<generate_token([token type, current token])>

This is used by the mutation methods when changing tokens or 
creating new ones.  It is expected to return a list consisting
of a single character to indicate the token type being produced
and the token itself.  Where it makes sense to do so the token
which is about to be modifed is passed along with the token type.
If the calling methods require a token of any type, then no
arguments will be passed to this method.

The provided version of this method returns a random character
from 'a'..'z' as both the token type and token.

=item C<valid_gene(string [, posn])>

This is used to determine if a proposed mutation is allowed.  This
method is passed a string of the whole gene's token types, it will
also be passed a position in the gene where this makes sense (for
instance, if only one token is to change).  It is expected to
return a true value if a change is acceptable and a false one
if it is not.

The provided version of this method always returns true.

=item C<clone()>

This returns a copy of the gene as a new object.  If you are using
nested genes, or other references as your tokens, then you may need
to produce your own version which will deep copy your structure.

=item C<new>

This returns an empty gene, into which you can put things.  If you
want to initialise your gene, or anything useful like that, then
you will need another one of these.

=item C<render_gene>

This is useful for debugging, returns a serialised summary of the
gene.

=back

=head1 AUTHOR

This module was written by Alex Gough (F<alex@rcon.org>).

=head1 SEE ALSO

For an illustration of the use of this module, see Regexgene.pm,
Musicgene.pm, spamscan.pl and music.pl from the gziped distribution.

=head1 COPYRIGHT

Copyright (c) 2000 Alex Gough <F<alex@rcon.org>>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 BUGS

This is very slow if you do not need to check that your mutations
create valid genes, but fast if you do, thems the breaks.  There
is a AI::Gene::Simple class instead if this bothers you.

Some methods will do odd things if you pass them weird values,
so try not to do that.  So long as you stick to passing
positive integers or C<undef> to the methods then they should
recover gracefully.

While it is easy and fun to write genetic and evolutionary
algorithms in perl, for most purposes, it will be much slower
than if they were implemented in another more suitable language.
There are some problems which do lend themselves to an approach
in perl and these are the ones where the time between mutations
will be large, for instance, when composing music where the
selection process is driven by human whims.

=cut

AI/Gene/Simple.pm  view on Meta::CPAN

package AI::Gene::Simple;
require 5.6.0;
use strict;
use warnings;

BEGIN {
  use Exporter   ();
  our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
  $VERSION     = 0.20;
  @ISA         = qw(Exporter);
  @EXPORT      = ();
  %EXPORT_TAGS = ();
  @EXPORT_OK   = qw();
}
our @EXPORT_OK;

my ($probs,$mut_keys) = _normalise( { map {$_ => 1} 
				      qw(insert remove overwrite 
					 duplicate minor major 
					 switch shuffle reverse) } );
##
# calls mutation method at random
# 0: number of mutations to perform
# 1: ref to hash of probs to use (otherwise uses default mutations and probs)

sub mutate {
  my $self = shift;
  my $num_mutates = +$_[0] || 1;
  my $rt = 0;
  my ($hr_probs, $muts);
  if (ref $_[1] eq 'HASH') { # use non standard mutations or probs
    ($hr_probs, $muts) = _normalise($_[1]);
  }
  else {                     # use standard mutations and probs
    $hr_probs = $probs;
    $muts = $mut_keys;
  }

 MUT_CYCLE: for (1..$num_mutates) {
    my $rand = rand;
    foreach my $mutation (@{$muts}) {
      next unless $rand < $hr_probs->{$mutation};
      my $mut = 'mutate_' . $mutation;
      $rt += $self->$mut(1);
      next MUT_CYCLE;
    }
  }
  return $rt;
}

##
# creates a normalised and cumulative prob distribution for the
# keys of the referenced hash

sub _normalise {
  my $hr = $_[0];
  my $h2 = {};
  my $muts = [keys %{$hr}];
  my $sum = 0;
  foreach (values %{$hr}) {
    $sum += $_;
  }
  if ($sum <= 0) {
    die "Cannot randomly mutate with bad probability distribution";
  }
  else {
    my $cum;
    @{$h2}{ @{$muts} } = map {$cum +=$_; $cum / $sum} @{$hr}{ @{$muts} };
    return ($h2, $muts);
  }
}

##
# inserts one element into the sequence
# 0: number to perform ( or 1)
# 1: position to mutate (undef for random)

sub mutate_insert {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $pos = defined($_[1]) ? $_[1] : int rand $glen;
    next if $pos > $glen; # further than 1 place after gene
    my $token = $self->generate_token;
    splice @{$self->[0]}, $pos, 0, $token;
    $rt++;
  }
  return $rt;
}

##
# removes element(s) from sequence
# 0: number of times to perform
# 1: position to affect (undef for rand)
# 2: length to affect, undef => 1, 0 => random length

sub mutate_remove {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $length = !defined($_[2]) ? 1 : ($_[2] || int rand $glen);
    return $rt if ($glen - $length) <= 0;
    my $pos = defined($_[1]) ? $_[1] : int rand $glen;
    next if $pos >= $glen; # outside of gene
    splice @{$self->[0]}, $pos, $length;
    $rt++;
  }
  return $rt;
}

##
# copies an element or run of elements into a random place in the gene
# 0: number to perform (or 1)
# 1: posn to copy from (undef for rand)
# 2: posn to splice in (undef for rand)
# 3: length            (undef for 1, 0 for random)

sub mutate_duplicate {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $length = !defined($_[3]) ? 1 : ($_[3] || int rand $glen);
    my $pos1 = defined($_[1]) ? $_[1] : int rand $glen;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $glen;
    next if ($pos1 + $length) > $glen;
    next if $pos2 > $glen;
    splice @{$self->[0]}, $pos2, 0, @{$self->[0]}[$pos1..($pos1+$length-1)];
    $rt++;
  }
  return $rt;
}

##
# Duplicates a sequence and writes it on top of some other position
# 0: num to perform  (or 1)
# 1: pos to get from          (undef for rand)
# 2: pos to start replacement (undef for rand)
# 3: length to operate on     (undef => 1, 0 => rand)

sub mutate_overwrite {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $length = !defined($_[3]) ? 1 : ($_[3] || int rand $glen);
    my $pos1 = defined($_[1]) ? $_[1] : int rand $glen;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $glen;
    next if ( ($pos1 + $length) >= $glen
	      or $pos2 > $glen);
    splice (@{$self->[0]}, $pos2, $length,
	    @{$self->[0]}[$pos1..($pos1+$length-1)] );
    $rt++;
  }

  return $rt;
}

##
# Takes a run of tokens and reverses their order, is a noop with 1 item
# 0: number to perform
# 1: posn to start from (undef for rand)
# 2: length             (undef=>1, 0=>rand)

sub mutate_reverse {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $length = scalar @{$self->[0]};
    my $pos = defined($_[1]) ? $_[1] : int rand $length;
    my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);

    next if ($pos >= $length
	    or $pos + $len > $length);

    splice (@{$self->[0]}, $pos, $len,
	    reverse( @{$self->[0]}[$pos..($pos+$len-1)] ));
    $rt++;
  }
  return $rt;
}

##
# Changes token into one of same type (ie. passes type to generate..)
# 0: number to perform
# 1: position to affect (undef for rand)

sub mutate_minor {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $pos = defined $_[1] ? $_[1] : int rand $glen;
    next if $pos >= $glen;  # pos lies outside of gene
    my $type = $self->[0][$pos];
    my $token = $self->generate_token($type);
    $self->[0][$pos] = $token;
    $rt++;
  }
  return $rt;
}

##
# Changes one token into some other token
# 0: number to perform
# 1: position to affect (undef for random)

sub mutate_major {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $pos = defined $_[1] ? $_[1] : int rand $glen;
    next if $pos >= $glen ; # outside of gene
    my $token = $self->generate_token();
    $self->[0][$pos] = $token;
    $rt++;
  }
  return $rt;
}

##
# swaps over two sequences within the gene
# any sort of oddness can occur if regions overlap
# 0: number to perform
# 1: start of first sequence   (undef for rand)
# 2: start of second sequence  (undef for rand)
# 3: length of first sequence  (undef for 1, 0 for rand)
# 4: length of second sequence (undef for 1, 0 for rand)

sub mutate_switch {
  my $self = shift;
  my $num = $_[0] || 1;
  my $rt = 0;
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $pos1 = defined $_[1] ? $_[1] : int rand $glen;
    my $pos2 = defined $_[2] ? $_[2] : int rand length $glen;
    next if $pos1 == $pos2;
    my $len1 = !defined($_[3]) ? 1 : ($_[3] || int rand $glen);
    my $len2 = !defined($_[4]) ? 1 : ($_[4] || int rand $glen);

    if ($pos1 > $pos2) { # ensure $pos1 comes first
      ($pos1, $pos2) = ($pos2, $pos1);
      ($len1, $len2) = ($len2, $len1);
    }

    if ( ($pos1 + $len1) > $pos2 # ensure no overlaps
	 or ($pos2 + $len2) > $glen
	 or $pos1 >= $glen ) {
      next;
    }

    my @chunk1 = splice(@{$self->[0]}, $pos1, $len1,
			splice(@{$self->[0]}, $pos2, $len2) );
    splice @{$self->[0]}, $pos2 + $len2 - $len1,0, @chunk1;
    $rt++;
  }
  return $rt;
}

##
# takes a sequence, removes it, then inserts it at another position
# odd things might occur if posn to replace to lies within area taken from
# 0: number to perform
# 1: posn to get from   (undef for rand)
# 2: posn to put        (undef for rand)
# 3: length of sequence (undef for 1, 0 for rand)

sub mutate_shuffle {
  my $self = shift;
  my $num = +$_[0] || 1;
  my $rt = 0;
  
  for (1..$num) {
    my $glen = scalar @{$self->[0]};
    my $pos1 = defined($_[1]) ? $_[1] : int rand $glen;
    my $pos2 = defined($_[2]) ? $_[2] : int rand $glen;
    my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $glen);

    next if ($pos1 +$len > $glen                      # outside gene
	     or $pos2 >= $glen                        # outside gene
	     or ($pos2 < ($pos1 + $len) and $pos2 > $pos1)); # overlap

    if ($pos1 < $pos2) {
      splice (@{$self->[0]}, $pos2-$len, 0, 
	      splice(@{$self->[0]}, $pos1, $len) );
    }
    else {
      splice(@{$self->[0]}, $pos2, 0,
	     splice(@{$self->[0]}, $pos1, $len) );
    }
    $rt++;
  }
  return $rt;
}

# These are intended to be overriden, simple versions are
# provided for the sake of testing.

# Generates things to make up genes
# can be called with a token type to produce, or with none.
# if called with a token type, it will also be passed the original
# token as the second argument.
# should return a two element list of the token type followed by the token itself.

sub generate_token {
  my $self = shift;
  my $token_type = $_[0];
  my $letter = ('a'..'z')[rand 25];
  unless ($token_type) {
    return $letter;
  }
  return $token_type;
}

## You might also want to have methods like the following,
# they will not be called by the 'sequence' methods.

# Default constructor
sub new {
  my $gene = [[]]; # leave space for other info
  return bless $gene, ref $_[0] || $_[0];
}

# remember that clone method may require deep copying depending on
# your specific needs

sub clone {
  my $self = shift;
  my $new = [];
  $new->[0] = [@{$self->[0]}];
  return bless $new, ref $self;
}

# You need some way to use the gene you've made and mutated, but
# this will let you have a look, if it starts being odd.

sub render_gene {
  my $self = shift;
  my $return =  "$self\n";
  $return .= (join ',', @{$self->[0]}). "\n";
  return $return;
}

# used for testing

sub _test_dump {
  my $self = shift;
  my $rt = (join('',@{$self->[0]}));
  return $rt;
}
1;

__END__;

=pod

=head1 NAME

 AI::Gene::Simple

=head1 SYNOPSIS

A base class for storing and mutating genetic sequences.

 package Somegene;
 use AI::Gene::Simple;
 our @ISA = qw (AI::Gene::Simple);

 sub generate_token {
   my $self = shift;
   my $prev = $_[0] ? $_[0] + (1-rand(1)) : rand(1)*10;
   return $prev;
 }

 sub calculate {
   my $self = shift;
   my $x = $_[0];
   my $rt=0;
   for (0..(scalar(@{$self->[0]}) -1)) {
     $rt += $self->[0][$_] * ($x ** $_);
   }
   return $rt;
 }

 sub seed {
   my $self = shift;
   $self->[0][$_] = rand(1) * 10 for (0..$_[0]);
   return $self;
 }

 # ... then elsewhere

 package main;

 my $gene = Somegene->new;
 $gene->seed(5);
 print $gene->calculate(2), "\n";
 $gene->mutate_minor;
 print $gene->calculate(2), "\n";
 $gene->mutate_major;
 print $gene->calculate(2), "\n";

=head1 DESCRIPTION

This is a class which provides generic methods for the
creation and mutation of genetic sequences.  Various mutations
are provided but the resulting mutations are not checked
for a correct syntax.  These classes are suitable for genes
where it is only necessary to know what lies at a given
position in a gene.  If you need to ensure a gene maintains
a sensible grammar, then you should use the AI::Gene::Sequence
class instead, the interfaces are the same though so you
will only need to modify your overiding classes if you need to
switch from one to the other.

A suitable use for this module might be a series of coefficients
in a polynomial expansion or notes to be played in a musical
score.

This module should not be confused with the I<bioperl> modules
which are used to analyse DNA sequences.

It is intended that the methods in this code are inherited
by other modules.

=head2 Anatomy of a gene

A gene is a linear sequence of tokens which tell some unknown
system how to behave.  These methods all expect that a gene
is of the form:

 [ [ 'token0', 'token1', ...  ], .. other elements ignored ]

=head2 Using the module

To use the genetic sequences, you must write your own
implementations of the following methods along with some
way of turning your encoded sequence into something useful.

=over 4

=item generate_token

=back

You may also want to override the following methods:

=over 4

=item new

=item clone

=item render_gene

=back

The calling conventions for these methods are outlined below.

=head2 Mutation methods

Mutation methods are all named C<mutate_*>.  In general, the
first argument will be the number of mutations required, followed
by the positions in the genes which should be affected, followed
by the lengths of sequences within the gene which should be affected.
If positions are not defined, then random ones are chosen.  If
lengths are not defined, a length of 1 is assumed (ie. working on
single tokens only), if a length of 0 is requested, then a random
length is chosen.

If a mutation is attempted which could corrupt your gene (copying
from a region beyond the end of the gene for instance) then it
will be silently skipped.  Mutation methods all return the number
of mutations carried out (not the number of tokens affected).

=over 4

=item C<mutate([num, ref to hash of probs & methods])>

This will call at random one of the other mutation methods.
It will repeat itself I<num> times.  If passed a reference
to a hash as its second argument, it will use that to
decide which mutation to attempt.

This hash should contain keys which fit $1 in C<mutate_(.*)>
and values indicating the weight to be given to that method.
The module will normalise this nicely, so you do not have to.
This lets you define your own mutation methods in addition to
overriding any you do not like in the module.

=item C<mutate_insert([num, pos])>

Inserts a single token into the string at position I<pos>.
The token will be randomly generated by the calling object's 
C<generate_token> method.

=item C<mutate_overwrite([num, pos1, pos2, len])>

Copies a section of the gene (starting at I<pos1>, length I<len>)
and writes it back into the gene, overwriting current elements,
starting at I<pos2>.

=item C<mutate_reverse([num, pos, len])>

Takes a sequence within the gene and reverses the ordering of the
elements within that sequence.  Starts at position I<pos> for
length I<len>.

=item C<mutate_shuffle([num, pos1, pos2, len])>

This takes a sequence (starting at I<pos1> length I<len>)
 from within a gene and moves
it to another position (starting at I<pos2>).  Odd things might occur if the
position to move the sequence into lies within the
section to be moved, but the module will try its hardest
to cause a mutation.

=item C<mutate_duplicate([num, pos1, pos2, length])>

This copies a portion of the gene starting at I<pos1> of length
I<length> and then splices it into the gene before I<pos2>.

=item C<mutate_remove([num, pos, length]))>

Deletes I<length> tokens from the gene, starting at I<pos>. Repeats
I<num> times.

=item C<mutate_minor([num, pos])>

This will mutate a single token at position I<pos> in the gene 
into one of the same type (as decided by the object's C<generate_token>
method).

=item C<mutate_major([num, pos])>

This changes a single token into a token of any token type.
Token at postition I<pos>.  The token is produced by the object's
C<generate_token> method.

=item C<mutate_switch([num, pos1, pos2, len1, len2])>

This takes two sequences within the gene and swaps them
into each other's position.  The first starts at I<pos1>
with length I<len1> and the second at I<pos2> with length
I<len2>.  If the two sequences overlap, then no mutation will
be attempted.

=back

The following methods are also provided, but you will probably
want to overide them for your own genetic sequences.

=over 4

=item C<generate_token([current token])>

This is used by the mutation methods when changing tokens or 
creating new ones.  It is expected to return a single token.
If a minor mutation is being attempted, then the method will
also be passed the current token.

The provided version of this method returns a random character
from 'a'..'z' as both the token type and token.

=item C<clone()>

This returns a copy of the gene as a new object.  If you are using
nested genes, or other references as your tokens, then you may need
to produce your own version which will deep copy your structure.

=item C<new>

This returns an empty gene, into which you can put things.  If you
want to initialise your gene, or anything useful like that, then
you will need another one of these.

=item C<render_gene>

This is useful for debugging, returns a serialised summary of the
gene.

=back

=head1 AUTHOR

This module was written by Alex Gough (F<alex@rcon.org>).

=head1 SEE ALSO

If you are encoding something which must maintain a correct
syntax (executable code, regular expressions, formal poems)
then you might be better off using AI::Gene::Sequence .

=head1 COPYRIGHT

Copyright (c) 2000 Alex Gough <F<alex@rcon.org>>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 BUGS

Some methods will do odd things if you pass them weird values,
so try not to do that.  So long as you stick to passing
positive integers or C<undef> to the methods then they should
recover gracefully.

While it is easy and fun to write genetic and evolutionary
algorithms in perl, for most purposes, it will be much slower
than if they were implemented in another more suitable language.
There are some problems which do lend themselves to an approach
in perl and these are the ones where the time between mutations
will be large, for instance, when composing music where the
selection process is driven by human whims.

=cut

Changes  view on Meta::CPAN

Revision history for Perl extension AI::Gene::Sequence.

0.21 Wed Jan 24 13:00:00 2001

BUGFIXES: problem with Makefile.PL & nmake fixed

0.20 Tue Jan 02 23:20:00 2001

Changes   name changed to AI::Gene::*
          speed warnings added to pod

0.13 Sat Dec 30 23:00:00 2000

Added:    mutate_reverse method added to both Sequence and Simple

BUGFIXES: modified makefile to ensure sensible version of perl
          removed eval in mutate method and _normalise

0.12 Fri Dec 29 19:00:00 2000

Added:    Genetics::Gene::Simple package added, with tests (tsimp.t)

BUGFIXES: require 5.6.0 lines.
          documentation made clearer.

0.11 Thu Dec 28 21:00:00 2000

Added:    Extensive test suite (tgene.t)
          mutate_overwrite added

BUGFIXES: Most methods have more gene length related sanity checking.
          So long as positive integers are used as args, then there should
          be no fatal errors through missing the end of substrings.
          mutate when called with ref of probs only worked with keys
          of generic probs hash, this is now fixed.

0.10 Wed Dec 27 21:00:00 2000

Initial public (but buggy) release.

MANIFEST  view on Meta::CPAN

Changes
Makefile.PL
MANIFEST
AI/Gene/Sequence.pm
AI/Gene/Simple.pm
demo/Regexgene.pm
demo/Musicgene.pm
demo/spamscan.pl
demo/music.pl
t/tgene.t
t/tsimp.t
test.pl
META.yml                                 Module meta-data (added by MakeMaker)

META.yml  view on Meta::CPAN

--- #YAML:1.0
name:               AI-Gene-Sequence
version:            0.22
abstract:           ~
author:  []
license:            unknown
distribution_type:  module
configure_requires:
    ExtUtils::MakeMaker:  0
build_requires:
    ExtUtils::MakeMaker:  0
requires:  {}
no_index:
    directory:
        - t
        - inc
generated_by:       ExtUtils::MakeMaker version 6.55_02
meta-spec:
    url:      http://module-build.sourceforge.net/META-spec-v1.4.html
    version:  1.4

Makefile.PL  view on Meta::CPAN

# Perl version checking
eval {require 5.6.0} or die <<'EOD';
* This module uses functions which are only available in perls
* greater than 5.6.0 which you do not seem to have yet.
EOD

use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
WriteMakefile(
    'NAME'		=> 'AI::Gene::Sequence',
    'VERSION_FROM'	=> 'AI/Gene/Sequence.pm', # finds $VERSION
    'PREREQ_PM'		=> {}, # e.g., Module::Name => 1.1
    'PL_FILES'          => {}, # some demos included, do not need to be run
    'PM'                => { 'AI/Gene/Sequence.pm'
			     => '$(INST_LIBDIR)/Sequence.pm',
			     'AI/Gene/Simple.pm'
			     => '$(INST_LIBDIR)/Simple.pm'},
);

demo/Musicgene.pm  view on Meta::CPAN


package Musicgene;
use strict;
use warnings;
use MIDI::Simple;

BEGIN {
  use Exporter   ();
  use AI::Gene::Sequence;
  our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
  $VERSION     = 0.01;
  @ISA         = qw(Exporter AI::Gene::Sequence);
  @EXPORT      = ();
  %EXPORT_TAGS = ();
  @EXPORT_OK   = qw();
}
our @EXPORT_OK;

our @chords = ([qw(A C G)], [qw(A C E)]);       # type c
our @octaves = (3..10);                         # type o
our @notes = ('A'..'G', 'rest');                # type n
our @lengths = (qw(hn qn), '');                 # type l

sub new {
  my $class = shift;
  my $self = ['',[]];
  bless $self, ref($class) || $class;
  $self->mutate_insert($_[0]) if $_[0];
  return $self;
}

sub generate_token {
  my $self = shift;
  my ($type, $prev) = @_[0,1];
  my @rt;
  unless ($type) {
    my $rand = rand;
    if    ($rand < .7) {$type = 'n'}
    elsif ($rand < .8) {$type = 'l'}
    elsif ($rand < .9) {$type = 'o'}
    elsif ($rand < 1 ) {$type = 'c'}
    else {die "$0: bad probability: $rand"}
  }
  $rt[0] = $type;
 SWITCH: for ($type) {
    /n/ && do {$rt[1] = $notes[rand@notes]; last SWITCH};
    /c/ && do {$rt[1] = $chords[rand@chords]; last SWITCH};
    /l/ && do {$rt[1] = $lengths[rand@lengths]; last SWITCH};
    /o/ && do {$rt[1] = $octaves[rand@octaves]; last SWITCH};
    die "$0: unknown type: $type";
  }
  return @rt[0,1];
}

sub valid_gene {length($_[1]) < 50 ? 1 : 0};

sub write_file {
  my $self = shift;
  my $file_name = $_[0] or die "$0: No file passed to write_file";
  my $opus = MIDI::Simple->new_score();
  my $note_length = '';
  foreach my $pos (0..(length $self->[0])) {
  SWITCH: for (substr($self->[0], $pos, 1)) {
      /l/ && do {$note_length = $self->[1][$pos]             ;last SWITCH};
      /n/ && do {$opus->n($note_length, $self->[1][$pos])    ;last SWITCH};
      /o/ && do {$opus->noop('o'.$self->[1][$pos])           ;last SWITCH};
      /c/ && do {$opus->n($note_length, @{$self->[1][$pos]}) ;last SWITCH};
    }
  }

  $opus->write_score($file_name);
  return;
}

## Also override mutation method
# calls mutation method at random
# 0: number of mutations to perform
# 1: ref to hash of probs to use (otherwise uses default mutations and probs)
my %probs = (
	     insert    =>.1,
	     remove    =>.2,
	     duplicate =>.4,
	     minor     =>.5,
	     major     =>.6,
	     overwrite =>.7,
	     reverse   =>.75,
	     switch    =>.8,
	     shuffle   =>1,
	    );

sub mutate {
  my $self = shift;
  my $num_mutates = +$_[0] || 1;
  my $rt = 0;
  my ($hr_probs, $muts);
  if (ref $_[1] eq 'HASH') { # use non standard mutations or probs
    $hr_probs = $self->_normalise($_[1]);
    $muts = [keys %{$hr_probs}];
  MUT_CYCLE: for (1..$num_mutates) {
      my $rand = rand;
      foreach my $mutation (@{$muts}) {
	next unless $rand < $hr_probs->{$mutation};
	$rt += eval "\$self->mutate_$mutation(1)";
	next MUT_CYCLE;
      }
    } 
  }
  else {                     # use standard mutations and probs
    foreach (1..$num_mutates) {
      my $rand = rand;
      if ($rand < $probs{insert}) {
	$rt += $self->mutate_insert(1);
      }      
      elsif ($rand < $probs{remove}) {
	$rt += $self->mutate_remove(1);
      }
      elsif ($rand < $probs{duplicate}) {
	$rt += $self->mutate_duplicate(1,undef, undef,0); # random length
      }
      elsif ($rand < $probs{minor}) {
	$rt += $self->mutate_minor(1);
      }
      elsif ($rand < $probs{major}) {
	$rt += $self->mutate_major(1);
      }
      elsif ($rand < $probs{overwrite}) {
	$rt += $self->mutate_overwrite(1,undef,undef,0);
      }
      elsif ($rand < $probs{switch}) {
	$rt += $self->mutate_switch(1,undef,undef,0,0);
      }
      elsif ($rand < $probs{shuffle} ) {
	$rt += $self->mutate_shuffle(1,undef,undef,0);
      }
    }
  }


  return $rt;
}

1;

demo/Regexgene.pm  view on Meta::CPAN

package Regexgene;
use strict;
use warnings;

=head1 NAME

  Regexgene - An example of a AI::Gene::Sequence

=head1 SYNOPSIS

This is a short module which illustrates the way to use the
AI::Gene::Sequence module.

 use Regexgene;
 $regex = Regexgene->new(5);
 print $regex->regex, "\n";
 $regex->mutate;
 print $regex->regex, "\n";
 $copy = $regex->clone;
 $copy->mutate;
 print $regex->regex, "\n", $copy->regex, "\n";

=head1 DESCRIPTION

The following is a code / pod mix, use the source.  A programme
using this module is available as C<spamscan.pl>.

=head1 The module code

=cut

=head2

First we need to be nice, do our exporting and versions, we
also need to tell perl that we want objects in this class
to inherit from AI::Gene::Sequence by placing it in
our @ISA array.

=cut

BEGIN {
  use Exporter   ();
  use AI::Gene::Sequence;
  our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
  $VERSION     = 0.01;
  @ISA         = qw(Exporter AI::Gene::Sequence);
  @EXPORT      = ();
  %EXPORT_TAGS = ();
  @EXPORT_OK   = qw();
}
our @EXPORT_OK;

=head2 Globals

We have a load of globals, these form the basis of our token types, anything
from the same array, is the same type eg.

 @modifiers = qw( * *? + +? ?? ); # are of type 'm' for modifier

=cut

our @modifiers  = qw( * *? + +? ?? );
our @char_types = qw( \w \W \d \D \s \S .);
our @ranges     = qw( [A-Z] [a-z] [0-9] );
our @chars      = ((0..9,'a'..'z','A'..'Z','_'),
                  (map '\\'.chr, 32..47, 58..64, 91..94, 96, 123..126));


=head2 Constructor

As we want to be able to fill our regular expression at the same
time as we create it and because we will want to nest sequences
we will need some way to know how deep we are, then a different
B<new> method is needed.

If called as an object method (C<$obj->new>), this decreases the depth
count by one from the invoking object.  It also adds tokens to the regular
expression and uses the B<valid_gene> method to ensure we stay sane 
from the start.

As can be seen, we use array offsets above $self->[1] to store information
which is specific to our implementation.

=cut

sub new {
  my $gene = ['',[], ref($_[0]) ? $_[0]->[2]-1 : 3 ]; # limit recursion
  bless $gene, ref($_[0]) || $_[0];
  my $length = $_[1] || 5;
  for (1..$length) {
    my @token = $gene->generate_token();
    my $new = $gene->[0] . $token[0];
    redo unless $gene->valid_gene($new); # hmmmm, enter turing from the wings
    $gene->[0] = $new;
    push @{$gene->[1]}, $token[1];
  }
  return $gene;
}

=head2 clone

As we are going to allow nested sequences, then we need to make sure that
when we copy an object we create new versions of everthing, rather than
reusing pointers to data used by other objects.

=cut

sub clone {
  my $self = shift;
  my $new = bless [$self->[0], [], $self->[2]], ref($self);
  @{$new->[1]} = map {ref($_) ? $_->clone : $_} @{$self->[1]}; # woohoo, recursive objects
  return $new;
}

=head2 generate_token

This is where we really start needing to have our own implementation.
This method is used by AI::Gene::Sequence when it needs a new
token, we also use it ourselves when we create a new object, but we
did not have to.

If we are provided with a token type, we use that, otherwise we chose
one at random.  We make sure that we return a two element list.
If we had wanted, when passed a type of 'g' along with a second
argument, we could have caused this method to mutate the nested
regex, instead, we just create a different one.

=cut

sub generate_token {
  my $self = shift;
  my $type = $_[0] || (qw(m t c r a g))[rand 6];
  my @rt;
  $rt[0] = $type;
 SWITCH: for ($type) {
    /^m/ && do {$rt[1] = $modifiers[rand@modifiers]  ;last SWITCH}; # modifier
    /^t/ && do {$rt[1] = $char_types[rand@char_types];last SWITCH}; # type
    /^c/ && do {$rt[1] = $chars[rand@chars]          ;last SWITCH}; # lone char
    /^r/ && do {$rt[1] = $ranges[rand@ranges]        ;last SWITCH}; # range
    /^a/ && do {$rt[1] = '|'                         ;last SWITCH}; # altern
    /^g/ && do {
      if ($self->[2] > 0) {  # recursion avoidance...
	$rt[1] = $self->new;
      }
      else {
	$rt[1] = $chars[rand@chars];
      }
      ;last SWITCH}; # grouping
    die "Unknown type of regex token ($type)";
  }
  return @rt[0,1];
}

# returns true if a valid regex, otherwise false, ignores optional posn arg

=head2 valid_gene

Because we have restricted ourselves to simple regular
expressions we only need to make sure that modifers and alternation
do not follow or precede things they should not.

Note that we do not use the current version of the gene in $self->[0].
This is because it is the un-mutated version, if we do not accept the
mutation then $self will be left alone by our calling methods.

That said, if you want to use $self->[0] then you can, but it would
be unwise to modify it here.

=cut

sub valid_gene {
  my $self = shift;
  my $gene = $_[0];
  if ($gene =~ /mm|am|aa|^a|^m|a$/) {
    return undef;
  }
  else {
    return 1;
  }
}

=head2

Having created a way to create, modify and verify our genetically
encoded regular expressions, we could do with some way to actually
use them.  This method retuns a non compiled regular expression and
calls itself recursively when it finds nested genes.

=cut

sub regex {
  my $self = shift;
  my $rt;
  warn "$0: empty gene turned into empty regex" unless scalar @{$self->[1]};
  foreach (@{$self->[1]}) {
    $rt .= ref($_) ? '(?:'. $_->regex .')' : $_;
  }
  return $rt;
}

=head1 AUTHOR

Alex Gough (F<alex@rcon.org>).

=head1 COPYRIGHT

Copyright (c) 2001 Alex Gough <F<alex@rcon.org>>. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

1;
__END__;

demo/music.pl  view on Meta::CPAN

use strict;
use warnings;
use Musicgene;

# make something to start from
my @seeds;
for (0..9) {
  $seeds[$_] = Musicgene->new(20);
  print "$_ : ", ($seeds[$_]->_test_dump)[0], "\n";
  $seeds[$_]->write_file('music'.$_.'.mid');
}

print "Enter number to retain (0-9):";
while (<>) {
  chomp;
  last if /\D/;
  $seeds[0] = $seeds[$_];
  $seeds[0]->write_file('music0.mid');
  print "\n0: ", ($seeds[0]->_test_dump)[0], "\n";
  for (1..9) {
    $seeds[$_] = $seeds[0]->clone; # make some children
    $seeds[$_]->mutate(5);         # modify them a bit
    $seeds[$_]->write_file('music'.$_.'.mid');
    print "$_: ", ($seeds[$_]->_test_dump)[0], "\n";
  }
  print "Enter number to retain (0-9):";
}

demo/spamscan.pl  view on Meta::CPAN

#!/usr/bin/perl -w
# spamscan.pl  by Alex Gough, 2001, (alex@rcon.org)
# This is a quick illustration of the Regexgene pseudo- module which
# is itself an illustration of the AI::Gene::Sequence module.
#
# It will run for ever, printing out dots or regular expressions
# which are quite good at spotting spam.

use strict;
use warnings;
use Regexgene;

my $num_mutates = 3;

# read in our passes and failures.
my (@wanted, @spam);
while (<DATA>) {
  if (1../^$/) { push @wanted, $_;}
  else         { push @spam, $_}
}
print "Best score possible is: ", scalar(@spam), "\n";

my $regex = seed_match(); # start off with something quite good
my $best_yet = 0;
my $temp = 1;
while (1) {
  my $child = $regex->clone;           # copy the parent
  $child->mutate($num_mutates);        # change it slightly
  my $rex = $child->regex;
  $rex = qr/$rex/;
  my $score = 0;                       # see if the kid is better
  $score += grep {$_ =~ $rex} @spam;   # we don't want spam
  $score -= grep {$_ =~ $rex} @wanted; # but we do want our mail
  if ($score > $best_yet) {
    $regex = $child;                   # and so progress is made
    $best_yet = $score;
    print "\n* $best_yet ", $regex->regex, "\n";
  }
  print '.' unless ($temp++ % 80);
}

sub seed_match {
  my $regex;
 TWIDDLE: while (1) {
    $regex = Regexgene->new(5);
    my $rg = $regex->regex;
    last TWIDDLE if $spam[rand@spam] =~ $rg;
  }
  return $regex;
}

# Stuff from my mailbox (Don't ask) and my spam trap
__DATA__;
Stats since whenever
Hello
Bit of Fun
Money 
oxford
The sound of one hand clapping
Silly Americans
Saturday
Mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm!
I've just written 4000 words with twenty diagrams in six hours.
Mmmmmm
Guiness
Who the Man!
The owls are not what they seem
It's that rich b*stard again
Irish Virus
What Alex Did Next
Stuff
Go Jerry, Go Jerry!!
Change of e-mail
Petrol
Groovy.
You know you aren't working when:
The saga continues...
At last, the cycle is complete
Phone works again
Warm, glowy feeling.
Mmmmm Free Time
Hmmm.
broken
Pedantry and the feminine perspective since 1840...
E-mail addresses
The domain
Disarming Baptists
Stats for, er, a while...
End-of-year party...
Nasty
The Joy of Work
Sild in tomato sauce
Someone I admire and deeply love
I am Mike, he is Bob
Windows 95 CD
Lather, Rinse, Repeat
Resistance is futile.

cum!!
Your complimentary market consultation.                         6861
Domain Registration
Finance Available on Attractive Terms
Free Dish Network Satellite & Free Install - Limited time offer! _____________________
An Internet Opportunity that really works!
Urgent message for Help!!!
RE: i need reload the scripts
Ink Jet Cartridges and Paper!  Lowest Prices w/ Guarantees
MORE$$$ 
Information IS Power!
Lenders COMPETE for mortgage LOANS!                         7080
Improve Your Sex Life With VIAGRA!!                         32358
BEST DEAL ON NEW CARS AND TRUCKS!!
E-Mail Services
The Net Detective. Snoop on Anyone....
Re:  Your Business
Re:  Your Business
Lenders COMPETE for your MORTAGE Loan!!! -rnqyxoyj
A LITTLE MONEY CAN GO A LONG WAYS
Detective software
Click Here and get a Brand New Free Satellite ...
Eliminate BAD c r e d i t! 
FWD:FWD:Target your market with search engine traffic for $0.25 -hoxrgck
** It is fun, it is legal and it works***
|||- - Professional Direct Email Marketers Club - -|||				kljh	
"A dream come true offering major bank credit cards at 5.9% interest!!
 ..//..pres./ We have foreigners who want to buy or finance your business/speak to them right now... 
Make Money For The Holidays Now!!                         30231
Make Money While You Sleep!!!!
Make Money While You Sleep!!!!
Untold Real Estate Info.
Pirate SOFTWARE
You Decide:  Is Age-Reversal Possible?                         29348
Open Letter Matthias Rath
90 % of the people in your city and state need this service,,become a credit card....
Get Out of Line...                         23048
=?big5?B?d29ya6FJoUk=?=
=?big5?B?d29ya6FJoUk=?=
 sixdegrees does entertainment
>I made my cool color business card online for FREE!
Lenders COMPETE for your MORTAGE Loan! -ihjvxhrmxxw
"you will be a dream come true,90% of the people in your city need this service!!
Your Long Distance Bill Is Incorrect.....
Are you looking for a "better way" to make money?                         17269
FRE

t/tgene.t  view on Meta::CPAN

use strict;
use warnings;

# Gtest is a small package used to test the AI::Gene::Sequence
# package.  It provides a generate_token method and a seed_gene
# method, the first is highly deterministic (so tests of a module
# which hinge on randomness can work) and the second sets up a gene
# ready for a test.

# Also is a new method, which creates the gene and seeds it and
# 'd' and 'g' methods, which return (stringified) versions of the
# sequence ($self->[0]) and gene (@{$self->[1]}) respectively.

package GTest;
our (@ISA);
use AI::Gene::Sequence;
@ISA = qw(AI::Gene::Sequence);

sub new {
  my $class = shift;
  my $self = ['',[]];
  bless $self, $class;
  $self->seed_gene;
  return $self;
}

sub seed_gene {
  my $self = shift;
  $self->[0] = join('', 'a'..'j');
  @{$self->[1]} = ('a'..'j');
  return 1;
}

sub generate_token {
  my $self = shift;
  my ($type, $prev) = @_;
  $type ||= 'n';
  $prev = uc $type;
  return ($type, $prev);
}

sub d {
  my $self = shift;
  return $self->[0];
}

sub g {
  my $self = shift;
  return join('', @{$self->[1]});
}

package main;
use Test;
# see above for a small package ( GTest ) used to test G::G::S
BEGIN {plan tests => 111, todo =>[]}
my $hammer = 30; # set big to bash at methods with randomness

{ # test1
  # first of all, does our testing package behave
  my $gene = GTest->new;
  die "$0: Broken render" unless $gene->d eq 'abcdefghij'
                            and $gene->g eq 'abcdefghij';
  die "$0: Broken generate" unless ($gene->generate_token('a'))[1] eq 'A'
    and ($gene->generate_token())[0] eq 'n';
  ok(1);
}
my $main = GTest->new;
{ print "# clone\n";
  my $gene = $main->clone;
  ok($gene->g, $main->g);
}

{ print "# mutate_minor\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_minor(1);
  ok ($rt, 1); # return value
  ok ($gene->g ne $main->g); # changed
  $gene = $main->clone;
  $gene->mutate_minor(1,0);
  ok ($gene->g, 'Abcdefghij');
  $rt = $gene->mutate_minor(1,10); # outside of gene
  ok ($rt,0);
  ok ($gene->g, 'Abcdefghij');
  # hammer randomness, check for errors
  $rt = 0;
  for (1..$hammer) {
    eval '$gene->mutate_minor()';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_major\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_major(1,0);
  ok($rt, 1);
  ok($gene->g, 'Nbcdefghij');
  $gene = $main->clone;
  $gene->mutate_major;
  ok($gene->g ne $main->g, 1);
  $gene = $main->clone;
  $rt = $gene->mutate_major(1,10); # outside of gene
  ok($rt,0);
  ok($gene->g eq $main->g);
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    eval '$gene->mutate_major()';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_remove\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_remove(1,0);
  ok($rt,1);
  ok($gene->g eq 'bcdefghij' and $gene->d eq 'bcdefghij');
  $rt = $gene->mutate_remove(1,0,2);
  ok($rt,1);
  ok($gene->g eq 'defghij' and $gene->d eq 'defghij');
  $rt = $gene->mutate_remove(1,7); # outside of gene
  ok($rt,0);
  ok($gene->g eq 'defghij');
  $rt = $gene->mutate_remove(1,5,5); # extends beyond gene
  ok($rt,1);
  ok($gene->g eq 'defgh');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_remove(1,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_insert\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_insert(1,0);
  ok($rt,1);
  ok($gene->g eq 'Nabcdefghij' and $gene->d eq 'nabcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_insert(1,10); # last possible pos
  ok($rt,1);
  ok($gene->d eq 'abcdefghijn' and $gene->g eq 'abcdefghijN');
  $gene = $main->clone;
  $rt = $gene->mutate_insert;
  ok($rt,1);
  ok($gene->d ne 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_insert(1,11); # outside of gene
  ok($rt,0);
  ok($gene->g eq 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_insert';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_overwrite\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_overwrite(1,0,1); # first to second
  ok($rt,1);
  ok($gene->g, 'aacdefghij');
  ok($gene->d, 'aacdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,0,4,3); # has length
  ok($rt,1);
  ok($gene->g, 'abcdabchij');
  ok($gene->d, 'abcdabchij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,3,4,3); # overlap
  ok($rt,1);
  ok($gene->g, 'abcddefhij');
  ok($gene->d, 'abcddefhij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,0,10,3); # dump lies at end of gene
  ok($rt,1);
  ok($gene->g, 'abcdefghijabc');
  ok($gene->d, 'abcdefghijabc');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,0,11); # dump lies beyond end of gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,11,4); # area to copy lies outside gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  ok($gene->d, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_overwrite(1,undef,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_reverse\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_reverse(1,0,2);
  ok($rt,1);
  ok($gene->d, 'bacdefghij');
  ok($gene->g, 'bacdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_reverse(1,0,10); # whole gene
  ok($rt,1);
  ok($gene->d, 'jihgfedcba');
  ok($gene->g, 'jihgfedcba');
  $gene = $main->clone;
  $rt = $gene->mutate_reverse(1,8,4); # extends beyond gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_reverse(1,10,1); # starts outside gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  ok($gene->g, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_reverse(1,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_duplicate\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_duplicate(1,0,0);
  ok($rt,1);
  ok($gene->g, 'aabcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,9,0); # from end of gene to front
  ok($rt,1);
  ok($gene->g, 'jabcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,10,0); # from outside of gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,0,11); # to posn beyond end of gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,0,10); # to posn at very end of gene
  ok($rt,1);
  ok($gene->g, 'abcdefghija');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,0,10,10); # double the gene
  ok($rt,1);
  ok($gene->g, 'abcdefghijabcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_duplicate(1,undef,undef,0)';
  }
  ok($rt,0);
}

{ print "# mutate_switch\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_switch(1,0,9); # first and last
  ok($rt,1);
  ok($gene->g, 'jbcdefghia');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,8,2,2); # 1st 2 and last 2
  ok($rt,1);
  ok($gene->g, 'ijcdefghab');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,5,2,4); # different lengths
  ok($rt,1);
  ok($gene->g, 'fghicdeabj');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,10); # pos2 outside gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,10,0); # pos1 outside gene (silently same as)
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,9,1,2); # second section extends beyond
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,2,5,3); # overlap of sections
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_switch(1,undef,undef,0,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}


{ print "# mutate_shuffle\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_shuffle(1,5,0); # from after to
  ok($rt,1);
  ok($gene->g, 'fabcdeghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,5,0,2); # extended sequence
  ok($rt,1);
  ok($gene->g, 'fgabcdehij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,5,2); # to after from
  ok($rt,1);
  ok($gene->g, 'cdeabfghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,9,1); # 1st to last
  ok($rt,1);
  ok($gene->g, 'bcdefghiaj');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,3,8); # overlap
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,10,1); # to posn outside gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,8,5); # should suceed
  ok($rt,1);
  ok($gene->g, 'fghabcdeij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,8,5,5); # extends beyond gene
  ok($rt,0);
  ok($gene->g, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_shuffle(1,undef,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate\n";
  my $rt = 0;
  # hammer with defaults
  for (1..$hammer) {
    my $gene = $main->clone;
    eval '$gene->mutate';
    $rt = 1 if $@;
  }
  ok($rt,0);
  # hammer with custom probs
  my %probs = (
               insert    =>1,
	       remove    =>1,
	       duplicate =>1,
	       overwrite =>1,
	       minor     =>1,
	       major     =>1,
	       switch    =>1,
	       shuffle   =>1,
	       );
  $rt = 0;
  for (1..$hammer) {
    my $gene= $main->clone;
    eval '$gene->mutate(1, \\%probs)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}
1;

t/tsimp.t  view on Meta::CPAN

use strict;
use warnings;

# GtestS is a small package used to test the AI::Gene::Simple
# package.  It provides a generate_token method and a seed_gene
# method, the first is highly deterministic (so tests of a module
# which hinge on randomness can work) and the second sets up a gene
# ready for a test.

# Also is a new method, which creates the gene and seeds it and
# a 'd' method, which returns a stringified version of $self->[0]

package GTestS;
our (@ISA);
use AI::Gene::Simple;
@ISA = qw(AI::Gene::Simple);

sub new {
  my $class = shift;
  my $self = [[]];
  bless $self, $class;
  $self->seed_gene;
  return $self;
}

sub seed_gene {
  my $self = shift;
  @{$self->[0]} = ('a'..'j');
  return 1;
}

sub generate_token {
  my $self = shift;
  my ($prev) = @_;
  if ($prev) {
    $prev = uc $prev;
  }
  else {
    $prev = 'N';
  }
  return $prev;
}

sub d {
  my $self = shift;
  return join('',@{$self->[0]});
}

package main;
use Test;
# see above for a small package ( GTest ) used to test G::G::S
BEGIN {plan tests => 101, todo =>[]}
my $hammer = 30; # set big to bash at methods with randomness

{ # test1
  # first of all, does our testing package behave
  my $gene = GTestS->new;
  die "$0: Broken render" unless $gene->d eq 'abcdefghij';
  die "$0: Broken generate" unless $gene->generate_token('a') eq 'A'
    and $gene->generate_token eq 'N';
  ok(1);
}
my $main = GTestS->new;
{ print "# clone\n";
  my $gene = $main->clone;
  ok($gene->d, $main->d);
}

{ print "# mutate_minor\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_minor(1);
  ok ($rt, 1); # return value
  ok ($gene->d ne $main->d); # changed
  $gene = $main->clone;
  $gene->mutate_minor(1,0);
  ok ($gene->d, 'Abcdefghij');
  $rt = $gene->mutate_minor(1,10); # outside of gene
  ok ($rt,0);
  ok ($gene->d, 'Abcdefghij');
  # hammer randomness, check for errors
  $rt = 0;
  for (1..$hammer) {
    eval '$gene->mutate_minor()';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_major\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_major(1,0);
  ok($rt, 1);
  ok($gene->d, 'Nbcdefghij');
  $gene = $main->clone;
  $gene->mutate_major;
  ok($gene->d ne $main->d);
  $gene = $main->clone;
  $rt = $gene->mutate_major(1,10); # outside of gene
  ok($rt,0);
  ok($gene->d eq $main->d);
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    eval '$gene->mutate_major()';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_remove\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_remove(1,0);
  ok($rt,1);
  ok($gene->d eq 'bcdefghij');
  $rt = $gene->mutate_remove(1,0,2);
  ok($rt,1);
  ok($gene->d eq 'defghij');
  $rt = $gene->mutate_remove(1,7); # outside of gene
  ok($rt,0);
  ok($gene->d eq 'defghij');
  $rt = $gene->mutate_remove(1,5,5); # extends beyond gene
  ok($rt,1);
  ok($gene->d eq 'defgh');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_remove(1,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_insert\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_insert(1,0);
  ok($rt,1);
  ok($gene->d eq 'Nabcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_insert(1,10); # last possible pos
  ok($rt,1);
  ok($gene->d eq 'abcdefghijN');
  $gene = $main->clone;
  $rt = $gene->mutate_insert;
  ok($rt,1);
  ok($gene->d ne 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_insert(1,11); # outside of gene
  ok($rt,0);
  ok($gene->d eq 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_insert';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_overwrite\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_overwrite(1,0,1); # first to second
  ok($rt,1);
  ok($gene->d, 'aacdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,0,4,3); # has length
  ok($rt,1);
  ok($gene->d, 'abcdabchij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,3,4,3); # overlap
  ok($rt,1);
  ok($gene->d, 'abcddefhij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,0,10,3); # dump lies at end of gene
  ok($rt,1);
  ok($gene->d, 'abcdefghijabc');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,0,11); # dump lies beyond end of gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_overwrite(1,11,4); # area to copy lies outside gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_overwrite(1,undef,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_reverse\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_reverse(1,0,2);
  ok($rt,1);
  ok($gene->d, 'bacdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_reverse(1,0,10); # whole gene
  ok($rt,1);
  ok($gene->d, 'jihgfedcba');
  $gene = $main->clone;
  $rt = $gene->mutate_reverse(1,8,4); # extends beyond gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_reverse(1,10,1); # starts outside gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_reverse(1,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate_duplicate\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_duplicate(1,0,0);
  ok($rt,1);
  ok($gene->d, 'aabcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,9,0); # from end of gene to front
  ok($rt,1);
  ok($gene->d, 'jabcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,10,0); # from outside of gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,0,11); # to posn beyond end of gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,0,10); # to posn at very end of gene
  ok($rt,1);
  ok($gene->d, 'abcdefghija');
  $gene = $main->clone;
  $rt = $gene->mutate_duplicate(1,0,10,10); # double the gene
  ok($rt,1);
  ok($gene->d, 'abcdefghijabcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_duplicate(1,undef,undef,0)';
  }
  ok($rt,0);
}

{ print "# mutate_switch\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_switch(1,0,9); # first and last
  ok($rt,1);
  ok($gene->d, 'jbcdefghia');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,8,2,2); # 1st 2 and last 2
  ok($rt,1);
  ok($gene->d, 'ijcdefghab');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,5,2,4); # different lengths
  ok($rt,1);
  ok($gene->d, 'fghicdeabj');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,10); # pos2 outside gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,10,0); # pos1 outside gene (silently same as)
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,9,1,2); # second section extends beyond
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_switch(1,0,2,5,3); # overlap of sections
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_switch(1,undef,undef,0,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}


{ print "# mutate_shuffle\n";
  my $gene = $main->clone;
  my $rt = $gene->mutate_shuffle(1,5,0); # from after to
  ok($rt,1);
  ok($gene->d, 'fabcdeghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,5,0,2); # extended sequence
  ok($rt,1);
  ok($gene->d, 'fgabcdehij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,5,2); # to after from
  ok($rt,1);
  ok($gene->d, 'cdeabfghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,9,1); # 1st to last
  ok($rt,1);
  ok($gene->d, 'bcdefghiaj');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,3,8); # overlap
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,10,1); # to posn outside gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,0,8,5); # should suceed
  ok($rt,1);
  ok($gene->d, 'fghabcdeij');
  $gene = $main->clone;
  $rt = $gene->mutate_shuffle(1,8,5,5); # extends beyond gene
  ok($rt,0);
  ok($gene->d, 'abcdefghij');
  # hammer randomness
  $rt = 0;
  for (1..$hammer) {
    $gene = $main->clone;
    eval '$gene->mutate_shuffle(1,undef,undef,0)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}

{ print "# mutate\n";
  my $rt = 0;
  # hammer with defaults
  for (1..$hammer) {
    my $gene = $main->clone;
    eval '$gene->mutate';
    $rt = 1 if $@;
  }
  ok($rt,0);
  # hammer with custom probs
  my %probs = (
               insert    =>1,
	       remove    =>1,
	       duplicate =>1,
	       overwrite =>1,
	       minor     =>1,
	       major     =>1,
	       switch    =>1,
	       shuffle   =>1,
	       );
  $rt = 0;
  for (1..$hammer) {
    my $gene= $main->clone;
    eval '$gene->mutate(1, \\%probs)';
    $rt = 1 if $@;
  }
  ok($rt,0);
}
1;

test.pl  view on Meta::CPAN

# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'

######################### We start with some black magic to print on failure.

# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)

BEGIN { $| = 1; print "1..1\n"; }
END {print "not ok 1\n" unless $loaded;}
use AI::Gene::Sequence;
use AI::Gene::Simple;
$loaded = 1;
print "ok 1\n";

######################### End of black magic.

# Insert your test code below (better if it prints "ok 13"
# (correspondingly "not ok 13") depending on the success of chunk 13
# of the test code):

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 3.425 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-cec75d87357c )