AI-NeuralNet-Kohonen

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

0.132  Web Apr 17 11:40:00 2003
	Some minor interface changes.
	Created *::Input class

0.0131 Thu Mar 20 11:55:00 2003
	Added epoch_start callback.

0.013 Mon Mar 17 14:49:00 2003
	Added 'neighbour_factor' field
	Method 'load_file' becomes 'load_input', as we may allow
	the loading of map fiels too.

0.012 Fri Mar 14 11:49:00 2003
	Added some support for loading and saving in SOM_PAK file
	format; gaussian smoothing of the map.

0.011 Thu Mar 13 14:00:01 2003
	Had forgotten to make the find_bmu method public.
	Allowed for rectangular maps rather than just square.

0.01  Thu Mar 13 12:32:00 2003
	- original version; created by h2xs 1.21 with options
		-X -n AI::NeuralNet::Kohonen

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

use strict;
use warnings;
use Carp qw/croak cluck confess/;

use AI::NeuralNet::Kohonen::Node;
use AI::NeuralNet::Kohonen::Input;

=head1 SYNOPSIS

	$_ = AI::NeuralNet::Kohonen->new(
		map_dim_x => 39,
		map_dim_y => 19,
		epochs    => 100,
		table     =>
	"3
	1 0 0 red
	0 1 0 yellow
	0 0 1 blue
	0 1 1 cyan
	1 1 0 yellow
	1 .5 0 orange
	1 .5 1 pink"

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

	$_->train;
	$_->save_file('mydata.txt');
	exit;

=head1 DESCRIPTION

An illustrative implimentation of Kohonen's Self-organising Feature Maps (SOMs)
in Perl. It's not fast - it's illustrative. In fact, it's slow: but it is illustrative....

Have a look at L<AI::NeuralNet::Kohonen::Demo::RGB> for an example of
visualisation of the map.

I'll maybe add some more text here later.

=head1 DEPENDENCIES

	AI::NeuralNet::Kohonen::Node
	AI::NeuralNet::Kohonen::Input

=head1 EXPORTS

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN


=item table

The contents of a file of the format that could be supplied to
the C<input_file> field.

=item input_names

A name for each dimension of the input vectors.

=item map_dim_x

=item map_dim_y

The dimensions of the feature map to create - defaults to a toy 19.
(note: this is Perl indexing, starting at zero).

=item epochs

Number of epochs to run for (see L<METHOD train>).
Minimum number is C<1>.

=item learning_rate

The initial learning rate.

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

they're iterated over. Just for experimental purposes.

=item smoothing

The amount of smoothing to apply by default when C<smooth>
is applied (see L</METHOD smooth>).

=item neighbour_factor

When working out the size of the neighbourhood of influence,
the average of the dimensions of the map are divided by this variable,
before the exponential function is applied: the default value is 2.5,
but you may with to use 2 or 4.

=item missing_mask

Used to signify data is missing in an input vector. Defaults
to C<x>.

=back

Private fields:

=over 4

=item time_constant

The number of iterations (epochs) to be completed, over the log of the map radius.

=item t

The current epoch, or moment in time.

=item l

The current learning rate.

=item map_dim_a

Average of the map dimensions.

=back

=cut

sub new {
	my $class					= shift;
	my %args					= @_;
	my $self 					= bless \%args,$class;

	$self->{missing_mask}		= 'x' unless defined $self->{missing_mask};
	$self->_process_table if defined $self->{table};	# Creates {input}
	$self->load_input($self->{input_file}) if defined $self->{input_file};	# Creates {input}
	if (not defined $self->{input}){
		cluck "No {input} supplied!";
		return undef;
	}

	$self->{map_dim_x}			= 19 unless defined $self->{map_dim_x};
	$self->{map_dim_y}			= 19 unless defined $self->{map_dim_y};
	# Legacy from...yesterday
	if ($self->{map_dim}){
		$self->{map_dim_x} 		= $self->{map_dim_y} = $self->{map_dim}
	}
	if (not defined $self->{map_dim_x} or $self->{map_dim_x}==0
	 or not defined $self->{map_dim_y} or $self->{map_dim_y}==0){
		 confess "No map dimensions in the input!";
	 }
	if ($self->{map_dim_x}>$self->{map_dim_y}){
		$self->{map_dim_a} 		= $self->{map_dim_y} + (($self->{map_dim_x}-$self->{map_dim_y})/2)
	} else {
		$self->{map_dim_a} 		= $self->{map_dim_x} + (($self->{map_dim_y}-$self->{map_dim_x})/2)
	}
	$self->{neighbour_factor}	= 2.5 unless $self->{neighbour_factor};
	$self->{epochs}				= 99 unless defined $self->{epochs};
	$self->{epochs}				= 1 if $self->{epochs}<1;
	$self->{time_constant}		= $self->{epochs} / log($self->{map_dim_a}) unless $self->{time_constant};	# to base 10?
	$self->{learning_rate}		= 0.5 unless $self->{learning_rate};
	$self->{l}					= $self->{learning_rate};
	if (not $self->{weight_dim}){
		cluck "{weight_dim} not set";
		return undef;
	}
	$self->randomise_map;
	return $self;
}




=head1 METHOD randomise_map

Populates the C<map> with nodes that contain random real nubmers.

See L<AI::NerualNet::Kohonen::Node/CONSTRUCTOR new>.

=cut

sub randomise_map { my $self=shift;
	confess "{weight_dim} not set" unless $self->{weight_dim};
	confess "{map_dim_x} not set" unless $self->{map_dim_x};
	confess "{map_dim_y} not set" unless $self->{map_dim_y};
	for my $x (0..$self->{map_dim_x}){
		$self->{map}->[$x] = [];
		for my $y (0..$self->{map_dim_y}){
			$self->{map}->[$x]->[$y] = new AI::NeuralNet::Kohonen::Node(
				dim => $self->{weight_dim},
				missing_mask => $self->{missing_mask},
			);
		}
	}
}


=head1 METHOD clear_map

As L<METHOD randomise_map> but sets all C<map> nodes to
either the value supplied as the only paramter, or C<undef>.

=cut

sub clear_map { my $self=shift;
	confess "{weight_dim} not set" unless $self->{weight_dim};
	confess "{map_dim_x} not set" unless $self->{map_dim_x};
	confess "{map_dim_y} not set" unless $self->{map_dim_y};
	my $val = shift || $self->{missing_mask};
	my $w = [];
	foreach (0..$self->{weight_dim}){
		push @$w, $val;
	}
	for my $x (0..$self->{map_dim_x}){
		$self->{map}->[$x] = [];
		for my $y (0..$self->{map_dim_y}){
			$self->{map}->[$x]->[$y] = new AI::NeuralNet::Kohonen::Node(
				weight		 => $w,
				dim 		 => $self->{weight_dim},
				missing_mask => $self->{missing_mask},
			);
		}
	}
}



lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

		$self->_decay_learning_rate;
		&{$self->{epoch_end}} if exists $self->{epoch_end};
	}
	&{$self->{train_end}} if $self->{train_end};
	return 1;
}


=head1 METHOD find_bmu

For a specific taraget, finds the Best Matching Unit in the map
and return the x/y index.

Accepts: a reference to an array that is the target.

Returns: a reference to an array that is the BMU (and should
perhaps be abstracted as an object in its own right), indexed as follows:

=over 4

=item 0

euclidean distance from the supplied target

=item 1, 2

I<x> and I<y> co-ordinate in the map

=back

See L</METHOD get_weight_at>,
and L<AI::NeuralNet::Kohonen::Node/distance_from>,

=cut


sub find_bmu { my ($self,$target) = (shift,shift);
	my $closest = [];	# [value, x,y] value and co-ords of closest match
	for my $x (0..$self->{map_dim_x}){
		for my $y (0..$self->{map_dim_y}){
			my $distance = $self->{map}->[$x]->[$y]->distance_from( $target );
			$closest = [$distance,0,0] if $x==0 and $y==0;
			$closest = [$distance,$x,$y] if $distance < $closest->[0];
		}
	}
	return $closest;
}

=head1 METHOD get_weight_at

Returns a reference to the weight array at the supplied I<x>,I<y>
co-ordinates.

Accepts: I<x>,I<y> co-ordinates, each a scalar.

Returns: reference to an array that is the weight of the node, or
C<undef> on failure.

=cut

sub get_weight_at { my ($self,$x,$y) = (shift,shift,shift);
	return undef if $x<0 or $y<0 or $x>$self->{map_dim_x} or $y>$self->{map_dim_y};
	return $self->{map}->[$x]->[$y]->{weight};
}



=head1 METHOD get_results

Finds and returns the results for all input vectors in the supplied
reference to an array of arrays,
placing the values in the C<results> field (array reference),
and, returning it either as an array or as it is, depending on

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

		push @{$self->{results}}, $_;
	}
	# Make it a scalar if it's a scalar
#	if ($#{$self->{results}} == 0){
#		$self->{results} = @{$self->{results}}[0];
#	}
	return wantarray? @{$self->{results}} : $self->{results};
}


=head1 METHOD map_results

Clears the C<map> and fills it with the results.

The sole paramter is passed to the L<METHOD clear_map>.
L<METHOD get_results> is then called, and the results
returned fed into the object field C<map>.

This may change, as it seems misleading to re-use that field.

=cut

sub map_results { my $self=shift;

}


=head1 METHOD dump

Print the current weight values to the screen.

=cut

sub dump { my $self=shift;
	print "    ";
	for my $x (0..$self->{map_dim_x}){
		printf ("  %02d ",$x);
	}
	print"\n","-"x107,"\n";
	for my $x (0..$self->{map_dim_x}){
		for my $w (0..$self->{weight_dim}){
			printf ("%02d | ",$x);
			for my $y (0..$self->{map_dim_y}){
				printf("%.2f ", $self->{map}->[$x]->[$y]->{weight}->[$w]);
			}
			print "\n";
		}
		print "\n";
	}
}

=head1 METHOD smooth

Perform gaussian smoothing upon the map.

Accepts: the length of the side of the square gaussian mask to apply.
If not supplied, uses the value in the field C<smoothing>; if that is
empty, uses the square root of the average of the map dimensions
(C<map_dim_a>).

Returns: a true value.

=cut

sub smooth { my ($self,$smooth) = (shift,shift);
	$smooth = $self->{smoothing} if not $smooth and defined $self->{smoothing};
	return unless $smooth;
	$smooth = int( sqrt $self->{map_dim_a} );
	my $mask = _make_gaussian_mask($smooth);

	# For every weight at every point
	for my $x (0..$self->{map_dim_x}){
		for my $y (0..$self->{map_dim_y}){
			for my $w (0..$self->{weight_dim}){
				# Apply the mask
				for my $mx (0..$smooth){
					for my $my (0..$smooth){
						$self->{map}->[$x]->[$y]->{weight}->[$w] *= $mask->[$mx]->[$my];
					}
				}
			}
		}
	}
	return 1;
}



lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

	}
	@_ = <IN>;
	close IN;
	$self->_process_input_text(\@_);
	return 1;
}


=head1 METHOD save_file

Saves the map file in I<SOM_PAK> format (see L<METHOD load_input>)
at the path specified in the first argument.

Return C<undef> on failure, a true value on success.

=cut

sub save_file { my ($self,$path) = (shift,shift);
	local *OUT;
	if (not open OUT,">$path"){
		warn "Could not open file for writing <$path>: $!";

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

	}
	#- Dimensionality of the vectors (integer, compulsory).
	print OUT ($self->{weight_dim}+1)," ";	# Perl indexing
	#- Topology type, either hexa or rect (string, optional, case-sensitive).
	if (not defined $self->{display}){
		print OUT "rect ";
	} else { # $self->{display} eq 'hex'
		print OUT "hexa ";
	}
	#- Map dimension in x-direction (integer, optional).
	print OUT $self->{map_dim_x}." ";
	#- Map dimension in y-direction (integer, optional).
	print OUT $self->{map_dim_y}." ";
	#- Neighborhood type, either bubble or gaussian (string, optional, case-sen- sitive).
	print OUT "gaussian ";
	# End of header
	print OUT "\n";

	# Format input data
	foreach (@{$self->{input}}){
		print OUT join("\t",@{$_->{values}});
		if ($_->{class}){
			print OUT " $_->{class} " ;

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

		# Intentionally blank
	} elsif (not defined $display){
		$self->{display} = undef;
	} elsif ($display eq 'hexa'){
		$self->{display} = 'hex'
	} elsif ($display eq 'rect'){
		$self->{display} = undef;
	}
	#- Map dimension in x-direction (integer, optional).
	$_				      = shift @specs;
	$self->{map_dim_x}    = $_ if defined $_;
	#- Map dimension in y-direction (integer, optional).
	$_				      = shift @specs;
	$self->{map_dim_y}    = $_ if defined $_;
	#- Neighborhood type, either bubble or gaussian (string, optional, case-sen- sitive).
	# not implimented

	# Format input data
	foreach (@_){
		$self->_add_input_from_str($_);
	}
	return 1;
}

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

		}
		return $self->{input}->[$self->{tar}];
	}
}


=head1 PRIVATE METHOD _adjust_neighbours_of

Accepts: a reference to an array containing
the distance of the BMU from the target, as well
as the x and y co-ordinates of the BMU in the map;
a reference to the target, which is an
C<AI::NeuralNet::Kohonen::Input> object.

Returns: true.

=head2 FINDING THE NEIGHBOURS OF THE BMU

	                        (      t   )
	sigma(t) = sigma(0) exp ( - ------ )
	                        (   lambda )

Where C<sigma> is the width of the map at any stage
in time (C<t>), and C<lambda> is a time constant.

Lambda is our field C<time_constant>.

The map radius is naturally just half the map width.

=head2 ADJUSTING THE NEIGHBOURS OF THE BMU

	W(t+1) = W(t) + THETA(t) L(t)( V(t)-W(t) )

Where C<L> is the learning rate, C<V> the target vector,
and C<W> the weight. THETA(t) represents the influence
of distance from the BMU upon a node's learning, and
is calculated by the C<Node> class - see
L<AI::NeuralNet::Kohonen::Node/distance_effect>.

=cut

sub _adjust_neighbours_of { my ($self,$bmu,$target) = (shift,shift,shift);
	my $neighbour_radius = int (
		($self->{map_dim_a}/$self->{neighbour_factor}) * exp(- $self->{t} / $self->{time_constant})
	);

	# Distance from co-ord vector (0,0) as integer
	# Basically map_width * y  +  x
	my $centre = ($self->{map_dim_a}*$bmu->[2])+$bmu->[1];
	# Set the class of the BMU
	$self->{map}->[ $bmu->[1] ]->[ $bmu->[2] ]->{class} = $target->{class};

	for my $x ($bmu->[1]-$neighbour_radius .. $bmu->[1]+$neighbour_radius){
		next if $x<0 or $x>$self->{map_dim_x};		# Ignore those not mappable
		for my $y ($bmu->[2]-$neighbour_radius .. $bmu->[2]+$neighbour_radius){
			next if $y<0 or $y>$self->{map_dim_y};	# Ignore those not mappable
			# Skip node if it is out of the circle of influence
			next if (
				(($bmu->[1] - $x) * ($bmu->[1] - $x)) + (($bmu->[2] - $y) * ($bmu->[2] - $y))
			) > ($neighbour_radius * $neighbour_radius);

			# Adjust the weight
			for my $w (0..$self->{weight_dim}){
				next if $target->{values}->[$w] eq $self->{map}->[$x]->[$y]->{missing_mask};
				my $weight = \$self->{map}->[$x]->[$y]->{weight}->[$w];
				$$weight = $$weight + (
					$self->{map}->[$x]->[$y]->distance_effect($bmu->[0], $neighbour_radius)
					* ( $self->{l} * ($target->{values}->[$w] - $$weight) )
				);
			}
		}
	}
}


=head1 PRIVATE METHOD _decay_learning_rate

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

			}
			$_ = new AI::NeuralNet::Kohonen::Input(values=>$_);
		}
	}

	# Recieves an array of ONE element,
	# should be an array of an array of elements
	my @bmu = $self->get_results($targets);

	# Check input and output dims are the same
	if ($#{$self->{map}->[0]->[1]->{weight}} != $targets->[0]->{dim}){
		confess "target input and map dimensions differ";
	}

	for my $i (0..$#bmu){
		foreach my $w (0..$self->{weight_dim}){
			$qerror += $targets->[$i]->{values}->[$w]
			- $self->{map}->[$bmu[$i]->[1]]->[$bmu[$i]->[2]]->{weight}->[$w];
		}
	}
	$qerror /= scalar @$targets;
	return $qerror;
}


=head1 PRIVATE METHOD _add_input_from_str

Adds to the C<input> field an input vector in SOM_PAK-format

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN


=cut

sub _add_input_from_str { my ($self) = (shift);
	$_ = shift;
	s/#.*$//g;
	return undef if /^$/ or not defined $self->{weight_dim};
	my @i = split /\s+/,$_;
	return undef if $#i < $self->{weight_dim}; # catch bad lines
	# 'x' in files signifies unknown: we prefer undef?
#	@i[0..$self->{weight_dim}] = map{
#		$_ eq 'x'? undef:$_
#	} @i[0..$self->{weight_dim}];
	my %args = (
		dim 	=> $self->{weight_dim},
		values	=> [ @i[0..$self->{weight_dim}] ],
	);
	$args{class} = $i[$self->{weight_dim}+1] if $i[$self->{weight_dim}+1];
	$args{enhance} = $i[$self->{weight_dim}+1] if $i[$self->{weight_dim}+2];
	$args{fixed} = $i[$self->{weight_dim}+1] if $i[$self->{weight_dim}+3];
	push @{$self->{input}}, new AI::NeuralNet::Kohonen::Input(%args);

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

=item -

Enhancement factor: e.g. weight=3.  The training rate for the
corresponding input pattern vector is multiplied by this
parameter so that the reference vectors are updated as if this
input vector were repeated 3 times during training (i.e., as if
the same vector had been stored 2 extra times in the data file).

=item -

Fixed-point qualifier: e.g. fixed=2,5.  The map unit defined by
the fixed-point coordinates (x = 2; y = 5) is selected instead of
the best-matching unit for training. (See below for the definition
of coordinates over the map.) If several inputs are forced to
known locations, a wanted orientation results in the map.

=back

=back

Not (yet) implimented in file format:

=over 4

=item *

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

=back

=cut

=head1 SEE ALSO

See L<AI::NeuralNet::Kohonen::Node/distance_from>;
L<AI::NeuralNet::Kohonen::Demo::RGB>.

L<The documentation for C<SOM_PAK>|ftp://cochlea.hut.fi/pub/som_pak>,
which has lots of advice on map building that may or may not be applicable yet.

A very nice explanation of Kohonen's algorithm:
L<AI-Junkie SOM tutorial part 1|http://www.fup.btinternet.co.uk/aijunkie/som1.html>

=head1 AUTHOR AND COYRIGHT

This implimentation Copyright (C) Lee Goddard, 2003-2006.
All Rights Reserved.

Available under the same terms as Perl itself.

t/AI-NeuralNet-Kohonen.t  view on Meta::CPAN

	weight_dim => 2,
	input => [
		[1,2,3]
	],
);
isa_ok( $net->{input}, 'ARRAY');

is( $net->{input}->[0]->[0],1);
is( $net->{input}->[0]->[1],2);
is( $net->{input}->[0]->[2],3);
is( $net->{map_dim_a},19);

$net = new AI::NeuralNet::Kohonen(
	weight_dim => 2,
	input => [
		[1,2,3]
	],
	map_dim_x => 10,
	map_dim_y => 20,
);
is($net->{map_dim_a},15);


# Node test
my $node = new AI::NeuralNet::Kohonen::Node;
is($node,undef) or BAIL_OUT();
$node = new AI::NeuralNet::Kohonen::Node(
	weight => [0.1, 0.6, 0.5],
);
isa_ok( $node, 'AI::NeuralNet::Kohonen::Node');
is( $node->{dim}, 2);
my $input = new AI::NeuralNet::Kohonen::Input(
	dim		=> 2,
	values	=> [1,0,0],
);

is( sprintf("%.2f",$node->distance_from($input)), 1.19);

$net = AI::NeuralNet::Kohonen->new(
	map_dim_x	=> 14,
	map_dim_y	=> 10,
	epoch_end	=> sub {print"."},
	train_end	=> sub {print"\n"},
	epochs		=> 2,
	table		=>
"3
1 0 0 red
0 1 0 green
0 0 1 blue
",
);
isa_ok( $net->{input}, 'ARRAY');
isa_ok( $net->{input}->[0],'AI::NeuralNet::Kohonen::Input');
is( $net->{input}->[0]->{values}->[0],1);
is( $net->{input}->[0]->{values}->[1],0);
is( $net->{input}->[0]->{values}->[2],0);
is( $net->{weight_dim}, 2);
isa_ok( $net->{map}, 'ARRAY');
$net->train;
isa_ok( $net->{map}, 'ARRAY');
my @bmu = $net->get_results();
isa_ok( $bmu[0], 'ARRAY');
isa_ok( $net->{map}->[ 0 ]->[ 0 ], "AI::NeuralNet::Kohonen::Node" );

@bmu = $net->get_results([[0.5,0,0]]);
isa_ok($net->{map}->[ $bmu[0]->[1] ]->[ $bmu[0]->[2] ],
	"AI::NeuralNet::Kohonen::Node"
);
# warn $net->{map}->[ $bmu[1] ]->[ $bmu[2] ];#->get_class;
# Get the nearest class?

{
	my $i=0;
	my $targets = [[1, 0, 0],[0,1,0],[0,0,1]];
	my @bmu = $net->get_results($targets);
	# qerror
	my $qerror=0;
	foreach my $j (0..$net->{weight_dim}){ # loop over weights
		$qerror += $targets->[0]->{values}->[$j]
		- $net->{map}->[$bmu[$i]->[1]]->[$bmu[$i]->[2]]->{weight}->[$j];
	}
	is( $qerror, $net->quantise_error([ [1,0,0] ]));
}


SKIP: {
	skip 'Lost the input file',9;

	# Input file tests\n";
	$net = AI::NeuralNet::Kohonen->new(
		epochs	=> 0,
		input_file => $dir.'ex.dat',
		epoch_end	=> sub {print"."},
		train_end	=> sub {print"\n"},
	);
	isa_ok( $net,'AI::NeuralNet::Kohonen');
	isa_ok( $net->{input}, 'ARRAY');
	is( scalar @{$net->{input}}, 3840);
	is( $net->{map_dim_x}, 19);
	is ($net->{input}->[$#{$net->{input}}]->{values}->[4], 406.918518);
	is( ref $net->{input}->[$#{$net->{input}}]->{values}, 'ARRAY');
	diag "Training on a big file: this is SLOW, sorry\n";
	is($net->train,1);
	my $filename = substr(time,0,8);
	ok($net->save_file($filename),"Saved file as ".$filename);
	ok(unlink($filename),'Unlinked test file '.$filename);
}

sub BAIL_OUT {



( run in 0.956 second using v1.01-cache-2.11-cpan-49f99fa48dc )