AI-NeuralNet-Kohonen

 view release on metacpan or  search on metacpan

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN


Private fields:

=over 4

=item time_constant

The number of iterations (epochs) to be completed, over the log of the map radius.

=item t

The current epoch, or moment in time.

=item l

The current learning rate.

=item map_dim_a

Average of the map dimensions.

=back

=cut

sub new {
	my $class					= shift;
	my %args					= @_;
	my $self 					= bless \%args,$class;

	$self->{missing_mask}		= 'x' unless defined $self->{missing_mask};
	$self->_process_table if defined $self->{table};	# Creates {input}
	$self->load_input($self->{input_file}) if defined $self->{input_file};	# Creates {input}
	if (not defined $self->{input}){
		cluck "No {input} supplied!";
		return undef;
	}

	$self->{map_dim_x}			= 19 unless defined $self->{map_dim_x};
	$self->{map_dim_y}			= 19 unless defined $self->{map_dim_y};
	# Legacy from...yesterday
	if ($self->{map_dim}){
		$self->{map_dim_x} 		= $self->{map_dim_y} = $self->{map_dim}
	}
	if (not defined $self->{map_dim_x} or $self->{map_dim_x}==0
	 or not defined $self->{map_dim_y} or $self->{map_dim_y}==0){
		 confess "No map dimensions in the input!";
	 }
	if ($self->{map_dim_x}>$self->{map_dim_y}){
		$self->{map_dim_a} 		= $self->{map_dim_y} + (($self->{map_dim_x}-$self->{map_dim_y})/2)
	} else {
		$self->{map_dim_a} 		= $self->{map_dim_x} + (($self->{map_dim_y}-$self->{map_dim_x})/2)
	}
	$self->{neighbour_factor}	= 2.5 unless $self->{neighbour_factor};
	$self->{epochs}				= 99 unless defined $self->{epochs};
	$self->{epochs}				= 1 if $self->{epochs}<1;
	$self->{time_constant}		= $self->{epochs} / log($self->{map_dim_a}) unless $self->{time_constant};	# to base 10?
	$self->{learning_rate}		= 0.5 unless $self->{learning_rate};
	$self->{l}					= $self->{learning_rate};
	if (not $self->{weight_dim}){
		cluck "{weight_dim} not set";
		return undef;
	}
	$self->randomise_map;
	return $self;
}




=head1 METHOD randomise_map

Populates the C<map> with nodes that contain random real nubmers.

See L<AI::NerualNet::Kohonen::Node/CONSTRUCTOR new>.

=cut

sub randomise_map { my $self=shift;
	confess "{weight_dim} not set" unless $self->{weight_dim};
	confess "{map_dim_x} not set" unless $self->{map_dim_x};
	confess "{map_dim_y} not set" unless $self->{map_dim_y};
	for my $x (0..$self->{map_dim_x}){
		$self->{map}->[$x] = [];
		for my $y (0..$self->{map_dim_y}){
			$self->{map}->[$x]->[$y] = new AI::NeuralNet::Kohonen::Node(
				dim => $self->{weight_dim},
				missing_mask => $self->{missing_mask},
			);
		}
	}
}


=head1 METHOD clear_map

As L<METHOD randomise_map> but sets all C<map> nodes to
either the value supplied as the only paramter, or C<undef>.

=cut

sub clear_map { my $self=shift;
	confess "{weight_dim} not set" unless $self->{weight_dim};
	confess "{map_dim_x} not set" unless $self->{map_dim_x};
	confess "{map_dim_y} not set" unless $self->{map_dim_y};
	my $val = shift || $self->{missing_mask};
	my $w = [];
	foreach (0..$self->{weight_dim}){
		push @$w, $val;
	}
	for my $x (0..$self->{map_dim_x}){
		$self->{map}->[$x] = [];
		for my $y (0..$self->{map_dim_y}){
			$self->{map}->[$x]->[$y] = new AI::NeuralNet::Kohonen::Node(
				weight		 => $w,
				dim 		 => $self->{weight_dim},
				missing_mask => $self->{missing_mask},
			);
		}
	}
}




=head1 METHOD train

Optionally accepts a parameter that is the number of epochs
for which to train: the default is the value in the C<epochs> field.

An epoch is composed of A number of generations, the number being
the total number of input vectors.

For every generation, iterates:

=over 4

=item 1

selects a target from the input array (see L</PRIVATE METHOD _select_target>);

=item 2

finds the best-matching unit (see L</METHOD find_bmu>);

=item 3

adjusts the neighbours of the BMU (see L</PRIVATE METHOD _adjust_neighbours_of>);

=back

At the end of every generation, the learning rate is decayed
(see L</PRIVATE METHOD _decay_learning_rate>).

See C<CONSTRUCTOR new> for details of applicable callbacks.

Returns a true value.

=cut

sub train { my ($self,$epochs) = (shift,shift);
	$epochs = $self->{epochs} unless defined $epochs;
	&{$self->{train_start}} if exists $self->{train_start};
	for my $epoch (1..$epochs){
		$self->{t} = $epoch;

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN

		print OUT join("\t",@{$_->{values}});
		if ($_->{class}){
			print OUT " $_->{class} " ;
		}
		print OUT "\n";
	}
	# EOF
	print OUT chr 26;
	close OUT;
	return 1;
}


#
# Process ASCII from table field or input file
# Accepts: ASCII as array or array ref
#
sub _process_input_text { my ($self) = (shift);
	if (not defined $_[1]){
		if (ref $_[0] eq 'ARRAY'){
			@_ = @{$_[0]};
		} else {
			@_ = split/[\n\r\f]+/,$_[0];
		}
	}
	chomp @_;
	my @specs = split/\s+/,(shift @_);
	#- Dimensionality of the vectors (integer, compulsory).
	$self->{weight_dim} = shift @specs;
	$self->{weight_dim}--; # Perl indexing
	#- Topology type, either hexa or rect (string, optional, case-sensitive).
	my $display		    = shift @specs;
	if (not defined $display and exists $self->{display}){
		# Intentionally blank
	} elsif (not defined $display){
		$self->{display} = undef;
	} elsif ($display eq 'hexa'){
		$self->{display} = 'hex'
	} elsif ($display eq 'rect'){
		$self->{display} = undef;
	}
	#- Map dimension in x-direction (integer, optional).
	$_				      = shift @specs;
	$self->{map_dim_x}    = $_ if defined $_;
	#- Map dimension in y-direction (integer, optional).
	$_				      = shift @specs;
	$self->{map_dim_y}    = $_ if defined $_;
	#- Neighborhood type, either bubble or gaussian (string, optional, case-sen- sitive).
	# not implimented

	# Format input data
	foreach (@_){
		$self->_add_input_from_str($_);
	}
	return 1;
}


=head1 PRIVATE METHOD _select_target

Return a random target from the training set in the C<input> field,
unless the C<targeting> field is defined, when the targets are
iterated over.

=cut

sub _select_target { my $self=shift;
	if (not $self->{targeting}){
		return $self->{input}->[
			(int rand(scalar @{$self->{input}}))
		];
	}
	else {
		$self->{tar}++;
		if ($self->{tar}>$#{ $self->{input} }){
			$self->{tar} = 0;
		}
		return $self->{input}->[$self->{tar}];
	}
}


=head1 PRIVATE METHOD _adjust_neighbours_of

Accepts: a reference to an array containing
the distance of the BMU from the target, as well
as the x and y co-ordinates of the BMU in the map;
a reference to the target, which is an
C<AI::NeuralNet::Kohonen::Input> object.

Returns: true.

=head2 FINDING THE NEIGHBOURS OF THE BMU

	                        (      t   )
	sigma(t) = sigma(0) exp ( - ------ )
	                        (   lambda )

Where C<sigma> is the width of the map at any stage
in time (C<t>), and C<lambda> is a time constant.

Lambda is our field C<time_constant>.

The map radius is naturally just half the map width.

=head2 ADJUSTING THE NEIGHBOURS OF THE BMU

	W(t+1) = W(t) + THETA(t) L(t)( V(t)-W(t) )

Where C<L> is the learning rate, C<V> the target vector,
and C<W> the weight. THETA(t) represents the influence
of distance from the BMU upon a node's learning, and
is calculated by the C<Node> class - see
L<AI::NeuralNet::Kohonen::Node/distance_effect>.

=cut

sub _adjust_neighbours_of { my ($self,$bmu,$target) = (shift,shift,shift);
	my $neighbour_radius = int (
		($self->{map_dim_a}/$self->{neighbour_factor}) * exp(- $self->{t} / $self->{time_constant})
	);

lib/AI/NeuralNet/Kohonen.pm  view on Meta::CPAN


Returns the gaussian weight.

See also L<_decay_learning_rate>.

=cut

sub _gauss_weight { my ($r, $sigma) = (shift,shift);
	return exp( -($r**2) / (2 * $sigma**2) );
}


=head1 PUBLIC METHOD quantise_error

Returns the quantise error for either the supplied points,
or those in the C<input> field.

=cut


sub quantise_error { my ($self,$targets) = (shift,shift);
	my $qerror=0;
	if (not defined $targets){
		$targets = $self->{input};
	} else {
		foreach (@$targets){
			if (not ref $_ or ref $_ ne 'ARRAY'){
				croak "Supplied target parameter should be an array of arrays!"
			}
			$_ = new AI::NeuralNet::Kohonen::Input(values=>$_);
		}
	}

	# Recieves an array of ONE element,
	# should be an array of an array of elements
	my @bmu = $self->get_results($targets);

	# Check input and output dims are the same
	if ($#{$self->{map}->[0]->[1]->{weight}} != $targets->[0]->{dim}){
		confess "target input and map dimensions differ";
	}

	for my $i (0..$#bmu){
		foreach my $w (0..$self->{weight_dim}){
			$qerror += $targets->[$i]->{values}->[$w]
			- $self->{map}->[$bmu[$i]->[1]]->[$bmu[$i]->[2]]->{weight}->[$w];
		}
	}
	$qerror /= scalar @$targets;
	return $qerror;
}


=head1 PRIVATE METHOD _add_input_from_str

Adds to the C<input> field an input vector in SOM_PAK-format
whitespace-delimited ASCII.

Returns C<undef> on failure to add an item (perhaps because
the data passed was a comment, or the C<weight_dim> flag was
not set); a true value on success.

=cut

sub _add_input_from_str { my ($self) = (shift);
	$_ = shift;
	s/#.*$//g;
	return undef if /^$/ or not defined $self->{weight_dim};
	my @i = split /\s+/,$_;
	return undef if $#i < $self->{weight_dim}; # catch bad lines
	# 'x' in files signifies unknown: we prefer undef?
#	@i[0..$self->{weight_dim}] = map{
#		$_ eq 'x'? undef:$_
#	} @i[0..$self->{weight_dim}];
	my %args = (
		dim 	=> $self->{weight_dim},
		values	=> [ @i[0..$self->{weight_dim}] ],
	);
	$args{class} = $i[$self->{weight_dim}+1] if $i[$self->{weight_dim}+1];
	$args{enhance} = $i[$self->{weight_dim}+1] if $i[$self->{weight_dim}+2];
	$args{fixed} = $i[$self->{weight_dim}+1] if $i[$self->{weight_dim}+3];
	push @{$self->{input}}, new AI::NeuralNet::Kohonen::Input(%args);

	return 1;
}


#
# Processes the 'table' paramter to the constructor
#
sub _process_table { my $self = shift;
	$_ = $self->_process_input_text( $self->{table} );
	undef $self->{table};
	return $_;
}


__END__
1;

=head1 FILE FORMAT

This module has begun to attempt the I<SOM_PAK> format:
I<SOM_PAK> file format version 3.1 (April 7, 1995),
Helsinki University of Technology, Espoo:

=over 4

The input data is stored in ASCII-form as a list of entries, one line
...for each vectorial sample.

The first line of the file is reserved for status knowledge of the
entries; in the present version it is used to define the following
items (these items MUST occur in the indicated order):

   - Dimensionality of the vectors (integer, compulsory).
   - Topology type, either hexa or rect (string, optional, case-sensitive).
   - Map dimension in x-direction (integer, optional).
   - Map dimension in y-direction (integer, optional).
   - Neighborhood type, either bubble or gaussian (string, optional, case-sen-
      sitive).



( run in 0.437 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )