AI-MXNet
view release on metacpan or search on metacpan
t/test_optimizers.t view on Meta::CPAN
package PerlAdam;
use strict;
use warnings;
use AI::MXNet qw(mx);
use Mouse;
use AI::MXNet::Function::Parameters;
extends 'AI::MXNet::Optimizer';
has 'beta1' => (is => 'rw', default => 0.9);
has 'beta2' => (is => 'rw', default => 0.999);
has 'epsilon' => (is => 'rw', default => 1e-8);
has 'rescale_grad' => (is => 'rw', default => 1);
has 'decay_factor' => (is => 'rw', default => (1-1e-8));
around BUILDARGS => \&init;
func init($code, $class, %kwargs)
{
return $class->$code(learning_rate => 0.001, wd => 0.9, %kwargs);
}
=begin
Create additional optimizer state: mean, variance
Parameters
----------
weight : NDArray
The weight data
=cut
method create_state($index, $weight)
{
return [
mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype), # mean
mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype) # variance
];
}
=begin
Update the parameters.
Parameters
----------
index : int
An unique integer key used to index the parameters
weight : NDArray
weight ndarray
grad : NDArray
grad ndarray
state : NDArray or other objects returned by init_state
The auxiliary state used in optimization.
=cut
method update($index, $weight, $grad, $state)
{
my $lr = $self->_get_lr($index);
$self->_update_count($index);
my $t = $self->_index_update_count->{$index};
my ($mean, $variance) = @$state;
my $wd = $self->_get_wd($index);
$grad = $grad * $self->rescale_grad + $wd * $weight;
if($self->clip_gradient)
{
mx->nd->clip($grad, -$self->clip_gradient, $self->clip_gradient, { out => $grad });
}
$mean *= $self->beta1;
$mean += $grad * (1 - $self->beta1);
$variance *= $self->beta2;
$variance += (1 - $self->beta2) * mx->nd->square($grad, { out => $grad });
my $coef1 = 1 - $self->beta1**$t;
my $coef2 = 1 - $self->beta2**$t;
$lr *= sqrt($coef2)/$coef1;
$weight -= $lr*$mean/(mx->nd->sqrt($variance) + $self->epsilon);
}
=head
RMSProp optimizer of Tieleman & Hinton, 2012,
For centered=False, the code follows the version in
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf by
Tieleman & Hinton, 2012
For centered=True, the code follows the version in
http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.
Parameters
----------
learning_rate : float, optional
Step size.
Default value is set to 0.001.
gamma1: float, optional
decay factor of moving average for gradient, gradient^2.
Default value is set to 0.9.
gamma2: float, optional
"momentum" factor.
Default value if set to 0.9.
Only used if centered=True
epsilon : float, optional
Default value is set to 1e-8.
centered : boolean, optional
Use Graves or Tielemans & Hintons version of RMSProp
wd : float, optional
L2 regularization coefficient add to all the weights
rescale_grad : float, optional
rescaling factor of gradient.
clip_gradient : float, optional
clip gradient in range [-clip_gradient, clip_gradient]
clip_weights : float, optional
clip weights in range [-clip_weights, clip_weights]
=cut
package PerlRMSProp;
use Mouse;
extends 'AI::MXNet::Optimizer';
has '+learning_rate' => (default => 0.001);
has 'gamma1' => (is => "ro", isa => "Num", default => 0.9);
has 'gamma2' => (is => "ro", isa => "Num", default => 0.9);
has 'epsilon' => (is => "ro", isa => "Num", default => 1e-8);
has 'centered' => (is => "ro", isa => "Bool", default => 0);
has 'clip_weights' => (is => "ro", isa => "Num");
# For centered=False: n
# For centered=True: n, g, delta
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
return [
$self->centered
? (
AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
), # n
AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
), # g
AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
)
) # delta
: (
AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
), # n
)
];
}
method update($index, $weight, $grad, $state)
{
my $lr = $self->_get_lr($index);
my $wd = $self->_get_wd($index);
$self->_update_count($index);
$grad = $grad * $self->rescale_grad + $wd * $weight;
if(not $self->centered)
{
my ($n) = @$state;
if(defined $self->clip_gradient)
{
$grad = mx->nd->clip($grad, -$self->clip_gradient, $self->clip_gradient);
}
$n .= (1 - $self->gamma1) * ($grad * $grad) + $self->gamma1 * $n;
$weight -= $lr * $grad/(mx->nd->sqrt($n + $self->epsilon));
}
else
{
my ($n, $g, $delta) = @$state;
if(defined $self->clip_gradient)
{
$grad = mx->nd->clip($grad, -$self->clip_gradient, $self->clip_gradient);
}
$n .= (1 - $self->gamma1) * ($grad * $grad) + $self->gamma1 * $n;
$g .= (1 - $self->gamma1) * $grad + $self->gamma1 * $g;
$delta .= ($self->gamma2) * $delta - $lr * $grad/(mx->nd->sqrt($n - $g*$g + $self->epsilon));
$weight += $delta;
}
if($self->clip_weights)
{
mx->nd->clip($weight, -$self->clip_weights, $self->clip_weights, { out => $weight });
}
}
package PerlSGD;
# perl reference implemenation of sgd
use Mouse;
extends 'AI::MXNet::Optimizer';
has '+learning_rate' => (default => 0.01);
has 'momentum' => (is => "ro", isa => "Num", default => 0);
has 'multi_precision' => (is => 'ro', isa => 'Bool', default => 0);
# Create additional optimizer state: momentum
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
my $momentum;
my $weight_master_copy;
my $do_multi_precision = ($self->multi_precision and $weight->dtype eq 'float16');
if($do_multi_precision)
{
if($self->momentum != 0)
{
$momentum = mx->nd->zeros($weight->shape, ctx => $weight->context, dtype=>'float32');
}
$weight_master_copy = mx->nd->array($weight, ctx=>$weight->context, dtype=>'float32');
return [$momentum, $weight_master_copy];
}
else
{
if($self->momentum != 0)
{
$momentum = mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype);
}
}
return $momentum;
}
method update($index, $weight, $grad, $state)
{
my $lr = $self->_get_lr($index);
my $wd = $self->_get_wd($index);
$self->_update_count($index);
my $use_multi_precision = ref($state) eq 'ARRAY';
if(not $use_multi_precision)
{
if($self->momentum == 0)
{
if(defined $self->clip_gradient)
{
$weight .= ((1 - $lr*$wd)*$weight -
$lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
);
}
else
{
$weight .= (1 - $lr*$wd)*$weight - $lr*$self->rescale_grad*$grad;
}
}
else
{
my $mom = $state;
if(defined $self->clip_gradient)
{
$mom .= ($self->momentum*$mom - $lr*$wd*$weight -
$lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
);
$weight += $mom;
}
else
{
( run in 0.446 second using v1.01-cache-2.11-cpan-39bf76dae61 )