AI-MXNet
view release on metacpan or search on metacpan
lib/AI/MXNet/Optimizer.pm view on Meta::CPAN
ctx => $weight->context,
dtype => $weight->dtype
) # variance
];
}
method update(
Index $index,
AI::MXNet::NDArray $weight,
AI::MXNet::NDArray $grad,
ArrayRef[AI::MXNet::NDArray] $state
)
{
my $lr = $self->_get_lr($index);
my $wd = $self->_get_wd($index);
$self->_update_count($index);
my $t = $self->_index_update_count->{$index};
my $coef1 = 1 - $self->beta1**$t;
my $coef2 = 1 - $self->beta2**$t;
$lr *= sqrt($coef2)/$coef1;
my ($mean, $var) = @{ $state };
AI::MXNet::NDArray->adam_update(
$weight, $grad, $mean, $var,
{
out => $weight,
lr => $lr,
wd => $wd,
%{ $self->kwargs }
}
);
}
__PACKAGE__->register;
=head1 NAME
AI::MXNet::AdaGrad - AdaGrad optimizer of Duchi et al., 2011
=cut
=head1 DESCRIPTION
AdaGrad optimizer of Duchi et al., 2011,
This code follows the version in http://arxiv.org/pdf/1212.5701v1.pdf Eq(5)
by Matthew D. Zeiler, 2012. AdaGrad will help the network to converge faster
in some cases.
Parameters
----------
learning_rate : float, optional
Step size.
Default value is set to 0.05.
wd : float, optional
L2 regularization coefficient add to all the weights
rescale_grad : float, optional
rescaling factor of gradient. Normally should be 1/batch_size.
eps: float, optional
A small float number to make the updating processing stable
Default value is set to 1e-7.
clip_gradient : float, optional
clip gradient in range [-clip_gradient, clip_gradient]
=cut
package AI::MXNet::AdaGrad;
use Mouse;
extends 'AI::MXNet::Optimizer';
has 'float_stable_eps' => (is => "rw", isa => "Num", default => 1e-7);
has '+learning_rate' => (default => 0.05);
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
return AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
); # history
}
method update(
Index $index,
AI::MXNet::NDArray $weight,
AI::MXNet::NDArray $grad,
AI::MXNet::NDArray $state
)
{
my $lr = $self->_get_lr($index);
my $wd = $self->_get_wd($index);
$self->_update_count($index);
$grad *= $self->rescale_grad;
if($self->clip_gradient)
{
$grad = AI::MXNet::NDArray->clip(
$grad,
-$self->clip_gradient,
$self->clip_gradient
);
}
my $history = $state;
$history += ($grad * $grad);
$weight += -$lr
*
(
$grad
/
AI::MXNet::NDArray->sqrt(
$history
+
$self->float_stable_eps
)
+
$wd * $weight
);
}
__PACKAGE__->register;
=head1 NAME
( run in 2.136 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )