AI-MXNet
view release on metacpan or search on metacpan
lib/AI/MXNet/Optimizer.pm view on Meta::CPAN
L2 regularization coefficient add to all the weights
rescale_grad : float, optional
rescaling factor of gradient. Normally should be 1/batch_size.
clip_gradient : float, optional
clip gradient in range [-clip_gradient, clip_gradient]
=cut
package AI::MXNet::AdaDelta;
use Mouse;
extends 'AI::MXNet::Optimizer';
has 'rho' => (is => "rw", isa => "Num", default => 0.9);
has 'epsilon' => (is => "rw", isa => "Num", default => 1e-5);
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
return [
AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
), # accumulated g
AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
) # accumulated delta
];
}
method update(
Index $index,
AI::MXNet::NDArray $weight,
AI::MXNet::NDArray $grad,
ArrayRef[AI::MXNet::NDArray] $state
)
{
my $wd = $self->_get_wd($index);
$self->_update_count($index);
$grad *= $self->rescale_grad;
if($self->clip_gradient)
{
$grad = AI::MXNet::NDArray->clip(
$grad,
-$self->clip_gradient,
$self->clip_gradient
);
}
my ($acc_g, $acc_delta) = @{ $state };
$acc_g .= $self->rho * $acc_g + (1 - $self->rho) * $grad * $grad;
my $current_delta = ($acc_delta + $self->epsilon)->sqrt
/
($acc_g + $self->epsilon)->sqrt
*
$grad;
$acc_delta .= $self->rho * $acc_delta + (1 - $self->rho) * $current_delta * $current_delta;
$weight -= $current_delta + $wd * $weight;
}
__PACKAGE__->register;
# For test use
package AI::MXNet::Test;
use Mouse;
extends 'AI::MXNet::Optimizer';
# Create a state to duplicate weight
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
return AI::MXNet::NDArray->zeros(
$weight->shape,
ctx => $weight->context
);
}
# performs w += rescale_grad * grad
method update(
Index $index,
AI::MXNet::NDArray $weight,
AI::MXNet::NDArray $grad,
AI::MXNet::NDArray $state
)
{
$weight += $grad * $self->rescale_grad;
$state .= $weight;
}
__PACKAGE__->register;
package AI::MXNet::Ftrl;
=head1 NAME
AI::MXNet::Ftrl
=cut
=head1 DESCRIPTION
Reference:Ad Click Prediction: a View from the Trenches
Parameters
----------
lamda1 : float, optional
L1 regularization coefficient.
learning_rate : float, optional
The initial learning rate.
beta : float, optional
Per-coordinate learning rate correlation parameter.
eta_{t,i}=frac{learning_rate}{beta+sqrt{sum_{s=1^}tg_{s,i}^t}
=cut
use Mouse;
extends 'AI::MXNet::Optimizer';
has '+learning_rate' => (default => 0.1);
has 'beta' => (is => "ro", isa => "Num", default => 1);
has 'lambda1' => (is => "ro", isa => "Num", default => 0.9);
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
return [
( run in 1.229 second using v1.01-cache-2.11-cpan-39bf76dae61 )