AI-MXNet

 view release on metacpan or  search on metacpan

t/test_optimizers.t  view on Meta::CPAN

package PerlAdam;
use strict;
use warnings;
use AI::MXNet qw(mx);
use Mouse;
use AI::MXNet::Function::Parameters;
extends 'AI::MXNet::Optimizer';
has 'beta1' => (is => 'rw', default => 0.9);
has 'beta2' => (is => 'rw', default => 0.999);
has 'epsilon' => (is => 'rw', default => 1e-8);
has 'rescale_grad' => (is => 'rw', default => 1);
has 'decay_factor' => (is => 'rw', default => (1-1e-8));
around BUILDARGS => \&init;

func init($code, $class, %kwargs)
{
    return $class->$code(learning_rate => 0.001, wd => 0.9, %kwargs);
}

=begin
        Create additional optimizer state: mean, variance

        Parameters
        ----------
        weight : NDArray
        The weight data
=cut

method create_state($index, $weight)
{
    return [
            mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype),  # mean
            mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype)   # variance
    ]; 
}

=begin
        Update the parameters.

        Parameters
        ----------
        index : int
        An unique integer key used to index the parameters

        weight : NDArray
        weight ndarray

        grad : NDArray
        grad ndarray

        state : NDArray or other objects returned by init_state
        The auxiliary state used in optimization.
=cut

method update($index, $weight, $grad, $state)
{
    my $lr = $self->_get_lr($index);
    $self->_update_count($index);
    my $t = $self->_index_update_count->{$index};
    my ($mean, $variance) = @$state;
    my $wd = $self->_get_wd($index);
    $grad = $grad * $self->rescale_grad + $wd * $weight;
    if($self->clip_gradient)
    {
        mx->nd->clip($grad, -$self->clip_gradient, $self->clip_gradient, { out => $grad });
    }
    $mean *= $self->beta1;
    $mean += $grad * (1 - $self->beta1);

    $variance *= $self->beta2;
    $variance += (1 - $self->beta2) * mx->nd->square($grad, { out => $grad });

    my $coef1 = 1 - $self->beta1**$t;
    my $coef2 = 1 - $self->beta2**$t;
    $lr *= sqrt($coef2)/$coef1;
    $weight -= $lr*$mean/(mx->nd->sqrt($variance) + $self->epsilon);
}

=head

    RMSProp optimizer of Tieleman & Hinton, 2012,

    For centered=False, the code follows the version in
    http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf by
    Tieleman & Hinton, 2012

    For centered=True, the code follows the version in
    http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.

    Parameters
    ----------
    learning_rate : float, optional
        Step size.
        Default value is set to 0.001.
    gamma1: float, optional
        decay factor of moving average for gradient, gradient^2.
        Default value is set to 0.9.
    gamma2: float, optional
        "momentum" factor.
        Default value if set to 0.9.
        Only used if centered=True
    epsilon : float, optional
        Default value is set to 1e-8.
    centered : boolean, optional
        Use Graves or Tielemans & Hintons version of RMSProp
    wd : float, optional
        L2 regularization coefficient add to all the weights
    rescale_grad : float, optional
        rescaling factor of gradient.
    clip_gradient : float, optional
        clip gradient in range [-clip_gradient, clip_gradient]
    clip_weights : float, optional
        clip weights in range [-clip_weights, clip_weights]
=cut

package PerlRMSProp;
use Mouse;
extends 'AI::MXNet::Optimizer';
has '+learning_rate' => (default => 0.001);
has 'gamma1'         => (is => "ro", isa => "Num",  default => 0.9);
has 'gamma2'         => (is => "ro", isa => "Num",  default => 0.9);
has 'epsilon'        => (is => "ro", isa => "Num",  default => 1e-8);
has 'centered'       => (is => "ro", isa => "Bool", default => 0);
has 'clip_weights'   => (is => "ro", isa => "Num");

# For centered=False: n
# For centered=True: n, g, delta
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
    return [
            $self->centered
            ? (
                AI::MXNet::NDArray->zeros(
                    $weight->shape,
                    ctx => $weight->context
                ),  # n
                AI::MXNet::NDArray->zeros(
                    $weight->shape,
                    ctx => $weight->context
                ),  # g
                AI::MXNet::NDArray->zeros(
                    $weight->shape,
                    ctx => $weight->context
                )
            )   # delta
            : (
                AI::MXNet::NDArray->zeros(
                    $weight->shape,
                    ctx => $weight->context
                ),  # n
            )
    ];
}

method update($index, $weight, $grad, $state)
{
    my $lr = $self->_get_lr($index);
    my $wd = $self->_get_wd($index);
    $self->_update_count($index);
    $grad = $grad * $self->rescale_grad + $wd * $weight;
    if(not $self->centered)
    {
        my ($n) = @$state;
        if(defined $self->clip_gradient)
        {
            $grad = mx->nd->clip($grad, -$self->clip_gradient, $self->clip_gradient);
        }
        $n .= (1 - $self->gamma1) * ($grad * $grad) + $self->gamma1 * $n;
        $weight -= $lr * $grad/(mx->nd->sqrt($n + $self->epsilon));
    }
    else
    {
        my ($n, $g, $delta) = @$state;
        if(defined $self->clip_gradient)
        {
            $grad = mx->nd->clip($grad, -$self->clip_gradient, $self->clip_gradient);
        }
        $n .= (1 - $self->gamma1) * ($grad * $grad) + $self->gamma1 * $n;
        $g .= (1 - $self->gamma1) * $grad + $self->gamma1 * $g;
        $delta .= ($self->gamma2) * $delta - $lr * $grad/(mx->nd->sqrt($n - $g*$g + $self->epsilon));
        $weight += $delta;
    }
    if($self->clip_weights)
    {
        mx->nd->clip($weight, -$self->clip_weights, $self->clip_weights, { out => $weight });
    }
}

package PerlSGD;
# perl reference implemenation of sgd
use Mouse;
extends 'AI::MXNet::Optimizer';
has '+learning_rate' => (default => 0.01);
has 'momentum'       => (is => "ro", isa => "Num",  default => 0);
has 'multi_precision' => (is => 'ro', isa => 'Bool', default => 0);

# Create additional optimizer state: momentum
method create_state(Index $index, AI::MXNet::NDArray $weight)
{
    my $momentum;
    my $weight_master_copy;
    my $do_multi_precision = ($self->multi_precision and $weight->dtype eq 'float16');
    if($do_multi_precision)
    {
        if($self->momentum != 0)
        {
            $momentum = mx->nd->zeros($weight->shape, ctx => $weight->context, dtype=>'float32');
        }
        $weight_master_copy = mx->nd->array($weight, ctx=>$weight->context, dtype=>'float32');
        return [$momentum, $weight_master_copy];
    }
    else
    {
        if($self->momentum != 0)
        {
            $momentum = mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype);
        }
    }
    return $momentum;
}

method update($index, $weight, $grad, $state)
{
    my $lr = $self->_get_lr($index);
    my $wd = $self->_get_wd($index);
    $self->_update_count($index);
    my $use_multi_precision = ref($state) eq 'ARRAY';

    if(not $use_multi_precision)
    {
        if($self->momentum == 0)
        {
            if(defined $self->clip_gradient)
            {
                $weight .= ((1 - $lr*$wd)*$weight -
                    $lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
                );
            }
            else
            {
                $weight .= (1 - $lr*$wd)*$weight - $lr*$self->rescale_grad*$grad;
            }
        }
        else
        {
            my $mom = $state;
            if(defined $self->clip_gradient)
            {
                $mom .= ($self->momentum*$mom - $lr*$wd*$weight -
                    $lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
                );
                $weight += $mom;
            }
            else
            {



( run in 0.446 second using v1.01-cache-2.11-cpan-39bf76dae61 )