AI-MXNet

 view release on metacpan or  search on metacpan

lib/AI/MXNet/IO.pm  view on Meta::CPAN

}

method getlabel()
{
    return $self->current_batch->label;
}

method getindex()
{
    return $self->current_batch->index;
}

method getpad()
{
    return $self->current_batch->pad;
}

package AI::MXNet::NDArrayIter;
use Mouse;
use AI::MXNet::Base;
use List::Util qw(shuffle);
extends 'AI::MXNet::DataIter';

=head1 NAME

    AI::MXNet::NDArrayIter - Predefined NDArray iterator.
=cut

=head1 DESCRIPTION

    Predefined NDArray iterator. Accepts PDL or AI::MXNet::NDArray object as an input.

    Parameters
    ----------
    data: Maybe[AcceptableInput|HashRef[AcceptableInput]|ArrayRef[AcceptableInput]].
        NDArrayIter supports single or multiple data and label.
    label: Maybe[AcceptableInput|HashRef[AcceptableInput]|ArrayRef[AcceptableInput]].
        Same as data, but is not given to the model during testing.
    batch_size=1: Int
        Batch Size
    shuffle=0: Bool
        Whether to shuffle the data
    last_batch_handle='pad': 'pad', 'discard' or 'roll_over'
        How to handle the last batch

    Note
    ----
    This iterator will pad, discard or roll over the last batch if
    the size of data does not match batch_size. Roll over is intended
    for training and can cause problems if used for prediction.
=cut

has 'data'                => (is => 'rw', isa => 'Maybe[AcceptableInput|HashRef[AcceptableInput]|ArrayRef[AcceptableInput]]');
has 'data_list'           => (is => 'rw', isa => 'ArrayRef[AI::MXNet::NDArray]');
has 'label'               => (is => 'rw', isa => 'Maybe[AcceptableInput|HashRef[AcceptableInput]|ArrayRef[AcceptableInput]]');
has 'batch_size'          => (is => 'rw', isa => 'Int', default => 1);
has '_shuffle'            => (is => 'rw', init_arg => 'shuffle', isa => 'Bool', default => 0);
has 'last_batch_handle'   => (is => 'rw', isa => 'Str', default => 'pad');
has 'label_name'          => (is => 'rw', isa => 'Str', default => 'softmax_label');
has 'num_source'          => (is => 'rw', isa => 'Int');
has 'cursor'              => (is => 'rw', isa => 'Int');
has 'num_data'            => (is => 'rw', isa => 'Int');

around BUILDARGS => sub {
    my $orig  = shift;
    my $class = shift;
    if(@_%2)
    {
        my $data  = shift;
        return $class->$orig(data => $data, @_);
    }
    return $class->$orig(@_);
};

sub BUILD
{
    my $self  = shift;
    my $data  = AI::MXNet::IO->init_data($self->data,  allow_empty => 0, default_name => 'data');
    my $label = AI::MXNet::IO->init_data($self->label, allow_empty => 1, default_name => $self->label_name);
    my $num_data  = $data->[0][1]->shape->[0];
    confess("size of data dimension 0 $num_data < batch_size ${\ $self->batch_size }")
        unless($num_data >= $self->batch_size);
    if($self->_shuffle)
    {
        my @idx = shuffle(0..$num_data-1);
        $_->[1] = AI::MXNet::NDArray->array(pdl_shuffle($_->[1]->aspdl, \@idx)) for @$data;
        $_->[1] = AI::MXNet::NDArray->array(pdl_shuffle($_->[1]->aspdl, \@idx)) for @$label;
    }
    if($self->last_batch_handle eq 'discard')
    {
        my $new_n = $num_data - $num_data % $self->batch_size - 1;
        $_->[1] = $_->[1]->slice([0, $new_n]) for @$data;
        $_->[1] = $_->[1]->slice([0, $new_n]) for @$label;
    }
    my $data_list  = [map { $_->[1] } (@{ $data }, @{ $label })];
    my $num_source = @{ $data_list };
    my $cursor = -$self->batch_size;
    $self->data($data);
    $self->data_list($data_list);
    $self->label($label);
    $self->num_source($num_source);
    $self->cursor($cursor);
    $self->num_data($num_data);
}

# The name and shape of data provided by this iterator
method provide_data()
{
    return [map {
        my ($k, $v) = @{ $_ };
        my $shape = $v->shape;
        $shape->[0] = $self->batch_size;
        AI::MXNet::DataDesc->new(name => $k, shape => $shape, dtype => $v->dtype)
    } @{ $self->data }];
}

# The name and shape of label provided by this iterator
method provide_label()
{
    return [map {
        my ($k, $v) = @{ $_ };
        my $shape = $v->shape;
        $shape->[0] = $self->batch_size;
        AI::MXNet::DataDesc->new(name => $k, shape => $shape, dtype => $v->dtype)
    } @{ $self->label }];
}

# Ignore roll over data and set to start
method hard_reset()
{
    $self->cursor(-$self->batch_size);
}

method reset()
{
    if($self->last_batch_handle eq 'roll_over' and $self->cursor > $self->num_data)
    {
        $self->cursor(-$self->batch_size + ($self->cursor%$self->num_data)%$self->batch_size);
    }
    else
    {
        $self->cursor(-$self->batch_size);
    }
}

method iter_next()
{
    $self->cursor($self->batch_size + $self->cursor);
    return $self->cursor < $self->num_data;
}

method next()
{
    if($self->iter_next)
    {
        return AI::MXNet::DataBatch->new(
            data  => $self->getdata,
            label => $self->getlabel,
            pad   => $self->getpad,
            index => undef
        );
    }
    else
    {
        return undef;
    }
}

# Load data from underlying arrays, internal use only
method _getdata($data_source)
{
    confess("DataIter needs reset.") unless $self->cursor < $self->num_data;
    if(($self->cursor + $self->batch_size) <= $self->num_data)
    {
        return [
            map {
                $_->[1]->slice([$self->cursor,$self->cursor+$self->batch_size-1])
            } @{ $data_source }
        ];
    }
    else
    {
        my $pad = $self->batch_size - $self->num_data + $self->cursor - 1;
        return [
            map {
                AI::MXNet::NDArray->concatenate(
                    [
                        $_->[1]->slice([$self->cursor, -1]),
                        $_->[1]->slice([0, $pad])
                    ]
                )
            } @{ $data_source }
        ];
    }
}

method getdata()
{
    return $self->_getdata($self->data);
}

method getlabel()
{
    return $self->_getdata($self->label);
}

method getpad()
{
    if( $self->last_batch_handle eq 'pad'
            and
        ($self->cursor + $self->batch_size) > $self->num_data
    )
    {
        return $self->cursor + $self->batch_size - $self->num_data;
    }
    else
    {
        return 0;
    }
}

package AI::MXNet::MXDataIter;
use Mouse;
use AI::MXNet::Base;

extends 'AI::MXNet::DataIter';

=head1 NAME

    AI::MXNet::MXDataIter - A data iterator pre-built in C++ layer of MXNet.
=cut

has 'handle'           => (is => 'ro', isa => 'DataIterHandle', required => 1);
has '_debug_skip_load' => (is => 'rw', isa => 'Int', default => 0);
has '_debug_at_begin'  => (is => 'rw', isa => 'Int', default => 0);
has 'data_name'        => (is => 'ro', isa => 'Str', default => 'data');
has 'label_name'       => (is => 'ro', isa => 'Str', default => 'softmax_label');
has [qw/first_batch
        provide_data
        provide_label
        batch_size/]   => (is => 'rw', init_arg => undef);

sub BUILD
{
    my $self = shift;
    $self->first_batch($self->next);
    my $data = $self->first_batch->data->[0];
    $self->provide_data([
        AI::MXNet::DataDesc->new(
            name  => $self->data_name,
            shape => $data->shape,
            dtype => $data->dtype
        )
    ]);
    my $label = $self->first_batch->label->[0];
    $self->provide_label([
        AI::MXNet::DataDesc->new(
            name  => $self->label_name,
            shape => $label->shape,
            dtype => $label->dtype
        )
    ]);
    $self->batch_size($data->shape->[0]);
}

sub DEMOLISH
{
    check_call(AI::MXNetCAPI::DataIterFree(shift->handle));
}

=head2 debug_skip_load

    Set the iterator to simply return always first batch.
    Notes



( run in 1.631 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )