AI-MXNet

 view release on metacpan or  search on metacpan

lib/AI/MXNet/RNN/IO.pm  view on Meta::CPAN

package AI::MXNet::RNN::IO;
use strict;
use warnings;
use AI::MXNet::Base;
use AI::MXNet::Function::Parameters;

=encoding UTF-8

=head1 NAME

    AI::MXNet::RNN::IO - Functions for constructing recurrent neural networks.
=cut

=head1 DESCRIPTION

    Functions for constructing recurrent neural networks.
=cut

=head2 encode_sentences

    Encode sentences and (optionally) build a mapping
    from string tokens to integer indices. Unknown keys
    will be added to vocabulary.

    Parameters
    ----------
    $sentences : array ref of array refs of str
        A array ref of sentences to encode. Each sentence
        should be a array ref of string tokens.
    :$vocab : undef or hash ref of str -> int
        Optional input Vocabulary
    :$invalid_label : int, default -1
        Index for invalid token, like <end-of-sentence>
    :$invalid_key : str, default '\n'
        Key for invalid token. Uses '\n' for end
        of sentence by default.
    :$start_label=0 : int
        lowest index.

    Returns
    -------
    $result : array ref of array refs of int
        encoded sentences
    $vocab : hash ref of str -> int
        result vocabulary
=cut


method encode_sentences(
    ArrayRef[ArrayRef]  $sentences,
    Maybe[HashRef]     :$vocab=,
    Int                :$invalid_label=-1,
    Str                :$invalid_key="\n",
    Int                :$start_label=0
)
{
    my $idx = $start_label;
    my $new_vocab;
    if(not defined $vocab)
    {
        $vocab = { $invalid_key => $invalid_label };
        $new_vocab = 1;
    }
    else
    {
        $new_vocab = 0;
    }
    my @res;
    for my $sent (@{ $sentences })
    {
        my @coded;
        for my $word (@{ $sent })
        {
            if(not exists $vocab->{ $word })
            {
                assert($new_vocab, "Unknown token: $word");
                if($idx == $invalid_label)
                {
                    $idx += 1;
                }
                $vocab->{$word} = $idx;
                $idx += 1;
            }
            push @coded, $vocab->{ $word };
        }
        push @res, \@coded;
    }
    return (\@res, $vocab);
}

package AI::MXNet::BucketSentenceIter;

=encoding UTF-8

=head1 NAME

    AI::MXNet::BucketSentenceIter
=cut

=head1 DESCRIPTION

    Simple bucketing iterator for language model.
    Label for each step is constructed from data of
    next step.
=cut

=head2 new

    Parameters
    ----------
    sentences : array ref of array refs of int
        encoded sentences
    batch_size : int
        batch_size of data
    invalid_label : int, default -1
        key for invalid label, e.g. <end-of-sentence>
    dtype : str, default 'float32'



( run in 0.593 second using v1.01-cache-2.11-cpan-39bf76dae61 )