AI-MicroStructure

 view release on metacpan or  search on metacpan

lib/AI/MicroStructure/RemoteList.pm  view on Meta::CPAN

# transformation subroutines
#
sub tr_nonword {
    my $str = shift;
    $str =~ tr/a-zA-Z0-9_/_/c;
    $str;
}

sub tr_accent {
    my $str = shift;
    $str =~ tr{ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ}
              {AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy};
    return $str;
}

my %utf2asc = (
    "æ"        => 'ae',
    "Æ"        => 'AE',
    "\xc5\xa0" => 'S',
    "\x{0160}" => 'S',
    # for pokemons
    "\x{0101}"     => 'a',
    "\x{012b}"     => 'i',
    "\x{014d}"     => 'o',
    "\x{016b}"     => 'u',
    "\xe2\x99\x80" => 'female',
    "\xe2\x99\x82" => 'male',
    "\x{2640}"     => 'female',
    "\x{2642}"     => 'male',
);
my $utf_re = qr/(@{[join( '|', sort keys %utf2asc )]})/;

sub tr_utf8_basic {
    my $str = shift;
    $str =~ s/$utf_re/$utf2asc{$1}/go;
    return $str;
}

1;

__END__

=head1 NAME

AI::MicroStructure::RemoteList - Retrieval of a remote source for a structure

=head1 SYNOPSIS

    package AI::MicroStructure::contributors;
    use strict;
    use AI::MicroStructure::List;
    our @ISA = qw( AI::MicroStructure::List );

    # data regarding the remote source
    our %Remote = (
        source =>
            'http://search.cpan.org/dist/AI-MicroStructure/CONTRIBUTORS',
        extract => sub {
            my $content = shift;
            my @items   =
                map { AI::MicroStructure::RemoteList::tr_nonword($_) }
                map { AI::MicroStructure::RemoteList::tr_accent($_) }
                $content =~ /^\* (.*?)\s*$/gm;
            return @items;
        },
    );

    __PACKAGE__->init();

    1;

    # and the usual documentation and list definition

=head1 DESCRIPTION

This base class adds the capability to fetch a fresh list of items from a
remote source to any structure that requires it.

To be able to fetch remote items, an C<AI::MicroStructure> structure must
define the package hash variable C<%Remote> with the appropriate keys.

The keys are:

=over 4

=item C<source>

The URL where the data is available. The content will be passed to the
C<extract> subroutine.

Because of the various way the data can be made available on the web
and can be used in L<AI::MicroStructure>, this scheme has evolved to
support several cases:

Single source URL:

    source => $url

Multiple source URL:

    source => [ $url1, $url2, ... ]

For structures with categories, it's possible to attach a URL for each
category:

    source => {
        category1 => $url1,
        category2 => $url2,
        ...
    }

In the case where the C<source> is an array or a hash reference, an
extra case is supported, in case the source data can only be obtained
via a C<POST> request. In that case, the source should be provided as
either:

    source => [
        [ $url1 => $data1 ],
        [ $url2 => $data2 ],
        ...
    ]



( run in 1.096 second using v1.01-cache-2.11-cpan-140bd7fdf52 )