Lingua-RU-OpenCorpora-Tokenizer
view release on metacpan or search on metacpan
lib/Lingua/RU/OpenCorpora/Tokenizer/Updater.pm view on Meta::CPAN
package Lingua::RU::OpenCorpora::Tokenizer::Updater;
use strict;
use warnings;
use LWP::UserAgent;
use Carp qw(croak);
use Lingua::RU::OpenCorpora::Tokenizer::List;
use Lingua::RU::OpenCorpora::Tokenizer::Vectors;
our $VERSION = 0.06;
sub new {
my $class = shift;
my $self = bless {@_}, $class;
$self->_init;
$self;
}
sub vectors_update_available { $_[0]->_update_available('vectors', $_[1]) }
sub hyphens_update_available { $_[0]->_update_available('hyphens', $_[1]) }
sub exceptions_update_available { $_[0]->_update_available('exceptions', $_[1]) }
sub prefixes_update_available { $_[0]->_update_available('prefixes', $_[1]) }
sub update_vectors { $_[0]->_update('vectors') }
sub update_hyphens { $_[0]->_update('hyphens') }
sub update_exceptions { $_[0]->_update('exceptions') }
sub update_prefixes { $_[0]->_update('prefixes') }
sub _init {
my $self = shift;
my $ua = LWP::UserAgent->new(
agent => __PACKAGE__ . ' ' . $VERSION . ', ',
env_proxy => 1,
);
$self->{ua} = $ua;
for(qw(exceptions prefixes hyphens)) {
$self->{$_} = Lingua::RU::OpenCorpora::Tokenizer::List->new($_);
}
$self->{vectors} = Lingua::RU::OpenCorpora::Tokenizer::Vectors->new;
return;
}
sub _update_available {
my($self, $mode, $force) = @_;
my $url = $self->{$mode}->_url('version');
my $res = $self->{ua}->get($url);
croak "$url: " . $res->code unless $res->is_success;
chomp(my $latest = $res->content);
my $current = $self->{$mode}->{version};
$self->{"${mode}_latest"} = $latest;
$self->{"${mode}_current"} = $current;
return $force
? 1
: $latest > $current;
}
sub _update {
my($self, $mode) = @_;
my $url = $self->{$mode}->_url('file');
my $res = $self->{ua}->get($url);
croak "$url: " . $res->code unless $res->is_success;
$self->{$mode}->_update($res->content);
}
1;
__END__
=head1 NAME
Lingua::RU::OpenCorpora::Tokenizer::Updater - download newer data for tokenizer
=head1 DESCRIPTION
This module is not supposed to be used directly. Instead use C<opencorpora-update-tokenizer> script that comes with this distribution.
=head1 SEE ALSO
( run in 1.400 second using v1.01-cache-2.11-cpan-98e64b0badf )