PGXN-API

 view release on metacpan or  search on metacpan

lib/PGXN/API/Indexer.pm  view on Meta::CPAN

package PGXN::API::Indexer;

use v5.14;
use utf8;
use Moose;
use PGXN::API;
use File::Spec::Functions qw(catfile catdir);
use File::Path qw(make_path);
use File::Copy::Recursive qw(fcopy dircopy);
use File::Basename;
use Text::Markup;
use Text::Markup::None qr/te?xt/;
use XML::LibXML;
use List::Util qw(first);
use List::MoreUtils qw(uniq);
use Lucy::Plan::Schema;
use Lucy::Analysis::PolyAnalyzer;
use Lucy::Analysis::RegexTokenizer;
use Lucy::Index::Indexer;
use Try::Tiny;
use Archive::Zip qw(AZ_OK);
use namespace::autoclean;
our $VERSION = v0.21.0;

has verbose   => (is => 'rw', isa => 'Int', default => 0);
has _index_it => (is => 'rw', isa => 'Bool', default => 1);
has to_index  => (is => 'ro', isa => 'HashRef', default => sub { +{
    map { $_ => [] } qw(docs dists extensions users tags)
} });

has _user_names => (is => 'ro', isa => 'HashRef', default => sub { +{ } });

has libxml   => (is => 'ro', isa => 'XML::LibXML', lazy => 1, default => sub {
    XML::LibXML->new(
        recover    => 2,
        no_network => 1,
        no_blanks  => 1,
        no_cdata   => 1,
    );
});

has index_dir => (is => 'ro', isa => 'Str', lazy => 1, default => sub {
    my $dir = catdir +PGXN::API->instance->doc_root, '_index';
    if (!-e $dir) {
        require File::Path;
        File::Path::make_path($dir);
    }
    $dir;
});

has schemas => ( is => 'ro', isa => 'HashRef', lazy => 1, default => sub {
    my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
        language => 'en',
    );

    my $fti = Lucy::Plan::FullTextType->new(
        analyzer      => $polyanalyzer,
        highlightable => 0,
    );

    my $ftih = Lucy::Plan::FullTextType->new(
        analyzer      => $polyanalyzer,
        highlightable => 1,
    );

    my $string = Lucy::Plan::StringType->new(
        indexed => 1,
        stored  => 1,
    );

    my $indexed = Lucy::Plan::StringType->new(
        indexed => 1,
        stored  => 0,
    );

    my $stored = Lucy::Plan::StringType->new(
        indexed => 0,
        stored  => 1,
    );

    my $list = Lucy::Plan::FullTextType->new(
        indexed       => 1,
        stored        => 1,
        highlightable => 1,
        analyzer      => Lucy::Analysis::RegexTokenizer->new(
            pattern => '[^\003]+'
        ),
    );

    my %schemas;
    for my $spec (
        [ docs => [
            [ key         => $indexed ],
            [ title       => $fti     ],
            [ abstract    => $fti     ],
            [ body        => $ftih    ],
            [ dist        => $fti     ],
            [ version     => $stored  ],
            [ docpath     => $stored  ],
            [ date        => $stored  ],
            [ user        => $string  ],
            [ user_name   => $fti     ],
        ]],
        [ dists => [
            [ key         => $indexed ],
            [ dist        => $fti     ],
            [ abstract    => $fti     ],
            [ description => $fti     ],
            [ readme      => $ftih    ],
            [ tags        => $list    ],
            [ version     => $stored  ],



( run in 1.626 second using v1.01-cache-2.11-cpan-39bf76dae61 )