Dancer-Plugin-Catmandu-OAI

 view release on metacpan or  search on metacpan

lib/Dancer/Plugin/Catmandu/OAI.pm  view on Meta::CPAN

package Dancer::Plugin::Catmandu::OAI;

=head1 NAME

Dancer::Plugin::Catmandu::OAI - OAI-PMH provider backed by a searchable Catmandu::Store

=cut

our $VERSION = '0.0508';

use Catmandu::Sane;
use Catmandu::Util qw(is_string is_array_ref hash_merge);
use Catmandu;
use Catmandu::Fix;
use Catmandu::Exporter::Template;
use Data::MessagePack;
use MIME::Base64 qw(encode_base64url decode_base64url);
use Dancer::Plugin;
use Dancer qw(:syntax);
use DateTime;
use DateTime::Format::ISO8601;
use DateTime::Format::Strptime;

my $DEFAULT_LIMIT = 100;

my $VERBS = {
    GetRecord => {
        valid    => {metadataPrefix => 1, identifier => 1},
        required => [qw(metadataPrefix identifier)],
    },
    Identify        => {valid => {}, required => [],},
    ListIdentifiers => {
        valid => {
            metadataPrefix  => 1,
            from            => 1,
            until           => 1,
            set             => 1,
            resumptionToken => 1
        },
        required => [qw(metadataPrefix)],
    },
    ListMetadataFormats =>
        {valid => {identifier => 1, resumptionToken => 1}, required => [],},
    ListRecords => {
        valid => {
            metadataPrefix  => 1,
            from            => 1,
            until           => 1,
            set             => 1,
            resumptionToken => 1
        },
        required => [qw(metadataPrefix)],
    },
    ListSets => {valid => {resumptionToken => 1}, required => [],},
};

{
    state $mp = Data::MessagePack->new->utf8;

    sub _deserialize {
        $mp->unpack(decode_base64url($_[0]));
    }

    sub _serialize {
        encode_base64url($mp->pack($_[0]));
    }

}

sub _new_token {
    my ($settings, $hits, $params, $from, $until, $old_token) = @_;

    my $n = $old_token && $old_token->{_n} ? $old_token->{_n} : 0;
    $n += $hits->size;

lib/Dancer/Plugin/Catmandu/OAI.pm  view on Meta::CPAN

TT

    my $template_list_metadata_formats = "";
    $template_list_metadata_formats .= <<TT;
$template_header
<ListMetadataFormats>
TT
    for my $format (values %$metadata_formats) {
        $template_list_metadata_formats .= <<TT;
<metadataFormat>
    <metadataPrefix>$format->{metadataPrefix}</metadataPrefix>
    <schema>$format->{schema}</schema>
    <metadataNamespace>$format->{metadataNamespace}</metadataNamespace>
</metadataFormat>
TT
    }
    $template_list_metadata_formats .= <<TT;
</ListMetadataFormats>
$template_footer
TT

    my $template_list_sets = <<TT;
$template_header
<ListSets>
TT
    for my $set (values %$sets) {
        $template_list_sets .= <<TT;
<set>
    <setSpec>$set->{setSpec}</setSpec>
    <setName>$set->{setName}</setName>
TT

        my $set_descriptions = $set->{setDescription} // [];
        $set_descriptions = [$set_descriptions]
            unless is_array_ref($set_descriptions);
        $template_list_sets .= "<setDescription>$_</setDescription>"
            for @$set_descriptions;

        $template_list_sets .= <<TT;
</set>
TT
    }
    $template_list_sets .= <<TT;
</ListSets>
$template_footer
TT

    my $fix = $setting->{fix};
    if ($fix) {
        $fix = Catmandu::Fix->new(fixes => $fix);
    }
    my $sub_deleted       = $opts{deleted}       || sub {0};
    my $sub_set_specs_for = $opts{set_specs_for} || sub {[]};

    my $template_options = $setting->{template_options} || {};

    my $render = sub {
        my ($tmpl, $data) = @_;
        content_type 'xml';
        my $out      = "";
        my $exporter = Catmandu::Exporter::Template->new(
            template => $tmpl,
            file     => \$out
        );
        $exporter->add($data);
        $exporter->commit;
        $out;
    };

    any ['get', 'post'] => $path => sub {
        my $uri_base = $setting->{uri_base}
            // request->uri_for(request->path_info);
        my $response_date = DateTime->now->iso8601 . 'Z';
        my $params = request->is_get ? params('query') : params('body');
        my $errors = [];
        my $format;
        my $set;
        my $verb = $params->{verb};
        my $vars = {
            uri_base      => $uri_base,
            request_uri   => $uri_base . $path,
            response_date => $response_date,
            errors        => $errors,
        };

        if ($verb and my $spec = $VERBS->{$verb}) {
            my $valid    = $spec->{valid};
            my $required = $spec->{required};

            if ($valid->{resumptionToken}
                and exists $params->{resumptionToken})
            {
                if (keys(%$params) > 2) {
                    push @$errors,
                        [badArgument =>
                            "resumptionToken cannot be combined with other parameters"
                        ];
                }
            }
            else {
                for my $key (keys %$params) {
                    next if $key eq 'verb';
                    unless ($valid->{$key}) {
                        push @$errors,
                            [badArgument => "parameter $key is illegal"];
                    }
                }
                for my $key (@$required) {
                    unless (exists $params->{$key}) {
                        push @$errors,
                            [badArgument => "parameter $key is missing"];
                    }
                }
            }
        }
        else {
            push @$errors, [badVerb => "illegal OAI verb"];
        }

        if (@$errors) {
            return $render->(\$template_error, $vars);

lib/Dancer/Plugin/Catmandu/OAI.pm  view on Meta::CPAN

                        if defined $token->{_m};
                    $params->{from}  = $token->{_f} if defined $token->{_f};
                    $params->{until} = $token->{_u} if defined $token->{_u};
                    $vars->{token}   = $token;
                }
                catch {
                    push @$errors,
                        [badResumptionToken =>
                            "resumptionToken is not in the correct format"
                        ];
                };

            }
        }

        if (exists $params->{set}) {
            unless ($sets) {
                push @$errors, [noSetHierarchy => "sets are not supported"];
            }
            unless ($set = $sets->{$params->{set}}) {
                push @$errors, [badArgument => "set does not exist"];
            }
        }

        if (exists $params->{metadataPrefix}) {
            unless ($format = $metadata_formats->{$params->{metadataPrefix}})
            {
                push @$errors,
                    [cannotDisseminateFormat =>
                        "metadataPrefix $params->{metadataPrefix} is not supported"
                    ];
            }
        }

        if (@$errors) {
            return $render->(\$template_error, $vars);
        }

        if ($verb eq 'GetRecord') {
            my $id = $params->{identifier};
            $id =~ s/^$ns//;

            my $rec = $bag->search(
                %{$setting->{default_search_params}},
                cql_query => sprintf($setting->{get_record_cql_pattern}, $id),
                start     => 0,
                limit     => 1,
            )->first;

            if (defined $rec) {
                if ($fix) {
                    $rec = $fix->fix($rec);
                }

                $vars->{id}        = $id;
                $vars->{datestamp} = $format_datestamp->(
                    $rec->{$setting->{datestamp_field}});
                $vars->{deleted} = $sub_deleted->($rec);
                $vars->{setSpec} = $sub_set_specs_for->($rec);
                my $metadata = "";
                my $exporter = Catmandu::Exporter::Template->new(
                    %$template_options,
                    template => $format->{template},
                    file     => \$metadata,
                );
                if ($format->{fix}) {
                    $rec = $format->{fix}->fix($rec);
                }
                $exporter->add($rec);
                $exporter->commit;
                $vars->{metadata} = $metadata;
                unless ($vars->{deleted}
                    and $setting->{deletedRecord} eq 'no')
                {
                    return $render->(\$template_get_record, $vars);
                }
            }
            push @$errors,
                [idDoesNotExist =>
                    "identifier $params->{identifier} is unknown or illegal"
                ];
            return $render->(\$template_error, $vars);

        }
        elsif ($verb eq 'Identify') {
            $vars->{earliest_datestamp}
                = $setting->{earliestDatestamp} || do {
                my $hits = $bag->search(
                    %{$setting->{default_search_params}},
                    cql_query => $setting->{cql_filter} || 'cql.allRecords',
                    limit     => 1,
                    sru_sortkeys => "$setting->{datestamp_index},,1",
                );
                if (my $rec = $hits->first) {
                    $format_datestamp->($rec->{$setting->{datestamp_field}});
                }
                else {
                    '1970-01-01T00:00:01Z';
                }
                };
            return $render->(\$template_identify, $vars);

        }
        elsif ($verb eq 'ListIdentifiers' || $verb eq 'ListRecords') {
            my $from  = $params->{from};
            my $until = $params->{until};

            for my $datestamp (($from, $until)) {
                $datestamp || next;
                if ($datestamp
                    !~ /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}Z)?$/)
                {
                    push @$errors,
                        [badArgument =>
                            "datestamps must have the format YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ"
                        ];
                    return $render->(\$template_error, $vars);
                }
            }

            if ($from && $until && length($from) != length($until)) {

lib/Dancer/Plugin/Catmandu/OAI.pm  view on Meta::CPAN

            }

            if (
                defined(
                    my $new_token = _new_token(
                        $setting, $search, $params,
                        $from,    $until,  $vars->{token}
                    )
                )
                )
            {
                $vars->{resumption_token} = _serialize($new_token);
            }

            $vars->{total} = $search->total;

            if ($verb eq 'ListIdentifiers') {
                $vars->{records} = [
                    map {
                        my $rec = $_;
                        my $id  = $rec->{$bag->id_key};

                        if ($fix) {
                            $rec = $fix->fix($rec);
                        }

                        {
                            id        => $id,
                            datestamp => $format_datestamp->(
                                $rec->{$setting->{datestamp_field}}
                            ),
                            deleted => $sub_deleted->($rec),
                            setSpec => $sub_set_specs_for->($rec),
                        };
                    } @{$search->hits}
                ];
                return $render->(\$template_list_identifiers, $vars);
            }
            else {
                $vars->{records} = [
                    map {
                        my $rec = $_;
                        my $id  = $rec->{$bag->id_key};

                        if ($fix) {
                            $rec = $fix->fix($rec);
                        }

                        my $deleted = $sub_deleted->($rec);

                        my $rec_vars = {
                            id        => $id,
                            datestamp => $format_datestamp->(
                                $rec->{$setting->{datestamp_field}}
                            ),
                            deleted => $deleted,
                            setSpec => $sub_set_specs_for->($rec),
                        };
                        unless ($deleted) {
                            my $metadata = "";
                            my $exporter = Catmandu::Exporter::Template->new(
                                %$template_options,
                                template => $format->{template},
                                file     => \$metadata,
                            );
                            if ($format->{fix}) {
                                $rec = $format->{fix}->fix($rec);
                            }
                            $exporter->add($rec);
                            $exporter->commit;
                            $rec_vars->{metadata} = $metadata;
                        }
                        $rec_vars;
                    } @{$search->hits}
                ];
                return $render->(\$template_list_records, $vars);
            }

        }
        elsif ($verb eq 'ListMetadataFormats') {
            if (my $id = $params->{identifier}) {
                $id =~ s/^$ns//;
                unless ($bag->get($id)) {
                    push @$errors,
                        [idDoesNotExist =>
                            "identifier $params->{identifier} is unknown or illegal"
                        ];
                    return $render->(\$template_error, $vars);
                }
            }
            return $render->(\$template_list_metadata_formats, $vars);

        }
        elsif ($verb eq 'ListSets') {
            return $render->(\$template_list_sets, $vars);
        }
    }
}

register oai_provider => \&oai_provider;

register_plugin;

1;

=head1 SYNOPSIS

    #!/usr/bin/env perl

    use Dancer;
    use Catmandu;
    use Dancer::Plugin::Catmandu::OAI;

    Catmandu->load;
    Catmandu->config;

    my $options = {};

    oai_provider '/oai' , %$options;

    dance;

lib/Dancer/Plugin/Catmandu/OAI.pm  view on Meta::CPAN

                    op:
                      '=': true
                      '<': true
                      '<=': true
                      '>=': true
                      '>': true
                      'exact': true
                    field: 'datestamp'
          index_mappings:
            publication:
              properties:
                datestamp: {type: date, format: date_time_no_millis}

=head1 IMPORT RECORDS

With the Catmandu configuration files in place records can be imported with the L<catmandu> command:

    # Drop the existing ElasticSearch 'oai' collection
    $ catmandu drop oai

    # Import the sample record
    $ catmandu import YAML to oai < sample.yml

    # Test if the records are available in the 'oai' collection
    $ catmandu export oai

=head1 DANCER CONFIGURATION

The Dancer configuration file 'config.yml' contains basic information for the OAI-PMH plugin to work:

    * store - In which Catmandu::Store are the metadata records stored
    * bag - In which Catmandu::Bag are the records of this 'store' (use: 'data' as default)
    * datestamp_field - Which field in the record contains a datestamp ('datestamp' in our example above)
    * datestamp_index - Which CQL index should be used to find records within a specified date range (if not specified, the value from the 'datestamp_field' setting is used)
    * repositoryName - The name of the repository
    * uri_base - The full base url of the OAI controller. To be used when behind a proxy server. When not set, this module relies on the Dancer request to provide its full url. Use middleware like 'ReverseProxy' or 'Dancer::Middleware::Rebase' in tha...
    * adminEmail - An administrative email. Can be string or array of strings. This will be included in the Identify response.
    * compression - a compression encoding supported by the repository. Can be string or array of strings. This will be included in the Identify response.
    * description - XML container that describes your repository. Can be string or array of strings. This will be included in the Identify response. Note that this module will try to validate the XML data.
    * earliestDatestamp - The earliest datestamp available in the dataset as YYYY-MM-DDTHH:MM:SSZ. This will be determined dynamically if no static value is given.
    * deletedRecord - The policy for deleted records. See also: L<https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords>
    * repositoryIdentifier - A prefix to use in OAI-PMH identifiers
    * cql_filter -  A CQL query to find all records in the database that should be made available to OAI-PMH
    * default_search_params - set default arguments that get passed to every call to the bag's search method
    * search_strategy - default is C<paginate>, set to C<es.scroll> to avoid deep paging (Elasticsearch only)
    * limit - The maximum number of records to be returned in each OAI-PMH request
    * delimiter - Delimiters used in prefixing a record identifier with a repositoryIdentifier (use: ':' as default)
    * sampleIdentifier - A sample identifier
    * metadata_formats - An array of metadataFormats that are supported
        * metadataPrefix - A short string for the name of the format
        * schema - An URL to the XSD schema of this format
        * metadataNamespace - A XML namespace for this format
        * template - The path to a Template Toolkit file to transform your records into this format
        * fix - Optionally an array of one or more L<Catmandu::Fix>-es or Fix files
    * sets - Optional an array of OAI-PMH sets and the CQL query to retrieve records in this set from the Catmandu::Store
        * setSpec - A short string for the same of the set
        * setName - A longer description of the set
        * setDescription - an optional and repeatable container that may hold community-specific XML-encoded data about the set. Should be string or array of strings.
        * cql - The CQL command to find records in this set in the L<Catmandu::Store>
    * xsl_stylesheet - Optional path to an xsl stylesheet
    * template_options - An optional hash of configuration options that will be passed to L<Catmandu::Exporter::Template> or L<Template>.

Below is a sample minimal configuration for the 'sample.yml' demo above:

    $ cat config.yml
    charset: "UTF-8"
    plugins:
      'Catmandu::OAI':
        store: oai
        bag: data
        datestamp_field: datestamp
        repositoryName: "My OAI DataProvider"
        uri_base: "http://oai.service.com/oai"
        adminEmail: me@example.com
        earliestDatestamp: "1970-01-01T00:00:01Z"
        cql_filter: "datestamp>1970-01-01T00:00:01Z"
        deletedRecord: persistent
        repositoryIdentifier: oai.service.com
        limit: 200
        delimiter: ":"
        sampleIdentifier: "oai:oai.service.com:1585315"
        metadata_formats:
          -
            metadataPrefix: oai_dc
            schema: "http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
            metadataNamespace: "http://www.openarchives.org/OAI/2.0/oai_dc/"
            template: oai_dc.tt

=head1 METADATAPREFIX TEMPLATE

For each metadataPrefix a Template Toolkit file needs to exist which translate L<Catmandu::Store> records into XML records. At least
one Template Toolkit file should be made available to transform stored records into Dublin Core. The example below contains an example file to
transform 'sample.yml' type records into Dublin Core:

    $ cat oai_dc.tt
    <oai_dc:dc xmlns="http://www.openarchives.org/OAI/2.0/oai_dc/"
               xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
               xmlns:dc="http://purl.org/dc/elements/1.1/"
               xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
               xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
    [%- FOREACH var IN ['title' 'creator' 'subject' 'description' 'publisher' 'contributor' 'date' 'type' 'format' 'identifier' 'source' 'language' 'relation' 'coverage' 'rights'] %]
        [%- FOREACH val IN $var %]
        <dc:[% var %]>[% val | html %]</dc:[% var %]>
        [%- END %]
    [%- END %]
    </oai_dc:dc>

=head1 START DANCER

If all the required files are available, then a Dancer application can be started. See the 'demo' directory of this distribution for a complete example:

    $ ls
    app.pl  catmandu.yml  config.yml  oai_dc.tt
    $ cat app.pl
    #!/usr/bin/env perl

    use Dancer;
    use Catmandu;
    use Dancer::Plugin::Catmandu::OAI;

    Catmandu->load;



( run in 0.316 second using v1.01-cache-2.11-cpan-eab888a1d7d )