Catmandu-Pure

 view release on metacpan or  search on metacpan

README  view on Meta::CPAN

NAME

    Catmandu::Pure - A bundle of Catmandu modules for working with data
    from Pure

SYNOPSIS

      # From the command line
      $ catmandu convert Pure \
            --base https://host/ws/api/... \
            --endpoint research-outputs \
            --apiKey "..."

MODULES

      * Catmandu::Importer::Pure

DESCRIPTION

    Catmandu::Importer::Pure is a Catmandu package that seamlessly imports
    data from Elsevier's Pure system using its REST service. Currently

lib/Catmandu/Importer/Pure.pm  view on Meta::CPAN

use XML::LibXML;
use XML::LibXML::XPathContext;
use Data::Validate::URI qw(is_web_uri);
use Scalar::Util qw(blessed);

our $VERSION = '0.05';

with 'Catmandu::Importer';

has base     => ( is => 'ro' );
has endpoint => ( is => 'ro' );
has path     => ( is => 'ro' );
has apiKey   => ( is => 'ro' );
has user     => ( is => 'ro' );
has password => ( is => 'ro' );
has post_xml => ( is => 'ro' );

has handler =>
  ( is => 'ro', default => sub { 'simple' }, coerce => \&_coerce_handler );
has options =>
  ( is => 'ro', default => sub { +{} }, coerce => \&_coerce_options );

lib/Catmandu/Importer/Pure.pm  view on Meta::CPAN

has _n                => ( is => 'ro', default => sub { 0 } );
has _start            => ( is => 'ro', default => sub { 0 } );
has _rs_size          => ( is => 'ro', default => sub { 0 } );
has _total_size       => ( is => 'ro', default => sub { 0 } );
has _next_url         => ( is => 'ro');


sub BUILD {
    my $self = shift;

    Catmandu::BadVal->throw("Base URL, endpoint and apiKey are required")
      unless $self->base && $self->endpoint && $self->apiKey;

    Catmandu::BadVal->throw( "Password is needed for user " . $self->user )
      if $self->user && !$self->password;

    Catmandu::BadVal->throw("Invalid filter, filter should be a CODE ref")
      if $self->filter && !is_code_ref( $self->filter );

    Catmandu::BadVal->throw(
        "Invalid value for timeout, should be non negative integer")
      if !is_natural( $self->timeout );

lib/Catmandu/Importer/Pure.pm  view on Meta::CPAN

    }
    catch {
        Catmandu::Error->throw(
            "Requested '$url'\nStatus code: " . $res->status_line );
    };
}

sub _url {
    my ( $self, $options ) = @_;

    my $url = $self->base . '/' . $self->endpoint
        . ($self->path ? '/' . $self->path : '');

    if ($options && %$options) {
        $url .= '?' . join '&',
          map { "$_=" . uri_escape( $options->{$_}, "^A-Za-z0-9\-\._~," ) }
          sort keys %{$options};
    }
    return $url;
}

lib/Catmandu/Importer/Pure.pm  view on Meta::CPAN


    if ( $self->fullResponse ) {
        $out->{results} = [$root];
        return $out;
    }

    my @result_nodes;

    if ( $xc->exists('/result/items') ) {
        @result_nodes = $xc->findnodes('/result/items/*');
    } elsif ($self->endpoint eq 'changes') {
        @result_nodes = $xc->findnodes('/result/contentChange');
    } else {
        @result_nodes = $xc->findnodes('/result/*[@uuid]');
    };

    $out->{results} = \@result_nodes;

    return $out;
}

lib/Catmandu/Importer/Pure.pm  view on Meta::CPAN


=head1 NAME

  Catmandu::Importer::Pure - Package that imports Pure data.

=head1 SYNOPSIS

  # From the command line
  $ catmandu convert Pure \
        --base https://host/ws/api/... \
        --endpoint research-outputs \
        --apiKey "..."

  # In Perl
  use Catmandu;

  my %attrs = (
    base     => 'https://host/path',
    endpoint => 'research-outputs',
    apiKey   => '...',
    options  => { 'fields' => 'title,type,authors.*' } 
  );

  my $importer = Catmandu->importer('Pure', %attrs);

  my $n = $importer->each(sub {
    my $hashref = $_[0];
    # ...
  });

  # get number of validated and approved publications
  my $count = Catmandu->importer(
    'Pure',
    base         => 'https://host/path',
    endpoint     => 'research-outputs',
    apiKey       => '...',
    fullResponse => 1,
    post_xml => '<?xml version="1.0" encoding="utf-8"?>'
      . '<researchOutputsQuery>'
      . '<size>0</size>'
      . '<workflowSteps>'
      . '  <workflowStep>approved</workflowStep>'
      . '  <workflowStep>validated</workflowStep>'
      . '</workflowSteps>'
      . '</researchOutputsQuery>'
  )->first->{count};

=head1 DESCRIPTION

Catmandu::Importer::Pure is a Catmandu package that seamlessly imports data from Elsevier's Pure system using its REST service.
In order to use the Pure Web Service you need an API key. List of all available endpoints and further documentation can currently
be found under /ws on a webserver that is running Pure. Note that this version of the importer is tested with Pure API version
5.18 and might not work with later versions.

=head1 CONFIGURATION

=over

=item base

Base URL for the REST service is required, for example 'http://purehost.com/ws/api/518'

=item endpoint

Valid endpoint is required, like 'research-outputs'

=item apiKey

Valid API key is required for access

=item path

Path after the endpoint 

=item user

User name if basic authentication is used

=item password

Password if basic authentication is used

=item options

lib/Catmandu/Pure.pm  view on Meta::CPAN


=head1 NAME

Catmandu::Pure - A bundle of Catmandu modules for working with data from Pure

=head1 SYNOPSIS

  # From the command line
  $ catmandu convert Pure \
        --base https://host/ws/api/... \
        --endpoint research-outputs \
        --apiKey "..."

=head1 MODULES

=over

=item

L<Catmandu::Importer::Pure>

t/01_importer.t  view on Meta::CPAN

    # and possibly also PURE_USER and PURE_PASSWORD.";
}

my %connect_args = (
    base     => $base_url,
    apiKey   => $apiKey,
    user     => $user,
    password => $password,
);

throws_ok { $pkg->new( endpoint => 'research-outputs', apiKey => '1234' ) }
qr/Base URL.+ required/, "required argument (base) missing";

throws_ok { $pkg->new( endpoint => 'research-outputs', base => $base_url) }
qr/apiKey.+ required/, "required argument (apiKey) missing";

throws_ok { $pkg->new( base => $DEFAULT_TEST_BASE ) }
qr/endpoint.+required/, "required argument (endpoint) missing";

lives_ok { $pkg->new( base => $DEFAULT_TEST_BASE, apiKey => '1234', endpoint => 'research-outputs' ) }
"required arguments supplied";

throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        apiKey   => '1234',
        endpoint => 'research-outputs',
        user     => 'user'
      )
} qr/Password is needed/, "password missing";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        user     => 'user',
        password => 'password'
      )
} "user,password provided";

throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        filter   => 'invalid'
      )
} qr/Invalid filter/, "invalid filter";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        filter   => sub { 1 }
      )
} "filter provided";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        apiKey => '1234',
        endpoint => 'persons',
        timeout  => 100
      )
} "timeout provided";

throws_ok {
    $pkg->new(
        base     => 'notvalid',
        endpoint => 'research-outputs',
        apiKey => '1234',
      )
} qr/Invalid base/, "invalid base";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        options  => { 'size' => 1 },
        apiKey => '1234',
      )
} "options";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        handler  => 'raw',
        apiKey => '1234',
      )
} "handler raw";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        handler  => 'simple',
        apiKey => '1234',
      )
} "handler simple";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        handler  => 'struct',
        apiKey => '1234',
      )
} "handler struct";

throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        handler  => 'wrong'
      )
} qr/Unable to load handler/, "missing handler";

throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        handler  => 12345
      )
} qr/Invalid handler/, "invalid handler - number";

throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        handler  => [ 0, 5 ],
      )
} qr/Invalid handler/, "invalid handler - array";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        handler  => sub { $_[0] }
      )
} "handler custom";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        handler  => Catmandu::Importer::Pure::Parser::raw->new,
      )
} "handler class invocant";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        handler  => '+Catmandu::Importer::Pure::Parser::raw',
      )
} "handler class";

lives_ok {
    $pkg->new(
        base      => $DEFAULT_TEST_BASE,
        endpoint  => 'research-outputs',
        apiKey => '1234',
        trim_text => 1,
      )
} "trim text";

my $importer =
  $pkg->new( base => $DEFAULT_TEST_BASE, apiKey => '1234', endpoint => 'research-outputs' );

isa_ok( $importer, $pkg );
can_ok( $importer, 'each' );
can_ok( $importer, 'first' );
can_ok( $importer, 'count' );
# Test invalid arguments
throws_ok {
    $pkg->new(
        base     => 'https://nothing.nowhere/x/x',
        endpoint => 'research-outputs',
        apiKey => '1234',
      )
} qr/Invalid base URL/, "invalid base URL";

throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        timeout  => 'xxx',
      )
} qr/Invalid value for timeout/, "invalid value for timeout";

#bad furl
throws_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        furl     => 'notfurl'
      )
} qr/Invalid furl/, "invalid value for furl";

lives_ok {
    $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        furl     => Furl->new
      )
} "furl passed";

my $it;
lives_ok {
    $it = $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey => '1234',
        options  => {
            'source' => {
                'name'  => 'PubMed',
                'value' => [ '19838868', '11017075' ],
            },
        }
      )
} 'setting of options 1';

lives_ok {
    $it = $pkg->new(
        base     => $DEFAULT_TEST_BASE,
        endpoint => 'research-outputs',
        apiKey   => '1234',
        options  => {
            offset => 1000,
        }
      )
} 'setting of options 2';

if ( $ENV{RELEASE_TESTING} ) {
############# everything below needs a Pure server

    #Test get results
    my $rec = eval { $pkg->new( %connect_args, endpoint => 'research-outputs' )->first };
    ok( !$@ && $rec, "get results" )
      or BAIL_OUT "Failed to get any results from base URL $connect_args{base}"
      . ( $connect_args{user} ? "(user=$connect_args{user})" : '' );
    
    my %bad_base = %connect_args;
    $bad_base{base} .= '/invalid/invalid';
    
    throws_ok { $pkg->new( %bad_base, endpoint => 'persons' )->first }
    qr/HTTP 404 Not Found/, "invalid base path";

    throws_ok {
        $pkg->new(
            %connect_args,
            apiKey => 'wrong key',
            endpoint => 'research-outputs',
          )->first
    } qr/Status code: 401/, "invalid API key";
    
    #Check REST errors
    throws_ok { $pkg->new( %connect_args, endpoint => '_nothing_' )->first }
    qr/Pure REST Error/, "invalid endpoint";
    
    throws_ok {
        $pkg->new(
            %connect_args,
            endpoint => 'research-outputs',
            options  => { 'size' => 'a10' }
          )->first
    } qr/Pure REST Error/, "invalid option";
    
    
    #Test handlers
    $rec = $pkg->new(
        %connect_args,
        handler  => 'raw',
        endpoint => 'research-outputs',
        options  => { 'size' => 1 }
    )->first;
    
    like( $rec, qr/^</, "raw handler" );
    
    $rec = $pkg->new(
        %connect_args,
        handler  => 'struct',
        endpoint => 'persons',
        options  => { 'size' => 1 }
    )->first;
    
    ok( $rec->[0] && $rec->[0] eq 'person', 'struct handler' );
    
    $rec = $pkg->new(
        %connect_args,
        handler  => sub { 'success' },
        endpoint => 'research-outputs',
        options  => { 'size' => 1 }
    )->first;
    
    is( $rec, 'success', "custom handler" );
    
    #Test empty response
    my $count = $pkg->new(
        %connect_args,
        endpoint => 'research-outputs',
        options  => { q => 'sdfkjasewrwe' }
    )->count;
    
    is( $count, 0, "empty results" );
    
    $count = $pkg->new(
        %connect_args,
        endpoint     => 'organisational-units',
        fullResponse => 1,
        options      => { 'offset' => 1, 'size' => 2 }
    )->count;
    
    is( $count, 1, 'full response with offset and size' );
    
    $count = $pkg->new(
        %connect_args,
        endpoint     => 'organisational-units',
        fullResponse => 1,
        options      => { 'size' => 0 }
    )->first->{count};
    ok( $count > 1, 'count organisations' );
    
    my $offset = $count - 5;
    my $pcount = $pkg->new(
        %connect_args,
        endpoint => 'organisational-units',
        options  => { offset => $offset }
    )->count;

    ok( $count == $pcount + $offset, 'get organisational-units from offset' );
    
    $rec = $pkg->new( %connect_args, endpoint => 'classification-schemes' )->first;
    ok( $rec, 'endpoint classification-schemes' );
    
    $rec = $pkg->new(
        %connect_args,
        endpoint => 'changes',
        path  => '2002-01-22',
    )->slice( 100, 1 )->first;
    ok( $rec, 'endpoint changes' );
}

done_testing;



( run in 0.414 second using v1.01-cache-2.11-cpan-b61123c0432 )