Catmandu-OAI

 view release on metacpan or  search on metacpan

lib/Catmandu/Importer/OAI.pm  view on Meta::CPAN

has oai                    => (is => 'ro', lazy => 1, builder => 1);
has dry                    => (is => 'ro');
has handler                => (is => 'rw', lazy => 1 , builder => 1, coerce => \&_coerce_handler );
has xslt                   => (is => 'ro', coerce => \&_coerce_xslt );
has sleep                  => ( is => 'ro', default => sub { 0 } );
has max_retries            => ( is => 'ro', default => sub { 0 } );
has _retried               => ( is => 'rw', default => sub { 0; } );
has _xml_handlers          => ( is => 'ro', default => sub { +{} } );
has realm                  => ( is => 'ro', predicate => 1 );
has username               => ( is => 'ro', predicate => 1 );
has password               => ( is => 'ro', predicate => 1 );

sub _build_handler {
    my ($self) = @_;
    if ($self->metadataPrefix eq 'oai_dc') {
        return 'oai_dc';
    }
    elsif ($self->metadataPrefix eq 'marcxml') {
        return 'marcxml';
    }
    elsif ($self->metadataPrefix eq 'mods') {
        return 'mods';
    }
    else {
        return 'struct';
    }
}

sub _coerce_handler {
  my ($handler) = @_;

  return $handler if is_invocant($handler) or is_code_ref($handler);

  if (is_string($handler) && !is_number($handler)) {
      my $class = $handler =~ /^\+(.+)/ ? $1
        : "Catmandu::Importer::OAI::Parser::$handler";

      my $handler;
      eval {
          $handler = Catmandu::Util::require_package($class)->new;
      };
      if ($@) {
        croak $@;
      } else {
        return $handler;
      }
  }

  return sub { return { _metadata => readXML($_[0]) } };
}

sub _coerce_xslt {
  eval {
    Catmandu::Util::require_package('Catmandu::XML::Transformer')
      ->new( stylesheet => $_[0] )
  } or croak $@;
}

sub _build_oai {
    my ($self) = @_;
    my $agent = HTTP::OAI::Harvester->new(baseURL => $self->url, resume => 0, keep_alive => 1);
    if( $self->has_username && $self->has_password ) {
        my $uri = URI->new( $self->url );
        my @credentials = (
            $uri->host_port,
            $self->realm || undef,
            $self->username,
            $self->password
        );
        $agent->credentials( @credentials );
    }
    $agent->env_proxy;
    $agent;
}

sub _xml_handler_for_node {
    my ( $self, $node ) = @_;
    my $ns = $node->namespaceURI();

    my $type;

    if( $ns eq "http://www.openarchives.org/OAI/2.0/oai_dc/" ){

        $type = "oai_dc";

    }
    elsif( $ns eq "http://www.loc.gov/MARC21/slim" ){

        $type = "marcxml";

    }
    elsif( $ns eq "http://www.loc.gov/mods/v3" ){

        $type = "mods";

    }
    else{

        $type = "struct";

    }

    $self->_xml_handlers()->{$type} ||= Catmandu::Util::require_package( "Catmandu::Importer::OAI::Parser::$type" )->new();
}

sub _map_set {
    my ($self, $rec) = @_;

    +{
        _id => $rec->setSpec(),
        setSpec => $rec->setSpec(),
        setName => $rec->setName(),
        setDescription => [ map {

            #root: 'setDescription'
            my @root = $_->dom()->childNodes();
            #child: oai_dc, marcxml, mods..
            my @children = $root[0]->childNodes();
            $self->_xml_handler_for_node( $children[0] )->parse( $children[0] );

        } $rec->setDescription() ]



( run in 1.386 second using v1.01-cache-2.11-cpan-5735350b133 )