Catmandu-OAI
view release on metacpan or search on metacpan
lib/Catmandu/Importer/OAI.pm view on Meta::CPAN
has oai => (is => 'ro', lazy => 1, builder => 1);
has dry => (is => 'ro');
has handler => (is => 'rw', lazy => 1 , builder => 1, coerce => \&_coerce_handler );
has xslt => (is => 'ro', coerce => \&_coerce_xslt );
has sleep => ( is => 'ro', default => sub { 0 } );
has max_retries => ( is => 'ro', default => sub { 0 } );
has _retried => ( is => 'rw', default => sub { 0; } );
has _xml_handlers => ( is => 'ro', default => sub { +{} } );
has realm => ( is => 'ro', predicate => 1 );
has username => ( is => 'ro', predicate => 1 );
has password => ( is => 'ro', predicate => 1 );
sub _build_handler {
my ($self) = @_;
if ($self->metadataPrefix eq 'oai_dc') {
return 'oai_dc';
}
elsif ($self->metadataPrefix eq 'marcxml') {
return 'marcxml';
}
elsif ($self->metadataPrefix eq 'mods') {
return 'mods';
}
else {
return 'struct';
}
}
sub _coerce_handler {
my ($handler) = @_;
return $handler if is_invocant($handler) or is_code_ref($handler);
if (is_string($handler) && !is_number($handler)) {
my $class = $handler =~ /^\+(.+)/ ? $1
: "Catmandu::Importer::OAI::Parser::$handler";
my $handler;
eval {
$handler = Catmandu::Util::require_package($class)->new;
};
if ($@) {
croak $@;
} else {
return $handler;
}
}
return sub { return { _metadata => readXML($_[0]) } };
}
sub _coerce_xslt {
eval {
Catmandu::Util::require_package('Catmandu::XML::Transformer')
->new( stylesheet => $_[0] )
} or croak $@;
}
sub _build_oai {
my ($self) = @_;
my $agent = HTTP::OAI::Harvester->new(baseURL => $self->url, resume => 0, keep_alive => 1);
if( $self->has_username && $self->has_password ) {
my $uri = URI->new( $self->url );
my @credentials = (
$uri->host_port,
$self->realm || undef,
$self->username,
$self->password
);
$agent->credentials( @credentials );
}
$agent->env_proxy;
$agent;
}
sub _xml_handler_for_node {
my ( $self, $node ) = @_;
my $ns = $node->namespaceURI();
my $type;
if( $ns eq "http://www.openarchives.org/OAI/2.0/oai_dc/" ){
$type = "oai_dc";
}
elsif( $ns eq "http://www.loc.gov/MARC21/slim" ){
$type = "marcxml";
}
elsif( $ns eq "http://www.loc.gov/mods/v3" ){
$type = "mods";
}
else{
$type = "struct";
}
$self->_xml_handlers()->{$type} ||= Catmandu::Util::require_package( "Catmandu::Importer::OAI::Parser::$type" )->new();
}
sub _map_set {
my ($self, $rec) = @_;
+{
_id => $rec->setSpec(),
setSpec => $rec->setSpec(),
setName => $rec->setName(),
setDescription => [ map {
#root: 'setDescription'
my @root = $_->dom()->childNodes();
#child: oai_dc, marcxml, mods..
my @children = $root[0]->childNodes();
$self->_xml_handler_for_node( $children[0] )->parse( $children[0] );
} $rec->setDescription() ]
( run in 1.386 second using v1.01-cache-2.11-cpan-5735350b133 )