Bio-Biblio
view release on metacpan or search on metacpan
lib/Bio/Biblio/IO/medlinexml.pm view on Meta::CPAN
# because it will be used by the event handler who knows nothing
# about this object
my $result = $self->{'_result'} || 'medline2ref';
$result = "\L$result"; # normalize capitalization to lower case
# a special case is 'raw' when no converting module is loaded
# and citations will be returned as a hashtable (the one which
# is created during parsing XML file/stream)
unless ($result eq 'raw') {
# load module with output converter - as defined in $result
if (defined &Bio::Biblio::IO::_load_format_module ($result)) {
$Convert = "Bio::Biblio::IO::$result"->new (@args);
}
}
# create an instance of the XML parser
# (unless it is already there...)
$self->{'_xml_parser'} = XML::Parser->new (Handlers => {Init => \&handle_doc_start,
Start => \&handle_start,
End => \&handle_end,
Char => \&handle_char,
Final => \&handle_doc_end})
unless $self->{'_xml_parser'};
# if there is an argument '-callback' then start parsing at once -
# the registered event handlers will use 'callback' to report
# back after each citation
#
# we need to remember this situation also in a global variable
# because the event handler subroutines know nothing about this
# object (unfortunately)
if ($Callback = $self->{'_callback'}) {
$self->_parse;
}
}
sub _parse {
my ($self) = shift;
if (defined $self->{'_file'}) {
$self->{'_xml_parser'}->parsefile ($self->{'_file'});
} elsif (defined $self->{'_fh'}) {
my $fh = $self->{'_fh'};
if (ref ($fh) and UNIVERSAL::isa ($fh, 'IO::Handler')) {
$self->{'_xml_parser'}->parse ($fh);
} else {
my $data;
$data .= $_ while <$fh>;
$self->{'_xml_parser'}->parse ($data);
}
} elsif ($self->{'_data'}) {
$self->{'_xml_parser'}->parse ($self->{'_data'});
} else {
$self->throw ("XML source to be parsed is unknown. Should be given in the new().");
}
# when parsing is done all citations have already been delivered
# to the caller using her callbacks - and nothing to be stored
# here, or parser put all citations into global @Cittaions where
# we want to copy there into this instance - so any caller can
# start parsing other XML input without overwriting already read
# citations from the first parser
if (@Citations) {
$self->{'_citations'} = [];
foreach my $cit (@Citations) {
push (@{ $self->{'_citations'} }, $cit);
undef $cit;
}
undef @Citations;
}
}
# ---------------------------------------------------------------------
#
# Here is an implementation of Bio::Biblio::IO methods
#
# ---------------------------------------------------------------------
# global variables used by the XML event handlers
# TBD: make them accessible at least ONLY from this module...
@Citations = ();
$Callback = undef;
$Convert = undef;
@ObjectStack = (); # it has Hash-Ref elements
@PCDataStack = (); # it has String elements
sub next_bibref {
my ($self) = @_;
$self->throw ("Method 'next_bibref' should not be called when a '-callback' argument given.")
if $self->{'_callback'};
# parse the whole input into memory (global @Citations)
# and then copy it into this object
$self->_parse unless $self->{'_citations'};
# return the next citation (and forget it here)
shift (@{ $self->{'_citations'} });
}
# ---------------------------------------------------------------------
#
# Here are the event handlers (they do the real job!)
#
# Note that these methods do not know anything about the object they
# are part of - they are called as subroutines. not as methods.
# It also means that they need to use global variables to store and
# exchnage intermediate results.
#
# ---------------------------------------------------------------------
#
# This is a list of #PCDATA elements.
#
%PCDATA_NAMES = (
'AbstractText' => 1,
'AccessionNumber' => 1,
'Acronym' => 1,
( run in 1.762 second using v1.01-cache-2.11-cpan-140bd7fdf52 )