Bio-Biblio

 view release on metacpan or  search on metacpan

lib/Bio/Biblio/IO/medlinexml.pm  view on Meta::CPAN

    # because it will be used by the event handler who knows nothing
    # about this object
    my $result = $self->{'_result'} || 'medline2ref';
    $result = "\L$result";      # normalize capitalization to lower case

    # a special case is 'raw' when no converting module is loaded
    # and citations will be returned as a hashtable (the one which
    # is created during parsing XML file/stream)
    unless ($result eq 'raw') {

        # load module with output converter - as defined in $result
        if (defined &Bio::Biblio::IO::_load_format_module ($result)) {
            $Convert = "Bio::Biblio::IO::$result"->new (@args);
        }
    }

    # create an instance of the XML parser
    # (unless it is already there...)
    $self->{'_xml_parser'} = XML::Parser->new (Handlers => {Init  => \&handle_doc_start,
                                                            Start => \&handle_start,
                                                            End   => \&handle_end,
                                                            Char  => \&handle_char,
                                                            Final => \&handle_doc_end})
        unless $self->{'_xml_parser'};

    # if there is an argument '-callback' then start parsing at once -
    # the registered event handlers will use 'callback' to report
    # back after each citation
    #
    # we need to remember this situation also in a global variable
    # because the event handler subroutines know nothing about this
    # object (unfortunately)
    if ($Callback = $self->{'_callback'}) {
        $self->_parse;
    }
}


sub _parse {
    my ($self) = shift;


    if (defined $self->{'_file'}) {
        $self->{'_xml_parser'}->parsefile ($self->{'_file'});
    } elsif (defined $self->{'_fh'}) {
        my $fh = $self->{'_fh'};
        if (ref ($fh) and UNIVERSAL::isa ($fh, 'IO::Handler')) {
            $self->{'_xml_parser'}->parse ($fh);
        } else {
            my $data;
            $data .= $_ while <$fh>;
            $self->{'_xml_parser'}->parse ($data);
        }
    } elsif ($self->{'_data'}) {
        $self->{'_xml_parser'}->parse ($self->{'_data'});
    } else {
        $self->throw ("XML source to be parsed is unknown. Should be given in the new().");
    }

    # when parsing is done all citations have already been delivered
    # to the caller using her callbacks - and nothing to be stored
    # here, or parser put all citations into global @Cittaions where
    # we want to copy there into this instance - so any caller can
    # start parsing other XML input without overwriting already read
    # citations from the first parser
    if (@Citations) {
        $self->{'_citations'} = [];
        foreach my $cit (@Citations) {
            push (@{ $self->{'_citations'} }, $cit);
            undef $cit;
        }
        undef @Citations;
    }
}

# ---------------------------------------------------------------------
#
#   Here is an implementation of Bio::Biblio::IO methods
#
# ---------------------------------------------------------------------

# global variables used by the XML event handlers
# TBD: make them accessible at least ONLY from this module...
@Citations = ();
$Callback = undef;
$Convert = undef;
@ObjectStack = ();   # it has Hash-Ref elements
@PCDataStack = ();   # it has String elements


sub next_bibref {
   my ($self) = @_;
   $self->throw ("Method 'next_bibref' should not be called when a '-callback' argument given.")
       if $self->{'_callback'};

   # parse the whole input into memory (global @Citations)
   # and then copy it into this object
   $self->_parse unless $self->{'_citations'};

   # return the next citation (and forget it here)
   shift (@{ $self->{'_citations'} });
}

# ---------------------------------------------------------------------
#
#   Here are the event handlers (they do the real job!)
#
# Note that these methods do not know anything about the object they
# are part of - they are called as subroutines. not as methods.
# It also means that they need to use global variables to store and
# exchnage intermediate results.
#
# ---------------------------------------------------------------------

#
# This is a list of #PCDATA elements.
#
%PCDATA_NAMES = (
                 'AbstractText' => 1,
                 'AccessionNumber' => 1,
                 'Acronym' => 1,



( run in 1.762 second using v1.01-cache-2.11-cpan-140bd7fdf52 )