MOSES-MOBY
view release on metacpan or search on metacpan
lib/MOSES/MOBY/Parser.pm view on Meta::CPAN
# Author: Edward Kawas <edward.kawas@gmail.com>,
# Martin Senger <martin.senger@gmail.com>
#
# For copyright and disclaimer see below.
#
# $Id: Parser.pm,v 1.4 2008/04/29 19:45:01 kawas Exp $
#-----------------------------------------------------------------
package MOSES::MOBY::Parser;
use MOSES::MOBY::Base;
use base qw( MOSES::MOBY::Base XML::SAX::Base );
use XML::SAX::ParserFactory;
use MOSES::MOBY::Data::ProvisionInformation;
use MOSES::MOBY::Data::Xref;
use MOSES::MOBY::Data::Object;
use MOSES::MOBY::Tags;
use MOSES::MOBY::Package;
use MOSES::MOBY::ServiceException;
use MOSES::MOBY::Generators::GenTypes;
use strict;
# add versioning to this module
use vars qw /$VERSION/;
$VERSION = sprintf "%d.%02d", q$Revision: 1.4 $ =~ /: (\d+)\.(\d+)/;
#-----------------------------------------------------------------
# A list of allowed attribute names. See MOSES::MOBY::Base for details.
#-----------------------------------------------------------------
{
my %_allowed =
(
cachedir => undef,
registry => undef,
lowestKnownDataTypes => { type => 'HASH' },
generator => { type => 'MOSES::MOBY::Generators::GenTypes' }
);
sub _accessible {
my ($self, $attr) = @_;
exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
}
sub _attr_prop {
my ($self, $attr_name, $prop_name) = @_;
my $attr = $_allowed {$attr_name};
return ref ($attr) ? $attr->{$prop_name} : $attr if $attr;
return $self->SUPER::_attr_prop ($attr_name, $prop_name);
}
}
#-----------------------------------------------------------------
# init
#-----------------------------------------------------------------
sub init {
my ($self) = shift;
$self->SUPER::init();
$self->lowestKnownDataTypes ({});
}
my %pcdataNames ;
my %pcdataNamesForPrimitives;
my @pcdataNamesArray;
my @pcdataNamesArrayForPrimitives;
# special logger just for the parser
my $PLOG;
BEGIN {
@pcdataNamesArray =
( NOTES, SERVICECOMMENT, VALUE, XREF, EXCEPTIONCODE, EXCEPTIONMESSAGE );
@pcdataNamesArrayForPrimitives =
( MOBYSTRING, MOBYINTEGER, MOBYFLOAT, MOBYBOOLEAN, MOBYDATETIME );
foreach (@pcdataNamesArray) {
$pcdataNames{$_} = 1;
}
foreach (@pcdataNamesArrayForPrimitives) {
$pcdataNamesForPrimitives{$_} = 1;
}
# special logger just for the parser because debugging level from
# the parser clutters other logging too much
use Log::Log4perl qw(get_logger :levels :no_extra_logdie_message);
$PLOG = get_logger ('parser');
}
############################################################
# GLOBAL VARIABLES
############################################################
my %generated; # keep track of whether a type was generated or not
my @objectStack; # type MobyObject
my @pcdataStack; # strings
my $readingMobyObject = 0; # true if inside Simple
my $readingCollection = 0; # true if inside collection
my $readingXrefs = 0; # true if inside Crossreference
my $readingProvision = 0; # true if inside provision information
my $insubstitution = 0; # when just using 'lowestKnownDataTypes' it contains a name of substituted element
my $inServiceNotes = 0; # true if inside serviceNotes
my $inMobyException = 0;
my $ignoring = 0; # count depth of ignored (unknown) data objects
my $result; # type MobyPackage - the whole result
my @articleNames;
my $locator;
######################END OF GLOBALS########################
#-----------------------------------------------------------------
# parse
# args: method => 'string', data => direct XML
# OR
# method => 'file', data => filename
#-----------------------------------------------------------------
sub parse {
my ($self, %args) = @_;
$self->throw ("parse() needs arguments 'method' and 'data'.")
unless $args{method} and $args{data};
# I could not assign this default value in init(), because it was
# before 'cachedir' etc. were set
my @generator_args = ();
push (@generator_args, (cachedir => $self->cachedir)) if $self->cachedir;
push (@generator_args, (registry => $self->registry)) if $self->registry;
$self->generator ( new MOSES::MOBY::Generators::GenTypes (@generator_args) )
unless $self->generator;
$self->select_parser;
my $parser = XML::SAX::ParserFactory->parser(Handler => $self);
if ($args{method} eq 'string') {
$parser->parse_string ($args{data});
} elsif ($args{method} eq 'file') {
$parser->parse_file ($args{data});
} else {
$self->throw ("in parse(): 'method' is neither 'string' nor 'file'.");
}
return $result if $result;
$PLOG->error("There was a problem parsing\n$args{data}.\nIf this is a file, please make sure that the file exists, otherwise please ensure that the XML is 'valid'.");
$self->throw ("There was a problem parsing\n$args{data}.\nIf this is a file, please make sure that the file exists, otherwise please ensure that the XML is 'valid'.");
}
#-----------------------------------------------------------------
# select_parser
#
# If there is a configuration option defining what XML parser to use,
# this method selects it. Otherwise, it leaves it to the parser
# factory to find it out.
#
#-----------------------------------------------------------------
sub select_parser {
my $self = shift;
if (defined $MOBYCFG::XML_PARSER) {
$XML::SAX::ParserPackage = $MOBYCFG::XML_PARSER;
}
}
#*********************************************************************
#
# XML-SAX 2.0 handler routines.
#
#********************************************************************
sub start_element {
my ( $self, $element ) = @_;
# element is a hash reference with these properties:
# Name The element type name (including prefix).
# Attributes The attributes attached to the element, if any.
# NamespaceURI The namespace of this element.
# Prefix The namespace prefix used on this element.
# LocalName The local name of this element.
#
# Attributes is a reference to hash keyed by JClark namespace notation.
# That is, the keys are of the form "{NamespaceURI}LocalName". If the attribute
# has no NamespaceURI, then it is simply "{}LocalName". Each attribute is a hash reference with these properties:
# Name The attribute name (including prefix).
# Value The normalized value of the attribute.
# NamespaceURI The namespace of this attribute.
# Prefix The namespace prefix used on this attribute.
# LocalName The local name of this attribute.
$PLOG->debug ("Starting element $element->{Name} with local name $element->{LocalName} \n");
if ( $ignoring > 0 ) {
$ignoring++;
return;
}
if ( exists $pcdataNames{ $element->{LocalName} } ) {
my $st = "";
push @pcdataStack, \$st;
}
if ($readingMobyObject) {
if ( exists $pcdataNamesForPrimitives{ $element->{LocalName} } ) {
my $st = "";
push @pcdataStack, \$st;
}
}
if ($readingMobyObject) {
if ( $element->{LocalName} eq PROVISIONINFORMATION ) {
push @objectStack, \MOSES::MOBY::Data::ProvisionInformation->new();
$readingProvision = 1;
}
elsif ( $element->{LocalName} eq CROSSREFERENCE ) {
$readingXrefs = 1;
}
elsif ($readingXrefs) {
if ( $element->{LocalName} eq XREF ) {
my $xref = MOSES::MOBY::Data::Xref->new();
$xref->id( $self->getValue( attributes=>$element->{Attributes}, name=>OBJ_ID ) );
$xref->namespace(
$self->getValue( attributes=>$element->{Attributes}, name=>OBJ_NAMESPACE ) );
eval {
$xref->evidenceCode($self->getValue( attributes=>$element->{Attributes}, name=>EVIDENCECODE )
);
};
if ($@) {
$self->error ($@);
}
my $serviceName =
$self->getValue( attributes=>$element->{Attributes}, name=>SERVICENAME );
my $serviceAuthority =
$self->getValue( attributes=>$element->{Attributes}, name=>AUTHURI );
if ($serviceName and $serviceAuthority) {
$xref->service ($serviceName);
$xref->authority ($serviceAuthority);
}
push @objectStack, \$xref;
}
elsif ( $element->{LocalName} eq MOBYOBJECT ) {
my $xref = MOSES::MOBY::Data::Xref->new();
$xref->id( $self->getValue( attributes=>$element->{Attributes}, name=>OBJ_ID ) );
$xref->namespace(
$self->getValue( attributes=>$element->{Attributes}, name=>OBJ_NAMESPACE ) );
#$xref->isSimpleXref(1);
push @objectStack, \$xref;
}
}
elsif ($readingProvision) {
if ( $element->{LocalName} eq SERVICESOFTWARE ) {
my $info = ${ $self->vPeek('MOSES::MOBY::Data::ProvisionInformation') };
$info->softwareName(
$self->getValue( attributes=>$element->{Attributes}, name=>SOFTWARENAME ) );
my $version =
$self->getValue( attributes=>$element->{Attributes}, name=>SOFTWAREVERSION );
$version =
$self->getValue( attributes=>$element->{Attributes}, name=>PLAINVERSION )
unless $version;
$info->softwareVersion($version);
my $comment =
$self->getValue( attributes=>$element->{Attributes}, name=>SOFTWARECOMMENT );
$comment = $self->getValue( attributes=>$element->{Attributes}, name=>COMMENT )
unless $comment;
$info->softwareComment($comment);
lib/MOSES/MOBY/Parser.pm view on Meta::CPAN
$simple->name( $self->getValue( attributes=>$element->{Attributes}, name=>ARTICLENAME ) ) unless $readingCollection;
push @objectStack, \$simple;
$readingMobyObject = 1;
}
elsif ( $element->{LocalName} eq COLLECTION ) {
my $collection = MOSES::MOBY::Collection->new;
$PLOG->error("A 'Collection' element cannot have an empty articlename.") if $self->getValue( attributes=>$element->{Attributes}, name=>ARTICLENAME ) eq "" ;
$self->throw("A 'Collection' element cannot have an empty articlename.") if $self->getValue( attributes=>$element->{Attributes}, name=>ARTICLENAME ) eq "" ;
$collection->name(
$self->getValue( attributes=>$element->{Attributes}, name=>ARTICLENAME ) );
push @objectStack, \$collection;
$readingCollection = 1;
}
elsif ( $element->{LocalName} eq PARAMETER ) {
my $parameter = MOSES::MOBY::Parameter->new;
$parameter->name(
$self->getValue( attributes=>$element->{Attributes}, name=>ARTICLENAME ) );
push @objectStack, \$parameter;
}
$PLOG->debug ($self->printInfo);
}
sub end_element {
my ( $self, $element ) = @_;
$PLOG->debug ("Ending element $element->{Name}\n");
my ( $obj, $obj2 );
if ( $ignoring > 0 ) {
$ignoring--;
return;
}
if ($readingMobyObject) {
if ( $element->{LocalName} eq SIMPLE ) {
$obj = pop @objectStack;
$obj = ${$obj};
$obj2 = ${ $self->peek() };
if ( $obj2->isa('MOSES::MOBY::Job') ) {
$obj2->add_dataElements($obj);
}
elsif ( $obj2->isa('MOSES::MOBY::Collection') ) {
$obj2->add_data($obj);
}
else {
$self->error ("A simple element should not be in '"
. ref($obj2) . "'."
);
}
$readingMobyObject = 0;
}
elsif ( $element->{LocalName} eq PROVISIONINFORMATION ) {
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Data::Object") }->provision($obj);
$readingProvision = 0;
}
elsif ( $element->{LocalName} eq CROSSREFERENCE ) {
$readingXrefs = 0;
}
elsif ($readingXrefs) {
if ( $element->{LocalName} eq XREF ) {
$obj = pop @pcdataStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Data::Xref") }
->description($obj);
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Data::Object") }->add_xrefs($obj);
}
elsif ( $element->{LocalName} eq MOBYOBJECT ) {
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Data::Object") }->add_xrefs($obj);
}
}
elsif ($readingProvision) {
if ( $element->{LocalName} eq SERVICECOMMENT ) {
$obj = pop @pcdataStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Data::ProvisionInformation") }
->serviceComment($obj);
}
}
else {
if ($insubstitution or $generated{$element->{LocalName}}) {
my $mobyObj = pop @objectStack;
$mobyObj = ${$mobyObj};
if ( $insubstitution
and $element->{LocalName} eq $insubstitution ) {
$insubstitution = 0;
}
if ( exists $pcdataNamesForPrimitives{ $element->{LocalName} } )
{
my $value = pop @pcdataStack;
$value = ${$value};
$mobyObj->value($value);
}
$obj2 = ${ $self->peek() };
if ( $obj2->isa("MOSES::MOBY::Simple") ) {
$obj2->data($mobyObj);
}
else {
# save original article name in data object itself
# (will be needed for creating back an XML of this object)
my $methodName = pop @articleNames;
$mobyObj->original_memberName ($methodName);
#is the article name for children empty? throw error
$PLOG->error("Invalid article name given for children of '".$obj2->mobyname."'. Please make sure that these fields are not empty.") if $methodName eq "";
$self->throw("Invalid article name given for children of '".$obj2->mobyname."'. Please make sure that these fields are not empty.") if $methodName eq "";
$methodName = $self->escape_name ($methodName);
$self->callMethod(
actor => $obj2,
method => $methodName,
parameter => $mobyObj,
);
}
}
}
}
elsif ( $element->{LocalName} eq MOBY ) {
if ( scalar @objectStack == 0 ) {
$self->error( "Nothing came out from the parsed XML data.");
}
$obj = pop @objectStack;
$obj = ${$obj};
if ( not $obj->isa("MOSES::MOBY::Package") ) {
$self->error("The input XML does not start with a MOBY tag");
}
$result = $obj;
}
elsif ( $element->{LocalName} eq MOBYCONTENT ) {
# do nothing
}
elsif ( $element->{LocalName} eq SERVICENOTES ) {
$inServiceNotes = 0;
}
elsif ( $element->{LocalName} eq MOBYEXCEPTION ) {
if ($inServiceNotes) {
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Package") }->add_exceptions($obj);
$inMobyException = 0;
}
}
elsif ( $element->{LocalName} eq MOBYDATA ) {
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Package") }->add_jobs($obj);
}
elsif ( $element->{LocalName} eq COLLECTION ) {
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Job") }->add_dataElements($obj);
$readingCollection = 0;
}
elsif ( $element->{LocalName} eq PARAMETER ) {
$obj = pop @objectStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Job") }->add_dataElements($obj);
}
elsif ( $element->{LocalName} eq NOTES ) {
$obj = pop @pcdataStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Package") }->serviceNotes($obj);
}
elsif ( $element->{LocalName} eq EXCEPTIONCODE ) {
$obj = pop @pcdataStack;
$obj = ${$obj};
if ($inMobyException) {
${ $self->vPeek("MOSES::MOBY::ServiceException") }->code($obj);
}
}
elsif ( $element->{LocalName} eq EXCEPTIONMESSAGE ) {
$obj = pop @pcdataStack;
$obj = ${$obj};
if ($inMobyException ){
${ $self->vPeek("MOSES::MOBY::ServiceException") }->message($obj);
}
}
elsif ( $element->{LocalName} eq VALUE ) {
$obj = pop @pcdataStack;
$obj = ${$obj};
${ $self->vPeek("MOSES::MOBY::Parameter") }->value($obj);
}
$PLOG->debug ($self->printInfo) if $PLOG->is_debug;
}
sub characters {
my ( $self, $characters ) = @_;
$PLOG->debug ("characters: $characters->{Data}\n") if $PLOG->is_debug;
# characters is a hash reference with this property:
# Data The characters from the XML document.
if ( @pcdataStack . "" == 0 ) {
return;
}
my $text = $characters->{Data};
$text =~ s/^\s+//;
$text =~ s/\s+$//;
${ $self->pcdataPeek() } = ${ $self->pcdataPeek() } . $text;
}
sub start_document {
my ( $self, $document ) = @_;
@objectStack = ();
@articleNames = ();
@pcdataStack = ();
$ignoring = 0;
}
sub end_document {
my ( $self, $document ) = @_;
@objectStack = undef;
@pcdataStack = undef;
$ignoring = 0;
}
sub ignorable_whitespace {
my ( $self, $characters ) = @_;
}
# returns type MobyObject or dies if shouldBeThere->isa(ref(objectStack.top))
sub vPeek {
my ( $self, $shouldBeThere ) = @_;
# my version of peek
my $obj = pop @objectStack;
push @objectStack, $obj;
if ( $$obj->isa($shouldBeThere) ) {
return $obj;
}
$PLOG->error("Wrong XML: Expected '$shouldBeThere' - but found " .ref $$obj);
$self->error("Wrong XML: Expected '$shouldBeThere' - but found " .ref $$obj);
}
# returns type MobyObject
sub peek {
my ($self) = @_;
# my version of peek
my $obj = pop @objectStack;
push @objectStack, $obj;
return $obj;
}
# returns string references
sub pcdataPeek {
my ($self) = @_;
# my version of peek
my $obj = pop @pcdataStack;
push @pcdataStack, $obj;
return $obj;
}
# returns the value of an attribute either using a namespace or not
# attributes=>, name=>
sub getValue {
my ( $self, %hash) = @_;
my (%attributes, $name);
%attributes = %{$hash{attributes}} if exists $hash{attributes};
$name = $hash{name} if exists $hash{name};
# die ("You need to provide attributes and an element name $name.") unless ($name and %attributes);
my $attr =
$attributes{ "{" . MOBY_XML_NS . "}$name" };
my $string = $attr->{Value} || "";
if ( $string eq "" ) {
$attr = $attributes{"{}$name"};
$string = $attr->{Value} || "";
}
return $string;
}
# call a method, methodName, on object actor using parameter
# actor=>, method=> parameter=>
sub callMethod {
my ( $self, %hash ) = @_;
my ( $actor, $methodName, $parameter );
$actor = $hash{actor} ||'';
$methodName = $hash{method} ||'';
$parameter = $hash{parameter};
eval {
if (ref($actor->$methodName) eq 'ARRAY' ) {
my $method = "add_$methodName";
$actor->$method($parameter);
} else {
$actor->$methodName($parameter);
}
};
if ($@ and not $insubstitution) {
$PLOG->error("Method '$methodName' was not found in the object ". $actor->mobyname);
$self->error( "Method '$methodName' was not found in the object " . $actor->mobyname);
}
}
#*********************************************************************
#
# XML-SAX 2.0 event to set document locator
# unfortunately, this event is passed along as it should be
# and as such, reporting exact errors specific to MOBY XML is hard
#
#********************************************************************
sub set_document_locator {
my ($self) = shift;
$PLOG->debug ("setting the document locator");
my( $loc) = @_;
$locator = $loc;
}
#*********************************************************************
#
# XML-SAX 2.0 error events
#
#********************************************************************
sub fatal_error {
my ($self) = shift;
my $msg = $self->_format_msg (@_);
$self->throw ("Parsing XML fatally failed: $msg");
}
sub error {
my ($self) = shift;
my $msg = $self->_format_msg (@_);
$self->throw ("Parsing XML failed: $msg");
}
sub warning {
my ($self) = shift;
my $msg = $self->_format_msg (@_);
$LOG->warning ($msg);
}
sub _format_msg {
my ($self, $message) = @_;
return $message unless ref ($message) eq 'XML::SAX::Exception::Parse';
my $pubId = $message->{PublicId} || '';
my $sysId = $message->{SystemId} || '';
my $linNo = $message->{LineNumber} || '?';
my $colNo = $message->{ColumnNumber} || '?';
my $msg = $message->{Message} || '';
return "$msg [line $linNo, column $colNo] $sysId $pubId";
}
sub printInfo {
my $self = shift;
return unless $PLOG->is_debug;
use Data::Dumper;
my $buf =
"##########################################################\n"
. "# INFO #\n"
. "##########################################################\n"
. "Object stack currently holds:\n\tBOTTOM: ";
foreach (@objectStack) {
$buf .= Dumper($$_) . "\n";
}
$buf .= "TOP\n";
$buf .= "pcdataStack currently holds:\n\tBOTTOM: ";
foreach (@pcdataStack) {
$buf .= Dumper($$_) . "\n";
}
$buf .= "TOP\n";
$buf .=
"readingMobyObject: $readingMobyObject\n"
. "readingCollection: $readingCollection\n"
. "readingXrefs : $readingXrefs\n"
. "readingProvision : $readingProvision\n"
. "insubstition : $insubstitution\n"
. "inServiceNotes : $inServiceNotes\n"
. "ignoring : $ignoring\n";
$buf .=
"##########################################################\n"
. "# END INFO #\n"
. "##########################################################\n";
return $buf;
}
1;
__END__
=head1 NAME
MOSES::MOBY::Parser - parser of XML BioMoby messages
=head1 SYNOPSIS
use MOSES::MOBY::Parser;
# create a parser
my $parser = new MOSES::MOBY::Parser ();
# parse a file $package is a MOSES::MOBY::Pacakge reference
my $package = $parser->parse ( method => 'file', data => "/home/moby/input.xml" );
# parse a string of xml
$package = $parser->parse ( method => 'string', data => $inputXML );
=head1 DESCRIPTION
The MOSES::MOBY::Parser is a SAX based parser used to parse BioMOBY service XML messages.
The Moby::Parser is able to read Biomoby service/client XML data,
parse the XML and create from them an instance of B<Moby::Package>.
The parser can be invoked by using the subroutine B<parse>.
The parser depends on generated Perl modules that define all
of the Biomoby data types. There is a generator B<MOSES::MOBY::Generators::GenTypes>
that produces such modules into a package MOSES::MOBY::Data.
There is one situation when the parser tries to substitute an
unknown data type by a known one. If the parsing encoutered an XML
tag depicting a B<top-level> data object (not a member object
of some other data object) and if there is no class available for
such object, the parser can be instructed to create a substituted
object whose name was given in the parser constructor. This is to
prevent situation when a long-time running and deployed service
suddenly gets a request from a client that uses more up-to-date
list of data types. It would be bad to let such service die (or
minimally respond unproperly) just because its modules were
generated too long ago.
( run in 0.972 second using v1.01-cache-2.11-cpan-d8267643d1d )