OWL-Simple
view release on metacpan or search on metacpan
lib/OWL/Simple/Parser.pm view on Meta::CPAN
This module wraps XML::Parser, which is a sequential event-driven XML parser that
can potentially handle very large XML documents. The whole XML structure
is never loaded into memory completely, only the bits of interest.
In the constructor specify the owlfile to be loaded and two optional tags -
synonym_tag or definition_tag that define custom annotations in the ontology for
synonyms and definitions respectively. Note both tags have to be fully
specified exactly as in the OWL XML to be loaded, e.g. FULL_SYN for NCI Thesaurus
or efo:alternative_term for EFO.
=head2 METHODS
=over
=item class_count()
Number of classes loaded by the parser.
=item synonyms_count()
Number of synonyms loaded by the parser.
=item version()
Version of the ontology extracted from the owl:versionInfo.
=item class
Hash collection of all the OWL::Simple::Class objects
=back
=head1 AUTHOR
Tomasz Adamusiak <tomasz@cpan.org>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2010-2011 European Bioinformatics Institute. All Rights Reserved.
This module is free software; you can redistribute it and/or modify it
under lGPLv3.
This software is provided "as is" without warranty of any kind.
=cut
package OWL::Simple::Parser;
use Moose 0.89;
use OWL::Simple::Class;
use XML::Parser 2.34;
use Data::Dumper;
use Log::Log4perl qw(:easy);
Log::Log4perl->easy_init( { level => $INFO, layout => '%-5p - %m%n' } );
our $VERSION = 1.01;
has 'owlfile' => ( is => 'rw', isa => 'Str', required => 1 );
has 'class' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
has 'class_count' => ( is => 'rw', isa => 'Int', default => 0 );
has 'synonyms_count' => ( is => 'rw', isa => 'Int', default => 0 );
has 'version' => ( is => 'rw', isa => 'Str' , default => '');
has 'synonym_tag' =>
( is => 'rw', isa => 'Str', default => 'efo:alternative_term' );
has 'definition_tag' =>
( is => 'rw', isa => 'Str', default => 'efo:definition' );
my $parser;
my $path = '';
my $class = OWL::Simple::Class->new();
my %restriction;
# Default constructor. Initializes the XML::Parser and sets appropriate handlers.
sub BUILD() {
my $self = shift;
$parser = new XML::Parser;
$parser->setHandlers(
Start => sub { $self->startElement(@_) },
End => sub { $self->endElement(@_) },
Char => sub { $self->characterData(@_) },
);
}
# Increments internal counter of classes and synonyms parser respectively.
sub incr_classes() {
my $self = shift;
$self->class_count( $self->class_count + 1 );
}
sub incr_synonyms() {
my $self = shift;
$self->synonyms_count( $self->synonyms_count + 1 );
}
# Main function. Parser the owlfile using XML::Parser
sub parse() {
my $self = shift;
$parser->parsefile( $self->owlfile );
INFO "LOADED "
. $self->class_count
. ' CLASSES AND '
. $self->synonyms_count
. ' SYNONYMS from '
. $self->owlfile;
1;
}
# Handler executed by XML::Parser. Adds current element to $path.
# $path is used characterData() to determine whtether node text should be
# added to class.
#
# Initializes a new OWLClass object and stores it in $class. This is later
# populated by other handlers.
( run in 0.383 second using v1.01-cache-2.11-cpan-39bf76dae61 )