OWL-Simple

 view release on metacpan or  search on metacpan

lib/OWL/Simple/Parser.pm  view on Meta::CPAN


This module wraps XML::Parser, which is a sequential event-driven XML parser that
can  potentially handle very large XML documents. The whole XML structure
is never loaded into memory completely, only the bits of interest.

In the constructor specify the owlfile to be loaded and two optional tags -
synonym_tag or definition_tag that define custom annotations in the ontology for 
synonyms and definitions respectively. Note both tags have to be fully 
specified exactly as in the OWL XML to be loaded, e.g. FULL_SYN for NCI Thesaurus 
or efo:alternative_term for EFO. 

=head2 METHODS

=over

=item class_count()

Number of classes loaded by the parser.

=item synonyms_count()

Number of synonyms loaded by the parser.

=item version()

Version of the ontology extracted from the owl:versionInfo.

=item class

Hash collection of all the OWL::Simple::Class objects

=back

=head1 AUTHOR

Tomasz Adamusiak <tomasz@cpan.org>

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2010-2011 European Bioinformatics Institute. All Rights Reserved.

This module is free software; you can redistribute it and/or modify it 
under lGPLv3.

This software is provided "as is" without warranty of any kind.

=cut

package OWL::Simple::Parser;

use Moose 0.89;
use OWL::Simple::Class;
use XML::Parser 2.34;
use Data::Dumper;
use Log::Log4perl qw(:easy);
Log::Log4perl->easy_init( { level => $INFO, layout => '%-5p - %m%n' } );

our $VERSION = 1.01;

has 'owlfile'     => ( is => 'rw', isa => 'Str',     required => 1 );
has 'class'       => ( is => 'ro', isa => 'HashRef', default  => sub { {} } );
has 'class_count' => ( is => 'rw', isa => 'Int',     default  => 0 );
has 'synonyms_count' => ( is => 'rw', isa => 'Int', default => 0 );
has 'version' => ( is => 'rw', isa => 'Str' , default => '');
has 'synonym_tag' =>
  ( is => 'rw', isa => 'Str', default => 'efo:alternative_term' );
has 'definition_tag' =>
  ( is => 'rw', isa => 'Str', default => 'efo:definition' );
  

my $parser;
my $path = '';
my $class = OWL::Simple::Class->new();
my %restriction;

# Default constructor. Initializes the XML::Parser and sets appropriate handlers.

sub BUILD() {
	my $self = shift;
	$parser = new XML::Parser;
	$parser->setHandlers(
		Start => sub { $self->startElement(@_) },
		End   => sub { $self->endElement(@_) },
		Char  => sub { $self->characterData(@_) },
	);
}

# Increments internal counter of classes and synonyms parser respectively.

sub incr_classes() {
	my $self = shift;
	$self->class_count( $self->class_count + 1 );
}

sub incr_synonyms() {
	my $self = shift;
	$self->synonyms_count( $self->synonyms_count + 1 );
}

# Main function. Parser the owlfile using XML::Parser

sub parse() {
	my $self = shift;
	$parser->parsefile( $self->owlfile );
	INFO "LOADED "
	  . $self->class_count
	  . ' CLASSES AND '
	  . $self->synonyms_count
	  . ' SYNONYMS from '
	  . $self->owlfile;

	1;
}

# Handler executed by XML::Parser. Adds current element to $path.
# $path is used characterData() to determine whtether node text should be
# added to class.
#
# Initializes a new OWLClass object and stores it in $class. This is later
# populated by other handlers.



( run in 0.383 second using v1.01-cache-2.11-cpan-39bf76dae61 )