EBI-FGPT-FuzzyRecogniser

 view release on metacpan or  search on metacpan

lib/EBI/FGPT/FuzzyRecogniser.pm  view on Meta::CPAN

sub parseMeSH($) {
	my ( $self, $file ) = @_;
	my $term;
	INFO "Parsing MeSH file $file ...";

	my $parser = MeSH::Parser::ASCII->new( meshfile => $file );

	# parse the file
	$parser->parse();

	# loop through all the headings
	while ( my ( $id, $heading ) = each %{ $parser->heading } ) {
		my $accession = $id;
		my $label     = $heading->{label};
		my @synonyms  = @{ $heading->{synonyms} };
		my $term      = createOntologyTerm( $accession, $label, @synonyms );

		# Add term to ontology_terms array
		push @{ $self->ontology_terms }, $term;
	}
}

=item parseMeSH()

Custom OMIM parser.

=cut

sub parseOMIM($) {
	my ( $self, $file ) = @_;
	INFO "Parsing OMIM file $file ...";

	my $synonym_count;

	# FIXME: The external parser is suboptimal in many ways
	# if this becomes more often used consider creating
	# a custom one from sratch
	my $parser = Bio::Phenotype::OMIM::OMIMparser->new( -omimtext => $file );

	# loop through all the records
	while ( my $omim_entry = $parser->next_phenotype() ) {

		# *FIELD* NO
		my $id = $omim_entry->MIM_number();
		$id = 'OMIM:' . $id;

		# *FIELD* TI - first line
		my $title = $omim_entry->title();
		$title =~ s/^.\d+ //;       # remove id from title
		$title =~ s/INCLUDED//g;    # remove INCLUDED as it screws up scoring

		# *FIELD* TI - additional lines
		my $alt = $omim_entry->alternative_titles_and_symbols();

		# OMIM uses this weird delimiter ;;
		# to signal sections irrespective of actual line endings
		# this is a major headache to resolve, the parser doesn't
		# do this and we're not going to bother with it either
		$alt =~ s/;;//g;
		$alt =~ s/INCLUDED//g;      # remove INCLUDED as it screws up scoring
		my @synonyms = split m!\n!, $alt;

		# if alt doesn't start with ;; it's an overspill from the
		# title (go figure!)
		if (    $alt ne ''
			 && $omim_entry->alternative_titles_and_symbols() !~ /^;;/ )
		{
			$title .= shift @synonyms;
		}

		# Instantiate new ontology term

		my $term = createOntologyTerm( $id, $title, @synonyms );

		# Add term to ontology_terms array
		push @{ $self->ontology_terms }, $term;

		$synonym_count += scalar @synonyms;

	}

}

=item parseOBO()

Custom OBO parser.

=cut

sub parseOBO($) {
	my ( $self, $file ) = @_;
	INFO "Parsing obo file $file ...";
	my $parser = new GO::Parser( { handler => 'obj' } );
	$parser->parse($file);
	my $graph = $parser->handler->graph();

	# load terms into hash
	my $class_count;
	my $synonym_count;

	for my $OBOclass ( @{ $graph->get_all_terms() } ) {
		if ( $OBOclass->is_obsolete ) {
			INFO $OBOclass->public_acc() . ' obsoleted';
			next;
		}
		$class_count++;
		$synonym_count += scalar( @{ $OBOclass->synonym_list() } );

		# Instantiate new ontology term
		my $accession = $OBOclass->public_acc();
		my $label     = $OBOclass->name();
		my @synonyms  = @{ $OBOclass->synonym_list() };
		my $term      = createOntologyTerm( $accession, $label, @synonyms );

		# Add term to  array
		push @{ $self->ontology_terms }, $term;
	}

	INFO "Loaded " . $class_count . " classes and " . $synonym_count . " synonyms";

}



( run in 0.580 second using v1.01-cache-2.11-cpan-71847e10f99 )