EBI-FGPT-FuzzyRecogniser
view release on metacpan or search on metacpan
lib/EBI/FGPT/FuzzyRecogniser.pm view on Meta::CPAN
sub parseMeSH($) {
my ( $self, $file ) = @_;
my $term;
INFO "Parsing MeSH file $file ...";
my $parser = MeSH::Parser::ASCII->new( meshfile => $file );
# parse the file
$parser->parse();
# loop through all the headings
while ( my ( $id, $heading ) = each %{ $parser->heading } ) {
my $accession = $id;
my $label = $heading->{label};
my @synonyms = @{ $heading->{synonyms} };
my $term = createOntologyTerm( $accession, $label, @synonyms );
# Add term to ontology_terms array
push @{ $self->ontology_terms }, $term;
}
}
=item parseMeSH()
Custom OMIM parser.
=cut
sub parseOMIM($) {
my ( $self, $file ) = @_;
INFO "Parsing OMIM file $file ...";
my $synonym_count;
# FIXME: The external parser is suboptimal in many ways
# if this becomes more often used consider creating
# a custom one from sratch
my $parser = Bio::Phenotype::OMIM::OMIMparser->new( -omimtext => $file );
# loop through all the records
while ( my $omim_entry = $parser->next_phenotype() ) {
# *FIELD* NO
my $id = $omim_entry->MIM_number();
$id = 'OMIM:' . $id;
# *FIELD* TI - first line
my $title = $omim_entry->title();
$title =~ s/^.\d+ //; # remove id from title
$title =~ s/INCLUDED//g; # remove INCLUDED as it screws up scoring
# *FIELD* TI - additional lines
my $alt = $omim_entry->alternative_titles_and_symbols();
# OMIM uses this weird delimiter ;;
# to signal sections irrespective of actual line endings
# this is a major headache to resolve, the parser doesn't
# do this and we're not going to bother with it either
$alt =~ s/;;//g;
$alt =~ s/INCLUDED//g; # remove INCLUDED as it screws up scoring
my @synonyms = split m!\n!, $alt;
# if alt doesn't start with ;; it's an overspill from the
# title (go figure!)
if ( $alt ne ''
&& $omim_entry->alternative_titles_and_symbols() !~ /^;;/ )
{
$title .= shift @synonyms;
}
# Instantiate new ontology term
my $term = createOntologyTerm( $id, $title, @synonyms );
# Add term to ontology_terms array
push @{ $self->ontology_terms }, $term;
$synonym_count += scalar @synonyms;
}
}
=item parseOBO()
Custom OBO parser.
=cut
sub parseOBO($) {
my ( $self, $file ) = @_;
INFO "Parsing obo file $file ...";
my $parser = new GO::Parser( { handler => 'obj' } );
$parser->parse($file);
my $graph = $parser->handler->graph();
# load terms into hash
my $class_count;
my $synonym_count;
for my $OBOclass ( @{ $graph->get_all_terms() } ) {
if ( $OBOclass->is_obsolete ) {
INFO $OBOclass->public_acc() . ' obsoleted';
next;
}
$class_count++;
$synonym_count += scalar( @{ $OBOclass->synonym_list() } );
# Instantiate new ontology term
my $accession = $OBOclass->public_acc();
my $label = $OBOclass->name();
my @synonyms = @{ $OBOclass->synonym_list() };
my $term = createOntologyTerm( $accession, $label, @synonyms );
# Add term to array
push @{ $self->ontology_terms }, $term;
}
INFO "Loaded " . $class_count . " classes and " . $synonym_count . " synonyms";
}
( run in 0.580 second using v1.01-cache-2.11-cpan-71847e10f99 )