Bio-Homology-InterologWalk

 view release on metacpan or  search on metacpan

lib/Bio/Homology/InterologWalk.pm  view on Meta::CPAN

          #I will check the genome name
               if(!$genome_names{$binomial_species}){
                    print "get_forward_orthologies():"
                    . "\n Genome name: $sourceorg ($source_NCBI_taxon_ID)"  
                    . " not recognised in ensembl db: $ensembl_db. Skipping this db..\n\n";
                    next; 
               }
          }

          #----------OTHER ADAPTORS----------------------
          my $member_adaptor = $registry->get_adaptor($ensembl_db,      'compara', 'Member');
          my $homology_adaptor = $registry->get_adaptor($ensembl_db,    'compara', 'Homology');
          my $proteintree_adaptor = $registry->get_adaptor($ensembl_db, 'compara', "ProteinTree");  
          #----------------------------------------------

          if($destorg eq "All"){#all species available
               my $num_of_genomes = @{$all_genome_dbs};
               print("\n$num_of_genomes genomes found in database: $ensembl_db. \n\n");
          }else{    #one specific species selected
               my $dest_taxon = $NCBI_taxon_adaptor->fetch_node_by_name($destorg);
               my $dest_NCBI_taxon_ID = $dest_taxon->ncbi_taxid;
               
               unless($genome_taxon_ids{$dest_NCBI_taxon_ID}){
                    print "Genome name: $destorg ($dest_NCBI_taxon_ID) is not recognised in ensembl db: $ensembl_db. Skipping this db..\n";
                    next;
               }
               my $method_link_species_set_adaptor = $registry->get_adaptor($ensembl_db, "compara", "MethodLinkSpeciesSet");
               my $gdb1; my $gdb2;
               
               eval { local $SIG{'__DIE__'}; $gdb1 = $genome_db_adaptor->fetch_by_taxon_id($source_NCBI_taxon_ID); };    warn $@ if $@;
               eval { local $SIG{'__DIE__'}; $gdb2 = $genome_db_adaptor->fetch_by_taxon_id($dest_NCBI_taxon_ID);   };    warn $@ if $@;

               $orthologues_mlss = $method_link_species_set_adaptor->fetch_by_method_link_type_GenomeDBs("ENSEMBL_ORTHOLOGUES",[$gdb1,$gdb2]);
               if(!$orthologues_mlss){
                    print "fetch_by_method_link_type_GenomeDBs for $gdb1, $gdb2 returns undefined. Aborting..\n";
                    return;
               }
          }
          
          while (<$in_data>){
               my ($ID) = $_;
               chomp $ID;
               next if ($ID eq '');
               
               $counter = 0;
               my $gene;my @genes;
               
               #stable id and display label
               $gene = $source_species_gene_adaptor->fetch_by_stable_id($ID);
               if(!$gene){
                    $gene = $source_species_gene_adaptor->fetch_by_display_label($ID);
               }
               if($gene){
                    push(@genes,$gene);
               }else{
                    @genes = @{$source_species_gene_adaptor->fetch_all_by_external_name($ID)};
               }

               foreach my $gene (@genes){
                    my $all_homologies;
                    my $gid = $gene->stable_id;
                    my $member = $member_adaptor->fetch_by_source_stable_id("ENSEMBLGENE", $gid);
               
                    if (defined $member){
                         if($destorg eq "All"){ #all destination genomes
                              $all_homologies = $homology_adaptor->fetch_all_by_Member($member);
                         }else{ #one destination genome
                              $all_homologies = $homology_adaptor->fetch_all_by_Member_MethodLinkSpeciesSet($member, $orthologues_mlss);
                         }
                    }else{
                         print "$gid ..no member object defined in Ensembl, skipping..\n";
                         next;
                    }
                    next if (scalar(@$all_homologies) == 0);
                    
                    print $gid, " ", ($gene->external_name || '-'), "\t";
                    $counter = _process_homologies(homology_query_id    =>    $gid, 
                                                   homology_vector       =>   $all_homologies, 
                                                   protein_adaptor       =>   $proteintree_adaptor,
                                                   outfile               =>   $out_data,
                                                   hq_only               =>   $onetoone_only
                                                   );
                    print "..$counter orthologue(s).\n";
                    $counter_db += $counter;
               }
          }
          print "Found $counter_db orthologues in database: $ensembl_db\n";
          $global_count += $counter_db;
          seek($in_data,0,0);
    }
    close($in_data);
    close($out_data);   
    print "\n**Found $global_count orthologues in all databases**\n";
    if($global_count == 0){
         unlink($out_path);
         print("No orthologues found for $sourceorg. Exiting..\n");
         return;
    }
    return 1;
}



=head2 get_interactions

 Usage     : $RC = Bio::Homology::InterologWalk::get_interactions(
                                                  input_path     => $in_path,
                                                  output_path    => $out_path,
                                                  url            => $url,
                                                  no_spoke       => 1, 
                                                  exp_only       => 1, 
                                                  physical_only  => 1
                                                  );
 Purpose   : this methods allows  to query the Intact database using the REST interface. 
             IntAct is the Molecular Interaction database at the European Bioinformatics 
             Institute (UK). The Intact project offers programmatic access to their data 
             through the PSICQUIC specification 
             (see http://code.google.com/p/psicquic/wiki/PsicquicSpecification).
             This subroutine interrogates via Rest the Intact PPI db with a list of ensembl
             gene ids (obtained usually from get_forward_orthologies()), obtains data in 
             the PSI-MI TAB format (see http://code.google.com/p/psimi/wiki/PsimiTabFormat), 
             processes it and appends it to the input data. 
 Returns   : success/failure code
 Argument  : -input_path : path to input file. Input file for this subroutine is the 
              output of get_forward_orthologies()
             -output_path : where you want the routine to write the data. Data is in TSV 
              format.
             -url : url for the REST service to query (currently only EBI Intact PSICQUIC 
              Rest)
             -(OPTIONAL) no_spoke: if set, interactions obtained from the expansion of 
              complexes through the SPOKE method 
              (see http://nar.oxfordjournals.org/cgi/content/full/38/suppl_1/D525)
              will be ignored
             -(OPTIONAL) exp_only: if set, only interactions whose MITAB25 field "Interaction 
              Detection Method" (MI:0001 in the PSI-MI controlled vocabulary) is at 
              least "experimental interaction detection" 
              (MI:0045 in the PSI-MI controlled vocabulary) will be retained. I.e. if set, 



( run in 2.330 seconds using v1.01-cache-2.11-cpan-5735350b133 )