Bio-Homology-InterologWalk

 view release on metacpan or  search on metacpan

lib/Bio/Homology/InterologWalk.pm  view on Meta::CPAN

     my $wants_chimeras  = $args{chimeric};
     
     my $ID_OUT; #the object of our search
     
     if(!$registry){
          print("get_direct_interactions(): no registry defined. Aborting..\n");
          return;
     }
     if(!$sourceorg){
          print("get_direct_interactions(): no source organism specified. Aborting..\n");
          return;
     }
     if(!$url){
          print("get_direct_interactions(): no PSICQUIC url specified. Aborting..\n");
          return;
     }
     #MANAGE FILES
     open (my $in_data,  q{<}, $in_path) or croak("Unable to open $in_path : $!");
     open (my $out_data,  q{>}, $out_path) or croak("Unable to open $out_path : $!");
     #============
     my $client = REST::Client->new();
     my $gene_adaptor = $registry->get_adaptor($sourceorg, 'core', 'Gene'); 
     
     my $atleast_one_entry;
     
     my $DF_interaction_id;
     my $DF_acc_numb_a;  
     my $DF_acc_numb_b;
     my $DF_alt_id_a;
     my $DF_alt_id_b;
     my $DF_name_a; 
     my $DF_name_b;
     my $DF_taxon_a;
     my $DF_taxon_b;
     my $DF_props_a;
     my $DF_props_b;
     my $DF_pub;
     my $DF_int_type;    
     my $DF_det_method;
     my $DF_exp_method;
     
     #interactor search string
     my $int_search_string = "search/interactor/";
     #GLOBAL search string
     my $glob_search_string = "search/query/";
     #Header
     print $out_data $HEADER_DIRECT, "\n";
     
     my $options = _build_query_options(
                       no_spoke   => $no_spokes, 
                       exp_only   => $exp_only,
                       phys_only  => $physical_only
                       );

     my $missed = 0;
     while (<$in_data>){
          my ($ID) = $_;
          chomp $ID;
          next if ($ID eq '');
          
#          my $idsignature;
          #get a "signature" to spot the kind of id we are dealing with.
          #current solution involves getting all the letters starting from the beginning, if there's at least two.
          #otherwise get the initial three characters whatever they are, and then do a fuzzy regex matching using string::approx
          #this will be needed in order to be sure to get the same kind of id back.
          #eg "IPR006259" ----> "IPR"
#          if($ID =~ /^([a-z]{2,})(.+)/i){
#               $idsignature = $1;
#          }else{
#               $idsignature = substr($ID, 0, 1) . substr($ID, 1, 1) . substr($ID, 1, 1);
#          }
          
          print "$ID: Querying IntAct WS for $ID..";
          my $request = $url . $int_search_string.  $ID . $options;
          
          $client->GET($request);
          print "(", $client->responseCode(), ")";
          
          my $responseContent = $client->responseContent();
          if(!$responseContent){
               #Let's try
               #a global search, to search for the id in non-standard data fields:
               $request = $url . $glob_search_string.  $ID . $options;
               $client->GET($request);
               $responseContent = $client->responseContent();
               if(!$responseContent){
                    print("..nothing..\n");
                    next;    
               }
          }
          $atleast_one_entry = 1;
          my @responsetoparse = split(/\n/,$responseContent);
          my $interactionsRetrieved = scalar @responsetoparse;
          print "..Interactions found: ", $interactionsRetrieved, "\n";
          
          foreach my $intactInteraction (@responsetoparse){
               my $same_taxon; my $ID_check;
               my @MITABDataRow = split("\t",$intactInteraction);
               
               #the following relies heavily on Intact's mitab implementation
               $DF_interaction_id = _get_intact_id($MITABDataRow[13]);
               next if(!$DF_interaction_id);
               
               $DF_taxon_a    = _get_interactor_taxon($MITABDataRow[9]);
               $DF_taxon_b    = _get_interactor_taxon($MITABDataRow[10]);
               if($DF_taxon_a eq $DF_taxon_b){
                    $same_taxon = 1;
                    $ID_check = $check_ids;
               }else{ #it's a chimeric interaction. In this case I continue only if specified by the user. If I continue,
                      #the check id flag must be masked for the current interaction (if flagged) 
                    next unless($wants_chimeras);
               }
               $DF_acc_numb_a = _get_interactor_uniprot_id($MITABDataRow[0]);
               $DF_acc_numb_b = _get_interactor_uniprot_id($MITABDataRow[1]);
               $DF_alt_id_a   = _get_interactor_alias_prop_list($MITABDataRow[2]);
               $DF_alt_id_b   = _get_interactor_alias_prop_list($MITABDataRow[3]);
               $DF_name_a     = _get_interactor_name($MITABDataRow[4]);
               $DF_name_b     = _get_interactor_name($MITABDataRow[5]);
               $DF_props_a    = _get_interactor_alias_prop_list($MITABDataRow[19]);
               $DF_props_b    = _get_interactor_alias_prop_list($MITABDataRow[20]);
               $DF_pub        = $MITABDataRow[8];
               $DF_int_type   = $MITABDataRow[11];
               $DF_det_method = $MITABDataRow[6];
               $DF_exp_method = $MITABDataRow[24];

               $ID_OUT = _get_ensembl_id_from_mitab_data(
                                                    init_id          => $ID,
                                                    acc_numb_a       => $DF_acc_numb_a,
                                                    acc_numb_b       => $DF_acc_numb_b,
                                                    alt_id_a         => $DF_alt_id_a,
                                                    alt_id_b         => $DF_alt_id_b,
                                                    name_a           => $DF_name_a,  
                                                    name_b           => $DF_name_b,
                                                    props_a          => $DF_props_a,
                                                    props_b          => $DF_props_b,
                                                    adaptor          => $gene_adaptor,
                                                    id_check         => $ID_check                                                   
                                                    );
               #fuzzy string matching: this ID_OUT should be of the same kind as the original id. Does it feature the same initial
               #id signature or something very close?
               #if(!_fuzzy_match($idsignature, $ID_OUT)){
               #     $ID_OUT = _get_ensembl_id_from_uniprotkb_id($gene_adaptor, $target_AccNumb, $target_Name, $target_Aliases, $target_PropsRow);
               #}
               #TODO REVIEW THIS                                    

               if($ID_OUT){
                    print("Interaction ($DF_interaction_id): $ID <--> $ID_OUT\n");
                    my $fullDataRow = join("\t",$ID,$DF_interaction_id,
                                        $DF_acc_numb_a, $DF_acc_numb_b,
                                        $DF_alt_id_a, $DF_alt_id_b,
                                        $DF_name_a, $DF_name_b,
                                        $DF_taxon_a, $DF_taxon_b,
                                        $DF_pub, $DF_int_type,$DF_det_method,
                                        $DF_exp_method,$ID_OUT);
                    print $out_data $fullDataRow, "\n";
                    next;
               }
               
               #If we're here, we weren't able to identify the target interactor id. We'll have to translate both
               print("Target ID not found through Intact Data. Translating both...\n");

               my $interactoridA = _get_ensembl_id_from_uniprotkb_id(
                                                            adaptor       => $gene_adaptor, 
                                                            ebi_id        => $DF_interaction_id,    
                                                            acc_numb      => $DF_acc_numb_a, 
                                                            protein_name  => $DF_name_a, 
                                                            aliases       => $DF_alt_id_a, 
                                                            props         => $DF_props_a
                                                            );
               my $interactoridB = _get_ensembl_id_from_uniprotkb_id(
                                                            adaptor       => $gene_adaptor, 
                                                            ebi_id        => $DF_interaction_id,    
                                                            acc_numb      => $DF_acc_numb_b, 
                                                            protein_name  => $DF_name_b, 
                                                            aliases       => $DF_alt_id_b, 
                                                            props         => $DF_props_b
                                                            );
               
                #TODO
#               print("Converting all IDs in Uniprot KB IDs..\n");       
#               my $candidate = Bio::Homology::InterologWalk::_compare_uniprotkbids($gene_adaptor, $ID, $DF_acc_numb_a, $DF_acc_numb_b);
#               if($candidate){
#                    print $candidate;  
#               }else{
#                    $missed += 1;
#                    next     
#               }
               #Let's give up and save the uniprotkb id instead of the ensembl id.
               #this is useful when the destination taxon does not exist in ensembl but
               #we still want to keep the interaction
               if(!$interactoridA){
                    $interactoridA = $DF_acc_numb_a if($DF_acc_numb_a ne '-');
               }
               if(!$interactoridB){
                    $interactoridB = $DF_acc_numb_b if($DF_acc_numb_b ne '-');
               }
               
               unless($interactoridA and $interactoridB){
                    print("At least one of the two identifiers could not be retrieved. Skipping..\n");
                    $missed += 1;
                    next;



( run in 0.542 second using v1.01-cache-2.11-cpan-71847e10f99 )