Bio-Homology-InterologWalk
view release on metacpan or search on metacpan
lib/Bio/Homology/InterologWalk.pm view on Meta::CPAN
my $wants_chimeras = $args{chimeric};
my $ID_OUT; #the object of our search
if(!$registry){
print("get_direct_interactions(): no registry defined. Aborting..\n");
return;
}
if(!$sourceorg){
print("get_direct_interactions(): no source organism specified. Aborting..\n");
return;
}
if(!$url){
print("get_direct_interactions(): no PSICQUIC url specified. Aborting..\n");
return;
}
#MANAGE FILES
open (my $in_data, q{<}, $in_path) or croak("Unable to open $in_path : $!");
open (my $out_data, q{>}, $out_path) or croak("Unable to open $out_path : $!");
#============
my $client = REST::Client->new();
my $gene_adaptor = $registry->get_adaptor($sourceorg, 'core', 'Gene');
my $atleast_one_entry;
my $DF_interaction_id;
my $DF_acc_numb_a;
my $DF_acc_numb_b;
my $DF_alt_id_a;
my $DF_alt_id_b;
my $DF_name_a;
my $DF_name_b;
my $DF_taxon_a;
my $DF_taxon_b;
my $DF_props_a;
my $DF_props_b;
my $DF_pub;
my $DF_int_type;
my $DF_det_method;
my $DF_exp_method;
#interactor search string
my $int_search_string = "search/interactor/";
#GLOBAL search string
my $glob_search_string = "search/query/";
#Header
print $out_data $HEADER_DIRECT, "\n";
my $options = _build_query_options(
no_spoke => $no_spokes,
exp_only => $exp_only,
phys_only => $physical_only
);
my $missed = 0;
while (<$in_data>){
my ($ID) = $_;
chomp $ID;
next if ($ID eq '');
# my $idsignature;
#get a "signature" to spot the kind of id we are dealing with.
#current solution involves getting all the letters starting from the beginning, if there's at least two.
#otherwise get the initial three characters whatever they are, and then do a fuzzy regex matching using string::approx
#this will be needed in order to be sure to get the same kind of id back.
#eg "IPR006259" ----> "IPR"
# if($ID =~ /^([a-z]{2,})(.+)/i){
# $idsignature = $1;
# }else{
# $idsignature = substr($ID, 0, 1) . substr($ID, 1, 1) . substr($ID, 1, 1);
# }
print "$ID: Querying IntAct WS for $ID..";
my $request = $url . $int_search_string. $ID . $options;
$client->GET($request);
print "(", $client->responseCode(), ")";
my $responseContent = $client->responseContent();
if(!$responseContent){
#Let's try
#a global search, to search for the id in non-standard data fields:
$request = $url . $glob_search_string. $ID . $options;
$client->GET($request);
$responseContent = $client->responseContent();
if(!$responseContent){
print("..nothing..\n");
next;
}
}
$atleast_one_entry = 1;
my @responsetoparse = split(/\n/,$responseContent);
my $interactionsRetrieved = scalar @responsetoparse;
print "..Interactions found: ", $interactionsRetrieved, "\n";
foreach my $intactInteraction (@responsetoparse){
my $same_taxon; my $ID_check;
my @MITABDataRow = split("\t",$intactInteraction);
#the following relies heavily on Intact's mitab implementation
$DF_interaction_id = _get_intact_id($MITABDataRow[13]);
next if(!$DF_interaction_id);
$DF_taxon_a = _get_interactor_taxon($MITABDataRow[9]);
$DF_taxon_b = _get_interactor_taxon($MITABDataRow[10]);
if($DF_taxon_a eq $DF_taxon_b){
$same_taxon = 1;
$ID_check = $check_ids;
}else{ #it's a chimeric interaction. In this case I continue only if specified by the user. If I continue,
#the check id flag must be masked for the current interaction (if flagged)
next unless($wants_chimeras);
}
$DF_acc_numb_a = _get_interactor_uniprot_id($MITABDataRow[0]);
$DF_acc_numb_b = _get_interactor_uniprot_id($MITABDataRow[1]);
$DF_alt_id_a = _get_interactor_alias_prop_list($MITABDataRow[2]);
$DF_alt_id_b = _get_interactor_alias_prop_list($MITABDataRow[3]);
$DF_name_a = _get_interactor_name($MITABDataRow[4]);
$DF_name_b = _get_interactor_name($MITABDataRow[5]);
$DF_props_a = _get_interactor_alias_prop_list($MITABDataRow[19]);
$DF_props_b = _get_interactor_alias_prop_list($MITABDataRow[20]);
$DF_pub = $MITABDataRow[8];
$DF_int_type = $MITABDataRow[11];
$DF_det_method = $MITABDataRow[6];
$DF_exp_method = $MITABDataRow[24];
$ID_OUT = _get_ensembl_id_from_mitab_data(
init_id => $ID,
acc_numb_a => $DF_acc_numb_a,
acc_numb_b => $DF_acc_numb_b,
alt_id_a => $DF_alt_id_a,
alt_id_b => $DF_alt_id_b,
name_a => $DF_name_a,
name_b => $DF_name_b,
props_a => $DF_props_a,
props_b => $DF_props_b,
adaptor => $gene_adaptor,
id_check => $ID_check
);
#fuzzy string matching: this ID_OUT should be of the same kind as the original id. Does it feature the same initial
#id signature or something very close?
#if(!_fuzzy_match($idsignature, $ID_OUT)){
# $ID_OUT = _get_ensembl_id_from_uniprotkb_id($gene_adaptor, $target_AccNumb, $target_Name, $target_Aliases, $target_PropsRow);
#}
#TODO REVIEW THIS
if($ID_OUT){
print("Interaction ($DF_interaction_id): $ID <--> $ID_OUT\n");
my $fullDataRow = join("\t",$ID,$DF_interaction_id,
$DF_acc_numb_a, $DF_acc_numb_b,
$DF_alt_id_a, $DF_alt_id_b,
$DF_name_a, $DF_name_b,
$DF_taxon_a, $DF_taxon_b,
$DF_pub, $DF_int_type,$DF_det_method,
$DF_exp_method,$ID_OUT);
print $out_data $fullDataRow, "\n";
next;
}
#If we're here, we weren't able to identify the target interactor id. We'll have to translate both
print("Target ID not found through Intact Data. Translating both...\n");
my $interactoridA = _get_ensembl_id_from_uniprotkb_id(
adaptor => $gene_adaptor,
ebi_id => $DF_interaction_id,
acc_numb => $DF_acc_numb_a,
protein_name => $DF_name_a,
aliases => $DF_alt_id_a,
props => $DF_props_a
);
my $interactoridB = _get_ensembl_id_from_uniprotkb_id(
adaptor => $gene_adaptor,
ebi_id => $DF_interaction_id,
acc_numb => $DF_acc_numb_b,
protein_name => $DF_name_b,
aliases => $DF_alt_id_b,
props => $DF_props_b
);
#TODO
# print("Converting all IDs in Uniprot KB IDs..\n");
# my $candidate = Bio::Homology::InterologWalk::_compare_uniprotkbids($gene_adaptor, $ID, $DF_acc_numb_a, $DF_acc_numb_b);
# if($candidate){
# print $candidate;
# }else{
# $missed += 1;
# next
# }
#Let's give up and save the uniprotkb id instead of the ensembl id.
#this is useful when the destination taxon does not exist in ensembl but
#we still want to keep the interaction
if(!$interactoridA){
$interactoridA = $DF_acc_numb_a if($DF_acc_numb_a ne '-');
}
if(!$interactoridB){
$interactoridB = $DF_acc_numb_b if($DF_acc_numb_b ne '-');
}
unless($interactoridA and $interactoridB){
print("At least one of the two identifiers could not be retrieved. Skipping..\n");
$missed += 1;
next;
( run in 0.542 second using v1.01-cache-2.11-cpan-71847e10f99 )