Bioinf
view release on metacpan or search on metacpan
}
if($char_opt=~/m/){
for($i=0; $i< @seqlets; $i++){
if($seqlets[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/){
if($1 eq $2){ next }
$leading_seq=$1; $long=$2; $long=~s/\,/ /g;
push(@Final_out, "$leading_seq $long" );
}
}
}
@Final_out=sort @Final_out;
print "\n\n\n# \@Final_out\n@Final_out \n=================\n " if $verbose;
return(\@Final_out);
}
#______________________________________________________________
# Title : get_overlapping_range
# Usage : @n1=@{&get_overlapping_range(\@ranges1, \@ranges2)};
# Function :
# Example :
# Warning : You MUST NOT delete '# options : ..' entry
# as it is read by various subroutines.
# Keywords : get_overlapping_range_in_msp, get_overlapping_range_in_msp_file,
# get_overlapping_seq_match_range, get_overlap_seq_match_range
# Options : _ for debugging.
# # for debugging.
# Returns :
# Argument :
# Category :
# Version : 1.1
#--------------------------------------------------------------
sub get_overlapping_range{
my (@new_range, $R_start1, $R_start2);
($R_start1, $R_end1)=@{$_[0]}[0..1];
($R_start2, $R_end2)=@{$_[1]}[0..1];
if(($R_start1 <= $R_start2)&& # ------------
( $R_end1 >= $R_end2) ){ # -------
@new_range= ($R_start2, $R_end2);
}elsif(($R_start1 <= $R_start2)&& # -----------
( $R_end1 <= $R_end2) && # -----------
( $R_end1 > $R_start2) ){
@new_range= ($R_start2, $R_end1);
}elsif(($R_start1 >= $R_start2)&& # -----------
( $R_end1 >= $R_end2 ) && # -----------
( $R_end2 > $R_start1) ){
@new_range= ($R_start1, $R_end2);
}elsif(($R_start1 >= $R_start2)&& # ------
( $R_end1 <= $R_end2) ){ # -----------
@new_range= ($R_start1, $R_end1);
}else{ # ----
@new_range=(0,0); # --------
}
return(\@new_range);
}
#______________________________________________________________________________
# Title : find_source_perl_library
# Usage : $source_library=${&find_source_perl_library};
# Function : gets the default perl sub source library from ENV setenv
# Example :
# Keywords :
# Options :
# Author :
# Category :
# Version : 1.1
#------------------------------------------------------------------------------
sub find_source_perl_library{
my($source_library);
print "\n# $0: You did not use \"s=\" option for \$source_library\n";
print "\n# I am trying to retrieve your default source lib. \n";
if( defined( $ENV{'MY_PERL_LIB'} ) ){
$source_library=$ENV{'MY_PERL_LIB'};
}elsif( defined( $ENV{'BIO_PERL'} ) ){
$bioperl_lib=$ENV{'BIO_PERL'};
}elsif(-e "/gn0/jong/Perl/Bio.pl"){
$source_library="/gn0/jong/Perl/Bio.pl";
}elsif(-e "/home/jong/Perl/Bio.pl"){
$source_library="/home/jong/Perl/Bio.pl";
}elsif(-e "/Perl/Bio.pl"){
$source_library="/Perl/Bio.pl";
}elsif(-e "Bio.pl"){
$source_library="Bio.pl";
}elsif(-e "/usr/Perl/Bio.pl"){
$source_library="/usr/Perl/Bio.pl";
}elsif(-e "/ss0/sat/Script/Bio.pl"){
$source_library="/ss0/sat/Script/Bio.pl";
}elsif(-e "/ss0/agb/Script/Bio.pl"){
$source_library="/ss0/agb/Script/Bio.pl";
}else{
print "\n $0 can not find source library, please set BIO_PERL env\n";
}
return(\$source_library);
}
#______________________________________________________________
# Title : find_central_seq_msp_chunk
# Usage : This finds the correct msp chunk with given seq name
# and big original or any msp chunk
# Function :
# Example :
# Warning : You MUST NOT delete '# options : ..' entry
# as it is read by various subroutines.
# Keywords :
# Options : _ for debugging.
# # for debugging.
# Returns :
# Argument :
# Category :
# Version : 1.0
#--------------------------------------------------------------
sub find_central_seq_msp_chunk{
my $central_seq=${$_[0]};
my @MSP=@{$_[1]};
my ($j, $range, @MSP_1);
for($j=0; $j<@MSP; $j++){
#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# $1 $2 $3 $4 $5 $6 $7 $8
# 171 41.18 6 73 HI1690 9 76 HI0736 sodium...
}
close TMP;
}else{
if($question > 2){
print "\n I can create the index in pwd for you run $0 and \n";
print "\n you can copy seq.idx(or $FASTA\.idx) into your swissprot dir later\n";
goto CREATE;
}
goto W;
}
#""""""""""""""" CREATION of INDEX file """""""""""""""""""""""""""""""""""""""""""""
CREATE:
if(defined($DB)){ print "\n Can I create seq.idx in pwd? (y+return or return)\n" }
if(defined($FASTA)){ print "\n Can I create $FASTA\.idx in pwd? (y+return or return)\n" }
$yes_no=getc;
if($yes_no=~/y/i){
if(defined($DB)){
print "\n seq.idx being created...\(1 min in my Linux\)\n";
open(DB, "$DB");
open(IDX, ">seq.idx");
print IDX "# swiss_index\n";
while(<DB>){
if(/^ID[\t ]+(\w+) +/){
$index{$1}=tell(DB);
print IDX "\n$1 $index{$1}";
}
}
close(DB);
close(IDX);
if(-s "seq.idx"){
print "\nGood. seq\.idx is created.";
print "\n Copy seq.idx to SWISSPROT dir or you can set\n";
print "absolute path ENV var \'SWINDEX\' to your seq.idx path\n";
print "e.g. #bash\> export SWINDEX=\/DB\/Swiss\/seq.idx\n\n";
if($create==1){ exit; }
}else{
print "\n Creation of seq.dat seems to have gone wrong";
}
}elsif(defined($FASTA)){
$F_idx="$FASTA\.idx";
print "\n $F_idx being created...\n";
open(FASTADB, "$FASTA");
open(FASTAIDX, ">$F_idx");
print FASTAIDX "# fasta_index\n";
while(<FASTADB>){
if(/^\> {0,4}(\S+) */){
$index{$1}=tell(FASTADB);
print FASTAIDX "\n$1 $index{$1}";
}
}
close(FASTADB);
close(FASTAIDX);
if(-s $F_idx){
print "\nGood! Copy $F_idx to your DB dir and set two ENV vars\n";
print "absolute path ENV var \'FASTADB\' to your fastadb path\n";
print "absolute path ENV var \'FASTAINDEX\' to your $F_idx path\n";
print "e.g. #bash\> export FASTADB =\/DB\/mySwiss\/$FASTA\n";
print "e.g. #bash\> export FASTAINDEX=\/DB\/mySwiss\/$F_idx\n";
print "e.g. #tcsh\> setenv FASTADB \/DB\/mySwiss\/$FASTA\n";
print "e.g. #tcsh\> setenv FASTAINDEX \/DB\/mySwiss\/$F_idx\n";
print "Unless, you can specify the database each time at prompt\n\n";
if($create==1){ exit; }
}else{
print "\n Creation of seq.dat or $F_idx seems to have gone wrong";
}
}
}else{
exit;
}
}
}
#""""""""""""""""""""""""""" MAIN SERACH """""""""""""""""""""""""""""""""""""""""""""""
MAIN_SEARCH:
for($i=0; $i<@in; $i++){
my (@possible, @pos, %possible); my $target=$in[$i];
if($target=~/\*/){
$target=~s/\*/\\\w\{0,6\}/; # to handle glob input
$all=1;
}
if(defined($index_file)){
open(INDEX, "$index_file");
if($species==1){
while(<INDEX>){
if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
}
}else{
while(<INDEX>){
if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
}
}
close INDEX;
goto SWISS;
}elsif(($fasta==1) && (defined($FASTA_index)) ){
open(INDEX, "$FASTA_index");
if($species==1){
while(<INDEX>){
if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
}
}else{
while(<INDEX>){
if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
}
}
close INDEX;
goto FASTA;
}
SWISS:
@poss = sort keys %possible;
if( (@poss >1)&&($all !=1)){
print "\n @poss","\n";
print chr(7);
print "\n There are more than a few seqs for $in[$i]";
print "\n be more specific! OR use -a option for all matched\n\n";
exit;
}elsif($all !=1){
print "\n";
open (DB, "$DB");
( run in 1.050 second using v1.01-cache-2.11-cpan-39bf76dae61 )