Bioinf

 view release on metacpan or  search on metacpan

Bioinf.pm  view on Meta::CPAN

   }
   if($char_opt=~/m/){
      for($i=0; $i< @seqlets; $i++){
         if($seqlets[$i]=~/^ *\d+ +\d+\.?[e\-\d]* +\d+ +\d+ +(\S+) +\d+ +\d+ +(\S+) *$/){
            if($1 eq $2){ next }
            $leading_seq=$1; $long=$2; $long=~s/\,/ /g;
            push(@Final_out, "$leading_seq $long" );
         }
      }
   }
   @Final_out=sort @Final_out;
   print "\n\n\n# \@Final_out\n@Final_out \n=================\n " if $verbose;
   return(\@Final_out);
}


#______________________________________________________________
# Title     : get_overlapping_range
# Usage     : @n1=@{&get_overlapping_range(\@ranges1, \@ranges2)};
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  : get_overlapping_range_in_msp, get_overlapping_range_in_msp_file,
#             get_overlapping_seq_match_range, get_overlap_seq_match_range
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------
sub get_overlapping_range{
	 my (@new_range, $R_start1, $R_start2);
	 ($R_start1, $R_end1)=@{$_[0]}[0..1];
	 ($R_start2, $R_end2)=@{$_[1]}[0..1];

	 if(($R_start1 <= $R_start2)&&        # ------------
	 ( $R_end1 >= $R_end2) ){           #   -------
	   @new_range= ($R_start2, $R_end2);
	 }elsif(($R_start1 <= $R_start2)&&    # -----------
	 ( $R_end1 <= $R_end2) &&           #    -----------
	 ( $R_end1 >  $R_start2) ){
	   @new_range= ($R_start2, $R_end1);
	 }elsif(($R_start1 >= $R_start2)&&    #    -----------
	 ( $R_end1 >= $R_end2  ) &&         # -----------
	 ( $R_end2 >  $R_start1) ){
	   @new_range= ($R_start1, $R_end2);
	 }elsif(($R_start1 >= $R_start2)&&    #   ------
	 ( $R_end1 <= $R_end2) ){           # -----------
	   @new_range= ($R_start1, $R_end1);
	 }else{                                #  ----
	  @new_range=(0,0);                  #        --------
	 }
	 return(\@new_range);
}

#______________________________________________________________________________
# Title     : find_source_perl_library
# Usage     : $source_library=${&find_source_perl_library};
# Function  : gets the default perl sub source library from ENV setenv
# Example   :
# Keywords  :
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#------------------------------------------------------------------------------
sub find_source_perl_library{
     my($source_library);
     print "\n# $0: You did not use \"s=\" option for \$source_library\n";
     print "\n#     I am trying to retrieve your default source lib. \n";

     if( defined( $ENV{'MY_PERL_LIB'} ) ){
            $source_library=$ENV{'MY_PERL_LIB'};
     }elsif( defined( $ENV{'BIO_PERL'} ) ){
            $bioperl_lib=$ENV{'BIO_PERL'};
     }elsif(-e "/gn0/jong/Perl/Bio.pl"){
            $source_library="/gn0/jong/Perl/Bio.pl";
     }elsif(-e "/home/jong/Perl/Bio.pl"){
            $source_library="/home/jong/Perl/Bio.pl";
     }elsif(-e "/Perl/Bio.pl"){
            $source_library="/Perl/Bio.pl";
     }elsif(-e "Bio.pl"){
            $source_library="Bio.pl";
     }elsif(-e "/usr/Perl/Bio.pl"){
            $source_library="/usr/Perl/Bio.pl";
     }elsif(-e "/ss0/sat/Script/Bio.pl"){
            $source_library="/ss0/sat/Script/Bio.pl";
     }elsif(-e "/ss0/agb/Script/Bio.pl"){
            $source_library="/ss0/agb/Script/Bio.pl";
     }else{
            print "\n $0 can not find source library, please set BIO_PERL env\n";
     }
     return(\$source_library);
}

#______________________________________________________________
# Title     : find_central_seq_msp_chunk
# Usage     : This finds the correct msp chunk with given seq name
#             and big original or any msp chunk
# Function  :
# Example   :
# Warning   : You MUST NOT delete '# options : ..' entry
#              as it is read  by various subroutines.
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#--------------------------------------------------------------
sub find_central_seq_msp_chunk{
	 my $central_seq=${$_[0]};
	 my @MSP=@{$_[1]};
	 my ($j, $range, @MSP_1);
	 for($j=0; $j<@MSP; $j++){
	  #""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  #                   $1                 $2     $3    $4      $5     $6    $7     $8
	  #                   171     41.18      6      73  HI1690    9      76  HI0736 sodium...

Bioinf.pm  view on Meta::CPAN

			}
			close TMP;
		 }else{
			if($question > 2){
			   print "\n I can create the index in pwd for you run $0 and \n";
			   print "\n you can copy seq.idx(or $FASTA\.idx) into your swissprot dir later\n";
			   goto CREATE;
			}
			goto W;
		 }

		 #""""""""""""""" CREATION of INDEX file """""""""""""""""""""""""""""""""""""""""""""
		 CREATE:
		 if(defined($DB)){ print "\n Can I create seq.idx in pwd? (y+return or return)\n" }
		 if(defined($FASTA)){ print "\n Can I create $FASTA\.idx in pwd? (y+return or return)\n" }
		 $yes_no=getc;
		 if($yes_no=~/y/i){
			if(defined($DB)){
			   print "\n seq.idx being created...\(1 min in my Linux\)\n";
			   open(DB, "$DB");
			   open(IDX, ">seq.idx");
			   print IDX "# swiss_index\n";
			   while(<DB>){
				 if(/^ID[\t ]+(\w+) +/){
					$index{$1}=tell(DB);
					print IDX "\n$1 $index{$1}";
				 }
			   }
			   close(DB);
			   close(IDX);
			   if(-s "seq.idx"){
				   print "\nGood. seq\.idx is created.";
				   print "\n Copy seq.idx to SWISSPROT dir or you can set\n";
				   print "absolute path ENV var \'SWINDEX\' to your seq.idx path\n";
				   print "e.g. #bash\> export SWINDEX=\/DB\/Swiss\/seq.idx\n\n";
				   if($create==1){ exit;  }
			   }else{
				   print "\n Creation of seq.dat seems to have gone wrong";
			   }

			}elsif(defined($FASTA)){
			   $F_idx="$FASTA\.idx";
			   print "\n $F_idx being created...\n";
			   open(FASTADB, "$FASTA");
			   open(FASTAIDX, ">$F_idx");
			   print FASTAIDX "# fasta_index\n";
			   while(<FASTADB>){
				 if(/^\> {0,4}(\S+) */){
					$index{$1}=tell(FASTADB);
					print FASTAIDX "\n$1 $index{$1}";
				 }
			   }
			   close(FASTADB);
			   close(FASTAIDX);
			   if(-s $F_idx){
				   print "\nGood! Copy $F_idx to your DB dir and set two ENV vars\n";
				   print "absolute path ENV var \'FASTADB\' to your fastadb path\n";
				   print "absolute path ENV var \'FASTAINDEX\' to your $F_idx path\n";
				   print "e.g. #bash\> export FASTADB   =\/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #bash\> export FASTAINDEX=\/DB\/mySwiss\/$F_idx\n";
				   print "e.g. #tcsh\> setenv FASTADB    \/DB\/mySwiss\/$FASTA\n";
				   print "e.g. #tcsh\> setenv FASTAINDEX \/DB\/mySwiss\/$F_idx\n";
				   print "Unless, you can specify the database each time at prompt\n\n";
				   if($create==1){ exit;  }
			   }else{
				   print "\n Creation of seq.dat or $F_idx seems to have gone wrong";
			   }
			}
		 }else{
			exit;
		 }
	  }
	 }

	 #""""""""""""""""""""""""""" MAIN SERACH """""""""""""""""""""""""""""""""""""""""""""""
	 MAIN_SEARCH:
	 for($i=0; $i<@in; $i++){
	  my (@possible, @pos, %possible); my $target=$in[$i];
	  if($target=~/\*/){
		 $target=~s/\*/\\\w\{0,6\}/; # to handle glob input
		 $all=1;
	  }
	  if(defined($index_file)){
		 open(INDEX, "$index_file");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto SWISS;
	  }elsif(($fasta==1) && (defined($FASTA_index)) ){
		 open(INDEX, "$FASTA_index");
		 if($species==1){
		    while(<INDEX>){
		      if( /(\w*\_$target) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }else{
		    while(<INDEX>){
		      if( /(\w*$target\w*) +(\d+)/ ){ $possible{$1}=$2; }
		    }
		 }
		 close INDEX;
		 goto FASTA;
	  }

	  SWISS:
	  @poss = sort keys %possible;

	  if( (@poss >1)&&($all !=1)){
		 print "\n @poss","\n";
		 print chr(7);
		 print "\n There are more than a few seqs for $in[$i]";
		 print "\n be more specific! OR use -a option for all matched\n\n";
		 exit;
	  }elsif($all !=1){
		 print "\n";
		 open (DB, "$DB");



( run in 1.050 second using v1.01-cache-2.11-cpan-39bf76dae61 )