Bioinf

 view release on metacpan or  search on metacpan

Bioinf.pm  view on Meta::CPAN

		   }elsif($file[$i]=~/^(\S+)\.out\.gz/ or $file[$i]=~/^(\S+)\.[mfs]?sso\.gz/){
			   $big_out_msp1="\U$1".".msp";
			   $big_out_msp2="\U$1"."_all".".msp";
		   }
	   }
	 }
	 if(defined($big_out_msp)){
	   $big_out_msp1=$big_out_msp2=$big_out_msp;
	   print "\n# \$big_out_msp is defined as \'$big_out_msp\'\n";
	 }else{
	   print "\n# convert_sso_to_msp: You did not define the big MSP file out format, so $big_out_msp1 \n";
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (1) When File was given to this sub routine
	 #__________________________________________
	 if(@file == 1){   ## ONE single file input??
	  print "# one file @file is given, OUT will be: $big_out_msp1 \n";
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	          "u=$upper_expect_limit",
			  "l=$lower_expect_limit",
			  "m=$margin",
			  $new_format,
			  "s=$big_out_msp")};
	  push(@final_out, &write_msp_files(@sso, $big_out_msp1,
	        $single_out_opt, $add_range) );

	 }elsif(@file > 1){ ## MOre than 1 file input??
	  @sso=@{&open_sso_files(@file, $add_range, $add_range2,
	        "l=$lower_expect_limit",
	        "u=$upper_expect_limit",
	        "m=$margin",
	        $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp2,
			$single_out_opt, $add_range)} ); ## concatenates all the hash ref to one
	 }

	 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	 #  (2) When NO File but ARRAY is given
	 #      Here, you can have SSO files created
	 #__________________________________________
	 elsif(@array >=1){
	  print "\n# In convert_sso_to_msp, \@array is given rather than \@file";
	  @sso=@{&open_sso_files(@array, "u=$upper_expect_limit", $add_range2,
			  "l=$lower_expect_limit", $add_range, $create_sso,
			  "m=$margin", $new_format)};
	  push(@final_out, @{&write_msp_files(@sso, $big_out_msp,
						  $single_out_opt, $add_range)} );
	 }
	 return(\@final_out);
}



#________________________________________________________________________________
# Title     : bla_to_msf  (this is not used. Use convert_bla_to_msf)
# Usage     : @msf_file_made=@{&bla_to_msf(\@bla_file)};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced
# Example   :
# Keywords  : convert_bla_to_msf
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub bla_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, %alignment_hash, %alignment_hash_query,
			 %alignment_hash_subject);
		$choose_iteration=1;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						if(/^Query=(\S+)/){
								$query_seq=$1;   last;
						}
				}
				close(BLAST_OUTPUT);

				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Searching\.\.\.\.\.\.\.\.\.\.\./){
                                                     $present_iteration++;
                                                     if($present_iteration > $choose_iteration){
                                                          last
                                                     }else{
                                                          %alignment_hash_subject=%alignment_hash_query=();
                                                     }
						}elsif(/^\> *(\S+)/){
                                                     $subject_seq=$1;
                                                     $start_point='';
                                                     if($alignment_hash_subject{$subject_seq}){
                                                         $seq_already_in=1;
                                                         $subject_seq='';
                                                         next;
                                                     }
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, match the line to get expectation value

Bioinf.pm  view on Meta::CPAN

						 $alignment_hash_subject{$subject_name}=~/^(\S+) +(\S+)/;
						 @splited_subject_seq=split(//, $2);
						 $evalue=$1;

						 for($t=0; $t< @template_query_seq; $t++){
								 if($template_query_seq[$t] ne '-' ){
										 next
								 }elsif($template_query_seq[$t] eq '-'){

										 $char_of_the_position=$splited_query_seq[$t];
										 if($char_of_the_position ne '-' and $char_of_the_position ne '_'){

												 #print "\n# \$t is $t";
												 #print "\n# \$evalue is $evalue\n ==>";
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n ==>";
												 splice(@splited_subject_seq, $t, 0, '-');
												 splice(@splited_query_seq, $t, 0, '-');
												 #print @splited_query_seq, "\n ==>";
												 #print @splited_subject_seq, "\n";
												 next;
										 }elsif($char_of_the_position eq '_'){
												 splice(@splited_subject_seq, 0, 0, '_');
												 splice(@splited_query_seq, 0, 0, '_');

										 }elsif($char_of_the_position eq '-'){
												 next;
										 }
								 }
						 }
						 $new_subject_seq=join('', @splited_subject_seq);
						 $new_query_seq  =join('', @splited_query_seq);
						 #$alignment_hash{$keys[$g]}="$evalue $new_subject_seq";
						 #$alignment_hash{$keys[$g-1]}="$evalue $new_query_seq";
						 $alignment_hash_subject{$subject_name}="$evalue $new_subject_seq";
						 $alignment_hash_query{$subject_name}  ="$evalue $new_query_seq";
				}


				print "\n";print @template_query_seq, "\n" if $verbose;

				for($h=0; $h< @keys; $h++){
						 $subject_name=$keys[$h];
						 $alignment_hash_subject{$subject_name}=~/^(\S+) +(\S+)/;
						 #print "\n $alignment_hash_query{$subject_name}";
						 print "\n $alignment_hash_subject{$subject_name}";
						 $final_seq_out{$subject_name}=$2;
				}
				&write_msf(\%final_seq_out, \$output_msf);
				push(@final_out, $output_msf);
		}
		return(\@final_out);
}

#________________________________________________________________________________
# Title     : convert_bla_to_msf
# Usage     : @msf_file_made=@{&convert_bla_to_msf(\@bla_file)};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced
# Example   :
# Keywords  : convert_bla_to_msf
# Options   :
# Author    :
# Category  :
# Version   : 1.1
#--------------------------------------------------------------------------------
sub convert_bla_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
		my ($e_val_threshold)=0.0005;
		my(@template_query_seq, @keys, %alignment_hash, %alignment_hash_query,
			 %alignment_hash_subject);
		$choose_iteration=1;

		#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		# Opening file
		#______________________________________________
		for($i=0; $i< @file; $i++){
				$file_base_name=${&get_base_names($file[$i])};
				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){
						if(/^Query=(\S+)/){
								$query_seq=$1;   last;
						}
				}
				close(BLAST_OUTPUT);

				open(BLAST_OUTPUT, $file[$i]);
				while(<BLAST_OUTPUT>){

						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						#  Finds the query sequence, resets $start_point and next line
						#____________________________________________
						if(/^Searching\.\.\.\.\.\.\.\.\.\.\./){
								$present_iteration++;
								if($present_iteration > $choose_iteration){
										last
								}else{
										%alignment_hash_subject=%alignment_hash_query=();
								}
						}elsif(/^\> *(\S+)/){
								$subject_seq=$1;
								$start_point='';
								if($alignment_hash_subject{$subject_seq}){
										$seq_already_in=1;
										$subject_seq='';
										next;
								}
						}
						#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
						# If $subject_seq defined, match the line to get expectation value

Bioinf.pm  view on Meta::CPAN

     my (@finale_out, $sorted_name, $msp_line, $evalue, $score, $matched, $seq_id, $query_range_start,$accumulative_hits_eval_thresh,
         $query_range_stop, $query, $match_string_start, $match_string_stop, $read_point_found, %hash_out, %accumulative_hits, $evalue_thresh);
     %hash_out=%{$_[0]};         %accumulative_hits=%{$_[1]};
     $query=$_[2];               $matched=$_[3];
     $evalue=$_[4];              $score=$_[5];
     $seq_id=$_[6];              $sorted_name=$_[7];
     $query_range_start=$_[8];   $query_range_stop =$_[9];
     $match_string_start=$_[10]; $match_string_stop=$_[11];
     $read_point_found=$_[12];   $accumulative_hits_eval_thresh=$_[13];
     $take_only_the_last_iteration=$_[14];
     $accumulative_hits_eval_thresh=$_[15];
     $evalue_thresh=$_[16];

     $query  ="$query\_$query_range_start\-$query_range_stop";

     if($matched !~/^\S+\_\d+\-\d+ *$/){         $matched="$matched\_$match_string_start\-$match_string_stop";
     }elsif($matched =~/^(\S+)\_\d+\-\d+ *$/){   $matched="$1\_$match_string_start\-$match_string_stop";     }

     if($score=~/\S/ and $evalue=~/\S/ and $match_string_start=~/\S/ and $evalue_thresh > $evalue){
         $msp_line=sprintf("%-6s %-9s %-5s %-5s %-5s %-32s %-5s %-5s %-38s %-3s\n",
                        $score, $evalue, $seq_id, $query_range_start, $query_range_stop,
                        $query, $match_string_start, $match_string_stop, $matched, $read_point_found);

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # This is where I really put the matches !!!
         #_____________________________________________________
         if($hash_out{$sorted_name}=~/^\S+ +(\S+) +/){
             if($1 >= $evalue){
                 print "                    (1) put_msp_lines_to_hash_from_bla: $1 >= $evalue WRITING to hash. 1\n" if $verbose;
                 $hash_out{$sorted_name}=$msp_line;
             }else{
                 print "                    put_msp_lines_to_hash_from_bla: $1 < $evalue_ori NO write to hash\n" if $verbose;  }
         }else{
             print "                    (2) put_msp_lines_to_hash_from_bla: NO eval >= $evalue WRITING to hash. 2\n" if $verbose;
             $hash_out{$sorted_name}=$msp_line;
         }

         #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # This part is to rescue the hits dropped by matrix migration
         #_________________________________________________________________
         if(!$take_only_the_last_iteration and $evalue <= $accumulative_hits_eval_thresh ){
            if($accumulative_hits{$sorted_name}){
               if($accumulative_hits{$sorted_name}=~/^[\t ]*\S+[\t ]+(\S+)[\t ]/){
                   if($evalue < $1){
                       $accumulative_hits{$sorted_name}=$msp_line;   }   }
            }else{ $accumulative_hits{$sorted_name}=$msp_line;     }
         }
     }else{     }
     @finale_out=(\%hash_out, \%accumulative_hits, $read_point_found, $query, $matched, $evalue, $score, $seq_id, $sorted_name,
                  $query_range_start, $query_range_stop, $match_string_start, $match_string_stop  );
     return(\@finale_out);
}


#________________________________________________________________________________
# Title     : convert_bla_multaln_to_msf
# Usage     : @msf_file_made=@{&convert_bla_multaln_to_msf(\@bla_file, [i=2])};
# Function  : matched each query seq name and if the E value is lower than
#             my arbitrary threshold, I put the subject and target pair
#             alignment into a hash.
#             In later iterations, the latest is replaced,
#              when you use m6 option for PSI blast
#             this adds '00x' extensions to the repeatedly occurring seq names
#
# Example   : @msf_file_made=@{&convert_bla_multaln_to_msf(\@bla_file,
#                                              $verbose, "i=$iteration")};
# Keywords  : psi_blast_to_msf, psi_blast_multaln_to_msf
# Options   :
#   i=$iteration
#   v  for verbose
# Author    :
# Category  :
# Version   : 1.6
#--------------------------------------------------------------------------------
sub convert_bla_multaln_to_msf{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
    my ($e_val_threshold)=0.0005;
    my(@template_query_seq, @keys, $present_iteration, $blank_line_counter,
       %alignment_hash_subject, $seq_order, $choose_iteration, %final_output_hash,
       $seq_name, %seq_names_in_block, $put_alphabet_to_number_only_name);
    $choose_iteration=1;
    if($vars{'i'}=~/(\d+)/){
       $choose_iteration=$1;
    }
    if($char_opt=~/v/){ $verbose='v' }
    if($char_opt=~/a/){ $put_alphabet_to_number_only_name='a' }

    print "\n# $0: bla_multaln_to_msf, \$choose_iteration is $choose_iteration\n";

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Opening file
    #______________________________________________
    for($i=0; $i< @file; $i++){
        $file_base_name=${&get_base_names($file[$i])};
        print "\n# bla_multaln_to_msf: processing $file[$i]\n";
        my($present_iteration, %seq_names_in_block, $seq_name_ori, $sequence);
        open(BLAST_OUTPUT, $file[$i]);
        while(<BLAST_OUTPUT>){
            #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
            #  Finds the query sequence, resets $start_point and next line
            #____________________________________________
            if(/^Query= *(\S+)/){
                $query_seq=$1;
                print "\n# The query sequence is: $query_seq\n";
            }elsif(/^Searching\.\.\.\.\.\.\.\./ or eof){ ### to make sure it gets the last one
                $present_iteration++;
                if($present_iteration > $choose_iteration){
                    %final_output_hash=%alignment_hash_subject;
                    last;
                }else{
                    %final_output_hash=%alignment_hash_subject;
                    %alignment_hash_subject=();



( run in 0.543 second using v1.01-cache-2.11-cpan-96521ef73a4 )