Bioinf

 view release on metacpan or  search on metacpan

Bioinf.pm  view on Meta::CPAN

               print "\n# $out_evss_file is less than 500 byte, too small??\n";
          }
     }
     return(\@final_out_files);
}



#________________________________________________________________________
# Title     : write_fasta
# Usage     : many argments:  $seq_hash_reference  and $output_file_name
#             takes a hash which has got names keys and sequences values.
# Function  : writes multiple seqs. in fasta format (takes one or more seq.!!)
#             This needs hash which have 'name' 'actual sequence as value'
#
#             To print out each fasta seq into each single file, use write_fasta_seq_by_seq
#             This can rename seq names
#
# Example   : &write_fasta(\%in1, \$out_file_name, \%in2, \%in3,..., );
#             << The order of the hash and scalar ref. doesn't matter. >>
# Warning   : The default output file name is 'default_out.fa' if you do not
#             specify output file name.
#             OUTput file should have xxxxx.fa or xxxx.any_ext NOT just 'xxxxx'
# Keywords  : write_fasta_file, print_fasta_file, write fasta file, fasta_write
#             show_fasta, write_sequence_fasta, write_fasta_files,
# Options   : v for STD out.
#             r for rename the sequences so that Clustalw would not complain with 10 char limit
#               so result wuld be:  0 ->ASDFASDF, 1->ASDFASFASF, 2->ADSFASDFA
#       $write_pure_seq_only=o by o -o  ## writing only the seq (no gap chars or space)
# Returns   :
# Argument  :
#   $sort_seq_names=s by s  ## in writing sorted sequences are written
#   $write_rv_seq_as_well=R by R  # write reverse seq as well as forward seq
# Category  :
# Version   : 3.1
#--------------------------------------------------------------------
sub write_fasta{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	my($sort_seq_names, $string, $string_leng, $na,$out_file_name_provided,
           $write_pure_seq_only, $write_rv_seq_as_well, $output_file_rv,
           @files_made_with_rv, @files_made, %hash, $seq, $sec_str_hash_form_given);
	my($output_file) ='default_out.fa'; ### when no output file name is given, this is used
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
        # Try and determin the output file name
        #________________________________________________________
        if(@file>0){
            if($file[0]=~/\S\.[n]?fa/){ ## to handle .nfa and .fa files
                $output_file = $file[0];   $out_file_name_provided=1;
                if(-s $output_file){
                    rename($output_file, "$output_file\.bak");
                    print "\n# (INFO) $output_file is present. $output_file\.bak will be created for backup\n";
                }
            }elsif($file[0]=~/(\S+)\.\S+/){
                $output_file = "$1\.fa";   $out_file_name_provided=1;
            }
	}else{ $output_file='default_out.fa'; }

	if($char_opt=~/s/){ $sort_seq_names='s';	}
        if($char_opt=~/o/){ $write_pure_seq_only='o' }
        if($char_opt=~/R/){ $write_rv_seq_as_well='R'; print "\n# (INFO) You wanted REVerse seq as well\n"; }

	for ($n=0 ; $n < @hash; $n ++){
           my($seq, %hash);
           my %hash_orig=%{$hash[$n]};
           my(@keys_orig)= sort %hash_orig;

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # (1) If sec. str. hash form is given
           #______________________________________________________
           if($keys_orig[0]=~/^\d+$/ and $hash_orig{$keys_orig[0]}->[0]=~/^\S$/){
                #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # $hash_orig{$keys_orig[0]}->[3] is seq_name, $hash_orig{$keys_orig[0]}->[0] has each residue
                #_____________________________________________________________________________________________
                for($i=0; $i< @keys_orig; $i++){
                    $seq .=$hash_orig{$keys_orig[$i]}->[0];  ## making seq string like 'ASDFADFAFA....'
                }
                %hash=($hash_orig{$keys_orig[0]}->[3], $seq);
                $sec_str_hash_form_given=1;
           }else{
                %hash=%hash_orig;
                %hash_orig=();
           }

	   if($sort_seq_names){  @keys=sort keys %hash;
	   }else{		 @keys= keys %hash;	   }

           #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           # (2) When only one seq is given, use the seq name as output file
           #________________________________________________________________
           if( (@hash==1 and @keys==1 and @file < 1) or $sec_str_hash_form_given){
               $output_file="$keys[0]\.fa";
               $output_file_rv="$keys[0]\_rv\.fa";
           }elsif(@file < 1){                        $output_file="default_fa_$n\.fa";
               if($write_rv_seq_as_well){ $output_file_rv="default_fa_$n\_rv\.fa";  }
           }else{
               $output_file; # $output_file is already set when @file > 0
               if($write_rv_seq_as_well and !$base){
                   ($base, $ext)=split(/\./, $output_file);
                   $output_file_rv="$base\_rv\.$ext";
               }
           }

           open (FASTAS_WRITE,">$output_file");      # $string is the seq string.
           open (FASTAS_WRITE_RV,">$output_file_rv") if $write_rv_seq_as_well;
           push(@files_made, $output_file);
           push(@files_made_with_rv, $output_file_rv) if ($write_rv_seq_as_well);

           for ($i=0; $i < @keys; $i++){
                $na= $keys[$i];              $string = "\U$hash{$na}";
                if($write_rv_seq_as_well){  $string_rv=reverse($string);  $na_rv="$keys[$i]\_rv"; }

Bioinf.pm  view on Meta::CPAN

			 ${"$TITLE"}{$1}.="\n             $2";
		 }else{
			 ${"$TITLE"}{$entry_match}.= $2; }

	 ### Following is when entry line '# $certain_var = 1 by t'
	 }elsif( ($end_found != 1) && ($title_found==1) && (/^\# *([\$\@\%]+.+)/) ){
		 $line = $1;
		 if($entry_match =~ /[Oo]ption/){  ## if last entry was '# Option :', attach the variable directly.
			 ${"$TITLE"}{$entry_match} .= "\n             $line";
		 }else{                            ## if last entry wasn't '# Option :', find Option
			 for $entry (keys %{"$TITLE"}){  ##  and attach the variable to it
				 if ($entry =~ /[Oo]ption/){
					 ${"$TITLE"}{$entry} .= "\n             $line";
				 }
			 }
		 }
	 }elsif( ($title_found==1)&&(/ *\#[\*\-]{12,}/)){  ## to match '#-----..' or '#*******..'(Astrid's)
		 $end_found = 1; $title_found=0;
		 push(@boxes, \%{"$TITLE"});

	 }elsif( (/^#{10,} option table of this program   #{10,}/)&&($end_found >=1) &&($title_found==1)){
		 $option_tb_found++; ### This is a global var.
	 }
	}
	if(@boxes > 1){ \@boxes; }
	elsif( @boxes==1){ $boxes[0]; }
}                  ##<<--- ENd of the sub read_head_box

#____________________________________________________________________
# Title    : correct_head_box
# Function : Makes headbox in right and updated format. The most
#            updated headbox format is very this headbox. So, to
#            change all other headbox format, change this first.
# Usage    : just type correct_head_box.pl with a file name.
# Example  : correct_head_box.pl Bio.pl
# Argument : a filename
# Returns  :
# Options  :
# Version  : 1.1
# Keywords :
# Warning  :
#---------------------------------------------------------------
sub correct_head_box{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

	## Reading self and the template headbox in this subroutine.
	my %correct_head_box_entry = %{&read_correct_head_box()};
	for($p=0; $p < @file; $p++){
	  $in_file = $file[$p];

	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  ##  Make backup of the input file
	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  &cp( "$in_file", "$in_file\.bak$$");
	  print "\n $in_file\.bak$$ is created as a backup \n\n";
	  print chr(7);

	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  ##  Open files
	  ##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	  open(TARGET, "$in_file\.bak$$");
	  open(RESULT, ">$in_file");

	  my(@whole_file) = <TARGET>;
	  my(@keys) = keys %correct_head_box_entry;

	  for($i=0; $i < @whole_file; $i++){   ## <= going through the whole lines
		  my(@BOX);
		  if(($whole_file[$i] =~ /^#___________+/)&&
			  ($whole_file[$i+1] =~ /^# {0,4}([\w+\s*\w+]+) {0,4}: {0,9}([\w+\.\w+]*)/ ) ){
			  my($sub_name)=$2;
			  until( ($whole_file[$i] =~ /^sub +([\w\.]+) *\{/)||($whole_file[$i] =~ /^#\-{15,}/)
			  || ($whole_file[$i] =~ /^#\*{15,}/) ){
				  push(@BOX, $whole_file[$i]);
				  $i++;
			  }

			  ### To get the sub name by reading  'sub xxxxxxx{ ' line after #---------
			  for($z=$i; $z < $i+5; $z++){
				  if($whole_file[$z]=~/^sub +([\w\.]+) *\{/){
					  $sub_name=$1;
					  last;
				  }
			  }

			  my(%Final_out)=%{&read_head_box(\@BOX)};  ## Putting into a hash
			  ### Now I have %Final_out and %correct_head_box_entry
			  my(%correct) =%{&superpose_hash(\%correct_head_box_entry, \%Final_out)};
			  print RESULT @{&write_head_box(\%correct)};

			  until($whole_file[$i]=~/^sub +([\w\.]+) *\{/){  $i++ }
			  if($whole_file[$i]=~/^sub +([\w\.]+) *\{/){
				  until( $whole_file[$i] =~ /^}/){
					  print RESULT $whole_file[$i];
					  $i++;
				  }
				  print RESULT $whole_file[$i];
			  }
		  }elsif($whole_file[$i]=~/^sub +([\w\.]+) *\{/){  ### When there is no headbox at all.
			  $correct_head_box_entry{'Title'}=$1;
			  $correct_head_box_entry{'Version'}='1.0';
			  $correct_head_box_entry{'Author'}=getlogin;
			  print RESULT @{&write_head_box(\%correct_head_box_entry)};
			  print RESULT $whole_file[$i++];
			  until( $whole_file[$i] =~ /^}/){
				  print RESULT $whole_file[$i];
				  $i++;
			  }
			  print RESULT $whole_file[$i];

		  }else{  print RESULT $whole_file[$i]; }
	  }
	}
}



( run in 1.251 second using v1.01-cache-2.11-cpan-39bf76dae61 )