Bioinf
view release on metacpan or search on metacpan
print "\n# $out_evss_file is less than 500 byte, too small??\n";
}
}
return(\@final_out_files);
}
#________________________________________________________________________
# Title : write_fasta
# Usage : many argments: $seq_hash_reference and $output_file_name
# takes a hash which has got names keys and sequences values.
# Function : writes multiple seqs. in fasta format (takes one or more seq.!!)
# This needs hash which have 'name' 'actual sequence as value'
#
# To print out each fasta seq into each single file, use write_fasta_seq_by_seq
# This can rename seq names
#
# Example : &write_fasta(\%in1, \$out_file_name, \%in2, \%in3,..., );
# << The order of the hash and scalar ref. doesn't matter. >>
# Warning : The default output file name is 'default_out.fa' if you do not
# specify output file name.
# OUTput file should have xxxxx.fa or xxxx.any_ext NOT just 'xxxxx'
# Keywords : write_fasta_file, print_fasta_file, write fasta file, fasta_write
# show_fasta, write_sequence_fasta, write_fasta_files,
# Options : v for STD out.
# r for rename the sequences so that Clustalw would not complain with 10 char limit
# so result wuld be: 0 ->ASDFASDF, 1->ASDFASFASF, 2->ADSFASDFA
# $write_pure_seq_only=o by o -o ## writing only the seq (no gap chars or space)
# Returns :
# Argument :
# $sort_seq_names=s by s ## in writing sorted sequences are written
# $write_rv_seq_as_well=R by R # write reverse seq as well as forward seq
# Category :
# Version : 3.1
#--------------------------------------------------------------------
sub write_fasta{
#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
if($debug==1){print "\n\t\@hash=\"@hash\"
\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
my($sort_seq_names, $string, $string_leng, $na,$out_file_name_provided,
$write_pure_seq_only, $write_rv_seq_as_well, $output_file_rv,
@files_made_with_rv, @files_made, %hash, $seq, $sec_str_hash_form_given);
my($output_file) ='default_out.fa'; ### when no output file name is given, this is used
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
# Try and determin the output file name
#________________________________________________________
if(@file>0){
if($file[0]=~/\S\.[n]?fa/){ ## to handle .nfa and .fa files
$output_file = $file[0]; $out_file_name_provided=1;
if(-s $output_file){
rename($output_file, "$output_file\.bak");
print "\n# (INFO) $output_file is present. $output_file\.bak will be created for backup\n";
}
}elsif($file[0]=~/(\S+)\.\S+/){
$output_file = "$1\.fa"; $out_file_name_provided=1;
}
}else{ $output_file='default_out.fa'; }
if($char_opt=~/s/){ $sort_seq_names='s'; }
if($char_opt=~/o/){ $write_pure_seq_only='o' }
if($char_opt=~/R/){ $write_rv_seq_as_well='R'; print "\n# (INFO) You wanted REVerse seq as well\n"; }
for ($n=0 ; $n < @hash; $n ++){
my($seq, %hash);
my %hash_orig=%{$hash[$n]};
my(@keys_orig)= sort %hash_orig;
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# (1) If sec. str. hash form is given
#______________________________________________________
if($keys_orig[0]=~/^\d+$/ and $hash_orig{$keys_orig[0]}->[0]=~/^\S$/){
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# $hash_orig{$keys_orig[0]}->[3] is seq_name, $hash_orig{$keys_orig[0]}->[0] has each residue
#_____________________________________________________________________________________________
for($i=0; $i< @keys_orig; $i++){
$seq .=$hash_orig{$keys_orig[$i]}->[0]; ## making seq string like 'ASDFADFAFA....'
}
%hash=($hash_orig{$keys_orig[0]}->[3], $seq);
$sec_str_hash_form_given=1;
}else{
%hash=%hash_orig;
%hash_orig=();
}
if($sort_seq_names){ @keys=sort keys %hash;
}else{ @keys= keys %hash; }
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# (2) When only one seq is given, use the seq name as output file
#________________________________________________________________
if( (@hash==1 and @keys==1 and @file < 1) or $sec_str_hash_form_given){
$output_file="$keys[0]\.fa";
$output_file_rv="$keys[0]\_rv\.fa";
}elsif(@file < 1){ $output_file="default_fa_$n\.fa";
if($write_rv_seq_as_well){ $output_file_rv="default_fa_$n\_rv\.fa"; }
}else{
$output_file; # $output_file is already set when @file > 0
if($write_rv_seq_as_well and !$base){
($base, $ext)=split(/\./, $output_file);
$output_file_rv="$base\_rv\.$ext";
}
}
open (FASTAS_WRITE,">$output_file"); # $string is the seq string.
open (FASTAS_WRITE_RV,">$output_file_rv") if $write_rv_seq_as_well;
push(@files_made, $output_file);
push(@files_made_with_rv, $output_file_rv) if ($write_rv_seq_as_well);
for ($i=0; $i < @keys; $i++){
$na= $keys[$i]; $string = "\U$hash{$na}";
if($write_rv_seq_as_well){ $string_rv=reverse($string); $na_rv="$keys[$i]\_rv"; }
${"$TITLE"}{$1}.="\n $2";
}else{
${"$TITLE"}{$entry_match}.= $2; }
### Following is when entry line '# $certain_var = 1 by t'
}elsif( ($end_found != 1) && ($title_found==1) && (/^\# *([\$\@\%]+.+)/) ){
$line = $1;
if($entry_match =~ /[Oo]ption/){ ## if last entry was '# Option :', attach the variable directly.
${"$TITLE"}{$entry_match} .= "\n $line";
}else{ ## if last entry wasn't '# Option :', find Option
for $entry (keys %{"$TITLE"}){ ## and attach the variable to it
if ($entry =~ /[Oo]ption/){
${"$TITLE"}{$entry} .= "\n $line";
}
}
}
}elsif( ($title_found==1)&&(/ *\#[\*\-]{12,}/)){ ## to match '#-----..' or '#*******..'(Astrid's)
$end_found = 1; $title_found=0;
push(@boxes, \%{"$TITLE"});
}elsif( (/^#{10,} option table of this program #{10,}/)&&($end_found >=1) &&($title_found==1)){
$option_tb_found++; ### This is a global var.
}
}
if(@boxes > 1){ \@boxes; }
elsif( @boxes==1){ $boxes[0]; }
} ##<<--- ENd of the sub read_head_box
#____________________________________________________________________
# Title : correct_head_box
# Function : Makes headbox in right and updated format. The most
# updated headbox format is very this headbox. So, to
# change all other headbox format, change this first.
# Usage : just type correct_head_box.pl with a file name.
# Example : correct_head_box.pl Bio.pl
# Argument : a filename
# Returns :
# Options :
# Version : 1.1
# Keywords :
# Warning :
#---------------------------------------------------------------
sub correct_head_box{
#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
if($debug==1){print "\n\t\@hash=\"@hash\"
\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
## Reading self and the template headbox in this subroutine.
my %correct_head_box_entry = %{&read_correct_head_box()};
for($p=0; $p < @file; $p++){
$in_file = $file[$p];
##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
## Make backup of the input file
##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
&cp( "$in_file", "$in_file\.bak$$");
print "\n $in_file\.bak$$ is created as a backup \n\n";
print chr(7);
##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
## Open files
##""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
open(TARGET, "$in_file\.bak$$");
open(RESULT, ">$in_file");
my(@whole_file) = <TARGET>;
my(@keys) = keys %correct_head_box_entry;
for($i=0; $i < @whole_file; $i++){ ## <= going through the whole lines
my(@BOX);
if(($whole_file[$i] =~ /^#___________+/)&&
($whole_file[$i+1] =~ /^# {0,4}([\w+\s*\w+]+) {0,4}: {0,9}([\w+\.\w+]*)/ ) ){
my($sub_name)=$2;
until( ($whole_file[$i] =~ /^sub +([\w\.]+) *\{/)||($whole_file[$i] =~ /^#\-{15,}/)
|| ($whole_file[$i] =~ /^#\*{15,}/) ){
push(@BOX, $whole_file[$i]);
$i++;
}
### To get the sub name by reading 'sub xxxxxxx{ ' line after #---------
for($z=$i; $z < $i+5; $z++){
if($whole_file[$z]=~/^sub +([\w\.]+) *\{/){
$sub_name=$1;
last;
}
}
my(%Final_out)=%{&read_head_box(\@BOX)}; ## Putting into a hash
### Now I have %Final_out and %correct_head_box_entry
my(%correct) =%{&superpose_hash(\%correct_head_box_entry, \%Final_out)};
print RESULT @{&write_head_box(\%correct)};
until($whole_file[$i]=~/^sub +([\w\.]+) *\{/){ $i++ }
if($whole_file[$i]=~/^sub +([\w\.]+) *\{/){
until( $whole_file[$i] =~ /^}/){
print RESULT $whole_file[$i];
$i++;
}
print RESULT $whole_file[$i];
}
}elsif($whole_file[$i]=~/^sub +([\w\.]+) *\{/){ ### When there is no headbox at all.
$correct_head_box_entry{'Title'}=$1;
$correct_head_box_entry{'Version'}='1.0';
$correct_head_box_entry{'Author'}=getlogin;
print RESULT @{&write_head_box(\%correct_head_box_entry)};
print RESULT $whole_file[$i++];
until( $whole_file[$i] =~ /^}/){
print RESULT $whole_file[$i];
$i++;
}
print RESULT $whole_file[$i];
}else{ print RESULT $whole_file[$i]; }
}
}
}
( run in 1.175 second using v1.01-cache-2.11-cpan-39bf76dae61 )