Bioinf
view release on metacpan or search on metacpan
for($k=0; $k< @keys; $k++){
print "$keys[$k]: $stat2{$keys[$k]}\n";
}
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Getting statistics
#_________________________________________
$evalue=$s;
$E_mult_factor1=1;
@output=@{&get_isearch_result_stat(\%stat2, \@pdbg_seqs, \$evalue,
\$base, \$E_mult_factor1, $leng_thresh, \%msp_00)};
%correct=%{$output[3]};
%final_stat_big_hash=(%final_stat_big_hash, %correct);
if($verbose){
@keys=sort keys %correct;
for($k=0; $k< @keys; $k++){
print "$keys[$k] $correct{$keys[$k]}\n";
}
}
}
for($i=0; $i< @correcting_pairs; $i++){
$correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
}
return(\%correcting_pairs);
}
#__________________________________________________________________
# Title : get_isearch_result_stat
# Usage : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function :
# Example : Following input (hash eg: %stat2, input with the first word as key)
# will become columnar output.
#
# d1ash__ d1bam__ d1mba__ d2lhb__
# d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
# d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
#
# Will become:
# ....
# Check if SSO file already there
#_______________________________________________________________________
if(-s $out_sso_file){ $existing_sso=$out_file_sso_name }
elsif(-s $out_sso_gz_name){ $existing_sso=$out_file_sso_gz_name }
if(-s $out_msp_name){ $existing_msp=$out_file_msp_name }
elsif(-s $out_gz_name){ $existing_msp=$out_file_gz_name }
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# If the dates of files created are long ago, overwrite to refresh
#____________________________________________________________________
if( (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
$over_write_sso_by_age='o';
}
if( (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
$over_write_msp_by_age='o';
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# To check if the target seq DB is in ../
#________________________________________________
if(-s $sequence_DB){
print "\n# (i) Good, target \$sequence_DB $sequence_DB is in this working dir\n";
}elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB"; }
print "\n# (i) :-) Found $each_seq_fasta is searched against $sequence_DB\n";
if($algorithm=~/fasta/){ $out_sso_file="$seq_name\.fsso";
}elsif($algorithm=~/ssearch/){ $out_sso_file="$seq_name\.ssso"; }
$out_sso_gz_name="$out_sso_name\.gz";
if(-s $out_sso_file){ $existing_sso=$out_sso_file }
elsif(-s $out_sso_gz_name){ $existing_sso=$out_sso_gz_name }
if(-s $out_msp_file){ $existing_msp=$out_msp_file }
elsif(-s $out_gz_name){ $existing_msp=$out_gz_name }
if( (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
$over_write_sso_by_age='o';
}
if( (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
$over_write_msp_by_age='o';
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# To check if the target seq DB is in ../
#________________________________________________
if(-s $sequence_DB){ print "\n# (i) \$sequence_DB $sequence_DB exists, Good\n";
}elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB";
}elsif( -s "../../$sequence_DB"){ $sequence_DB="../../$sequence_DB"; }
# Version : 1.2
#----------------------------------------------------------------------------
sub if_file_older_than_x_days{
my($how_old_days);
my $days=1; # default
if(@_ < 2){ print "\n# if_file_older_than_x_days needs 2 args\n"; exit; }
my $file=${$_[0]} || $_[0];
$days=${$_[1]} || $_[1];
unless(-s $file){ print "\n# if_file_older_than_x_days: $file does NOT exist\n"; exit; }
if(lstat($file)){ # to handle Symbolyc link
print "\n# (i) if_file_older_than_x_days: running lstat\n";
$how_old_days=(localtime(time- (lstat($file))[9]))[3]; ## should be lstat not stat
}else{
print "\n# (i) if_file_older_than_x_days: running stat\n";
$how_old_days=(localtime(time- (stat($file))[9]))[3]; ## should be lstat not stat
}
if($how_old_days > $days and $how_old_days < 10000){
print "\n# if_file_older_than_x_days: $file is older than $days\n";
return(\$days);
}else{
print "\n# if_file_older_than_x_days: $file is NOT older than $days\n";
return(0);
}
}
$ori_seq_name=$1;
if($seq=~/\S/ and $seq_name=~/\S/){
my ($overwrite_by_age, $existing_msp_file);
$seq_file_name="$seq_name\.fa";
$seq_file_msp_name="$seq_name\.msp";
$seq_file_msp_gz_name="$seq_name\.msp\.gz";
$first_char= substr("\U$seq_name", 0, 1);
if(-s "$first_char\/$seq_file_msp_name"){ $existing_msp_file="$first_char\/$seq_file_msp_name"; }
elsif(-s "$first_char\/$seq_file_msp_gz_name"){ $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }
if( (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
$overwrite_by_age='o';
print "\n# interm_lib_search: $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
}
if( !$over_write and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
and !$overwrite_by_age ){
print "\n# interm_lib_search: $first_char\/$seq_file_msp_name already exists or newer than $age_in_days_of_out_file \n";
$seq='';
}else{
&do_sequence_search({"$seq_name", "$seq"},
$seq.=$_;
if($seq=~/\S/ and $seq_name=~/\S/){
my ($overwrite_by_age, $existing_msp_file);
$seq_file_name="$seq_name\.fa";
$seq_file_msp_name="$seq_name\.msp";
$seq_file_msp_gz_name="$seq_name\.msp\.gz";
$first_char= substr("\U$seq_name", 0, 1);
if(-s "$first_char\/$seq_file_msp_name"){ $existing_msp_file="$first_char\/$seq_file_msp_name"; }
elsif(-s "$first_char\/$seq_file_msp_gz_name"){ $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }
if( (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
$overwrite_by_age='o';
print "\n# interm_lib_search : $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
}
if( !$over_write and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
and !$overwrite_by_age ){
print "\n# $first_char\/$seq_file_msp_name already exists. (interm_lib_search) \n";
}else{
&do_sequence_search({"$seq_name", "$seq"},
"DB=$input_db_file" ,
pop(@temp);
my($up_pwd)=join('/', @temp);
$in_dir="$up_pwd\/$in_dir";
$final_dir=$in_dir if (-d $in_dir);
}
return(\$final_dir);
}#~~~~~~~ End of sub ~~~~~~~~~~~
@read_files = @{&read_file_names_only(\$in_dir, \@target_file_names)};
for($i=0; $i < @read_files; $i ++){
@stat=stat($read_files[$i]);
$size_sum+=$stat[7];
if($stat[7] > 1000000){ $big_files{$stat[7]} = $read_files[$i]; }
if( ($read_files[$i]=~/^[\W]+$/)||($read_files[$i] =~ / +/)){
splice( @read_files, $i, 1 ); $i-- }
if( ($read_files[$i]=~/\.\.+/)||($read_files[$i] =~ /\#+/)||($read_files[$i]=~/\,+/)){
splice( @read_files, $i, 1 ); $i-- }
if(-d "$in_dir\/$read_files[$i]" ){ push(@read_dirs, $read_files[$i]); next}
}
push(@final_files, @read_files);
return(\@final_files, \%big_files, \$size_sum, \@read_dirs);
for($k=0; $k< @keys; $k++){
print "$keys[$k]: $stat2{$keys[$k]}\n";
}
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Getting statistics
#_________________________________________
$evalue=$s;
$E_mult_factor1=1;
@output=@{&get_isearch_result_stat(\%stat2, \@pdbg_seqs, \$evalue,
\$base, \$E_mult_factor1, $leng_thresh, \%msp_00)};
%correct=%{$output[3]};
%final_stat_big_hash=(%final_stat_big_hash, %correct);
if($verbose){
@keys=sort keys %correct;
for($k=0; $k< @keys; $k++){
print "$keys[$k] $correct{$keys[$k]}\n";
}
}
}
for($i=0; $i< @correcting_pairs; $i++){
$correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
}
return(\%correcting_pairs);
}
#__________________________________________________________________
# Title : get_isearch_result_stat
# Usage : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function :
# Example : Following input (hash eg: %stat2, input with the first word as key)
# will become columnar output.
#
# d1ash__ d1bam__ d1mba__ d2lhb__
# d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
# d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
#
# Will become:
# ....
# Check if SSO file already there
#_______________________________________________________________________
if(-s $out_sso_file){ $existing_sso=$out_file_sso_name }
elsif(-s $out_sso_gz_name){ $existing_sso=$out_file_sso_gz_name }
if(-s $out_msp_name){ $existing_msp=$out_file_msp_name }
elsif(-s $out_gz_name){ $existing_msp=$out_file_gz_name }
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# If the dates of files created are long ago, overwrite to refresh
#____________________________________________________________________
if( (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
$over_write_sso_by_age='o';
}
if( (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
$over_write_msp_by_age='o';
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# To check if the target seq DB is in ../
#________________________________________________
if(-s $sequence_DB){
print "\n# (i) Good, target \$sequence_DB $sequence_DB is in this working dir\n";
}elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB"; }
print "\n# (i) :-) Found $each_seq_fasta is searched against $sequence_DB\n";
if($algorithm=~/fasta/){ $out_sso_file="$seq_name\.fsso";
}elsif($algorithm=~/ssearch/){ $out_sso_file="$seq_name\.ssso"; }
$out_sso_gz_name="$out_sso_name\.gz";
if(-s $out_sso_file){ $existing_sso=$out_sso_file }
elsif(-s $out_sso_gz_name){ $existing_sso=$out_sso_gz_name }
if(-s $out_msp_file){ $existing_msp=$out_msp_file }
elsif(-s $out_gz_name){ $existing_msp=$out_gz_name }
if( (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
$over_write_sso_by_age='o';
}
if( (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
$over_write_msp_by_age='o';
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# To check if the target seq DB is in ../
#________________________________________________
if(-s $sequence_DB){ print "\n# (i) \$sequence_DB $sequence_DB exists, Good\n";
}elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB";
}elsif( -s "../../$sequence_DB"){ $sequence_DB="../../$sequence_DB"; }
# Version : 1.2
#----------------------------------------------------------------------------
sub if_file_older_than_x_days{
my($how_old_days);
my $days=1; # default
if(@_ < 2){ print "\n# if_file_older_than_x_days needs 2 args\n"; exit; }
my $file=${$_[0]} || $_[0];
$days=${$_[1]} || $_[1];
unless(-s $file){ print "\n# if_file_older_than_x_days: $file does NOT exist\n"; exit; }
if(lstat($file)){ # to handle Symbolyc link
print "\n# (i) if_file_older_than_x_days: running lstat\n";
$how_old_days=(localtime(time- (lstat($file))[9]))[3]; ## should be lstat not stat
}else{
print "\n# (i) if_file_older_than_x_days: running stat\n";
$how_old_days=(localtime(time- (stat($file))[9]))[3]; ## should be lstat not stat
}
if($how_old_days > $days and $how_old_days < 10000){
print "\n# if_file_older_than_x_days: $file is older than $days\n";
return(\$days);
}else{
print "\n# if_file_older_than_x_days: $file is NOT older than $days\n";
return(0);
}
}
$ori_seq_name=$1;
if($seq=~/\S/ and $seq_name=~/\S/){
my ($overwrite_by_age, $existing_msp_file);
$seq_file_name="$seq_name\.fa";
$seq_file_msp_name="$seq_name\.msp";
$seq_file_msp_gz_name="$seq_name\.msp\.gz";
$first_char= substr("\U$seq_name", 0, 1);
if(-s "$first_char\/$seq_file_msp_name"){ $existing_msp_file="$first_char\/$seq_file_msp_name"; }
elsif(-s "$first_char\/$seq_file_msp_gz_name"){ $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }
if( (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
$overwrite_by_age='o';
print "\n# interm_lib_search: $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
}
if( !$over_write and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
and !$overwrite_by_age ){
print "\n# interm_lib_search: $first_char\/$seq_file_msp_name already exists or newer than $age_in_days_of_out_file \n";
$seq='';
}else{
&do_sequence_search({"$seq_name", "$seq"},
$seq.=$_;
if($seq=~/\S/ and $seq_name=~/\S/){
my ($overwrite_by_age, $existing_msp_file);
$seq_file_name="$seq_name\.fa";
$seq_file_msp_name="$seq_name\.msp";
$seq_file_msp_gz_name="$seq_name\.msp\.gz";
$first_char= substr("\U$seq_name", 0, 1);
if(-s "$first_char\/$seq_file_msp_name"){ $existing_msp_file="$first_char\/$seq_file_msp_name"; }
elsif(-s "$first_char\/$seq_file_msp_gz_name"){ $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }
if( (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
$overwrite_by_age='o';
print "\n# interm_lib_search : $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
}
if( !$over_write and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
and !$overwrite_by_age ){
print "\n# $first_char\/$seq_file_msp_name already exists. (interm_lib_search) \n";
}else{
&do_sequence_search({"$seq_name", "$seq"},
"DB=$input_db_file" ,
pop(@temp);
my($up_pwd)=join('/', @temp);
$in_dir="$up_pwd\/$in_dir";
$final_dir=$in_dir if (-d $in_dir);
}
return(\$final_dir);
}#~~~~~~~ End of sub ~~~~~~~~~~~
@read_files = @{&read_file_names_only(\$in_dir, \@target_file_names)};
for($i=0; $i < @read_files; $i ++){
@stat=stat($read_files[$i]);
$size_sum+=$stat[7];
if($stat[7] > 1000000){ $big_files{$stat[7]} = $read_files[$i]; }
if( ($read_files[$i]=~/^[\W]+$/)||($read_files[$i] =~ / +/)){
splice( @read_files, $i, 1 ); $i-- }
if( ($read_files[$i]=~/\.\.+/)||($read_files[$i] =~ /\#+/)||($read_files[$i]=~/\,+/)){
splice( @read_files, $i, 1 ); $i-- }
if(-d "$in_dir\/$read_files[$i]" ){ push(@read_dirs, $read_files[$i]); next}
}
push(@final_files, @read_files);
return(\@final_files, \%big_files, \$size_sum, \@read_dirs);
( run in 1.354 second using v1.01-cache-2.11-cpan-49f99fa48dc )