Bioinf

 view release on metacpan or  search on metacpan

Bioinf.pl  view on Meta::CPAN

					 for($k=0; $k< @keys; $k++){
							print "$keys[$k]: $stat2{$keys[$k]}\n";
					 }
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Getting statistics
				#_________________________________________
				$evalue=$s;
				$E_mult_factor1=1;
				@output=@{&get_isearch_result_stat(\%stat2, \@pdbg_seqs, \$evalue,
									\$base, \$E_mult_factor1,  $leng_thresh, \%msp_00)};
				%correct=%{$output[3]};
				%final_stat_big_hash=(%final_stat_big_hash, %correct);
				if($verbose){
						@keys=sort keys %correct;
						for($k=0; $k< @keys; $k++){
							 print "$keys[$k] $correct{$keys[$k]}\n";
						}
				}
		}

Bioinf.pl  view on Meta::CPAN



     for($i=0; $i< @correcting_pairs; $i++){
                     $correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
     }
     return(\%correcting_pairs);
}

#__________________________________________________________________
# Title     : get_isearch_result_stat
# Usage     : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function  :
# Example   : Following input (hash eg: %stat2, input with the first word as key)
#              will become columnar output.
#
#    d1ash__ d1bam__ d1mba__ d2lhb__
#    d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
#    d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
#
#    Will become:
#      ....

Bioinf.pl  view on Meta::CPAN

          # Check if SSO file already there
          #_______________________________________________________________________
          if(-s $out_sso_file){ $existing_sso=$out_file_sso_name }
          elsif(-s $out_sso_gz_name){ $existing_sso=$out_file_sso_gz_name }
          if(-s $out_msp_name){ $existing_msp=$out_file_msp_name }
          elsif(-s $out_gz_name){ $existing_msp=$out_file_gz_name }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # If the dates of files created are long ago, overwrite to refresh
          #____________________________________________________________________
          if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
               $over_write_sso_by_age='o';
          }
          if(  (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
               $over_write_msp_by_age='o';
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          #  To check if the target seq DB is in ../
          #________________________________________________
          if(-s $sequence_DB){
              print "\n# (i) Good, target \$sequence_DB $sequence_DB is in this working dir\n";
          }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB"; }

Bioinf.pl  view on Meta::CPAN


       print "\n# (i) :-) Found $each_seq_fasta is searched against $sequence_DB\n";
       if($algorithm=~/fasta/){       $out_sso_file="$seq_name\.fsso";
       }elsif($algorithm=~/ssearch/){ $out_sso_file="$seq_name\.ssso"; }
       $out_sso_gz_name="$out_sso_name\.gz";

       if(-s $out_sso_file){ $existing_sso=$out_sso_file }
       elsif(-s $out_sso_gz_name){ $existing_sso=$out_sso_gz_name }
       if(-s $out_msp_file){ $existing_msp=$out_msp_file }
       elsif(-s $out_gz_name){ $existing_msp=$out_gz_name }
       if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
            $over_write_sso_by_age='o';
       }
       if(  (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
            $over_write_msp_by_age='o';
       }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #  To check if the target seq DB is in ../
       #________________________________________________
       if(-s $sequence_DB){ print "\n# (i) \$sequence_DB $sequence_DB exists, Good\n";
       }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB";
       }elsif( -s "../../$sequence_DB"){ $sequence_DB="../../$sequence_DB"; }

Bioinf.pl  view on Meta::CPAN

# Version   : 1.2
#----------------------------------------------------------------------------
sub if_file_older_than_x_days{
    my($how_old_days);
    my $days=1; # default
    if(@_ < 2){ print "\n# if_file_older_than_x_days needs 2 args\n"; exit; }
    my $file=${$_[0]} || $_[0];
    $days=${$_[1]} || $_[1];
    unless(-s $file){  print "\n# if_file_older_than_x_days: $file does NOT exist\n"; exit; }

    if(lstat($file)){ # to handle Symbolyc link
       print "\n# (i) if_file_older_than_x_days: running lstat\n";
       $how_old_days=(localtime(time- (lstat($file))[9]))[3]; ## should be lstat not stat
    }else{
       print "\n# (i) if_file_older_than_x_days: running stat\n";
       $how_old_days=(localtime(time- (stat($file))[9]))[3]; ## should be lstat not stat
    }
    if($how_old_days > $days and $how_old_days < 10000){
       print "\n# if_file_older_than_x_days: $file is older than $days\n";
       return(\$days);
    }else{
       print "\n# if_file_older_than_x_days: $file is NOT older than $days\n";
       return(0);
    }
}

Bioinf.pl  view on Meta::CPAN

				  $ori_seq_name=$1;
				  if($seq=~/\S/ and $seq_name=~/\S/){
                                              my ($overwrite_by_age, $existing_msp_file);
                                              $seq_file_name="$seq_name\.fa";
                                              $seq_file_msp_name="$seq_name\.msp";
                                              $seq_file_msp_gz_name="$seq_name\.msp\.gz";
                                              $first_char= substr("\U$seq_name", 0, 1);
                                              if(-s "$first_char\/$seq_file_msp_name"){  $existing_msp_file="$first_char\/$seq_file_msp_name"; }
                                              elsif(-s "$first_char\/$seq_file_msp_gz_name"){  $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }

                                              if(  (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
                                                                     $overwrite_by_age='o';
                                                                     print "\n# interm_lib_search: $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
                                              }

                                              if( !$over_write  and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
                                                                     and !$overwrite_by_age ){
						 print "\n# interm_lib_search: $first_char\/$seq_file_msp_name already exists or newer than $age_in_days_of_out_file \n";
						 $seq='';
					 }else{
						 &do_sequence_search({"$seq_name", "$seq"},

Bioinf.pl  view on Meta::CPAN

			      $seq.=$_;
				  if($seq=~/\S/ and $seq_name=~/\S/){
										 my ($overwrite_by_age, $existing_msp_file);
										 $seq_file_name="$seq_name\.fa";
					 $seq_file_msp_name="$seq_name\.msp";
										 $seq_file_msp_gz_name="$seq_name\.msp\.gz";
										 $first_char= substr("\U$seq_name", 0, 1);
										 if(-s "$first_char\/$seq_file_msp_name"){  $existing_msp_file="$first_char\/$seq_file_msp_name"; }
										 elsif(-s "$first_char\/$seq_file_msp_gz_name"){  $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }

										 if(  (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
													$overwrite_by_age='o';
													print "\n# interm_lib_search : $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
										 }

					 if( !$over_write  and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
					     and !$overwrite_by_age ){
						 print "\n# $first_char\/$seq_file_msp_name already exists. (interm_lib_search) \n";
					 }else{
						 &do_sequence_search({"$seq_name", "$seq"},
						                      "DB=$input_db_file" ,

Bioinf.pl  view on Meta::CPAN

                                             pop(@temp);
                                             my($up_pwd)=join('/', @temp);
                                             $in_dir="$up_pwd\/$in_dir";
                                             $final_dir=$in_dir if (-d $in_dir);
                             }
                             return(\$final_dir);
			 }#~~~~~~~ End of sub ~~~~~~~~~~~

		 @read_files = @{&read_file_names_only(\$in_dir, \@target_file_names)};
		 for($i=0; $i < @read_files; $i ++){
					@stat=stat($read_files[$i]);
					$size_sum+=$stat[7];
					if($stat[7] > 1000000){ $big_files{$stat[7]} = $read_files[$i]; }
					if( ($read_files[$i]=~/^[\W]+$/)||($read_files[$i] =~ / +/)){
							splice( @read_files, $i, 1 ); $i--  }
					if( ($read_files[$i]=~/\.\.+/)||($read_files[$i] =~ /\#+/)||($read_files[$i]=~/\,+/)){
							splice( @read_files, $i, 1 ); $i-- }
					if(-d "$in_dir\/$read_files[$i]" ){ push(@read_dirs, $read_files[$i]); next}
		 }
		 push(@final_files, @read_files);
		 return(\@final_files, \%big_files,  \$size_sum, \@read_dirs);

Bioinf.pm  view on Meta::CPAN

					 for($k=0; $k< @keys; $k++){
							print "$keys[$k]: $stat2{$keys[$k]}\n";
					 }
				}

				#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				# Getting statistics
				#_________________________________________
				$evalue=$s;
				$E_mult_factor1=1;
				@output=@{&get_isearch_result_stat(\%stat2, \@pdbg_seqs, \$evalue,
									\$base, \$E_mult_factor1,  $leng_thresh, \%msp_00)};
				%correct=%{$output[3]};
				%final_stat_big_hash=(%final_stat_big_hash, %correct);
				if($verbose){
						@keys=sort keys %correct;
						for($k=0; $k< @keys; $k++){
							 print "$keys[$k] $correct{$keys[$k]}\n";
						}
				}
		}

Bioinf.pm  view on Meta::CPAN



     for($i=0; $i< @correcting_pairs; $i++){
                     $correcting_pairs{$correcting_pairs[$i]}=$correcting_pairs[$i];
     }
     return(\%correcting_pairs);
}

#__________________________________________________________________
# Title     : get_isearch_result_stat
# Usage     : &get_self_isearch_stat(\%stat2, \@pdbg_seqs, \$evalue);
# Function  :
# Example   : Following input (hash eg: %stat2, input with the first word as key)
#              will become columnar output.
#
#    d1ash__ d1bam__ d1mba__ d2lhb__
#    d1baba_ d1flp__ d1hbg__ d1hlb__ d1mba__ d1mbd__ d2lhb__ d3aaha_ d3sdha_
#    d1cpca_ d1cpcb_ d1gof_1 d2ts1_1
#
#    Will become:
#      ....

Bioinf.pm  view on Meta::CPAN

          # Check if SSO file already there
          #_______________________________________________________________________
          if(-s $out_sso_file){ $existing_sso=$out_file_sso_name }
          elsif(-s $out_sso_gz_name){ $existing_sso=$out_file_sso_gz_name }
          if(-s $out_msp_name){ $existing_msp=$out_file_msp_name }
          elsif(-s $out_gz_name){ $existing_msp=$out_file_gz_name }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          # If the dates of files created are long ago, overwrite to refresh
          #____________________________________________________________________
          if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
               $over_write_sso_by_age='o';
          }
          if(  (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
               $over_write_msp_by_age='o';
          }

          #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          #  To check if the target seq DB is in ../
          #________________________________________________
          if(-s $sequence_DB){
              print "\n# (i) Good, target \$sequence_DB $sequence_DB is in this working dir\n";
          }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB"; }

Bioinf.pm  view on Meta::CPAN


       print "\n# (i) :-) Found $each_seq_fasta is searched against $sequence_DB\n";
       if($algorithm=~/fasta/){       $out_sso_file="$seq_name\.fsso";
       }elsif($algorithm=~/ssearch/){ $out_sso_file="$seq_name\.ssso"; }
       $out_sso_gz_name="$out_sso_name\.gz";

       if(-s $out_sso_file){ $existing_sso=$out_sso_file }
       elsif(-s $out_sso_gz_name){ $existing_sso=$out_sso_gz_name }
       if(-s $out_msp_file){ $existing_msp=$out_msp_file }
       elsif(-s $out_gz_name){ $existing_msp=$out_gz_name }
       if(  (localtime(time- (stat($existing_sso))[9]))[3] > $age_in_days_of_out_file ){
            $over_write_sso_by_age='o';
       }
       if(  (localtime(time- (stat($existing_msp))[9]))[3] > $age_in_days_of_out_file ){
            $over_write_msp_by_age='o';
       }

       #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       #  To check if the target seq DB is in ../
       #________________________________________________
       if(-s $sequence_DB){ print "\n# (i) \$sequence_DB $sequence_DB exists, Good\n";
       }elsif( -s "../$sequence_DB"){ $sequence_DB="../$sequence_DB";
       }elsif( -s "../../$sequence_DB"){ $sequence_DB="../../$sequence_DB"; }

Bioinf.pm  view on Meta::CPAN

# Version   : 1.2
#----------------------------------------------------------------------------
sub if_file_older_than_x_days{
    my($how_old_days);
    my $days=1; # default
    if(@_ < 2){ print "\n# if_file_older_than_x_days needs 2 args\n"; exit; }
    my $file=${$_[0]} || $_[0];
    $days=${$_[1]} || $_[1];
    unless(-s $file){  print "\n# if_file_older_than_x_days: $file does NOT exist\n"; exit; }

    if(lstat($file)){ # to handle Symbolyc link
       print "\n# (i) if_file_older_than_x_days: running lstat\n";
       $how_old_days=(localtime(time- (lstat($file))[9]))[3]; ## should be lstat not stat
    }else{
       print "\n# (i) if_file_older_than_x_days: running stat\n";
       $how_old_days=(localtime(time- (stat($file))[9]))[3]; ## should be lstat not stat
    }
    if($how_old_days > $days and $how_old_days < 10000){
       print "\n# if_file_older_than_x_days: $file is older than $days\n";
       return(\$days);
    }else{
       print "\n# if_file_older_than_x_days: $file is NOT older than $days\n";
       return(0);
    }
}

Bioinf.pm  view on Meta::CPAN

				  $ori_seq_name=$1;
				  if($seq=~/\S/ and $seq_name=~/\S/){
                                              my ($overwrite_by_age, $existing_msp_file);
                                              $seq_file_name="$seq_name\.fa";
                                              $seq_file_msp_name="$seq_name\.msp";
                                              $seq_file_msp_gz_name="$seq_name\.msp\.gz";
                                              $first_char= substr("\U$seq_name", 0, 1);
                                              if(-s "$first_char\/$seq_file_msp_name"){  $existing_msp_file="$first_char\/$seq_file_msp_name"; }
                                              elsif(-s "$first_char\/$seq_file_msp_gz_name"){  $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }

                                              if(  (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
                                                                     $overwrite_by_age='o';
                                                                     print "\n# interm_lib_search: $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
                                              }

                                              if( !$over_write  and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
                                                                     and !$overwrite_by_age ){
						 print "\n# interm_lib_search: $first_char\/$seq_file_msp_name already exists or newer than $age_in_days_of_out_file \n";
						 $seq='';
					 }else{
						 &do_sequence_search({"$seq_name", "$seq"},

Bioinf.pm  view on Meta::CPAN

			      $seq.=$_;
				  if($seq=~/\S/ and $seq_name=~/\S/){
										 my ($overwrite_by_age, $existing_msp_file);
										 $seq_file_name="$seq_name\.fa";
					 $seq_file_msp_name="$seq_name\.msp";
										 $seq_file_msp_gz_name="$seq_name\.msp\.gz";
										 $first_char= substr("\U$seq_name", 0, 1);
										 if(-s "$first_char\/$seq_file_msp_name"){  $existing_msp_file="$first_char\/$seq_file_msp_name"; }
										 elsif(-s "$first_char\/$seq_file_msp_gz_name"){  $existing_msp_file="$first_char\/$seq_file_msp_gz_name"; }

										 if(  (localtime(time- (stat($existing_msp_file))[9]))[3] > $age_in_days_of_out_file ) {
													$overwrite_by_age='o';
													print "\n# interm_lib_search : $seq_file_msp_name is older than $age_in_days_of_out_file days, ovrwrting\n";
										 }

					 if( !$over_write  and (-s "$first_char\/$seq_file_msp_name" or -s "$first_char\/$seq_file_msp_gz_name")
					     and !$overwrite_by_age ){
						 print "\n# $first_char\/$seq_file_msp_name already exists. (interm_lib_search) \n";
					 }else{
						 &do_sequence_search({"$seq_name", "$seq"},
						                      "DB=$input_db_file" ,

Bioinf.pm  view on Meta::CPAN

                                             pop(@temp);
                                             my($up_pwd)=join('/', @temp);
                                             $in_dir="$up_pwd\/$in_dir";
                                             $final_dir=$in_dir if (-d $in_dir);
                             }
                             return(\$final_dir);
			 }#~~~~~~~ End of sub ~~~~~~~~~~~

		 @read_files = @{&read_file_names_only(\$in_dir, \@target_file_names)};
		 for($i=0; $i < @read_files; $i ++){
					@stat=stat($read_files[$i]);
					$size_sum+=$stat[7];
					if($stat[7] > 1000000){ $big_files{$stat[7]} = $read_files[$i]; }
					if( ($read_files[$i]=~/^[\W]+$/)||($read_files[$i] =~ / +/)){
							splice( @read_files, $i, 1 ); $i--  }
					if( ($read_files[$i]=~/\.\.+/)||($read_files[$i] =~ /\#+/)||($read_files[$i]=~/\,+/)){
							splice( @read_files, $i, 1 ); $i-- }
					if(-d "$in_dir\/$read_files[$i]" ){ push(@read_dirs, $read_files[$i]); next}
		 }
		 push(@final_files, @read_files);
		 return(\@final_files, \%big_files,  \$size_sum, \@read_dirs);



( run in 1.354 second using v1.01-cache-2.11-cpan-49f99fa48dc )