Bioinf

 view release on metacpan or  search on metacpan

Bioinf.pl  view on Meta::CPAN

             #-- if $msp_1{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
             if($msp_1{$enquiry_seq}=~/\S+/){
                    ($seq_name, $sw_score, $evalue)=$enquiry_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
                     $merged_msp1{$seq_name} .=$msp_1{$enquiry_seq};
             }else{
                    next;
             }
	 }
	 for($i=0; $i< @msp2_keys; $i++){
             $enquiry_seq = $msp2_keys[$i];

             #-- if $msp_2{$enquiry_seq} is not empty, assigns name, score, evalue etc to vars, or next
             if($msp_2{$enquiry_seq}=~/\S+/){
                     $merged_msp2{$enquiry_seq} .=$msp_2{$enquiry_seq};
             }else{
                    next;
             }
	 }

	 @merged_msp1_keys=sort keys  %merged_msp1;
	 @merged_msp2_keys=sort keys  %merged_msp2;

	 for($i=0; $i< @merged_msp1_keys; $i++){
	  $enquiry_seq=$merged_msp1_keys[$i];
	  @intermediate_seqs=sort split(/ +/, $merged_msp1{$enquiry_seq});
	  for($j=0; $j< @intermediate_seqs; $j++){

		 $intermediate_seq=$intermediate_seqs[$j];

		 ($inter_seq_name, $sw_score, $evalue)=$intermediate_seq=~/(\S+)\((\S+)\)\((\S+)\)/;
		 @final_matches=sort split(/ +/,  $merged_msp2{$inter_seq_name});
		 for($k=0; $k < @final_matches; $k ++){
		     $final_matched_seq = $final_matches[$k];
		     if($char_opt=~/v/){
			 printf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		     }
		     $final_out{$final_matched_seq}=
	             sprintf ("%-18s %-40s %-38s\n", $enquiry_seq, $intermediate_seq, $final_matched_seq);
		 }
		 #print "\n";
	  }
	  #print "\n";
	 }
	 #print "\n";
	 return(\%final_out);
}

#______________________________________________________________________________
# Title     : get_perl_keywords
# Usage     :
# Function  :
# Example   :
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_perl_keywords{
    my(%perl_keywords);
    my @keywords=qw( AUTOLOAD BEGIN CORE DESTROY END abs accept alarm and atan2 bind binmode bless caller chdir chmod chop chown chr chroot
       close closedir cmp connect continue cos crypt dbmclose dbmopen defined delete die do dump each else elsif endgrent endhostent endnetent endprotoent endpwent endservent
       eof eq eval exec exit exp fcntl fileno flock for foreach fork format formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
       getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid getservbyname
       getservbyport getservent getsockname getsockopt glob gmtime goto grep gt hex if index int ioctl join keys kill last lc lcfirst le length link listen local localtime log lstat
       lt m mkdir msgctl msgget msgrcv msgsnd my ne next no not oct open opendir or ord pack package pipe pop print printf push q qq quotemeta qw qx rand read readdir readline
       readlink readpipe recv redo ref rename require reset return reverse rewinddir rindex rmdir s scalar seek seekdir select
       semctl semget semop send setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent setservent setsockopt shift
       shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split sprintf sqrt srand stat
       study sub substr symlink syscall sysread system syswrite tell telldir tie time times tr truncate uc ucfirst
       umask undef unless unlink unpack unshift untie until use utime values vec wait waitpid wantarray
       warn while write x xor y
     );
     foreach(@keywords){
        $perl_keywords{$_}=$_;
     }
     return(\%perl_keywords);
}




#______________________________________________________________________________
# Title     :  get_homology_info_of_seq_pairs
# Usage     :
# Function  :
# Example   : %seq_pair_homology_table=%{&get_homology_info_of_seq_pairs(\%pairs_excluded,
#                                         \%pdbg_hash_table)};
#
# Keywords  :
# Options   :
# Author    : jong@salt2.med.harvard.edu,
# Category  :
# Version   : 1.0
#------------------------------------------------------------------------------
sub get_homology_info_of_seq_pairs{
   my($i, %pairs_to_be_checked,%seq_pairs_homology_table, @pairs,
      $homology_info, %pdbg_hash_table);
   for($i=0; $i< @_; $i++){
      my %in_hash=%{$_[$i]};
      my @seq_names=keys %in_hash;
      if($in_hash{$seq_names[0]}=~/^\S+[\t ]+\S+$/){
          %pairs_to_be_checked=%in_hash; %in_hash=();
      }elsif($in_hash{$seq_names[0]}=~/^\S+$/){
          %pdbg_hash_table=%in_hash; %in_hash=();
      }
   }

   @pairs=keys %pairs_to_be_checked;

   for($i=0; $i< @pairs; $i++){
      if($pairs[$i]=~/^(\S+)[\t ]+(\S+)/){
          $homology_info=${&check_homology_of_seq_pair(\$pairs[$i], \%pdbg_hash_table)};
          $seq_pairs_homology_table{$pairs[$i]}=$homology_info;
          print "\n#>> $pairs[$i] $homology_info" if $verbose;
      }
   }
   return(\%seq_pairs_homology_table);
}


Bioinf.pl  view on Meta::CPAN

				  my (%out_subs, $each_sub);
				  my $title_found;
				  #"""" Taking the headbox """""""""""""
				  if( ($lib[$j]=~/^#+[_\-\*]{10,120} *$/)
					 &&($lib[$j+1]=~/^(# *title *: *([\w\-]+))[^\.pl]/i) ){
					  $each_sub=$2;
					  $title_found =1;
					  if( (-s "$each_sub\.pl") > 200 ){
						  print (-s "$each_sub\.pl"), "   ";
						  print "  $each_sub", " exists \n";
						  next FOR;
					  }elsif((-s "$each_sub\.pl") <= 200){
					     my $temp;
					     open (TEMP, "<$each_sub\.pl");
					     while(<TEMP>){
					        if(/^#[_\-\*]{10,120} *$/){ $temp++ }
					        elsif(/^# *title *: *[\w\-]+[^\.pl]/i ){
					           $temp++;
					        }elsif(/^# *\w+/){
					           $temp=$temp+0.5;
					        }
					     }
					     if($temp >2){
					        next FOR;
					     }
					  }

					  $out_subs{"$each_sub"}.="$lib[$j]$1\n";
					  $j+=2;
					  until( ($lib[$j]=~/^sub *\w+ *\{/)||($lib[$j]=~/^#---+ *$/) ||
							 ($lib[$j]=~/^#_____+ *$/) || ($lib[$j]=~/^#\*\*+ *$/) ){
							 $lib[$j]=~s/( *)$//;  #<-- removing ending space
							 $out_subs{"$each_sub"}.="$lib[$j]";
							 $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  $j++;    ## essential to remove #------------- line
				  }

				  #"""""""" Reading sub {  } """""""
				  if( ($title_found==1)&&($lib[$j]=~/^sub +([\w\-]+) *\{/) ){
				     $each_sub=$1;
					  $out_subs{"$each_sub"}.="$lib[$j]";
					  if($lib[$j]=~/^sub +([\w\-]+) *\{.+\}/){
						  goto WRITE;
					  }
					  $j++;
					  until($lib[$j]=~/^\}/){
					     $out_subs{"$each_sub"}.="$lib[$j]";  $j++;
					  }
					  $out_subs{"$each_sub"}.="$lib[$j]";  ## to fetch '}'
					  $j++;

					  WRITE:
				     open (EACH_FILE, ">$each_sub\.pl");
				     print EACH_FILE  "#\!\/perl\n";
				     print EACH_FILE  "# Made by $0 at: ", `date`, "\n";
				     print EACH_FILE $out_subs{$each_sub};
		           close EACH_FILE;
		           %out_subs=();
		           #chmod

				  }
			  }
	}#""""""""""""" end of for (@file)
	close LIB_FILE;
}



#___________________________________________________________
# Title     : is_html
# Usage     :
# Function  : Checks if it is an html file.
# Example   : $html=&is_html(\@test);
# Warning   :
# Keywords  :
# Options   : _  for debugging.
#             #  for debugging.
# Returns   :
# Argument  :
# Category  :
# Version   : 1.0
#-------------------------------------------------------
sub is_html{
	#"""""""""""""""""< handle_arguments{ head Ver 4.1 >"""""""""""""""""""
	my(@A)=&handle_arguments(@_);my($num_opt)=${$A[7]};my($char_opt)=${$A[8]};
	my(@hash)=@{$A[0]};my(@file)=@{$A[4]};my(@dir)=@{$A[3]};my(@array)=@{$A[1]};
	my(@string)=@{$A[2]};my(@num_opt)=@{$A[5]};my(@char_opt)=@{$A[6]};
	my(@raw_string)=@{$A[9]};my(%vars)=%{$A[10]};my(@range)=@{$A[11]};
	my($i,$j,$c,$d,$e,$f,$g,$h,$k,$l,$m,$n,$o,$p,$q,$r,$s,$t,$u,$v,$w,$x,$y,$z);
	if($debug==1){print "\n\t\@hash=\"@hash\"
	\@raw_string=\"@raw_string\"\n\t\@array=\"@array\"\n\t\@num_opt=\"@num_opt\"
	\@char_opt=\"@char_opt\"\n\t\@file=\"@file\"\n\t\@string=\"@string\"\n" }
	#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
	my $html=0;
	if( @string >0 ){
	 for($i=0; $i<@string; $i++){
		if($string[$i]=~/^[.\n]{0, 100}\< *HTML *\>/i){
			$html_head=1;
		}if($string[$i]=~/[.\n]+\< *\/HTML *\>[\n.]{0,100}$/i){
			$html_end=1;
		}
	 }
	 if( ($html_head eq $html_end)&&($html_end=1)){
		$html=1;
	 }
	 if($debug==1){ print "\n \@string is @string\n"; }
	}elsif(@file>0){
	 for($i=0; $i< @file; $i++){
		my $all_lines;
		open(F, "$file[$i]");
		while(<F>){
		  $all_lines.=$_;
		}
		print "\n All the lines of $file[$i] is $all_lines\n" if $debug==1;
		if($all_lines =~/\< *HTML *\>/i){
		  if($all_lines=~/\< *\/ *HTML *\>/i){
			  $html=1;
			  print "\n html matched $html\n" if $debug ==1;
		  }

Bioinf.pl  view on Meta::CPAN

# Example   :
# Warning   :
# Keywords  :
# Options   :
# Returns   : one or more scalar references according to the input numbers.
# Argument  : (343) or (\$length)
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------
sub rand_RNA_seq_generate { my($seq_length,@out_seq_ref,$rand_RNA_seq, $i,$residue);
	for($i=0; $i<@_; $i++){
	 if( ref($_[$i]) && (ref($_[$i]) eq 'SCALAR') ){
		if(${$_[$i]} =~/\d+/){ $seq_length = ${$_[$i]}; }   }
	 elsif( !ref($_[$i]) ){ if($_[$i] =~/\d+/){ $seq_length = $_[$i]; } }
	 else{ print "\n rand_RNA_generate in $0 gets number\(s\) of them\n"; exit; }
	 srand(time()|$$);
	 for (1..$seq_length) {
		 $residue = pack("c", rand(128));  redo unless $residue =~ /[ACGU]/;
		 $rand_RNA_seq .= $residue;    }
	 push(@out_seq_ref, \$rand_RNA_seq);  }
	if(@out_seq_ref == 1){ return($out_seq_ref[0]); }
	elsif(@out_seq_ref > 1){ return(@out_seq_ref); }
}

#____________________________________________________________________________
# Title     : replace_text
# Usage     : &replace_text(\@input_array_of_filenames);
# Function  : finds patterns of text and replaces them in multiple input files
# Example   :
# Warning   : This produces a temporary file and rename it...
# Keywords  : replace_txt, change_text,
# Options   :
# Returns   : nothing
# Argument  : reference of one array of file names in pwd
# Category  :
# Version   : 1.4
#--------------------------------------------------------------------
sub replace_text{
	my ($file, @input_files );
	$|=1;
	my $old=shift ;
	my $new=shift;
	print "\n# $0: OLD pattern-> $old, NEW pattern-> $new, \n";
	@input_files=@_;

	for $file(@input_files){
	 open (IN, "$file");
	 my @lines=<IN>;
	 close(IN);
		 open (OUT, ">$file") or warn "\n# $0: ERROR opening $file, check permission!\n";
	 for (@lines){
		if(/^(.*)($old)(.*)$/){
			$temp="$1${new}$3\n";
						print "\nmatched $_ in $file, becomes: $temp\n";
			print OUT $temp;
		}else{
			print OUT $_;
		}
	 }
	 close OUT;
	 if($file =~/\.pl$/){ chmod 0755, $file; } # this makes it execu..
	}
	return(\@input_files); # returning the changed files
}




#________________________________________________________________________
# Title     : get_av_seq_length
# Usage     : $std_devi_of_lengths = &get_av_seq_length(\%hash_ref);
# Function  : gets hash of sequence, compares lengths, and outs av.
# Example   :
# Warning   : uses a sub  &array_average(\@lengths);
# Keywords  :
# Options   :
# Returns   : one ref. for scaler digit.
# Argument  : one hash reference for sequences.
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_av_seq_length{
	my(%hash1)= %{$_[0]};
	my(@names)=keys %hash1;
	my($sequence, $len, @lengths, $av_seq_length);
	for $name (@names){  $sequence = $hash1{$name}; $sequence =~s/\W//g;
	 $len =length($sequence);  push(@lengths,$len); }
	$av_seq_length=&array_average(\@lengths);
	return(\$av_seq_length);
}

#________________________________________________________________________
# Title     : get_sd_of_length_diff
# Usage     : $result = &get_sd_of_length_diff(\%input);
# Function  :
# Example   :
# Warning   : removes all non-char(.-, space....) in the input string
# Keywords  :
# Options   :
# Returns   : one scaler digit
# Argument  : gets one hash reference,
# Category  :
# Version   : 1.0
#--------------------------------------------------------------------
sub get_sd_of_length_diff{
	my(%hash1)= %{$_[0]};
	my(@names)=keys %hash1;
	my($sequence, $len, @lengths, $std_devi);
	for $name (@names){ $sequence = $hash1{$name};
	 $sequence =~s/\W//g; $len =length($sequence);
	 push(@lengths,$len); }
	$std_devi=&sd(\@lengths);
	\$std_devi;
}
#________________________________________________________________________
# Title     : get_av_and_sd_seq_length
# Usage     : $get_av_and_sd_seq_length= &get_av_seq_length(\%hash_ref);
# Function  : gets ref of hash of sequence, compares lengths, and outs av.
# Example   :
# Warning   : uses a sub  &array_average(\@lengths);
# Keywords  :

Bioinf.pl  view on Meta::CPAN

sub remove_dup_in_array{
    my($i, $sort_opt, @out_ref, @nondup,%duplicate, @orig, @out_ref);
    my @in=@_;
    for($i=0; $i<@in; $i++){
        if($in[$i] eq 's'){
                       $sort_opt=1;  splice(@in, $i, 1); $i--;
        }elsif( ref($in[$i]) eq 'SCALAR'  and  ${$in[$i]} eq 's' ){
                       $sort_opt=1;  splice(@in, $i, 1); $i--;
        }
    }
    for($i=0; $i<@in; $i++){
        undef(%duplicate);
        if(ref($in[$i]) eq 'ARRAY'){    @orig = @{$in[$i]};    }
        else{ @orig=@in }
        @nondup = grep { ! $duplicate{$_}++ } @orig;    ## NOTE -> $_
        if($sort_opt==1){ @nondup= sort @nondup }
        push(@out_ref, \@nondup);
    }
    if(@out_ref ==1){ return($out_ref[0]);}
    elsif(@out_ref >1){  return(@out_ref);}
}

#________________________________________________________________________
# Title     : remove_text
# Usage     : &remove_text(\@input_array_of_filenames);
# Function  : finds patterns of text and replaces them in multiple input files
# Example   :
# Warning   : This produces a temporary file and rename it...
# Class     :
# Keywords  :
# Options   :
# Package   :
# Reference :
# Returns   : nothing
# Tips      :
# Argument  : reference of one array of file names in pwd
# Todo      :
# Author    : jong
# Category  :
# Version   : 1.3
# Enclosed  :
#--------------------------------------------------------------------
sub remove_text{
	my ( @input_files );
	$|=1;
	my $old=shift ;
	my $new='';
	@input_files=@_;
	my($file);

	for $file(@input_files){
	 open (IN, "$file");
	 my @lines=<IN>;
	 close(IN);
	 open (OUT, ">$file");
	 for $line(@lines){
			 $line=~s/$old//g;
			 print OUT $line;
	 }
	 close OUT;
	 if($file =~/\.pl$/){ chmod 0755, $file; } # this makes it execu..
	}
}




#____________________________________________________________________
# Title     : remove_elements_by_pattern
# Usage     : @out2 = @{&remove_elements_by_pattern(\@input1, \@input2,,,,)};
#             @out1 = @{&remove_elements_by_pattern(\@input1 )};
# Function  : removes elements by pattern in the array
# Example   :  @TARGET=qw(1 % $ ^ # A B 4444 44 4 4 3 33 3 11 A 3 4 4 7 AB);
#              @remove=qw(\W);  # removes all the non word stuff
#              @remove2=qw(\d );
#              @out=@{&remove_elements_by_pattern(\@TARGET, \@remove,\@remove2)};
# Warning   :
# Keywords  : remove_this_elements, remove_these_elements, remove_elements
#             remove_elements_by_position, kill_array_elements, kill_elements
#             take_away_elements, remove_array_elements
# Options   :
# Returns   : one or more references.
# Argument  : one or more refs for arrays. The first array is always the
#             only target.
# Category  :
# Version   : 1.2
#-----------------------------------------------------------------
sub remove_elements_by_pattern{
	my ($i, $j, $k, @elem);
	my @TARGET=@{$_[0]};
	if(@_ < 2){
	 print __LINE__, "\n remove_elements_by_pattern in $0 needs 2 array refs \n\n";
	 exit;
	}
	for($i=1; $i< @_; $i++){
	 if(ref($_[$i]) eq 'ARRAY'){
		push(@elem, @{$_[$i]});
	 }elsif(ref($_[$i]) eq 'SCALAR'){
		push(@elem, ${$_[$i]});
	 }else{
		push(@elem, $_[$i]);
	 }
	}
	for($j=0; $j<@TARGET; $j++){
	 for($k=0; $k<@elem; $k++){
		if($TARGET[$j] =~ /$elem[$k]/){
			splice(@TARGET, $j, 1);
			$j--;
		}
	 }
	}
	return(\@TARGET);
}


#____________________________________________________________________
# Title     : remove_elements_by_name
# Usage     : @out2 = @{&remove_elements_by_name(\@input1, \@input2)};
#             @out1 = @{&remove_elements_by_name(\@input1, \$name )};
# Function  : removes elements by name in the array
# Example   : ( two input:  (1,2,3,4,4,4,5,5,6,7), (1,3,4)  --> (2,5,5,6,7);



( run in 0.517 second using v1.01-cache-2.11-cpan-39bf76dae61 )