AlignDB-GC
view release on metacpan or search on metacpan
lib/AlignDB/GC.pm view on Meta::CPAN
foreach my $sliding_set (@sliding_windows) {
my @sliding_seqs = map { $sliding_set->substr_span($_) } @seqs;
my $sliding_gc = _calc_gc_ratio(@sliding_seqs);
push @sliding_gcs, $sliding_gc;
}
my $gc_mean = _mean(@sliding_gcs);
my $gc_std = _stddev(@sliding_gcs);
my $gc_cv
= $gc_mean == 0 || $gc_mean == 1 ? undef
: $gc_mean <= 0.5 ? $gc_std / $gc_mean
: $gc_std / ( 1 - $gc_mean );
my $gc_mdcw = $self->skip_mdcw ? undef : _mdcw(@sliding_gcs);
return ( $gc_mean, $gc_std, $gc_cv, $gc_mdcw );
}
sub _center_resize {
my AlignDB::IntSpan $old_set = shift;
my AlignDB::IntSpan $parent_set = shift;
my $resize = shift;
# find the middles of old_set
my $half_size = int( $old_set->size / 2 );
my $midleft = $old_set->at($half_size);
my $midright = $old_set->at( $half_size + 1 );
my $midleft_parent_idx = $parent_set->index($midleft);
my $midright_parent_idx = $parent_set->index($midright);
return unless $midleft_parent_idx and $midright_parent_idx;
# map to parent
my $parent_size = $parent_set->size;
my $half_resize = int( $resize / 2 );
my $new_left_idx = $midleft_parent_idx - $half_resize + 1;
$new_left_idx = 1 if $new_left_idx < 1;
my $new_right_idx = $midright_parent_idx + $half_resize - 1;
$new_right_idx = $parent_size if $new_right_idx > $parent_size;
my $new_set = $parent_set->slice( $new_left_idx, $new_right_idx );
return $new_set;
}
sub segment_gc_stat_one {
my $self = shift;
my $seqs_ref = shift;
my $comparable_set = shift;
my $segment_set = shift;
my $stat_segment_size = 500;
my $resize_set = _center_resize( $segment_set, $comparable_set, $stat_segment_size );
next unless $resize_set;
my @segment_seqs = map { $segment_set->substr_span($_) } @{$seqs_ref};
my $gc_mean = _calc_gc_ratio(@segment_seqs);
my ( $gc_std, $gc_mdcw ) = ( undef, undef );
my ( undef, undef, $gc_cv, undef ) = $self->segment_gc_stat( $seqs_ref, $resize_set, 100, 100 );
return ( $gc_mean, $gc_std, $gc_cv, $gc_mdcw );
}
sub _mean {
@_ = grep { defined $_ } @_;
return unless @_;
return $_[0] unless @_ > 1;
return List::Util::sum(@_) / scalar(@_);
}
sub _variance {
@_ = grep { defined $_ } @_;
return unless @_;
return 0 unless @_ > 1;
my $mean = _mean(@_);
return List::Util::sum( map { ( $_ - $mean )**2 } @_ ) / $#_;
}
sub _stddev {
@_ = grep { defined $_ } @_;
return unless @_;
return 0 unless @_ > 1;
return sqrt( _variance(@_) );
}
sub _mdcw {
my @array = @_;
if ( @array <= 1 ) {
warn "There should be more than 1 elements in the array.\n";
return;
}
my @dcws;
for ( 1 .. $#array ) {
my $dcw = $array[$_] - $array[ $_ - 1 ];
push @dcws, abs($dcw);
}
return _mean(@dcws);
}
# find local maxima, minima
sub find_extreme_step1 {
my $self = shift;
my $windows = shift;
my $count = scalar @$windows;
my $wave_window_size = $self->wave_window_size;
my $wave_window_step = $self->wave_window_step;
my $vicinal_size = $self->vicinal_size;
my $vicinal_number = int( $vicinal_size / $wave_window_step );
for my $i ( 0 .. $count - 1 ) {
#----------------------------#
# right vicinal window
#----------------------------#
( run in 1.687 second using v1.01-cache-2.11-cpan-39bf76dae61 )