AlignDB-GC

 view release on metacpan or  search on metacpan

lib/AlignDB/GC.pm  view on Meta::CPAN

    foreach my $sliding_set (@sliding_windows) {
        my @sliding_seqs = map { $sliding_set->substr_span($_) } @seqs;
        my $sliding_gc = _calc_gc_ratio(@sliding_seqs);
        push @sliding_gcs, $sliding_gc;
    }

    my $gc_mean = _mean(@sliding_gcs);
    my $gc_std  = _stddev(@sliding_gcs);
    my $gc_cv
        = $gc_mean == 0 || $gc_mean == 1 ? undef
        : $gc_mean <= 0.5 ? $gc_std / $gc_mean
        :                   $gc_std / ( 1 - $gc_mean );
    my $gc_mdcw = $self->skip_mdcw ? undef : _mdcw(@sliding_gcs);

    return ( $gc_mean, $gc_std, $gc_cv, $gc_mdcw );
}

sub _center_resize {
    my AlignDB::IntSpan $old_set    = shift;
    my AlignDB::IntSpan $parent_set = shift;
    my $resize                      = shift;

    # find the middles of old_set
    my $half_size           = int( $old_set->size / 2 );
    my $midleft             = $old_set->at($half_size);
    my $midright            = $old_set->at( $half_size + 1 );
    my $midleft_parent_idx  = $parent_set->index($midleft);
    my $midright_parent_idx = $parent_set->index($midright);

    return unless $midleft_parent_idx and $midright_parent_idx;

    # map to parent
    my $parent_size  = $parent_set->size;
    my $half_resize  = int( $resize / 2 );
    my $new_left_idx = $midleft_parent_idx - $half_resize + 1;
    $new_left_idx = 1 if $new_left_idx < 1;
    my $new_right_idx = $midright_parent_idx + $half_resize - 1;
    $new_right_idx = $parent_size if $new_right_idx > $parent_size;

    my $new_set = $parent_set->slice( $new_left_idx, $new_right_idx );

    return $new_set;
}

sub segment_gc_stat_one {
    my $self           = shift;
    my $seqs_ref       = shift;
    my $comparable_set = shift;
    my $segment_set    = shift;

    my $stat_segment_size = 500;

    my $resize_set = _center_resize( $segment_set, $comparable_set, $stat_segment_size );
    next unless $resize_set;

    my @segment_seqs = map { $segment_set->substr_span($_) } @{$seqs_ref};

    my $gc_mean = _calc_gc_ratio(@segment_seqs);
    my ( $gc_std, $gc_mdcw ) = ( undef, undef );

    my ( undef, undef, $gc_cv, undef ) = $self->segment_gc_stat( $seqs_ref, $resize_set, 100, 100 );

    return ( $gc_mean, $gc_std, $gc_cv, $gc_mdcw );
}

sub _mean {
    @_ = grep { defined $_ } @_;
    return unless @_;
    return $_[0] unless @_ > 1;
    return List::Util::sum(@_) / scalar(@_);
}

sub _variance {
    @_ = grep { defined $_ } @_;
    return   unless @_;
    return 0 unless @_ > 1;
    my $mean = _mean(@_);
    return List::Util::sum( map { ( $_ - $mean )**2 } @_ ) / $#_;
}

sub _stddev {
    @_ = grep { defined $_ } @_;
    return   unless @_;
    return 0 unless @_ > 1;
    return sqrt( _variance(@_) );
}

sub _mdcw {
    my @array = @_;

    if ( @array <= 1 ) {
        warn "There should be more than 1 elements in the array.\n";
        return;
    }

    my @dcws;
    for ( 1 .. $#array ) {
        my $dcw = $array[$_] - $array[ $_ - 1 ];
        push @dcws, abs($dcw);
    }

    return _mean(@dcws);
}

# find local maxima, minima
sub find_extreme_step1 {
    my $self    = shift;
    my $windows = shift;

    my $count = scalar @$windows;

    my $wave_window_size = $self->wave_window_size;
    my $wave_window_step = $self->wave_window_step;
    my $vicinal_size     = $self->vicinal_size;
    my $vicinal_number   = int( $vicinal_size / $wave_window_step );

    for my $i ( 0 .. $count - 1 ) {

        #----------------------------#
        # right vicinal window
        #----------------------------#



( run in 1.687 second using v1.01-cache-2.11-cpan-39bf76dae61 )