BioPerl
view release on metacpan or search on metacpan
Bio/Assembly/ContigAnalysis.pm view on Meta::CPAN
Function : Locates all low quality regions in the consensus
Returns : an array of Bio::SeqFeature::Generic objects
Args : optional arguments are
-threshold : cutoff value for low quality (minimum high quality)
Default: 25
-start : start of interval that will be analyzed
-end : start of interval that will be analyzed
-type : coordinate system type for interval
=cut
sub low_consensus_quality {
my ($self,@args) = shift; # Packege reference
my ($threshold,$start,$end,$type) =
$self->_rearrange([qw(THRESHOLD START END TYPE)],@args);
# Setting default value for threshold
$threshold = 25 unless (defined($threshold));
# Loading qualities
my @quality = @{ $self->{'_objref'}->get_consensus_quality()->qual };
# Changing coordinates to gap mode noaln (consed: consensus without alignments)
$start = 1 unless (defined($start));
if (defined $start && defined $type && ($type ne 'gapped consensus')) {
$start = $self->{'objref'}->change_coord($type,'gapped consensus',$start);
$end = $self->{'objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
}
$end = $self->{'_objref'}->get_consensus_length unless (defined $end);
# Scanning @quality vector and storing intervals limits with base qualities less then
# the threshold value
my ($lcq_start);
my ($i,@LCQ);
for ($i=$start-1; $i<=$end-1; $i++) {
# print $quality[$i],"\t",$i,"\n";
if (!defined($lcq_start) && (($quality[$i] <= $threshold) || ($quality[$i] == 98))) {
$lcq_start = $i+1;
} elsif (defined($lcq_start) && ($quality[$i] > $threshold)) {
$lcq_start = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$lcq_start);
my $lcq_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
push(@LCQ, Bio::SeqFeature::Generic->new(-start=>$lcq_start,
-end=>$lcq_end,
-primary=>'low_consensus_quality') );
$lcq_start = undef;
}
}
if (defined $lcq_start) {
$lcq_start = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$lcq_start);
my $lcq_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
push(@LCQ, Bio::SeqFeature::Generic->new(-start=>$lcq_start,
-end=>$lcq_end,
-primary=>'low_consensus_quality') );
}
return @LCQ;
}
=head2 not_confirmed_on_both_strands
Title : low_quality_consensus
Usage : my $sfc = $ContigAnal->low_quality_consensus();
Function :
Locates all regions whose consensus bases were not
confirmed by bases from sequences aligned in both
orientations, i.e., in such regions, no bases in aligned
sequences of either +1 or -1 strand agree with the
consensus bases.
Returns : an array of Bio::SeqFeature::Generic objects
Args : optional arguments are
-start : start of interval that will be analyzed
-end : start of interval that will be analyzed
-type : coordinate system type for interval
=cut
sub not_confirmed_on_both_strands {
my ($self,@args) = shift; # Package reference
my ($start,$end,$type) =
$self->_rearrange([qw(START END TYPE)],@args);
# Changing coordinates to default system 'align' (contig sequence with alignments)
$start = 1 unless (defined($start));
if (defined($type) && ($type ne 'gapped consensus')) {
$start = $self->{'_objref'}->change_coord($type,'gapped consensus',$start);
$end = $self->{'_objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
}
$end = $self->{'_objref'}->get_consensus_length unless (defined($end));
# Scanning alignment
my %confirmed = (); # If ($confirmed{$orientation}[$i] > 0) then $i is confirmed in $orientation strand
my ($i);
my $consensus = $self->{'_objref'}->get_consensus_sequence()->seq;
foreach my $seqID ($self->{'_objref'}->get_seq_ids) {
# Setting aligned read sub-sequence limits and loading data
my $seq = $self->{'_objref'}->get_seq_by_name($seqID);
my $sequence = $seq->seq;
# Scanning the aligned regions of each read and registering confirmed sites
my $contig_ix = 0;
my $coord = $self->{'_objref'}->get_seq_feat_by_tag($seq,"_align_clipping:$seqID");
my ($astart,$aend,$orientation) = ($coord->start,$coord->end,$coord->strand);
$astart = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$astart);
$aend = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$aend);
for ($i=$astart-1; $i<=$aend-1; $i++) {
# $i+1 in 'align' mode is $contig_ix
$contig_ix = $self->{'_objref'}->change_coord("aligned $seqID",'gapped consensus',$i+1);
next unless (($contig_ix >= $start) && ($contig_ix <= $end));
my $r_base = uc(substr($sequence,$i,1));
my $c_base = uc(substr($consensus,$contig_ix-1,1));
if ($c_base eq '-') {
$confirmed{$orientation}[$contig_ix] = -1;
} elsif (uc($r_base) eq uc($c_base)) { # Non discrepant region found
$confirmed{$orientation}[$contig_ix]++;
}
} # for ($i=$astart-1; $i<=$aend-1; $i++)
} # foreach $seqID (@reads)
# Locating non-confirmed aligned regions for each orientation in $confirmed registry
my ($orientation);
my @NCBS = ();
foreach $orientation (keys %confirmed) {
my ($ncbs_start,$ncbs_end);
for ($i=$start; $i<=$end; $i++) {
if (!defined($ncbs_start) &&
(!defined($confirmed{$orientation}[$i]) || ($confirmed{$orientation}[$i] == 0))) {
$ncbs_start = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
} elsif (defined($ncbs_start) &&
defined($confirmed{$orientation}[$i]) &&
($confirmed{$orientation}[$i] > 0)) {
$ncbs_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i-1);
push(@NCBS, Bio::SeqFeature::Generic->new(-start=>$ncbs_start,
-end=>$ncbs_end,
-strand=>$orientation,
-primary=>"not_confirmed_on_both_strands") );
$ncbs_start = undef;
}
}
if (defined($ncbs_start)) { # NCBS at the end of contig
$ncbs_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$end);
push(@NCBS, Bio::SeqFeature::Generic->new(-start=>$ncbs_start,
-end=>$ncbs_end,
-strand=>$orientation,
-primary=>'not_confirmed_on_both_strands') );
}
}
return @NCBS;
}
=head2 single_strand
Title : single_strand
Usage : my $sfc = $ContigAnal->single_strand();
Function :
Locates all regions covered by aligned sequences only in
one of the two strands, i.e., regions for which aligned
sequence's strand() method returns +1 or -1 for all
sequences.
Returns : an array of Bio::SeqFeature::Generic objects
Args : optional arguments are
-start : start of interval that will be analyzed
-end : start of interval that will be analyzed
-type : coordinate system type for interval
=cut
#'
sub single_strand {
my ($self,@args) = shift; # Package reference
my ($start,$end,$type) =
$self->_rearrange([qw(START END TYPE)],@args);
# Changing coordinates to gap mode align (consed: consensus sequence with alignments)
$type = 'gapped consensus' unless(defined($type));
$start = 1 unless (defined($start));
if (defined($type) && $type ne 'gapped consensus') {
$start = $self->{'objref'}->change_coord($type,'gapped consensus',$start);
$end = $self->{'objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
}
($end) = $self->{'_objref'}->get_consensus_length unless (defined($end));
# Loading complete list of coordinates for aligned sequences
my $sfc = $self->{'_objref'}->get_features_collection();
my @forward = grep { $_->primary_tag =~ /^_aligned_coord:/ }
$sfc->features_in_range(-start=>$start,
-end=>$end,
-contain=>0,
-strand=>1,
-strandmatch=>'strong');
my @reverse = grep { $_->primary_tag =~ /^_aligned_coord:/ }
$sfc->features_in_range(-start=>$start,
-end=>$end,
-contain=>0,
-strand=>-1,
-strandmatch=>'strong');
# Merging overlapping features
@forward = $self->_merge_overlapping_features(@forward);
@reverse = $self->_merge_overlapping_features(@reverse);
# Finding single stranded regions
my ($length) = $self->{'_objref'}->get_consensus_length;
$length = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$length);
@forward = $self->_complementary_features_list(1,$length,@forward);
@reverse = $self->_complementary_features_list(1,$length,@reverse);
my @SS = ();
foreach my $feat (@forward, @reverse) {
$feat->primary_tag('single_strand_region');
push(@SS,$feat);
}
return @SS;
}
=head1 Internal Methods
=head2 _merge_overlapping_features
Title : _merge_overlapping_features
Usage : my @feat = $ContigAnal->_merge_overlapping_features(@features);
Function : Merge all overlapping features into features
that hold original features as sub-features
Returns : array of Bio::SeqFeature::Generic objects
Args : array of Bio::SeqFeature::Generic objects
=cut
sub _merge_overlapping_features {
my ($self,@feat) = @_;
$self->throw_not_implemented();
}
=head2 _complementary_features_list
Title : _complementary_features_list
Usage : @feat = $ContigAnal->_complementary_features_list($start,$end,@features);
Function : Build a list of features for regions
not covered by features in @features array
Returns : array of Bio::SeqFeature::Generic objects
Args :
$start : [integer] start of first output feature
$end : [integer] end of last output feature
@features : array of Bio::SeqFeature::Generic objects
=cut
sub _complementary_features_list {
my ($self,$start,$end,@feat) = @_;
$self->throw_not_implemented();
}
1;
__END__
( run in 0.486 second using v1.01-cache-2.11-cpan-39bf76dae61 )