BioPerl

 view release on metacpan or  search on metacpan

Bio/Assembly/ContigAnalysis.pm  view on Meta::CPAN

 Function  : Locates all low quality regions in the consensus
 Returns   : an array of Bio::SeqFeature::Generic objects
 Args      : optional arguments are
             -threshold : cutoff value for low quality (minimum high quality)
                          Default: 25
             -start     : start of interval that will be analyzed
             -end       : start of interval that will be analyzed
             -type      : coordinate system type for interval

=cut

sub low_consensus_quality {
    my ($self,@args) = shift; # Packege reference

    my ($threshold,$start,$end,$type) = 
	$self->_rearrange([qw(THRESHOLD START END TYPE)],@args);

    # Setting default value for threshold
    $threshold = 25 unless (defined($threshold));

    # Loading qualities
    my @quality = @{ $self->{'_objref'}->get_consensus_quality()->qual };

    # Changing coordinates to gap mode noaln (consed: consensus without alignments)
    $start = 1 unless (defined($start));
    if (defined $start && defined $type && ($type ne 'gapped consensus')) {
	$start = $self->{'objref'}->change_coord($type,'gapped consensus',$start);
	$end   = $self->{'objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
    }
    $end = $self->{'_objref'}->get_consensus_length unless (defined $end);

    # Scanning @quality vector and storing intervals limits with base qualities less then
    # the threshold value
    my ($lcq_start);
    my ($i,@LCQ);
    for ($i=$start-1; $i<=$end-1; $i++) {
#	print $quality[$i],"\t",$i,"\n";
	if (!defined($lcq_start) && (($quality[$i] <= $threshold) || ($quality[$i] == 98))) {
	    $lcq_start = $i+1;
	} elsif (defined($lcq_start) && ($quality[$i] > $threshold)) {
	    $lcq_start  = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$lcq_start);
	    my $lcq_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
	    push(@LCQ, Bio::SeqFeature::Generic->new(-start=>$lcq_start,
						     -end=>$lcq_end,
						     -primary=>'low_consensus_quality') );
	    $lcq_start = undef;
	}
    }

    if (defined $lcq_start) {
	$lcq_start  = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$lcq_start);
	my $lcq_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
	push(@LCQ, Bio::SeqFeature::Generic->new(-start=>$lcq_start,
						 -end=>$lcq_end,
						 -primary=>'low_consensus_quality') );
    }

    return @LCQ;
}

=head2 not_confirmed_on_both_strands

 Title     : low_quality_consensus
 Usage     : my $sfc = $ContigAnal->low_quality_consensus();
 Function  : 

             Locates all regions whose consensus bases were not
             confirmed by bases from sequences aligned in both
             orientations, i.e., in such regions, no bases in aligned
             sequences of either +1 or -1 strand agree with the
             consensus bases.

 Returns   : an array of Bio::SeqFeature::Generic objects
 Args      : optional arguments are
             -start : start of interval that will be analyzed
             -end   : start of interval that will be analyzed
             -type  : coordinate system type for interval

=cut

sub not_confirmed_on_both_strands {
    my ($self,@args) = shift; # Package reference

    my ($start,$end,$type) = 
	$self->_rearrange([qw(START END TYPE)],@args);

    # Changing coordinates to default system 'align' (contig sequence with alignments)
    $start = 1 unless (defined($start));
    if (defined($type) && ($type ne 'gapped consensus')) {
	$start = $self->{'_objref'}->change_coord($type,'gapped consensus',$start);
	$end   = $self->{'_objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
    }
    $end = $self->{'_objref'}->get_consensus_length unless (defined($end));

    # Scanning alignment
    my %confirmed = (); # If ($confirmed{$orientation}[$i] > 0) then $i is confirmed in $orientation strand
    my ($i);
    my $consensus = $self->{'_objref'}->get_consensus_sequence()->seq;
    foreach my $seqID ($self->{'_objref'}->get_seq_ids) {
	# Setting aligned read sub-sequence limits and loading data
	my $seq = $self->{'_objref'}->get_seq_by_name($seqID);
	my $sequence = $seq->seq;

	# Scanning the aligned regions of each read and registering confirmed sites
	my $contig_ix = 0;
	my $coord = $self->{'_objref'}->get_seq_feat_by_tag($seq,"_align_clipping:$seqID");
	my ($astart,$aend,$orientation) = ($coord->start,$coord->end,$coord->strand);
	$astart = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$astart);
	$aend   = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$aend);
	for ($i=$astart-1; $i<=$aend-1; $i++) {
	    # $i+1 in 'align' mode is $contig_ix
	    $contig_ix = $self->{'_objref'}->change_coord("aligned $seqID",'gapped consensus',$i+1);
	    next unless (($contig_ix >= $start) && ($contig_ix <= $end));
	    my $r_base = uc(substr($sequence,$i,1));
	    my $c_base = uc(substr($consensus,$contig_ix-1,1));
	    if ($c_base eq '-') {
		$confirmed{$orientation}[$contig_ix] = -1;
	    } elsif (uc($r_base) eq uc($c_base)) { # Non discrepant region found
		$confirmed{$orientation}[$contig_ix]++;
	    }
	} # for ($i=$astart-1; $i<=$aend-1; $i++)
    } # foreach $seqID (@reads)

    # Locating non-confirmed aligned regions for each orientation in $confirmed registry
    my ($orientation);
    my @NCBS = ();
    foreach $orientation (keys %confirmed) {
	my ($ncbs_start,$ncbs_end);

	for ($i=$start; $i<=$end; $i++) {
	    if (!defined($ncbs_start) &&
		(!defined($confirmed{$orientation}[$i]) || ($confirmed{$orientation}[$i] == 0))) {
		$ncbs_start = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
	    } elsif (defined($ncbs_start) &&
		     defined($confirmed{$orientation}[$i]) &&
		     ($confirmed{$orientation}[$i] > 0)) {
		$ncbs_end   = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i-1);
		push(@NCBS, Bio::SeqFeature::Generic->new(-start=>$ncbs_start,
							  -end=>$ncbs_end,
							  -strand=>$orientation,
							  -primary=>"not_confirmed_on_both_strands") );
		$ncbs_start = undef;
	    }
	}

	if (defined($ncbs_start)) { # NCBS at the end of contig
	    $ncbs_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$end);
	    push(@NCBS, Bio::SeqFeature::Generic->new(-start=>$ncbs_start,
						      -end=>$ncbs_end,
						      -strand=>$orientation,
						      -primary=>'not_confirmed_on_both_strands') );
	}
    }

    return @NCBS;
}

=head2 single_strand

 Title     : single_strand
 Usage     : my $sfc = $ContigAnal->single_strand();
 Function  : 

             Locates all regions covered by aligned sequences only in
             one of the two strands, i.e., regions for which aligned
             sequence's strand() method returns +1 or -1 for all
             sequences.

 Returns   : an array of Bio::SeqFeature::Generic objects
 Args      : optional arguments are
             -start : start of interval that will be analyzed
             -end   : start of interval that will be analyzed
             -type  : coordinate system type for interval

=cut

#'
sub single_strand {
    my ($self,@args) = shift; # Package reference

    my ($start,$end,$type) = 
	$self->_rearrange([qw(START END TYPE)],@args);

    # Changing coordinates to gap mode align (consed: consensus sequence with alignments)
    $type  = 'gapped consensus' unless(defined($type));
    $start = 1 unless (defined($start));
    if (defined($type) && $type ne 'gapped consensus') {
	$start = $self->{'objref'}->change_coord($type,'gapped consensus',$start);
	$end   = $self->{'objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
    }
    ($end) = $self->{'_objref'}->get_consensus_length unless (defined($end));

    # Loading complete list of coordinates for aligned sequences
    my $sfc = $self->{'_objref'}->get_features_collection();
    my @forward = grep { $_->primary_tag =~ /^_aligned_coord:/ } 
    $sfc->features_in_range(-start=>$start,
			    -end=>$end,
			    -contain=>0,
			    -strand=>1,
			    -strandmatch=>'strong');
    my @reverse = grep { $_->primary_tag =~ /^_aligned_coord:/ } 
    $sfc->features_in_range(-start=>$start,
			    -end=>$end,
			    -contain=>0,
			    -strand=>-1,
			    -strandmatch=>'strong');
    # Merging overlapping features
    @forward = $self->_merge_overlapping_features(@forward);
    @reverse = $self->_merge_overlapping_features(@reverse);

    # Finding single stranded regions
    my ($length) = $self->{'_objref'}->get_consensus_length;
    $length  = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$length);
    @forward = $self->_complementary_features_list(1,$length,@forward);
    @reverse = $self->_complementary_features_list(1,$length,@reverse);

    my @SS = ();
    foreach my $feat (@forward, @reverse) {
	$feat->primary_tag('single_strand_region');
	push(@SS,$feat);
    }

    return @SS;
}

=head1 Internal Methods

=head2 _merge_overlapping_features

 Title     : _merge_overlapping_features
 Usage     : my @feat = $ContigAnal->_merge_overlapping_features(@features);
 Function  : Merge all overlapping features into features
             that hold original features as sub-features
 Returns   : array of Bio::SeqFeature::Generic objects
 Args      : array of Bio::SeqFeature::Generic objects

=cut

sub _merge_overlapping_features {
    my ($self,@feat) = @_;

    $self->throw_not_implemented();
}

=head2 _complementary_features_list

 Title     : _complementary_features_list
 Usage     : @feat = $ContigAnal->_complementary_features_list($start,$end,@features);
 Function  : Build a list of features for regions
             not covered by features in @features array
 Returns   : array of Bio::SeqFeature::Generic objects
 Args      : 
             $start    : [integer] start of first output feature
             $end      : [integer] end of last output feature
             @features : array of Bio::SeqFeature::Generic objects

=cut

sub _complementary_features_list {
    my ($self,$start,$end,@feat) = @_;

    $self->throw_not_implemented();
}

1;

__END__



( run in 0.486 second using v1.01-cache-2.11-cpan-39bf76dae61 )