FAST
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/FAST/Bio/SearchIO/exonerate.pm view on Meta::CPAN
}
$self->cigar($cigar);
$self->vulgar($vulgar);
$self;
}
=head2 next_result
Title : next_result
Usage : my $hit = $searchio->next_result;
Function: Returns the next Result from a search
Returns : FAST::Bio::Search::Result::ResultI object
Args : none
=cut
sub next_result{
my ($self) = @_;
local $/ = "\n";
local $_;
$self->{'_last_data'} = '';
my ($reporttype,$seenquery,$reportline);
$self->start_document();
my @hit_signifs;
my $seentop;
my (@q_ex, @m_ex, @h_ex); ## gc addition
while( defined($_ = $self->_readline) ) {
# warn( "Reading $_");
if( /^\s*Query:\s+(\S+)\s*(.+)?/ ) {
if( $seentop ) {
$self->end_element({'Name' => 'ExonerateOutput'});
$self->_pushback($_);
return $self->end_document();
}
$seentop = 1;
my ($nm,$desc) = ($1,$2);
chomp($desc) if defined $desc;
$self->{'_result_count'}++;
$self->start_element({'Name' => 'ExonerateOutput'});
$self->element({'Name' => 'ExonerateOutput_query-def',
'Data' => $nm });
$self->element({'Name' => 'ExonerateOutput_query-desc',
'Data' => $desc });
$self->element({'Name' => 'ExonerateOutput_program',
'Data' => 'Exonerate' });
$self->{'_seencigar'} = 0;
$self->{'_vulgar'} = 0;
} elsif ( /^Target:\s+(\S+)\s*(.+)?/ ) {
my ($nm,$desc) = ($1,$2);
chomp($desc) if defined $desc;
$self->start_element({'Name' => 'Hit'});
$self->element({'Name' => 'Hit_id',
'Data' => $nm});
$self->element({'Name' => 'Hit_desc',
'Data' => $desc});
$self->{'_seencigar'} = 0;
$self->{'_vulgar'} = 0;
} elsif( s/^vulgar:\s+(\S+)\s+ # query sequence id
(\d+)\s+(\d+)\s+([\-\+\.])\s+ # query start-end-strand
(\S+)\s+ # target sequence id
(\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand
(\d+)\s+ # score
//ox ) {
next if( $self->cigar || $self->{'_seencigar'});
$self->{'_vulgar'}++;
#
# Note from Ewan. This is ugly - copy and paste from
# cigar line parsing. Should unify somehow...
#
if( ! $self->within_element('result') ) {
$self->start_element({'Name' => 'ExonerateOutput'});
$self->element({'Name' => 'ExonerateOutput_query-def',
'Data' => $1 });
}
if( ! $self->within_element('hit') ) {
$self->start_element({'Name' => 'Hit'});
$self->element({'Name' => 'Hit_id',
'Data' => $5});
}
## gc note:
## $qe and $he are no longer used for calculating the ends,
## just the $qs and $hs values and the alignment and insert lenghts
my ($qs,$qe,$qstrand) = ($2,$3,$4);
my ($hs,$he,$hstrand) = ($6,$7,$8);
my $score = $9;
# $self->element({'Name' => 'ExonerateOutput_query-len',
# 'Data' => $qe});
# $self->element({'Name' => 'Hit_len',
# 'Data' => $he});
## gc note:
## add one because these values are zero-based
## this calculation was originally done lower in the code,
## but it's clearer to do it just once at the start
my @rest = split;
my ($qbegin,$qend) = ('query-from', 'query-to');
if( $qstrand eq '-' ) {
$qstrand = -1; $qe++;
} else {
$qstrand = 1;
$qs++;
}
my ($hbegin,$hend) = ('hit-from', 'hit-to');
if( $hstrand eq '-' ) {
$hstrand = -1;
$he++;
} else {
$hstrand = 1;
$hs++;
}
# okay let's do this right and generate a set of HSPs
# from the cigar line/home/bio1/jes12/bin/exonerate --model est2genome --bestn 1 t/data/exonerate_cdna.fa t/data/exonerate_genomic_rev.fa
my ($aln_len,$inserts,$deletes) = (0,0,0);
my ($laststate,@events,$gaps) =( '' );
while( @rest >= 3 ) {
my ($state,$len1,$len2) = (shift @rest, shift @rest, shift @rest);
#
# HSPs are only the Match cases; otherwise we just
# move the coordinates on by the correct amount
#
if( $state eq 'M' ) {
if( $laststate eq 'G' ) {
# merge gaps across Match states so the HSP
# goes across
$events[-1]->{$qend} = $qs + $len1*$qstrand - $qstrand;
$events[-1]->{$hend} = $hs + $len2*$hstrand - $hstrand;
$events[-1]->{'gaps'} = $gaps;
} else {
push @events,
{ 'score' => $score,
'align-len' => $len1,
$qbegin => $qs,
$qend => ($qs + $len1*$qstrand - $qstrand),
$hbegin => $hs,
$hend => ($hs + $len2*$hstrand - $hstrand),
};
}
$gaps = 0;
} else {
$gaps = $len1 + $len2 if $state eq 'G';
}
$qs += $len1*$qstrand;
$hs += $len2*$hstrand;
$laststate= $state;
}
for my $event ( @events ) {
$self->start_element({'Name' => 'Hsp'});
while( my ($key,$val) = each %$event ) {
$self->element({'Name' => "Hsp_$key",
'Data' => $val});
}
$self->element({'Name' => 'Hsp_identity',
'Data' => 0});
$self->end_element({'Name' => 'Hsp'});
}
# end of hit
$self->element({'Name' => 'Hit_score',
'Data' => $score});
# issued end...
$self->end_element({'Name' => 'Hit'});
$self->end_element({'Name' => 'ExonerateOutput'});
return $self->end_document();
} elsif( s/^cigar:\s+(\S+)\s+ # query sequence id
(\d+)\s+(\d+)\s+([\-\+])\s+ # query start-end-strand
(\S+)\s+ # target sequence id
(\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand
(\d+)\s+ # score
//ox ) {
next if( $self->vulgar || $self->{'_seenvulgar'});
$self->{'_cigar'}++;
if( ! $self->within_element('result') ) {
$self->start_element({'Name' => 'ExonerateOutput'});
$self->element({'Name' => 'ExonerateOutput_query-def',
'Data' => $1 });
}
if( ! $self->within_element('hit') ) {
$self->start_element({'Name' => 'Hit'});
$self->element({'Name' => 'Hit_id',
'Data' => $5});
}
## gc note:
## $qe and $he are no longer used for calculating the ends,
## just the $qs and $hs values and the alignment and insert lenghts
my ($qs,$qe,$qstrand) = ($2,$3,$4);
my ($hs,$he,$hstrand) = ($6,$7,$8);
my $score = $9;
# $self->element({'Name' => 'ExonerateOutput_query-len',
# 'Data' => $qe});
# $self->element({'Name' => 'Hit_len',
# 'Data' => $he});
my @rest = split;
if( $qstrand eq '-' ) {
$qstrand = -1;
($qs,$qe) = ($qe,$qs); # flip-flop if we're on opp strand
$qs--; $qe++;
} else { $qstrand = 1; }
if( $hstrand eq '-' ) {
$hstrand = -1;
($hs,$he) = ($he,$hs); # flip-flop if we're on opp strand
$hs--; $he++;
} else { $hstrand = 1; }
# okay let's do this right and generate a set of HSPs
# from the cigar line
## gc note:
## add one because these values are zero-based
## this calculation was originally done lower in the code,
## but it's clearer to do it just once at the start
$qs++; $hs++;
my ($aln_len,$inserts,$deletes) = (0,0,0);
while( @rest >= 2 ) {
my ($state,$len) = (shift @rest, shift @rest);
if( $state eq 'I' ) {
$inserts+=$len;
} elsif( $state eq 'D' ) {
if( $len >= $MIN_INTRON ) {
$self->start_element({'Name' => 'Hsp'});
$self->element({'Name' => 'Hsp_score',
'Data' => $score});
$self->element({'Name' => 'Hsp_align-len',
'Data' => $aln_len});
$self->element({'Name' => 'Hsp_identity',
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.002 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )