Bio-WebService-LANL-SequenceLocator
view release on metacpan or search on metacpan
lib/Bio/WebService/LANL/SequenceLocator.pm view on Meta::CPAN
if ( $row->[0]
and $row->[0] =~ /^[A-Za-z]+$/
and not grep { defined and length } @$row[1 .. scalar @$row - 1])
{
$table{rows}->[-1]{protein_translation} = $row->[0];
next;
}
# Not all rows are data, some are informational sentences.
next if grep { not defined } @$row;
my %row;
@row{@$our_cols} =
map { ($_ and $_ eq "NA") ? undef : $_ }
map { ($_ and /(\d+) â (\d+)/) ? [$1, $2] : $_ }
@$row;
push @{$table{rows}}, \%row;
}
push @tables, \%table
if @{$table{rows}};
}
}
# Sort by depth, then within each depth by count
@tables = sort {
$a->{coords}[0] <=> $b->{coords}[0]
or $a->{coords}[1] <=> $b->{coords}[1]
} @tables;
if (@tables > 1) {
unless ( $tables[-1]->{rows}[0]{na_from_query_start} eq "1 â"
and $tables[-1]->{rows}[0]{protein_translation} eq "X") {
warn "Last table appears to be real!? It should be the bogus table of the bogus sequence.";
warn "Table is ", Dumper($tables[-1]), "\n";
return;
} else {
pop @tables;
}
}
return @tables;
}
sub parse_alignments {
my ($self, $content) = @_;
my @alignments;
my $doc = HTML::TokeParser->new(
\$content,
unbroken_text => 1,
);
my $expect_alignment = 0;
while (my $tag = $doc->get_tag("b", "pre")) {
my $name = lc $tag->[0];
my $text = $doc->get_text;
next unless defined $text;
# <pre>s are preceeded by a bold header, which we use as an indicator
if ($name eq 'b') {
$expect_alignment = $text =~ /Alignment\s+of\s+the\s+query\s+sequence\s+to\s+HXB2/i;
} elsif ($name eq 'pre') {
if ($text =~ /^\s*Query\b/m and $text =~ /^\s*HXB2\b/m) {
push @alignments, $text;
warn "Not expecting alignment, but found oneâ½"
unless $expect_alignment;
}
elsif ($text =~ /^\s+$/ and $expect_alignment) {
push @alignments, undef; # We appear to have found an unaligned sequence.
}
$expect_alignment = 0;
}
}
if (defined $alignments[-1]) {
warn "Last alignment is non-null! It should be the empty alignment of the bogus sequence.";
warn "Alignment is <$alignments[-1]>\n";
return;
} else {
pop @alignments;
}
my @results;
for (@alignments) {
my @hxb2;
if (defined) {
push @hxb2, $1 =~ s/\s+//gr
while /^\s*HXB2\b\s+(.+?)(?:\s+\d+|\s*)$/gm;
}
push @results, {
alignment => $_,
hxb2_sequence => @hxb2 ? join("", @hxb2) : undef,
};
}
return @results;
}
=head1 AUTHOR
Thomas Sibley E<lt>trsibley@uw.eduE<gt>
=head1 COPYRIGHT
Copyright 2014 by the Mullins Lab, Department of Microbiology, University of
Washington.
=head1 LICENSE
Licensed under the same terms as Perl 5 itself.
=cut
42;
( run in 2.532 seconds using v1.01-cache-2.11-cpan-97f6503c9c8 )