Bio-FastParsers

 view release on metacpan or  search on metacpan

lib/Bio/FastParsers/Hmmer/Standard/Domain.pm  view on Meta::CPAN

package Bio::FastParsers::Hmmer::Standard::Domain;
# ABSTRACT: Internal class for standard HMMER parser
# CONTRIBUTOR: Arnaud DI FRANCO <arnaud.difranco@gmail.com>
$Bio::FastParsers::Hmmer::Standard::Domain::VERSION = '0.221230';
use Moose;
use namespace::autoclean;

use List::AllUtils qw(mesh);


# public attributes

has $_ => (
    is       => 'ro',
    isa      => 'Str',
    required => 1,
) for qw(seq scoreseq profile probabilities);

with 'Bio::FastParsers::Roles::Domainable';

around BUILDARGS => sub {
    my ($orig, $class, $inargs) = @_;

    my @raw = @{ $inargs->{raw} };
    my $summary = $inargs->{summary};

    my %outargs;

    # parse header
    my @header_vals = $raw[0] =~ m/([\d\.]+)/xmsg;
    $outargs{'rank'}      = $header_vals[0];
    $outargs{'dom_score'} = $header_vals[1];
    $outargs{'c_evalue'}
        = @header_vals == 3 ? $header_vals[2] : join 'e-', @header_vals[2,3]
    ;

    # coerce numeric fields to numbers
    %outargs = map { $_ => 0 + $outargs{$_} } keys %outargs;

    # parse domain alignment

    # Alignment is made of 4 lines: best match to profile, scoring
    # correspondance, sequence alignment and posterior predictive. Each line
    # is shifted to the right by the same amount of characters, which is
    # different for each target. To get the size of the shift, I insert
    # special characters on the seqline and split on hit. Each part gives the
    # right length to extract correctly the information.

    my $profileline = $raw[1];
    my $scoreline = $raw[2];
    my $probline = $raw[4];
    ( my $seqline = $raw[3] )
        =~ s{(^\s+.*\s+\d+\s+)(\S+)\s\d+\s*$}{$1\|\|\|$2}xms;
    chomp $seqline;
    my ($skip, $tmpseq) = split /\|{3}/xms, $seqline;
    my $scoreseq = substr $scoreline, length $skip, length $tmpseq;
    my $profileseq = substr $profileline, length $skip, length $tmpseq;
    my $probabilities = substr $probline, length $skip, length $tmpseq;
    $outargs{'seq'} = $tmpseq;
    $outargs{'scoreseq'} = $scoreseq;
    $outargs{'profile'} = $profileseq;
    $outargs{'probabilities'} = $probabilities;

    # attributes from summary domtbl
    my @summary_attrs = qw(
        dom_bias i_evalue



( run in 0.735 second using v1.01-cache-2.11-cpan-5a3173703d6 )