Bio-FastParsers

 view release on metacpan or  search on metacpan

lib/Bio/FastParsers/CdHit.pm  view on Meta::CPAN

package Bio::FastParsers::CdHit;
# ABSTRACT: Front-end class for CD-HIT parser
# CONTRIBUTOR: Amandine BERTRAND <amandine.bertrand@doct.uliege.be>
$Bio::FastParsers::CdHit::VERSION = '0.221230';
use Moose;
use namespace::autoclean;

use autodie;

use Tie::IxHash;

extends 'Bio::FastParsers::Base';


# public attributes (inherited)


with 'Bio::FastParsers::Roles::Clusterable';


sub BUILD {
    my $self = shift;

    my $cluster_like = qr{\>Cluster \s (\d+)}xms;
    my $repr_id_like = qr{\d+ \t \d+\w{2}\, \s \>([\w\|\.]+) .{4} \*      }xms;
    my $memb_id_like = qr{\d+ \t \d+\w{2}\, \s \>([\w\|\.]+) .{4} at .* \%}xms;

    my $infile = $self->filename;
    open my $in, '<', $infile;

    tie my %members_for, 'Tie::IxHash';

    my $repr_id;
    my @members;

    while (my $line = <$in>) {
        chomp $line;

        if ($line =~ $cluster_like){
            #### cluster: $line
            push @{ $members_for{$repr_id} }, @members
                if $repr_id;
            $repr_id = q{};
            @members = ();
            #### $repr_id
            #### @members
            #### %members_for
        }

        elsif ($line =~ $repr_id_like) {
            #### reference sequence: $line
            $repr_id = $1;
            #### $repr_id
        }

        # find other seq (array)
        elsif ($line =~ $memb_id_like){
            #### member sequence: $line
            my $memb_id = $1;
            push @members, $memb_id;
            #### @members
        }
    }

    push @{ $members_for{$repr_id} }, @members
        if $repr_id;



( run in 1.147 second using v1.01-cache-2.11-cpan-39bf76dae61 )