Bio-FastParsers
view release on metacpan or search on metacpan
lib/Bio/FastParsers/CdHit.pm view on Meta::CPAN
package Bio::FastParsers::CdHit;
# ABSTRACT: Front-end class for CD-HIT parser
# CONTRIBUTOR: Amandine BERTRAND <amandine.bertrand@doct.uliege.be>
$Bio::FastParsers::CdHit::VERSION = '0.221230';
use Moose;
use namespace::autoclean;
use autodie;
use Tie::IxHash;
extends 'Bio::FastParsers::Base';
# public attributes (inherited)
with 'Bio::FastParsers::Roles::Clusterable';
sub BUILD {
my $self = shift;
my $cluster_like = qr{\>Cluster \s (\d+)}xms;
my $repr_id_like = qr{\d+ \t \d+\w{2}\, \s \>([\w\|\.]+) .{4} \* }xms;
my $memb_id_like = qr{\d+ \t \d+\w{2}\, \s \>([\w\|\.]+) .{4} at .* \%}xms;
my $infile = $self->filename;
open my $in, '<', $infile;
tie my %members_for, 'Tie::IxHash';
my $repr_id;
my @members;
while (my $line = <$in>) {
chomp $line;
if ($line =~ $cluster_like){
#### cluster: $line
push @{ $members_for{$repr_id} }, @members
if $repr_id;
$repr_id = q{};
@members = ();
#### $repr_id
#### @members
#### %members_for
}
elsif ($line =~ $repr_id_like) {
#### reference sequence: $line
$repr_id = $1;
#### $repr_id
}
# find other seq (array)
elsif ($line =~ $memb_id_like){
#### member sequence: $line
my $memb_id = $1;
push @members, $memb_id;
#### @members
}
}
push @{ $members_for{$repr_id} }, @members
if $repr_id;
( run in 1.147 second using v1.01-cache-2.11-cpan-39bf76dae61 )