Bio-FastParsers
view release on metacpan or search on metacpan
lib/Bio/FastParsers/Uclust.pm view on Meta::CPAN
package Bio::FastParsers::Uclust;
# ABSTRACT: Front-end class for UCLUST parser
# CONTRIBUTOR: Amandine BERTRAND <amandine.bertrand@doct.uliege.be>
$Bio::FastParsers::Uclust::VERSION = '0.221230';
use Moose;
use namespace::autoclean;
use autodie;
use Tie::IxHash;
extends 'Bio::FastParsers::Base';
# public attributes (inherited)
with 'Bio::FastParsers::Roles::Clusterable';
sub BUILD {
my $self = shift;
my $infile = $self->filename;
open my $in, '<', $infile;
tie my %members_for, 'Tie::IxHash';
LINE:
while (my $line = <$in>) {
chomp $line;
my ($type, @fields) = split /\t/xms, $line;
# https://www.drive5.com/usearch/manual/opt_uc.html
# Field Description
# - Record type S, H, C or N (see table below).
# 0 Cluster number (0-based).
# 1 Sequence length (S, N and H) or cluster size (C).
# 2 For H records, percent identity with target.
# 3 For H records, the strand: + or - for nucleotides, . for proteins.
# 4 Not used, parsers should ignore this field. Included for backwards compatibility.
# 5 Not used, parsers should ignore this field. Included for backwards compatibility.
# 6 Compressed alignment or the symbol '=' (equals sign). The = indicates that the query is 100% identical to the target sequence (field 10).
# 7 Label of query sequence (always present).
# 8 Label of target sequence (H records only).
if ($type eq 'C') {
push @{ $members_for{ $fields[7] } }, ();
}
elsif ($type eq 'H') {
push @{ $members_for{ $fields[8] } }, $fields[7];
}
}
# store representative and member sequence ids
$self->_set_members_for( \%members_for );
return;
}
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
( run in 1.041 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )