Bio-Gonzales
view release on metacpan or search on metacpan
lib/Bio/Gonzales/Domain/Group.pm view on Meta::CPAN
$i++;
}
return \%group;
}
sub filter_hits {
my ($self) = @_;
my $q = $self->search_result;
my $id_lookup = { map { $_ => 1 } @{ $self->filter_ids } };
my $group_idcs = $self->group_idcs;
my @hit_col;
while ( my ( $domain_id, $result ) = each %$q ) {
next unless ( exists( $group_idcs->{$domain_id} ) );
while ( my ( $seq_id, $hits ) = each %$result ) {
next if ( $id_lookup && !exists( $id_lookup->{$seq_id} ) );
for my $hit (@$hits) {
$hit->{domain_id} = $domain_id;
$hit->{seq_id} = $seq_id;
$hit->{group_id} = $group_idcs->{$domain_id};
push @hit_col, $hit;
}
}
}
return $self->_pick_best_domain_hits( \@hit_col )
if ( $self->only_best_hit );
return \@hit_col;
}
sub _pick_best_domain_hits {
my ( $self, $hit_list ) = @_;
my %group_collection;
for my $h (@$hit_list) {
my $id = $h->{seq_id} . "__//__" . $h->{group_id};
$group_collection{$id} //= [];
push @{ $group_collection{$id} }, $h;
}
my @best_ones;
for my $h ( values %group_collection ) {
my $max;
#find the domain with the maximal score
map { $max = $_ if ( !$max || $_->{score} > $max->{score} ) } @$h;
push @best_ones, $max;
}
return \@best_ones;
}
sub to_gff {
my ( $self, $dest ) = @_;
my $hits = $self->filter_hits;
my $gffout = Bio::Gonzales::Feat::IO::GFF3->new( file_or_fh => $dest, mode => '>' );
my $gidcs = $self->group_idcs;
while ( my ( $d, $idx ) = each %$gidcs ) {
$gffout->write_comment(" Group $idx contains domain: $d");
}
my $i = 0;
for my $max (@$hits) {
$gffout->write_feat(
Bio::Gonzales::Feat->new(
seq_id => $max->{seq_id},
source => 'hmmer',
type => 'region',
start => $max->{env_from},
end => $max->{env_to},
strand => 0,
score => $max->{score},
attributes => {
ID => [ "hit_" . $i++ ],
domainID => [ $max->{domain_id} ],
groupID => [ $max->{group_id} ]
}
)
);
}
$gffout->close;
}
sub filter_ids {
my ($self) = @_;
my $q = $self->search_result;
my $rgroups = $self->required_domains;
return unless ($rgroups);
my %id_occurrence;
# select the ids for every group by...
for my $domain_synonyms (@$rgroups) {
my @ids;
# iterating through the domains ...
for my $domain_synonym (@$domain_synonyms) {
# and storing all ids in an group-wide id list
push @ids, grep { @{ $q->{$domain_synonym}{$_} } > 0 } keys %{ $q->{$domain_synonym} };
}
# we need to eliminate duplicate ids (multiple synonyms per group)
# and increment the occurrence of the found ids
map { $id_occurrence{$_}++ } uniq @ids;
}
# if an id is in every group it occurred #num of rgroups times and is not forbidden, return it
return [ grep { $id_occurrence{$_} == @$rgroups && !$self->_is_forbidden($_) } keys %id_occurrence ];
}
sub _is_forbidden {
my ( $self, $id ) = @_;
my $q = $self->search_result;
my $fgroups = $self->forbidden_domains;
( run in 1.171 second using v1.01-cache-2.11-cpan-5b529ec07f3 )