Bio-Gonzales
view release on metacpan or search on metacpan
bin/gonz_compcol.pl view on Meta::CPAN
my @vb;
@va = @{ $opt{va} } if ( exists( $opt{va} ) );
@vb = @{ $opt{vb} } if ( exists( $opt{vb} ) );
my $a_data
= dict_slurp( $a_f, { key_idx => $opt{a}, val_idx => $opt{va}, uniq => 0, concat_keys => $opt{concat} } );
my $b_data
= dict_slurp( $b_f, { key_idx => $opt{b}, val_idx => $opt{vb}, uniq => 0, concat_keys => $opt{concat} } );
my @both = grep { exists( $b_data->{$_} ) } keys %$a_data;
my @not_in_a = grep { !exists( $a_data->{$_} ) } keys %$b_data;
my @not_in_b = grep { !exists( $b_data->{$_} ) } keys %$a_data;
say "A: $a_f";
say "B: $b_f";
say "";
say "A DISTINCT: " . scalar keys %$a_data;
if ( scalar keys %$b_data > 0 ) {
say "first 3:";
say " " . join( "\n ", ( keys %$a_data )[ 0 .. 2 ] );
}
bin/gonz_compcol.pl view on Meta::CPAN
}
say "";
say "INTERSECTION: " . scalar @both;
if ( scalar @both > 0 ) {
say "first 3:";
say " " . join( "\n ", @both[ 0 .. 2 ] );
}
say "";
say "UNIQUE TO A: " . scalar @not_in_b;
if ( scalar @not_in_b > 0 ) {
say "first 3:";
say " " . join( "\n ", @not_in_b[ 0 .. 2 ] );
}
say "";
say "UNIQUE TO B: " . scalar @not_in_a;
if ( scalar @not_in_a > 0 ) {
say "first 3:";
say " " . join( "\n ", @not_in_a[ 0 .. 2 ] );
}
say "";
my %both = map { $_ => 1 } @both;
for ( my $idx = 0; $idx < @va; $idx++ ) {
my %uniq;
my %total;
for $a ( keys %$a_data ) {
for my $v ( @{ $a_data->{$a} } ) {
lib/Bio/Gonzales/Domain/Identification/HMMER.pm view on Meta::CPAN
has '_intermediate_result_id' => ( is => 'bare', default => 0 );
has 'domain_spanning_region_file' => ( is => 'rw' );
has 'domain_spanning_region_masked_file' => ( is => 'rw' );
has 'domains_masked_inverted_file' => ( is => 'rw' );
has 'domains_masked_file' => ( is => 'rw' );
has 'whole_sequence_file' => ( is => 'rw' );
has 'domains_notfound_file' => (
is => 'rw',
lazy => 1,
default => sub { $_[0]->_catmyfile('not_found_domains.txt') }
);
has 'discovered_cache_file' => (
is => 'rw',
lazy => 1,
default => sub { $_[0]->_catmyfile('discovered.cache') }
);
has 'from_cache' => ( is => 'rw', default => 0 );
has 'domain_groups' => ( is => 'rw', default => sub { [] } );
lib/Bio/Gonzales/Domain/Identification/HMMER.pm view on Meta::CPAN
#open file for complete sequence, if set
my $whole_sequence;
$whole_sequence = Bio::SeqIO->new(
-format => 'fasta',
-file => ">>" . $self->whole_sequence_file,
) if ( $self->whole_sequence_file );
#store some kind of found/notfound/how many not found
#information in a file
open my $not_found, '>>', $self->domains_notfound_file
or croak "Can't open filehandle: $!";
open my $seq_ids_fh, '>>', $self->_catmyfile('hmmmer_result_seqids.tsv');
#run through sequences file to extract sequences with hits
while ( my $so = $snf2->next_seq ) {
$so->desc( "(" . $tag . ") " . $so->desc ) if ($tag);
#has the sequence has at least in every group/mark
if ( exists $result_for_sequence->{ $so->display_id() }
&& $result_for_sequence->{ $so->display_id() }->hit_in_every_mark )
lib/Bio/Gonzales/Domain/Identification/HMMER.pm view on Meta::CPAN
$so->mask( $m->{from}, $m->{to} );
}
$domains_masked_inverted->write_seq( $so->trunc_masked_ends );
}
#no domains found in sequence
} elsif ( exists $result_for_sequence->{ $so->display_id() }
&& $result_for_sequence->{ $so->display_id() }->num_marks_hit > 0 )
{
#print number of found domains to notfound,
#if not all domains occurr in sequence
say {$not_found} $so->display_id()
. "\tfound in "
. $result_for_sequence->{ $so->display_id }->num_marks_hit . '/'
. scalar( @{ $self->domain_groups } )
. ' groups';
} else {
say {$not_found} $so->display_id() . "\treally nothing found";
}
}
$seq_ids_fh->close;
$not_found->close;
}
# for every group in array, grep for accession
sub _is_in_all_domain_groups {
my ( $self, $hmm_hits_acc ) = @_;
#find the group
my $count_groups = 0;
for my $g ( @{ $self->domain_groups } ) {
$count_groups++
t/boilerplate.t view on Meta::CPAN
#!perl -T
use strict;
use warnings;
use Test::More tests => 1;
sub not_in_file_ok {
my ($filename, %regex) = @_;
open( my $fh, '<', $filename )
or die "couldn't open $filename for reading: $!";
my %violated;
while (my $line = <$fh>) {
while (my ($desc, $regex) = each %regex) {
if ($line =~ $regex) {
push @{$violated{$desc}||=[]}, $.;
t/boilerplate.t view on Meta::CPAN
if (%violated) {
fail("$filename contains boilerplate text");
diag "$_ appears on lines @{$violated{$_}}" for keys %violated;
} else {
pass("$filename contains no boilerplate text");
}
}
sub module_boilerplate_ok {
my ($module) = @_;
not_in_file_ok($module =>
'the great new $MODULENAME' => qr/ - The great new /,
'boilerplate description' => qr/Quick summary of what the module/,
'stub function definition' => qr/function[12]/,
);
}
module_boilerplate_ok('lib/Bio/Gonzales.pm');
( run in 0.582 second using v1.01-cache-2.11-cpan-0a987023a57 )