Bio-Gonzales

 view release on metacpan or  search on metacpan

bin/gonz_compcol.pl  view on Meta::CPAN

my @vb;
@va = @{ $opt{va} } if ( exists( $opt{va} ) );
@vb = @{ $opt{vb} } if ( exists( $opt{vb} ) );

my $a_data
  = dict_slurp( $a_f, { key_idx => $opt{a}, val_idx => $opt{va}, uniq => 0, concat_keys => $opt{concat} } );
my $b_data
  = dict_slurp( $b_f, { key_idx => $opt{b}, val_idx => $opt{vb}, uniq => 0, concat_keys => $opt{concat} } );

my @both     = grep { exists( $b_data->{$_} ) } keys %$a_data;
my @not_in_a = grep { !exists( $a_data->{$_} ) } keys %$b_data;
my @not_in_b = grep { !exists( $b_data->{$_} ) } keys %$a_data;

say "A: $a_f";
say "B: $b_f";
say "";

say "A DISTINCT:   " . scalar keys %$a_data;
if ( scalar keys %$b_data > 0 ) {
  say "first 3:";
  say "    " . join( "\n    ", ( keys %$a_data )[ 0 .. 2 ] );
}

bin/gonz_compcol.pl  view on Meta::CPAN

}
say "";

say "INTERSECTION: " . scalar @both;
if ( scalar @both > 0 ) {
  say "first 3:";
  say "    " . join( "\n    ", @both[ 0 .. 2 ] );
}
say "";

say "UNIQUE TO A:  " . scalar @not_in_b;
if ( scalar @not_in_b > 0 ) {
  say "first 3:";
  say "    " . join( "\n    ", @not_in_b[ 0 .. 2 ] );
}
say "";

say "UNIQUE TO B:  " . scalar @not_in_a;
if ( scalar @not_in_a > 0 ) {
  say "first 3:";
  say "    " . join( "\n    ", @not_in_a[ 0 .. 2 ] );
}
say "";

my %both = map { $_ => 1 } @both;

for ( my $idx = 0; $idx < @va; $idx++ ) {
  my %uniq;
  my %total;
  for $a ( keys %$a_data ) {
    for my $v ( @{ $a_data->{$a} } ) {

lib/Bio/Gonzales/Domain/Identification/HMMER.pm  view on Meta::CPAN

has '_intermediate_result_id' => ( is => 'bare', default => 0 );

has 'domain_spanning_region_file'        => ( is => 'rw' );
has 'domain_spanning_region_masked_file' => ( is => 'rw' );
has 'domains_masked_inverted_file'       => ( is => 'rw' );
has 'domains_masked_file'                => ( is => 'rw' );
has 'whole_sequence_file'                => ( is => 'rw' );
has 'domains_notfound_file'              => (
    is      => 'rw',
    lazy    => 1,
    default => sub { $_[0]->_catmyfile('not_found_domains.txt') }
);
has 'discovered_cache_file' => (
    is      => 'rw',
    lazy    => 1,
    default => sub { $_[0]->_catmyfile('discovered.cache') }
);

has 'from_cache' => ( is => 'rw', default => 0 );

has 'domain_groups' => ( is => 'rw', default => sub { [] } );

lib/Bio/Gonzales/Domain/Identification/HMMER.pm  view on Meta::CPAN


    #open file for complete sequence, if set
    my $whole_sequence;
    $whole_sequence = Bio::SeqIO->new(
        -format => 'fasta',
        -file   => ">>" . $self->whole_sequence_file,
    ) if ( $self->whole_sequence_file );

    #store some kind of found/notfound/how many not found
    #information in a file
    open my $not_found, '>>', $self->domains_notfound_file
        or croak "Can't open filehandle: $!";

    open my $seq_ids_fh, '>>', $self->_catmyfile('hmmmer_result_seqids.tsv');
    #run through sequences file to extract sequences with hits
    while ( my $so = $snf2->next_seq ) {

        $so->desc( "(" . $tag . ") " . $so->desc ) if ($tag);
        #has the sequence has at least in every group/mark
        if ( exists $result_for_sequence->{ $so->display_id() }
            && $result_for_sequence->{ $so->display_id() }->hit_in_every_mark )

lib/Bio/Gonzales/Domain/Identification/HMMER.pm  view on Meta::CPAN

                    $so->mask( $m->{from}, $m->{to} );
                }
                $domains_masked_inverted->write_seq( $so->trunc_masked_ends );
            }
            #no domains found in sequence
        } elsif ( exists $result_for_sequence->{ $so->display_id() }
            && $result_for_sequence->{ $so->display_id() }->num_marks_hit > 0 )
        {
            #print number of found domains to notfound,
            #if not all domains occurr in sequence
            say {$not_found} $so->display_id()
                . "\tfound in "
                . $result_for_sequence->{ $so->display_id }->num_marks_hit . '/'
                . scalar( @{ $self->domain_groups } )
                . ' groups';
        } else {
            say {$not_found} $so->display_id() . "\treally nothing found";

        }
    }
    $seq_ids_fh->close;
    $not_found->close;
}

# for every group in array, grep for accession
sub _is_in_all_domain_groups {
    my ( $self, $hmm_hits_acc ) = @_;

    #find the group
    my $count_groups = 0;
    for my $g ( @{ $self->domain_groups } ) {
        $count_groups++

t/boilerplate.t  view on Meta::CPAN

#!perl -T

use strict;
use warnings;
use Test::More tests => 1;

sub not_in_file_ok {
    my ($filename, %regex) = @_;
    open( my $fh, '<', $filename )
        or die "couldn't open $filename for reading: $!";

    my %violated;

    while (my $line = <$fh>) {
        while (my ($desc, $regex) = each %regex) {
            if ($line =~ $regex) {
                push @{$violated{$desc}||=[]}, $.;

t/boilerplate.t  view on Meta::CPAN

    if (%violated) {
        fail("$filename contains boilerplate text");
        diag "$_ appears on lines @{$violated{$_}}" for keys %violated;
    } else {
        pass("$filename contains no boilerplate text");
    }
}

sub module_boilerplate_ok {
    my ($module) = @_;
    not_in_file_ok($module =>
        'the great new $MODULENAME'   => qr/ - The great new /,
        'boilerplate description'     => qr/Quick summary of what the module/,
        'stub function definition'    => qr/function[12]/,
    );
}

module_boilerplate_ok('lib/Bio/Gonzales.pm');



( run in 0.582 second using v1.01-cache-2.11-cpan-0a987023a57 )