Bio-ViennaNGS

 view release on metacpan or  search on metacpan

lib/Bio/ViennaNGS/Fasta.pm  view on Meta::CPAN


Bio::ViennaNGS::Fasta - Moose wrapper for Bio::DB::Fasta

=head1 SYNOPSIS

  use Bio::ViennaNGS::Fasta;

  my $f = Bio::ViennaNGS::Fasta->new(fasta => "data/foo.fa", );

  # get all FASTA IDs
  my @ids = $f->fastaids;

  # get a reference to a hash of Bio::PrimarySeq::Fasta objects whose
  # keys are the Fasta IDs seen in the input file
  my $ps = $f->primaryseqH;

  # get the strand-specific genomic sequence for a certain Fasta ID
  my $id = "chr1";
  my $start = 287;
  my $end = 1289;
  my $strand = "+";
  my $seq = $f->stranded_subsequence($id,$start,$end,$strand);

=head1 DESCRIPTION

L<Bio::ViennaNGS::Fasta> provides a L<Moose> interface to
L<Bio::DB::Fasta>, spiced up with a few convenience methods for easy
sequence data retrieval.

=head2 ATTRIBUTES

=over 3

=item fasta (required)

Upcon object construction, this attribute expects an input fasta file,
which is transparently coerced into a L<Bio::DB::Fasta> object and
hitherto available via the C<fasta> attribute.

=item fastaids (auto-computed)

Arrary reference to the Fasta IDs found in the input file

=item primaryseqH (auto-computed)

Hash reference to L<Bio::PrimarySeq::Fasta> objects whose keys are the
Fasta IDs found in the input file

=back

=cut

package Bio::ViennaNGS::Fasta;

use Bio::ViennaNGS;
use Moose;
use Bio::ViennaNGS::Subtypes;
use Bio::Perl;
use Carp;
use Data::Dumper;
use namespace::autoclean;
use version; our $VERSION = version->declare("$Bio::ViennaNGS::VERSION");

has 'fasta' => (
		is => 'ro',
		isa => 'Bio::ViennaNGS::MyFasta',
		required => 1,
		predicate => 'has_fasta',
		coerce => 1,
	    );


has 'fastaids' => (
		   is => 'rw',
		   isa => 'ArrayRef',
		   predicate => 'has_ids',
		   init_arg => undef,
		  );

has 'primaryseqH' => (
		     is => 'rw',
		     isa => 'HashRef',
		     predicate => 'has_primaryseq',
		     init_arg => undef,
		    );


sub BUILD {
  my $self = shift;
  my $this_function = (caller(0))[3];
  $self->fastaids([$self->fasta->ids]);
  confess "ERROR [$this_function] \$self->fastsids not available"
    unless ($self->has_ids);
  my %ps = ();
  foreach my $id (@{$self->fastaids}){
    $ps{$id} = $self->fasta->get_Seq_by_id($id);
  }
  $self->primaryseqH(\%ps);
}

=head2 METHODS

=over 2

=item stranded_subsequence

Title : stranded_subsequence

Usage : C<$obj-E<gt>stranded_subsequence($id,$start,$end,$strand)>

Function : Returns the DNA/RNA sequence for ID C<$id> from C<$start>
           to C<$end>. Internally, sequence data is retrieved from
           C<$self-E<gt>$primaryseqH> HashRef to
           L<Bio::PrimarySeqI>/L<Bio::PrimarySeq::Fasta> objects.

Args : C<$id> is the Fasta ID to retrieve sequence data from,
        C<$start> and C<$end> are (1-based) start and end coordinates
        of the requested interval, where C<$start> must be <= C<$end>,
        and C<$strand> is 1 or -1 for [+] or [-] strand, respectively.

Returns : A string.

=item has_sequid

Title : has_sequid 

Usage : C<$obj-E<gt>has_seqid($id)>

Function : Checks whether the current object contains Fasta ID C<$id>.

Args : C<$id> if the Fasta ID to check for.

Returns : 1 if ID C<$id> was found, 0 else.

=back

=cut

sub stranded_subsequence {
  my ($self,$id,$start,$end,$strand) = @_;
  my ($this_function,$seq,$rc,$p,$obj);
  $this_function = (caller(0))[3];
  confess "ERROR [$this_function] start corrdinate must be <= end coordinate"
    unless ($start <= $end);
  confess "ERROR [$this_function] Id $id not found in input Fasta file"
    unless ($self->has_sequid($id));
  $seq = ${$self->primaryseqH}{$id}->subseq($start => $end);
  if ($strand eq '-1' || $strand eq '-') {
    $rc = revcom($seq);
    $seq = $rc->seq();
  }
  #print "id:$id\nstart:$start\nend:$end\n";
  return $seq;
}


sub has_sequid {
  my ($self,$id) = @_;
  return exists ${$self->primaryseqH}{$id} ? 1 : 0;
}


=head1 DEPENDENCIES

=over

=item L<Bio::Perl> >= 1.00690001

=item L<Bio::DB::Fasta>

=item L<Moose>

=item L<Carp>

=item L<namespace::autoclean>

=back

=head1 SEE ALSO

=over

=item L<Bio::ViennaNGS>

=item L<Bio::DB::Fasta>

=back

=head1 AUTHOR

Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2014-2017 by Michael T. Wolfinger

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.0 or,
at your option, any later version of Perl 5 you may have available.

This software is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=cut

1;



( run in 0.940 second using v1.01-cache-2.11-cpan-5a3173703d6 )