Bio-ViennaNGS
view release on metacpan or search on metacpan
lib/Bio/ViennaNGS/Peak.pm view on Meta::CPAN
# -*-CPerl-*-
# Last changed Time-stamp: <2018-07-03 12:20:53 mtw>
package Bio::ViennaNGS::Peak;
use Bio::ViennaNGS;
use Moose;
use Carp;
use Data::Dumper;
use Path::Class;
use List::Util qw(sum sum0 min max first);
use Bio::ViennaNGS::Util qw(sortbed);
use namespace::autoclean;
use File::Temp qw(tempfile);
use version; our $VERSION = version->declare("$Bio::ViennaNGS::VERSION");
has 'data' => (
is => 'ro',
isa => 'HashRef',
predicate => 'has_data',
default => sub { {} },
);
has 'region' => (
is => 'ro',
isa => 'HashRef',
predicate => 'has_region',
default => sub { {} },
);
has 'peaks' => (
is => 'ro',
isa => 'HashRef',
predicate => 'has_peaks',
default => sub { {} },
);
has 'winsize' => (
is => 'ro',
isa => 'Int',
predicate => 'has_winsize',
);
has 'interval' => (
is => 'ro',
isa => 'Int',
predicate => 'has_interval',
);
has 'mincov' => (
is => 'ro',
isa => 'Int',
predicate => 'has_mincov',
);
has 'length' => (
is => 'ro',
isa => 'Int',
predicate => 'has_length',
);
has 'threshold' => (
is => 'ro',
isa => 'Value',
predicate => 'has_threshold',
);
sub populate_data {
my ($self,$filep,$filen) = @_;
my $this_function = (caller(0))[3];
my ($i,$element,$chr,$start,$end,$val,$lastend);
my $have_lastend = 0;
lib/Bio/ViennaNGS/Peak.pm view on Meta::CPAN
whose mean is less than a certain value
(i.e. C<$self-E<gt>threshold> * peak maximum).
Raw peaks are stored in C<%{$self-E<gt>data}-E<gt>{peaks}>.
Args : C<$dest> contains the output path for results, C<$prefix> the
prefix used for all output file names. C<$log> is the name of a
log file, or undef if no logging is reuqired.
Returns : None. The output is a position-sorted BED6 file containing
all raw peaks.
Notes : It is highly recommended to use I<normalized> input data in
order to allow for multiple calls of this method with the same
set of parameters on different samples.
=item final_peaks
Title : final_peaks
Usage : C<$obj-E<gt>final_peaks($dest,$prefix,$log);>
Function : This method characterizes final peaks from RNA-seq coverage
found in C<%{$self-E<gt>data}-E<gt>{peaks}>. The latter is
supposed to have been populated by C<$self-E<gt>raw_peaks>.
The procedure for finding final peaks is as follows: For each raw peak
found in C<%{$self-E<gt>data}-E<gt>{peaks}> the window of maximum
coverage is retrieved and a (second) sliding window approach is then
applied to regions both upstream and downstream of the maximum. Peak
boundaries are set at the position where the mean coverage of the
respective window is lower than C<$self-E<gt>threshold> * peak
maximum).
Peaks are reported if their total length (as determined by this
routine) is not longer than C<$self-E<gt>length>.
Args : C<$dest> contains the output path for results, C<$prefix> the
prefix used for all output file names. C<$log> is the name of a
log file, or undef if no logging is reuqired.
Returns : None. The output is a position-sorted BED6 file containing
all candidate peaks.
Notes :
=back
=head1 DEPENDENCIES
=over
=item L<Moose>
=item L<Carp>
=item L<Path::Class>
=item L<List::Util>
=item L<namespace::autoclean>
=back
=head1 SEE ALSO
=over
=item L<Bio::ViennaNGS>
=item L<Bio::ViennaNGS::Util>
=back
=head1 AUTHOR
Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2015-2018 by Michael T. Wolfinger
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.0 or,
at your option, any later version of Perl 5 you may have available.
This software is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
( run in 0.465 second using v1.01-cache-2.11-cpan-0d23b851a93 )