CLIPSeqTools
view release on metacpan or search on metacpan
lib/CLIPSeqTools/PreprocessApp/annotate_with_conservation.pm view on Meta::CPAN
=head1 DESCRIPTION
Annotate alignments in a database table with phastCons or phyloP
conservation scores. Adds a column named "conservation" with the average
conservation score for the nucleotides of each read.
To minimize storage needs, the conservation score is converted from
floating point number to integer by multiplying with 1000.
=head1 OPTIONS
Input options for library.
--driver <Str> driver for database connection (eg. mysql,
SQLite).
--database <Str> database name or path to database file for file
based databases (eg. SQLite).
--table <Str> database table.
--host <Str> hostname for database connection.
--user <Str> username for database connection.
--password <Str> password for database connection.
--records_class <Str> type of records stored in database.
--filter <Filter> filter library. May be used multiple times.
Syntax: column_name="pattern"
e.g. keep reads with deletions AND not repeat
masked AND longer than 31
--filter deletion="def"
--filter rmsk="undef" .
--filter query_length=">31".
Operators: >, >=, <, <=, =, !=, def, undef
Other input
--rname_sizes <Str> file with sizes for reference alignment
sequences (rnames). Must be tab delimited
(chromosome\tsize) with one line per rname.
--cons_dir <Str> directory with phastCons or phyloP files.
Database options.
--drop drop column if it already exists (not
supported in SQlite).
Other options.
-v --verbose print progress lines and extra information.
-h -? --usage --help print help message
=cut
package CLIPSeqTools::PreprocessApp::annotate_with_conservation;
$CLIPSeqTools::PreprocessApp::annotate_with_conservation::VERSION = '1.0.0';
# Make it an app command
use MooseX::App::Command;
extends 'CLIPSeqTools::PreprocessApp';
#######################################################################
####################### Load External modules #####################
#######################################################################
use Modern::Perl;
use autodie;
use namespace::autoclean;
use Try::Tiny;
use PDL::Lite; $PDL::BIGPDL = 0; $PDL::BIGPDL++; # enable huge pdls
#######################################################################
####################### Command line options ######################
#######################################################################
option 'rname_sizes' => (
is => 'rw',
isa => 'Str',
required => 1,
documentation => 'file with sizes for reference alignment sequences (rnames). Must be tab delimited (chromosome\tsize) with one line per rname.',
);
option 'cons_dir' => (
is => 'rw',
isa => 'Str',
required => 1,
documentation => 'directory with phastCons or phyloP files.',
);
option 'drop' => (
is => 'rw',
isa => 'Bool',
documentation => 'drop column if they already exist (not supported in SQlite).',
);
#######################################################################
########################## Consume Roles ##########################
#######################################################################
with
"CLIPSeqTools::Role::Option::Library" => {
-alias => { validate_args => '_validate_args_for_library' },
-excludes => 'validate_args',
};
#######################################################################
######################## Interface Methods ########################
#######################################################################
sub validate_args {
my ($self) = @_;
$self->_validate_args_for_library;
}
sub run {
my ($self) = @_;
warn "Starting: annotate_with_conservation\n";
warn "Validating arguments\n" if $self->verbose;
$self->validate_args();
warn "Reading sizes for reference alignment sequences\n" if $self->verbose;
my %rname_sizes = $self->read_rname_sizes;
warn "Opening reads collection\n" if $self->verbose;
my $reads_collection = $self->reads_collection;
my @rnames = $reads_collection->rnames_for_all_strands;
( run in 2.020 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )