CLIPSeqTools

 view release on metacpan or  search on metacpan

lib/CLIPSeqTools/App/count_reads_on_genic_elements.pm  view on Meta::CPAN


clipseqtools count_reads_on_genic_elements [options/parameters]

=head1 DESCRIPTION

Counts library reads on transcripts, genes, exons, introns.

* Transcript counts are measured only on their exons.
* Gene counts are measured only for exonic regions.

Output (4 files): counts.gene.tab, counts.transcript.tab, counts.exon.tab, counts.intron.tab - (name, location, length, count, count_per_nt, exonic_count, exonic_length, exonic_count_per_nt)

=head1 OPTIONS

  Input options for library.
    --driver <Str>         driver for database connection (eg. mysql,
                           SQLite).
    --database <Str>       database name or path to database file for file
                           based databases (eg. SQLite).
    --table <Str>          database table.
    --host <Str>           hostname for database connection.
    --user <Str>           username for database connection.
    --password <Str>       password for database connection.
    --records_class <Str>  type of records stored in database.
    --filter <Filter>      filter library. May be used multiple times.
                           Syntax: column_name="pattern"
                           e.g. keep reads with deletions AND not repeat
                                masked AND longer than 31
                                --filter deletion="def" 
                                --filter rmsk="undef" .
                                --filter query_length=">31".
                           Operators: >, >=, <, <=, =, !=, def, undef

  Other input
    --gtf <Str>            GTF file with genes/transcripts.

  Output
    --o_prefix <Str>       output path prefix. Script will create and add
                           extension to path. [Default: ./]

  Other options.
    -v --verbose           print progress lines and extra information.
    -h -? --usage --help   print help message

=cut


package CLIPSeqTools::App::count_reads_on_genic_elements;
$CLIPSeqTools::App::count_reads_on_genic_elements::VERSION = '1.0.0';

# Make it an app command
use MooseX::App::Command;
extends 'CLIPSeqTools::App';


#######################################################################
#######################   Load External modules   #####################
#######################################################################
use Modern::Perl;
use autodie;
use namespace::autoclean;


#######################################################################
##########################   Consume Roles   ##########################
#######################################################################
with 
	"CLIPSeqTools::Role::Option::Library" => {
		-alias    => { validate_args => '_validate_args_for_library' },
		-excludes => 'validate_args',
	},
	"CLIPSeqTools::Role::Option::Genes" => {
		-alias    => { validate_args => '_validate_args_for_genes' },
		-excludes => 'validate_args',
	},
	"CLIPSeqTools::Role::Option::OutputPrefix" => {
		-alias    => { validate_args => '_validate_args_for_output_prefix' },
		-excludes => 'validate_args',
	};

	
#######################################################################
########################   Interface Methods   ########################
#######################################################################
sub validate_args {
	my ($self) = @_;
	
	$self->_validate_args_for_library;
	$self->_validate_args_for_genes;
	$self->_validate_args_for_output_prefix;
}

sub run {
	my ($self) = @_;
	
	warn "Starting analysis: count_reads_on_genic_elements\n";
	
	warn "Validating arguments\n" if $self->verbose;
	$self->validate_args();
	
	warn "Creating gene collection\n" if $self->verbose;
	my $gene_collection = $self->gene_collection;
	
	warn "Creating transcript collection\n" if $self->verbose;
	my $transcript_collection = $self->transcript_collection;
	
	warn "Creating reads collection\n" if $self->verbose;
	my $reads_collection = $self->reads_collection;

	warn "Calculating transcript and exon/intron counts\n" if $self->verbose;
	my $counter = 1;
	$transcript_collection->foreach_record_do( sub {
		my ($transcript) = @_;
		
		# Calculate exon and transcript counts. Transcript counts is calculated by the exonic region only
		my $transcript_exonic_count = 0;
		foreach my $exon (@{$transcript->exons}) {
			my $exon_count = $reads_collection->total_copy_number_for_records_contained_in_region($exon->strand, $exon->chromosome, $exon->start, $exon->stop);
			$exon->extra({count => $exon_count});
			$transcript_exonic_count += $exon_count;
		}



( run in 0.583 second using v1.01-cache-2.11-cpan-39bf76dae61 )