CLIPSeqTools

 view release on metacpan or  search on metacpan

lib/CLIPSeqTools/App/genome_coverage.pm  view on Meta::CPAN

=head1 NAME

CLIPSeqTools::App::genome_coverage - Measure percent of genome covered by reads.

=head1 SYNOPSIS

clipseqtools genome_coverage [options/parameters]

=head1 DESCRIPTION

Measure the percent of genome that is covered by the reads of a library.

=head1 OPTIONS

  Input options for library.
    --driver <Str>         driver for database connection (eg. mysql,
                           SQLite).
    --database <Str>       database name or path to database file for file
                           based databases (eg. SQLite).
    --table <Str>          database table.
    --host <Str>           hostname for database connection.

lib/CLIPSeqTools/App/genome_coverage.pm  view on Meta::CPAN

	my $reads_collection = $self->reads_collection;
	my @rnames = $reads_collection->rnames_for_all_strands;
		
	warn "Creating output path\n" if $self->verbose;
	$self->make_path_for_output_prefix();
		
	warn "Calculating genome coverage\n" if $self->verbose;
	my $total_genome_coverage = 0;
	my $total_genome_length = 0;
	open(my $OUT, '>', $self->o_prefix.'genome_coverage.tab');
	say $OUT join("\t", 'rname', 'covered_area', 'size', 'percent_covered');
	foreach my $rname (@rnames) {
		warn "Working for $rname\n" if $self->verbose;
		my $pdl = PDL->zeros(PDL::byte(), $rname_sizes{$rname});
		
		$reads_collection->foreach_record_on_rname_do($rname, sub {
			$pdl->slice([$_[0]->start, $_[0]->stop]) .= 1;
			return 0;
		});
		
		my $covered_area = $pdl->sum();

lib/CLIPSeqTools/App/nmer_enrichment_over_shuffled.pm  view on Meta::CPAN


  Output
    --o_prefix <Str>       output path prefix. Script will create and add
                           extension to path. [Default: ./]

  Other options.
    --nmer_length <Int>    length N of the Nmer. Default: 6
    --permutations <Int>   number of permutation to be performed. 
                           Use more than 100 to get p-values < 0.01.
    --subset <Num>         run analysis on random subset. Option specifies
                           number (if integer) or percent (if % is used)
                           of data to be used.
    -v --verbose           print progress lines and extra information.
    -h -? --usage --help   print help message

=cut

package CLIPSeqTools::App::nmer_enrichment_over_shuffled;
$CLIPSeqTools::App::nmer_enrichment_over_shuffled::VERSION = '1.0.0';

# Make it an app command

lib/CLIPSeqTools/App/nmer_enrichment_over_shuffled.pm  view on Meta::CPAN

	is            => 'rw',
	isa           => 'Int',
	default       => 100,
	documentation => 'the number of permutation to be performed. Use more than 100 to get p-values < 0.01.',
);

option 'subset' => (
	is            => 'rw',
	isa           => 'Str',
	default       => '100%',
	documentation => 'run analysis on random subset. Option specifies the number (if integer) or percent (if % is used) of data to be used.',
);


#######################################################################
##########################   Consume Roles   ##########################
#######################################################################
with 
	"CLIPSeqTools::Role::Option::Library" => {
		-alias    => { validate_args => '_validate_args_for_library' },
		-excludes => 'validate_args',

lib/CLIPSeqTools/App/reads_long_gaps_size_distribution.pm  view on Meta::CPAN

		}

		return 0;
	});

	warn "Creating output path\n" if $self->verbose;
	$self->make_path_for_output_prefix();

	warn "Printing results\n" if $self->verbose;
	open (my $OUT1, '>', $self->o_prefix.'reads_long_gaps_size_distribution.tab');
	say $OUT1 join("\t", 'gap_size', 'count', 'percent');
	say $OUT1 join("\t", $_, $gap_size_count{$_}, ($gap_size_count{$_}/$total_cn)*100) for sort {$a <=> $b} keys %gap_size_count;
	close $OUT1;

	if ($self->plot) {
		warn "Creating plot\n" if $self->verbose;
		CLIPSeqTools::PlotApp->initialize_command_class(
			'CLIPSeqTools::PlotApp::reads_long_gaps_size_distribution',
			file     => $self->o_prefix.'reads_long_gaps_size_distribution.tab',
			o_prefix => $self->o_prefix
		)->run();

lib/CLIPSeqTools/CompareApp/compare_counts.pm  view on Meta::CPAN

	warn "Starting analysis: compare_counts\n";

	warn "Validating arguments\n" if $self->verbose;
	$self->validate_args();

	warn "Reading input files\n" if $self->verbose;
	my @tables = map {Data::Table::fromFile($_)} @{$self->table};
	die "Error: Input table sizes differ. Aborting.\n" if not all_tables_of_equal_size(@tables);
	$self->t_name([(1..@tables)]) if !defined $self->t_name;

	warn "Searching for 25th percentile\n" if $self->verbose;
	my $uq_idx = _quantile_idx_from_table_with_fewer(
		$self->val_col, $self->val_thres, $self->key_col, @tables);

	warn "Normalizing the values\n" if $self->verbose;
	#Normalized column name is value column + "_uq" suffix
	_build_normalized_column_in_tables($self->val_col, $uq_idx, @tables);

	warn "Writing output files\n" if $self->verbose;
	my @output_files = map {
		$self->o_prefix . $_ . '.counts.uq.tab'} @{$self->t_name};

lib/CLIPSeqTools/CompareApp/libraries_overlap_stats.pm  view on Meta::CPAN

			
			return 0;
		});
	}

	warn "Creating output path\n" if $self->verbose;
	$self->make_path_for_output_prefix();

	warn "Printing results\n" if $self->verbose;
	open (my $OUT, '>', $self->o_prefix.'libraries_overlap_stats.tab');
	say $OUT join("\t", 'total_records', 'total_copy_number', 'overlapping_records', 'overlapping_copy_number', 'overlapping_records_percent', 'overlapping_copy_number_percent');
	say $OUT join("\t", $total_records, $total_copy_number, $overlapping_records, $overlapping_copy_number, ($overlapping_records / $total_records) * 100, ($overlapping_copy_number / $total_copy_number) * 100);
	close $OUT;
}

sub read_rname_sizes {
	my ($self) = @_;
	
	my %rname_size;
	open (my $CHRSIZE, '<', $self->rname_sizes);
	while (my $line = <$CHRSIZE>) {

lib/CLIPSeqTools/PlotApp/genomic_distribution.pm  view on Meta::CPAN


	# Start R
	my $R = Statistics::R->new();

	# Pass arguments to R
	$R->set('ifile', $self->file);
	$R->set('figfile', $figfile);

	# Read table with data - Do exra calulations
	$R->run(q{idata = read.delim(ifile)});
	$R->run(q{idata$percent = (idata$count / idata$total) * 100});

	# Do plots
	$R->run(q{pdf(figfile, width=14)});
	$R->run(q{par(mfrow = c(1, 2), mar=c(10, 4.1, 3.1, 2.1));});

	$R->run(q{mypal = c(rep("black",2), rep("darkgrey",3), rep("grey",3),
		rep("lightgrey",3), rep("lightblue",3))});

	$R->run(q{x = barplot(idata$percent, names.arg=idata$category, col=mypal,
		ylim=c(0,100), ylab="Percent of total reads", xaxt="n");});
	$R->run(q{text(x=x, y=-max(idata$percent)/100, idata$category, xpd=TRUE,
		srt=50, adj=c(1, 1))});

	$R->run(q{x = barplot(height=idata$count, names.arg=idata$category,
		col=mypal, ylab="Number of reads", xaxt="n");});
	$R->run(q{text(x=x, y=-max(idata$count)/100, idata$category, xpd=TRUE,
		srt=50, adj=c(1, 1))});

	$R->run(q{graphics.off()});

	# Close R

lib/CLIPSeqTools/PlotApp/nucleotide_composition.pm  view on Meta::CPAN

	# Pass arguments to R
	$R->set('ifile', $self->file);
	$R->set('figfile', $figfile);

	# Disable scientific notation
	$R->run(q{options(scipen=999)});

	# Read table with data
	$R->run(q{idata = read.delim(ifile)});

	# Measure percentages
	$R->run(q{idata$A_percent = idata$A_count / idata$total_count * 100});
	$R->run(q{idata$C_percent = idata$C_count / idata$total_count * 100});
	$R->run(q{idata$G_percent = idata$G_count / idata$total_count * 100});
	$R->run(q{idata$T_percent = idata$T_count / idata$total_count * 100});
	$R->run(q{idata$N_percent = idata$N_count / idata$total_count * 100});

	# Do plots
	$R->run(q{pdf(figfile, width=7)});
	$R->run(q{par(mfrow = c(1, 1), cex.lab=1.2, cex.axis=1.2, cex.main=1.2,
		lwd=1.2, oma=c(0, 0, 0, 0), mar=c(5.1, 5.1, 4.1, 2.1))});
	$R->run(q{plot(idata$position, idata$A_percent, type="o",
		col="darkred", ylim=c(0,100), xlab="Position in read",
		ylab="Percent of nucleotides (%)", main="Nucleotide composition per
		read position", cex=0.5)});
	$R->run(q{lines(idata$position, idata$C_percent, type="o",
		col="orange", cex=0.5)});
	$R->run(q{lines(idata$position, idata$G_percent, type="o",
		col="lightblue", cex=0.5)});
	$R->run(q{lines(idata$position, idata$T_percent, type="o",
		col="darkblue", cex=0.5)});
	$R->run(q{lines(idata$position, idata$N_percent, type="o", col="black",
		cex=0.5)});
	$R->run(q{legend( "topright" , c("A", "C", "G", "T", "N"),
		fill=c("darkred", "orange", "lightblue", "darkblue", "black"))});
	$R->run(q{graphics.off()});

	# Close R
	$R->stop();
}


lib/CLIPSeqTools/PlotApp/reads_long_gaps_size_distribution.pm  view on Meta::CPAN


	# Read table with data
	$R->run(q{idata = read.delim(ifile)});

	# Create groups of scores
	$R->run(q{mybreaks = c(seq(0,500,100), seq(1000,5000,2000),
		seq(10000,50000,20000), Inf)});
	$R->run(q{idata$size_group = cut(idata$gap_size, breaks=mybreaks,
		dig.lab=4)});

	# Aggregate (sum) counts and percents for size groups
	$R->run(q{aggregate_counts = tapply(idata$count, idata$size_group , sum)});
	$R->run(q{aggregate_percents = tapply(idata$percent, idata$size_group , sum)});

	# Do plots
	$R->run(q{pdf(figfile, width=21)});
	$R->run(q{par(mfrow = c(1, 3), cex.lab=1.6, cex.axis=1.2, cex.main=1.6,
		lwd=1.5, oma=c(0, 0, 2, 0), mar=c(9.1, 5.1, 4.1, 2.1))});

	$R->run(q{plot(aggregate_counts, type="b", xaxt="n", pch=19, xlab = NA,
		ylab="Number of reads", main="Number of reads with given gap size")});
	$R->run(q{axis(1, at=1:length(aggregate_counts),
		labels=names(aggregate_counts), las=2)});
	$R->run(q{mtext(side = 1, "Gap size", line = 7)});

	$R->run(q{plot(aggregate_percents, type="b", xaxt="n", pch=19, xlab = NA,
		ylab="Percent of reads (%)", main="Percent of reads with given gap size")});
	$R->run(q{axis(1, at=1:length(aggregate_percents),
		labels=names(aggregate_percents), las=2)});
	$R->run(q{mtext(side = 1, "Gap size", line = 7)});

	$R->run(q{plot((aggregate_counts / sum(idata$count)) * 100, type="b",
		xaxt="n", pch=19, xlab = NA, ylab="Percent of gaps (%)", main="Percent
		of gaps with given size")});
	$R->run(q{axis(1, at=1:length(aggregate_counts),
		labels=names(aggregate_counts), las=2)});
	$R->run(q{mtext(side = 1, "Gap size", line = 7)});

	$R->run(q{graphics.off()});

lib/CLIPSeqTools/Tutorial/Details.pod  view on Meta::CPAN

=item 6. C<distribution_on_genic_elements>

Measure how reads are distributed along the length of 5'UTR, CDS and 3'UTR.

=item 7. C<distribution_on_introns_exons>

Measure how reads are distributed along the length of exons and introns.

=item 8. C<genome_coverage>

Measure percent of genome covered by reads.

=item 9. C<genomic_distribution>

Count reads on genes, repeats, exons , introns, 5'UTRs, ...

=item 10. C<nmer_enrichment_over_shuffled>

Measure the enrichment of Nmers within the reads over shuffled reads.

=item 11. C<nucleotide_composition>



( run in 0.327 second using v1.01-cache-2.11-cpan-709fd43a63f )