view release on metacpan or search on metacpan
lib/CLIPSeqTools/App/genome_coverage.pm view on Meta::CPAN
=head1 NAME
CLIPSeqTools::App::genome_coverage - Measure percent of genome covered by reads.
=head1 SYNOPSIS
clipseqtools genome_coverage [options/parameters]
=head1 DESCRIPTION
Measure the percent of genome that is covered by the reads of a library.
=head1 OPTIONS
Input options for library.
--driver <Str> driver for database connection (eg. mysql,
SQLite).
--database <Str> database name or path to database file for file
based databases (eg. SQLite).
--table <Str> database table.
--host <Str> hostname for database connection.
lib/CLIPSeqTools/App/genome_coverage.pm view on Meta::CPAN
my $reads_collection = $self->reads_collection;
my @rnames = $reads_collection->rnames_for_all_strands;
warn "Creating output path\n" if $self->verbose;
$self->make_path_for_output_prefix();
warn "Calculating genome coverage\n" if $self->verbose;
my $total_genome_coverage = 0;
my $total_genome_length = 0;
open(my $OUT, '>', $self->o_prefix.'genome_coverage.tab');
say $OUT join("\t", 'rname', 'covered_area', 'size', 'percent_covered');
foreach my $rname (@rnames) {
warn "Working for $rname\n" if $self->verbose;
my $pdl = PDL->zeros(PDL::byte(), $rname_sizes{$rname});
$reads_collection->foreach_record_on_rname_do($rname, sub {
$pdl->slice([$_[0]->start, $_[0]->stop]) .= 1;
return 0;
});
my $covered_area = $pdl->sum();
lib/CLIPSeqTools/App/nmer_enrichment_over_shuffled.pm view on Meta::CPAN
Output
--o_prefix <Str> output path prefix. Script will create and add
extension to path. [Default: ./]
Other options.
--nmer_length <Int> length N of the Nmer. Default: 6
--permutations <Int> number of permutation to be performed.
Use more than 100 to get p-values < 0.01.
--subset <Num> run analysis on random subset. Option specifies
number (if integer) or percent (if % is used)
of data to be used.
-v --verbose print progress lines and extra information.
-h -? --usage --help print help message
=cut
package CLIPSeqTools::App::nmer_enrichment_over_shuffled;
$CLIPSeqTools::App::nmer_enrichment_over_shuffled::VERSION = '1.0.0';
# Make it an app command
lib/CLIPSeqTools/App/nmer_enrichment_over_shuffled.pm view on Meta::CPAN
is => 'rw',
isa => 'Int',
default => 100,
documentation => 'the number of permutation to be performed. Use more than 100 to get p-values < 0.01.',
);
option 'subset' => (
is => 'rw',
isa => 'Str',
default => '100%',
documentation => 'run analysis on random subset. Option specifies the number (if integer) or percent (if % is used) of data to be used.',
);
#######################################################################
########################## Consume Roles ##########################
#######################################################################
with
"CLIPSeqTools::Role::Option::Library" => {
-alias => { validate_args => '_validate_args_for_library' },
-excludes => 'validate_args',
lib/CLIPSeqTools/App/reads_long_gaps_size_distribution.pm view on Meta::CPAN
}
return 0;
});
warn "Creating output path\n" if $self->verbose;
$self->make_path_for_output_prefix();
warn "Printing results\n" if $self->verbose;
open (my $OUT1, '>', $self->o_prefix.'reads_long_gaps_size_distribution.tab');
say $OUT1 join("\t", 'gap_size', 'count', 'percent');
say $OUT1 join("\t", $_, $gap_size_count{$_}, ($gap_size_count{$_}/$total_cn)*100) for sort {$a <=> $b} keys %gap_size_count;
close $OUT1;
if ($self->plot) {
warn "Creating plot\n" if $self->verbose;
CLIPSeqTools::PlotApp->initialize_command_class(
'CLIPSeqTools::PlotApp::reads_long_gaps_size_distribution',
file => $self->o_prefix.'reads_long_gaps_size_distribution.tab',
o_prefix => $self->o_prefix
)->run();
lib/CLIPSeqTools/CompareApp/compare_counts.pm view on Meta::CPAN
warn "Starting analysis: compare_counts\n";
warn "Validating arguments\n" if $self->verbose;
$self->validate_args();
warn "Reading input files\n" if $self->verbose;
my @tables = map {Data::Table::fromFile($_)} @{$self->table};
die "Error: Input table sizes differ. Aborting.\n" if not all_tables_of_equal_size(@tables);
$self->t_name([(1..@tables)]) if !defined $self->t_name;
warn "Searching for 25th percentile\n" if $self->verbose;
my $uq_idx = _quantile_idx_from_table_with_fewer(
$self->val_col, $self->val_thres, $self->key_col, @tables);
warn "Normalizing the values\n" if $self->verbose;
#Normalized column name is value column + "_uq" suffix
_build_normalized_column_in_tables($self->val_col, $uq_idx, @tables);
warn "Writing output files\n" if $self->verbose;
my @output_files = map {
$self->o_prefix . $_ . '.counts.uq.tab'} @{$self->t_name};
lib/CLIPSeqTools/CompareApp/libraries_overlap_stats.pm view on Meta::CPAN
return 0;
});
}
warn "Creating output path\n" if $self->verbose;
$self->make_path_for_output_prefix();
warn "Printing results\n" if $self->verbose;
open (my $OUT, '>', $self->o_prefix.'libraries_overlap_stats.tab');
say $OUT join("\t", 'total_records', 'total_copy_number', 'overlapping_records', 'overlapping_copy_number', 'overlapping_records_percent', 'overlapping_copy_number_percent');
say $OUT join("\t", $total_records, $total_copy_number, $overlapping_records, $overlapping_copy_number, ($overlapping_records / $total_records) * 100, ($overlapping_copy_number / $total_copy_number) * 100);
close $OUT;
}
sub read_rname_sizes {
my ($self) = @_;
my %rname_size;
open (my $CHRSIZE, '<', $self->rname_sizes);
while (my $line = <$CHRSIZE>) {
lib/CLIPSeqTools/PlotApp/genomic_distribution.pm view on Meta::CPAN
# Start R
my $R = Statistics::R->new();
# Pass arguments to R
$R->set('ifile', $self->file);
$R->set('figfile', $figfile);
# Read table with data - Do exra calulations
$R->run(q{idata = read.delim(ifile)});
$R->run(q{idata$percent = (idata$count / idata$total) * 100});
# Do plots
$R->run(q{pdf(figfile, width=14)});
$R->run(q{par(mfrow = c(1, 2), mar=c(10, 4.1, 3.1, 2.1));});
$R->run(q{mypal = c(rep("black",2), rep("darkgrey",3), rep("grey",3),
rep("lightgrey",3), rep("lightblue",3))});
$R->run(q{x = barplot(idata$percent, names.arg=idata$category, col=mypal,
ylim=c(0,100), ylab="Percent of total reads", xaxt="n");});
$R->run(q{text(x=x, y=-max(idata$percent)/100, idata$category, xpd=TRUE,
srt=50, adj=c(1, 1))});
$R->run(q{x = barplot(height=idata$count, names.arg=idata$category,
col=mypal, ylab="Number of reads", xaxt="n");});
$R->run(q{text(x=x, y=-max(idata$count)/100, idata$category, xpd=TRUE,
srt=50, adj=c(1, 1))});
$R->run(q{graphics.off()});
# Close R
lib/CLIPSeqTools/PlotApp/nucleotide_composition.pm view on Meta::CPAN
# Pass arguments to R
$R->set('ifile', $self->file);
$R->set('figfile', $figfile);
# Disable scientific notation
$R->run(q{options(scipen=999)});
# Read table with data
$R->run(q{idata = read.delim(ifile)});
# Measure percentages
$R->run(q{idata$A_percent = idata$A_count / idata$total_count * 100});
$R->run(q{idata$C_percent = idata$C_count / idata$total_count * 100});
$R->run(q{idata$G_percent = idata$G_count / idata$total_count * 100});
$R->run(q{idata$T_percent = idata$T_count / idata$total_count * 100});
$R->run(q{idata$N_percent = idata$N_count / idata$total_count * 100});
# Do plots
$R->run(q{pdf(figfile, width=7)});
$R->run(q{par(mfrow = c(1, 1), cex.lab=1.2, cex.axis=1.2, cex.main=1.2,
lwd=1.2, oma=c(0, 0, 0, 0), mar=c(5.1, 5.1, 4.1, 2.1))});
$R->run(q{plot(idata$position, idata$A_percent, type="o",
col="darkred", ylim=c(0,100), xlab="Position in read",
ylab="Percent of nucleotides (%)", main="Nucleotide composition per
read position", cex=0.5)});
$R->run(q{lines(idata$position, idata$C_percent, type="o",
col="orange", cex=0.5)});
$R->run(q{lines(idata$position, idata$G_percent, type="o",
col="lightblue", cex=0.5)});
$R->run(q{lines(idata$position, idata$T_percent, type="o",
col="darkblue", cex=0.5)});
$R->run(q{lines(idata$position, idata$N_percent, type="o", col="black",
cex=0.5)});
$R->run(q{legend( "topright" , c("A", "C", "G", "T", "N"),
fill=c("darkred", "orange", "lightblue", "darkblue", "black"))});
$R->run(q{graphics.off()});
# Close R
$R->stop();
}
lib/CLIPSeqTools/PlotApp/reads_long_gaps_size_distribution.pm view on Meta::CPAN
# Read table with data
$R->run(q{idata = read.delim(ifile)});
# Create groups of scores
$R->run(q{mybreaks = c(seq(0,500,100), seq(1000,5000,2000),
seq(10000,50000,20000), Inf)});
$R->run(q{idata$size_group = cut(idata$gap_size, breaks=mybreaks,
dig.lab=4)});
# Aggregate (sum) counts and percents for size groups
$R->run(q{aggregate_counts = tapply(idata$count, idata$size_group , sum)});
$R->run(q{aggregate_percents = tapply(idata$percent, idata$size_group , sum)});
# Do plots
$R->run(q{pdf(figfile, width=21)});
$R->run(q{par(mfrow = c(1, 3), cex.lab=1.6, cex.axis=1.2, cex.main=1.6,
lwd=1.5, oma=c(0, 0, 2, 0), mar=c(9.1, 5.1, 4.1, 2.1))});
$R->run(q{plot(aggregate_counts, type="b", xaxt="n", pch=19, xlab = NA,
ylab="Number of reads", main="Number of reads with given gap size")});
$R->run(q{axis(1, at=1:length(aggregate_counts),
labels=names(aggregate_counts), las=2)});
$R->run(q{mtext(side = 1, "Gap size", line = 7)});
$R->run(q{plot(aggregate_percents, type="b", xaxt="n", pch=19, xlab = NA,
ylab="Percent of reads (%)", main="Percent of reads with given gap size")});
$R->run(q{axis(1, at=1:length(aggregate_percents),
labels=names(aggregate_percents), las=2)});
$R->run(q{mtext(side = 1, "Gap size", line = 7)});
$R->run(q{plot((aggregate_counts / sum(idata$count)) * 100, type="b",
xaxt="n", pch=19, xlab = NA, ylab="Percent of gaps (%)", main="Percent
of gaps with given size")});
$R->run(q{axis(1, at=1:length(aggregate_counts),
labels=names(aggregate_counts), las=2)});
$R->run(q{mtext(side = 1, "Gap size", line = 7)});
$R->run(q{graphics.off()});
lib/CLIPSeqTools/Tutorial/Details.pod view on Meta::CPAN
=item 6. C<distribution_on_genic_elements>
Measure how reads are distributed along the length of 5'UTR, CDS and 3'UTR.
=item 7. C<distribution_on_introns_exons>
Measure how reads are distributed along the length of exons and introns.
=item 8. C<genome_coverage>
Measure percent of genome covered by reads.
=item 9. C<genomic_distribution>
Count reads on genes, repeats, exons , introns, 5'UTRs, ...
=item 10. C<nmer_enrichment_over_shuffled>
Measure the enrichment of Nmers within the reads over shuffled reads.
=item 11. C<nucleotide_composition>