CLIPSeqTools
view release on metacpan or search on metacpan
lib/CLIPSeqTools/CompareApp/libraries_overlap_stats.pm view on Meta::CPAN
$self->validate_args();
warn "Reading sizes for reference alignment sequences\n" if $self->verbose;
my %rname_sizes = $self->read_rname_sizes;
warn "Creating reads collection\n" if $self->verbose;
my $reads_collection = $self->reads_collection;
my @rnames = $reads_collection->rnames_for_all_strands;
warn "Creating reference reads collection\n" if $self->verbose;
my $r_reads_collection = $self->r_reads_collection;
warn "Measuring the overlap of the primary library with the reference\n" if $self->verbose;
my $total_copy_number = 0; # The total copy number of primary records
my $total_records = 0; # The total number of primary records
my $overlapping_copy_number = 0; # The total copy number of primary records that overlap the reference
my $overlapping_records = 0; # The total number of primary records that overlap the reference
foreach my $rname (@rnames) {
warn " Annotating $rname with reference records\n" if $self->verbose;
my $rname_size = $rname_sizes{$rname};
my $pdl_plus = PDL->zeros(PDL::byte(), $rname_size);
my $pdl_minus = PDL->zeros(PDL::byte(), $rname_size);
$r_reads_collection->foreach_record_on_rname_do($rname, sub {
my ($r_record) = @_;
my $coords = [$r_record->start, $r_record->stop];
$pdl_plus->slice($coords) .= 1 if $r_record->strand == 1;
$pdl_minus->slice($coords) .= 1 if $r_record->strand == -1;
return 0;
});
warn " Parsing primary records on $rname and checking for overlap with reference\n" if $self->verbose;
$reads_collection->foreach_record_on_rname_do($rname, sub {
my ($p_record) = @_;
my $overlap = 0;
my $coords = [$p_record->start, $p_record->stop];
$overlap = $pdl_plus->slice($coords)->sum() if $p_record->strand == 1;
$overlap = $pdl_minus->slice($coords)->sum() if $p_record->strand == -1;
if ($overlap) {
$overlapping_copy_number += $p_record->copy_number;
$overlapping_records += 1;
}
$total_copy_number += $p_record->copy_number;
$total_records += 1;
return 0;
});
}
warn "Creating output path\n" if $self->verbose;
$self->make_path_for_output_prefix();
warn "Printing results\n" if $self->verbose;
open (my $OUT, '>', $self->o_prefix.'libraries_overlap_stats.tab');
say $OUT join("\t", 'total_records', 'total_copy_number', 'overlapping_records', 'overlapping_copy_number', 'overlapping_records_percent', 'overlapping_copy_number_percent');
say $OUT join("\t", $total_records, $total_copy_number, $overlapping_records, $overlapping_copy_number, ($overlapping_records / $total_records) * 100, ($overlapping_copy_number / $total_copy_number) * 100);
close $OUT;
}
sub read_rname_sizes {
my ($self) = @_;
my %rname_size;
open (my $CHRSIZE, '<', $self->rname_sizes);
while (my $line = <$CHRSIZE>) {
chomp $line;
my ($chr, $size) = split(/\t/, $line);
$rname_size{$chr} = $size;
}
close $CHRSIZE;
return %rname_size;
}
1;
( run in 0.917 second using v1.01-cache-2.11-cpan-39bf76dae61 )