CLIPSeqTools

 view release on metacpan or  search on metacpan

lib/CLIPSeqTools/CompareApp/libraries_overlap_stats.pm  view on Meta::CPAN

	$self->validate_args();
	
	warn "Reading sizes for reference alignment sequences\n" if $self->verbose;
	my %rname_sizes = $self->read_rname_sizes;

	warn "Creating reads collection\n" if $self->verbose;
	my $reads_collection = $self->reads_collection;
	my @rnames = $reads_collection->rnames_for_all_strands;

	warn "Creating reference reads collection\n" if $self->verbose;
	my $r_reads_collection = $self->r_reads_collection;

	warn "Measuring the overlap of the primary library with the reference\n" if $self->verbose;
	my $total_copy_number = 0;       # The total copy number of primary records
	my $total_records = 0;           # The total number of primary records
	my $overlapping_copy_number = 0; # The total copy number of primary records that overlap the reference
	my $overlapping_records = 0;     # The total number of primary records that overlap the reference
	foreach my $rname (@rnames) {
		warn " Annotating $rname with reference records\n" if $self->verbose;
		my $rname_size = $rname_sizes{$rname};
		my $pdl_plus   = PDL->zeros(PDL::byte(), $rname_size);
		my $pdl_minus  = PDL->zeros(PDL::byte(), $rname_size);
		
		$r_reads_collection->foreach_record_on_rname_do($rname, sub {
			my ($r_record) = @_;
			
			my $coords = [$r_record->start, $r_record->stop];
			$pdl_plus->slice($coords)  .= 1 if $r_record->strand == 1;
			$pdl_minus->slice($coords) .= 1 if $r_record->strand == -1;
			
			return 0;
		});
		
		
		warn " Parsing primary records on $rname and checking for overlap with reference\n" if $self->verbose;
		$reads_collection->foreach_record_on_rname_do($rname, sub {
			my ($p_record) = @_;
			
			my $overlap = 0;
			my $coords = [$p_record->start, $p_record->stop];
			$overlap = $pdl_plus->slice($coords)->sum() if $p_record->strand == 1;
			$overlap = $pdl_minus->slice($coords)->sum() if $p_record->strand == -1;
			
			if ($overlap) {
				$overlapping_copy_number += $p_record->copy_number;
				$overlapping_records += 1;
			}
			
			$total_copy_number += $p_record->copy_number;
			$total_records += 1;
			
			return 0;
		});
	}

	warn "Creating output path\n" if $self->verbose;
	$self->make_path_for_output_prefix();

	warn "Printing results\n" if $self->verbose;
	open (my $OUT, '>', $self->o_prefix.'libraries_overlap_stats.tab');
	say $OUT join("\t", 'total_records', 'total_copy_number', 'overlapping_records', 'overlapping_copy_number', 'overlapping_records_percent', 'overlapping_copy_number_percent');
	say $OUT join("\t", $total_records, $total_copy_number, $overlapping_records, $overlapping_copy_number, ($overlapping_records / $total_records) * 100, ($overlapping_copy_number / $total_copy_number) * 100);
	close $OUT;
}

sub read_rname_sizes {
	my ($self) = @_;
	
	my %rname_size;
	open (my $CHRSIZE, '<', $self->rname_sizes);
	while (my $line = <$CHRSIZE>) {
		chomp $line;
		my ($chr, $size) = split(/\t/, $line);
		$rname_size{$chr} = $size;
	}
	close $CHRSIZE;
	return %rname_size;
}

1;



( run in 0.917 second using v1.01-cache-2.11-cpan-39bf76dae61 )