App-Sandy
view release on metacpan or search on metacpan
lib/App/Sandy/Simulator.pm view on Meta::CPAN
# Populate alternative seq_id
$self->_populate_piece_table($piece_table{$seq_id}{alt}{table}, $snvs);
}
}
# Initialize the logical offsets and valodate the
# new size due to the genomic variation
my @blacklist;
for my $seq_id (keys %piece_table) {
my $type_h = delete $piece_table{$seq_id};
for my $type (keys %$type_h) {
my $table_h = delete $type_h->{$type};
my $table = $table_h->{table};
# Initialize the logical offset
$table->calculate_logical_offset;
# Get the new size
my $new_size = $table->logical_len;
unless ($self->truncate) {
my $class = ref $self->seq;
if ($class eq 'App::Sandy::Seq::SingleEnd') {
if ($new_size < $self->seq->read_mean) {
log_msg ":: Skip '$seq_id:$type': So many deletions resulted in a sequence lesser than the required read-mean";
next;
}
} elsif ($class eq 'App::Sandy::Seq::PairedEnd') {
if ($new_size < $self->seq->fragment_mean) {
log_msg ":: Skip '$seq_id:$type': So many deletions resulted in a sequence lesser than the required fragment mean";
next;
}
} else {
die "No valid options for 'seq'";
}
}
# If all's right
$table_h->{size} = $new_size;
$type_h->{$type} = $table_h;
}
# if there is at least one type,
# then return it to the piece_table
if (%$type_h) {
$piece_table{$seq_id} = $type_h;
# else, just remove it!
} else {
push @blacklist => $seq_id;
}
}
unless (%piece_table) {
die "All fasta entries were removed due to deletions. ",
"Please, verify the genomic variation '$genomic_variation'\n";
}
# If fasta_rtree has entries
unless ($self->_has_no_fasta_rtree) {
# Remove no valid entries from id -> pid relation
$self->_delete_fasta_rtree(@blacklist) if @blacklist;
}
# Make the id -> pid relationship
$self->_populate_fasta_tree;
# HASH -> SEQ_ID -> @(REF @ALT) -> @(TABLE SIZE)
return \%piece_table;
}
sub _populate_piece_table {
my ($self, $table, $snvs) = @_;
for my $snv (@$snvs) {
# If there is an ID, make sure that it is not a comma, colon
# separated list. Else, make sure to keep the ref/alt length
# to max 25+25+1=51
my $annot = defined $snv->{id} && $snv->{id} ne '.'
? sprintf "%d:%s" => $snv->{pos} + 1, (split(/[,;]/, $snv->{id}))[0]
: sprintf "%d:%.25s/%.25s" => $snv->{pos} + 1, $snv->{ref}, $snv->{alt};
# Insertion
if ($snv->{ref} eq '-') {
$table->insert(\$snv->{alt}, $snv->{pos}, $annot);
# Deletion
} elsif ($snv->{alt} eq '-') {
$table->delete($snv->{pos}, length $snv->{ref}, $annot);
# Change
} else {
$table->change(\$snv->{alt}, $snv->{pos}, length $snv->{ref}, $annot);
}
}
}
sub _validate_indexed_snv_against_fasta {
my ($self, $indexed_snv) = @_;
my $indexed_fasta = $self->_fasta;
my $genomic_variation = $self->_genomic_variation_names;
for my $std_seq_id (keys %$indexed_snv) {
my $snvs = delete $indexed_snv->{$std_seq_id};
my $seq_id = $self->_get_seqname($std_seq_id);
unless (defined $seq_id && exists $indexed_fasta->{$seq_id}) {
next;
}
my $seq = \$indexed_fasta->{$seq_id}{seq};
my $size = $indexed_fasta->{$seq_id}{size};
my @saved_snvs;
for my $snv (@$snvs) {
# Insertions may accur until one base after the
# end of the sequence, not more
if (($snv->{ref} eq '-' && $snv->{pos} > $size) || ($snv->{ref} ne '-' && $snv->{pos} >= $size)) {
log_msg sprintf ":: In validating '%s': Position, %s/%s at %s:%d, outside fasta sequence",
$genomic_variation, $snv->{ref}, $snv->{alt}, $seq_id, $snv->{pos} + 1;
# Next snv
next;
# Deletions and changes. Just verify if the reference exists
} elsif ($snv->{ref} ne '-') {
my $ref = substr $$seq, $snv->{pos}, length($snv->{ref});
if (uc($ref) ne uc($snv->{ref})) {
log_msg sprintf ":: In validating '%s': Not found reference '%s' at fasta position %s:%d",
$genomic_variation, $snv->{ref}, $seq_id, $snv->{pos} + 1;
# Next snv
next;
}
}
push @saved_snvs => $snv;
}
if (@saved_snvs) {
$indexed_snv->{$std_seq_id} = [@saved_snvs];
}
}
}
sub _calculate_number_of_reads {
my $self = shift;
my $number_of_reads;
if ($self->count_loops_by eq 'coverage') {
# It is needed to calculate the genome size
my $fasta = $self->_fasta;
my $fasta_size = 0;
$fasta_size += $fasta->{$_}{size} for keys %{ $fasta };
$number_of_reads = int(($fasta_size * $self->coverage) / $self->seq->read_mean);
# In case it is paired-end read, divide the number of reads by 2 because
# App::Sandy::Seq::PairedEnd class returns 2 reads at time
$number_of_reads = int($number_of_reads / 2)
if ref($self->seq) eq 'App::Sandy::Seq::PairedEnd';
} elsif ($self->count_loops_by eq 'number-of-reads') {
$number_of_reads = $self->number_of_reads;
} else {
croak sprintf "Unknown option '%s' for calculating the number of reads\n",
$self->count_loops_by;
}
# Maybe the number_of_reads is zero. It may occur due to the low coverage and/or fasta_file size
if ($number_of_reads <= 0) {
die "The computed number of reads is equal to zero.\n" .
"It may occur due to the low coverage, fasta-file sequence size or number of reads directly passed by the user\n";
}
return $number_of_reads;
}
sub _calculate_parent_count {
my ($self, $counter_ref) = @_;
return if $self->_has_no_fasta_rtree;
my %parent_count;
while (my ($id, $count) = each %$counter_ref) {
my $pid = $self->_get_fasta_rtree($id);
$parent_count{$pid} += $count if defined $pid;
( run in 3.462 seconds using v1.01-cache-2.11-cpan-13bb782fe5a )