Bio-MUST-Apps-FortyTwo
view release on metacpan or search on metacpan
lib/Bio/MUST/Apps/FortyTwo/OrgProcessor.pm view on Meta::CPAN
$new_seq = $new_seq->reverse_complemented_seq;
$subject_seq = $subject_seq->reverse_complemented_seq;
}
####### [ORG] Adding: $new_seq->full_id
# align new_seq on template_seq using subject_seq as a guide
$aligned_seqs->add_seq(
$ap->integrator->align(
new_seq => $new_seq,
subject => $subject_seq,
template => $template_seq,
start => $hsp->hit_start,
)
);
######## [DEBUG] aligned with BLAST
}
}
}
}
$parser->remove unless $rp->debug_mode;
return $aligned_seqs;
}
## use critic
sub _compress_seqs {
my $self = shift;
my $ali = shift;
my $rp = $self->ali_proc->run_proc;
# first fetch all seqs...
# ... then clear existing seqs
my @seqs2cap = $ali->all_seqs;
$ali->_set_seqs( [] );
# proceed only if at least one seq
return unless @seqs2cap;
# Note: this might seem sub-optimal in case of singletons
# but we follow as closely as possible the logic of compress-db.pl
# proceed only if at least two seqs
# otherwise add lone seq
if (@seqs2cap < 2) {
$ali->add_seq( shift @seqs2cap );
return;
}
# TODO: add debugging comments?
# try to cap seqs
my $cap = Cap3->new(
seqs => \@seqs2cap,
cap3_args => {
-p => $rp->merge_min_ident * 100.0, # CAP3 expects percents
-o => $rp->merge_min_len,
},
);
# add singlet seqs
my @singlets = $cap->all_singlets;
$ali->add_seq($_) for @singlets;
# proceed only if contigs of seqs
my @contigs = $cap->all_contigs;
return unless @contigs;
for my $contig (@contigs) {
my @ids = map { $_->full_id }
@{ $cap->seq_ids_for( $contig->full_id ) };
my $contig_id = shift(@ids) . ':::+' . @ids;
my $contig_seq = $contig->seq;
# add contig seq
$ali->add_seq(
Seq->new( seq_id => $contig_id, seq => $contig_seq )
);
}
# Note: we do not need to return anything as we modified $ali in place
return;
}
sub _fetch_tax_line_for_transcript {
my $self = shift;
my $orthologue = shift;
my $ap = $self->ali_proc;
my $rp = $ap->run_proc;
# fetch transcript accession (= query id) from BLAST report
my $query_def = $orthologue->query_def;
my $transcript_acc = $self->orthologous_seqs->long_id_for($query_def);
# extract range and strand (if any) from transcript accession...
# Note: this mostly makes sense for genomic contigs/scaffolds
# ... and extract potential tail '+N' (if orthologues have been merged)
# Note: this looks convoluted but it has to work in many different setups
# depending on the value of trim_homologues and merge_orthologues:
# - off/off: acc
# - off/on: acc:::+N
# - on /off: acc:::start:::end:::strand
# - on /on: acc:::start:::end:::strand:::+N
my @fields = split /:::/xms, $transcript_acc;
my ($strip_acc, $start, $end, $strand, $more) = @fields;
($more, $start) = ($start, undef)
if !defined $end && defined $start && $start =~ m/\A \+\d+ \z/xms;
# set acc to transcript accession
my ($first_chunk) = $strip_acc =~ m/^(\S+)/xms;
my @parts = split /\|/xms, $first_chunk;
my $acc = pop @parts;
###### [ORG] orthologous transcript: $acc
( run in 1.825 second using v1.01-cache-2.11-cpan-39bf76dae61 )