App-Fasops
view release on metacpan or search on metacpan
lib/App/Fasops/Common.pm view on Meta::CPAN
my $seg = $realigned_segments->[$i];
$seg = uc $seg;
substr( $aligned[$i], $seg_start - 1, $seg_end - $seg_start + 1, $seg );
}
}
return \@aligned;
}
#----------------------------#
# trim pure dash regions
#----------------------------#
sub trim_pure_dash {
my $seq_refs = shift;
my $seq_count = @{$seq_refs};
my $seq_length = length $seq_refs->[0];
return unless $seq_length;
my $trim_region = AlignDB::IntSpan->new();
for my $pos ( 1 .. $seq_length ) {
my @bases;
for my $i ( 0 .. $seq_count - 1 ) {
my $base = substr( $seq_refs->[$i], $pos - 1, 1 );
push @bases, $base;
}
if ( all { $_ eq '-' } @bases ) {
$trim_region->add($pos);
}
}
for my $span ( reverse $trim_region->spans ) {
my $seg_start = $span->[0];
my $seg_end = $span->[1];
for my $i ( 0 .. $seq_count - 1 ) {
substr( $seq_refs->[$i], $seg_start - 1, $seg_end - $seg_start + 1, "" );
}
}
return;
}
#----------------------------#
# trim outgroup only sequence
#----------------------------#
# if intersect is superset of union
# T G----C
# Q G----C
# O GAAAAC
sub trim_outgroup {
my $seq_refs = shift;
my $seq_count = scalar @{$seq_refs};
Carp::confess "Need three or more sequences\n" if $seq_count < 3;
# Don't expand indel set here. Last seq is outgroup
my @indel_intspans;
for my $i ( 0 .. $seq_count - 2 ) {
my $indel_intspan = indel_intspan( $seq_refs->[$i] );
push @indel_intspans, $indel_intspan;
}
# find trim_region
my $union_set = AlignDB::IntSpan::union(@indel_intspans);
my $intersect_set = AlignDB::IntSpan::intersect(@indel_intspans);
my $trim_region = AlignDB::IntSpan->new();
for my $span ( $union_set->runlists ) {
if ( $intersect_set->superset($span) ) {
$trim_region->add($span);
}
}
# trim all segments in trim_region
for my $span ( reverse $trim_region->spans ) {
my $seg_start = $span->[0];
my $seg_end = $span->[1];
for my $i ( 0 .. $seq_count - 1 ) {
substr( $seq_refs->[$i], $seg_start - 1, $seg_end - $seg_start + 1, "" );
}
}
return;
}
#----------------------------#
# record complex indels and ingroup indels
#----------------------------#
# All ingroup intersect sets are parts of complex indels after trim_outgroup()
# intersect 4-6
# T GGA--C
# Q G----C
# O GGAGAC
# result, complex_region 2-3
# T GGAC
# Q G--C
# O GGAC
sub trim_complex_indel {
my $seq_refs = shift;
my $seq_count = scalar @{$seq_refs};
my @ingroup_idx = ( 0 .. $seq_count - 2 );
Carp::confess "Need three or more sequences\n" if $seq_count < 3;
# Don't expand indel set here. Last seq is outgroup
my @indel_intspans;
for my $i (@ingroup_idx) {
my $indel_intspan = indel_intspan( $seq_refs->[$i] );
push @indel_intspans, $indel_intspan;
}
my $outgroup_indel_intspan = indel_intspan( $seq_refs->[ $seq_count - 1 ] );
# find trim_region
my $union_set = AlignDB::IntSpan::union(@indel_intspans);
my $intersect_set = AlignDB::IntSpan::intersect(@indel_intspans);
my $complex_region = AlignDB::IntSpan->new();
for ( reverse $intersect_set->spans ) {
my $seg_start = $_->[0];
my $seg_end = $_->[1];
# trim sequence, including outgroup
for my $i ( 0 .. $seq_count - 1 ) {
substr( $seq_refs->[$i], $seg_start - 1, $seg_end - $seg_start + 1, "" );
}
# add to complex_region
for my $span ( $union_set->runlists ) {
my $sub_union_set = AlignDB::IntSpan->new($span);
if ( $sub_union_set->superset("$seg_start-$seg_end") ) {
$complex_region->merge($sub_union_set);
}
}
# modify all related set
$union_set = $union_set->banish_span( $seg_start, $seg_end );
for (@ingroup_idx) {
$indel_intspans[$_]
= $indel_intspans[$_]->banish_span( $seg_start, $seg_end );
}
$outgroup_indel_intspan->banish_span( $seg_start, $seg_end );
$complex_region = $complex_region->banish_span( $seg_start, $seg_end );
}
# add ingroup-outgroup complex indels to complex_region
for my $i (@ingroup_idx) {
my $outgroup_intersect_intspan = $outgroup_indel_intspan->intersect( $indel_intspans[$i] );
for my $sub_out_set ( $outgroup_intersect_intspan->sets ) {
for my $sub_union_set ( $union_set->sets ) {
# union_set > intersect_set
if ( $sub_union_set->larger_than($sub_out_set) ) {
$complex_region->merge($sub_union_set);
}
}
}
}
return $complex_region->runlist;
}
# read normal fasta files
sub read_fasta {
my $infile = shift;
( run in 2.081 seconds using v1.01-cache-2.11-cpan-97f6503c9c8 )