Bio-Palantir

 view release on metacpan or  search on metacpan

lib/Bio/Palantir/Parser/Root.pm  view on Meta::CPAN


    else {
        # do not filter clusters as antismash 4 reboots coordinates per contig
        $cluster_list = [ @{ forcearray $self->_root->{'model'} } ];
    }
                
    my @clusters;
    for my $cluster (@$cluster_list) {
        my $begin = $cluster->{'genecluster'}->{'region'}->{'begin'}->{'value'};
        my $end   = $cluster->{'genecluster'}->{'region'}->{'end'  }->{'value'};

        # fix for antiSMASH 4: keep coordinates in strand + order
        my $strand = $begin < $end ? '+' : '-';
        
        if ($strand eq '-') {
            my $temp_begin = $begin;

            $begin = $end;
            $end   = $temp_begin;
        }

        # second fix for antiSMASH 4: handle the coordinates reset for each contig (use of the sequence value which is only exploited in antiSMASH 4)
        my @cluster_genes;
        if ($cluster->{'genecluster'}->{'sequence'}->{'value'}) {
            my $cluster_seqlist 
                = $cluster->{'genecluster'}->{'sequence'}->{'value'};

            # filter on the sequence value and the cluster coordinates
            @cluster_genes = grep { $_->genomic_dna_begin < $end 
                                && $_->genomic_dna_end > $begin }
                             grep { $_->_root->{'sequence'}{'value'} 
                                eq $cluster_seqlist } @genes
            ;
        }
        
        # this information is not extracted in antiSMASH 5 (but coordinates are continuous)
        else {
            @cluster_genes = grep { $_->genomic_dna_begin < $end 
                &&  $_->genomic_dna_end > $begin } @genes;
        }
        
        @cluster_genes 
            = sort { $a->genomic_dna_begin <=> $b->genomic_dna_begin } 
            @cluster_genes
        ;

        my $gene_rank = 1;
        my $domain_rank = 1;

        for my $gene (@cluster_genes) {

            $_->_set_rank($domain_rank++) 
                for sort { $a->begin <=> $b->begin } $gene->all_domains;

            $gene->_set_rank($gene_rank++);
        }

        # fix antiSMASH 5.1 and its module delineation
        if ($self->_root->{modulelist}) {     # Add new module feature from version 5.1

            # TODO see how synchronize domain rank in @genes and @modules
            my @cluster_modules = 
                sort {$a->genomic_prot_begin <=> $b->genomic_prot_begin } 
                grep { $_->genomic_dna_begin < $end
                    && $_->genomic_dna_end > $begin }
                @modules;
            ;
            
            my $mrank = 1;
            $_->_set_rank($mrank++) for @cluster_modules;
            
            push @clusters, Cluster->new( 
                module_delineation => $self->module_delineation,
                              rank => $cluster_rank,
                             _root => $cluster->{'genecluster'},
                             genes => \@cluster_genes,
                 genomic_dna_begin => $begin,
                   genomic_dna_end => $end,
                genomic_prot_begin => ceil($begin / 3),
                genomic_prot_end   => floor($end / 3),
                modules            => \@cluster_modules,
            );
        }

        else { 
            push @clusters, Cluster->new( 
                module_delineation => $self->module_delineation,
                              rank => $cluster_rank,
                             _root => $cluster->{'genecluster'},
                             genes => \@cluster_genes,
                 genomic_dna_begin => $begin,
                   genomic_dna_end => $end,
                genomic_prot_begin => ceil($begin / 3),
                  genomic_prot_end => floor($end / 3),
            );
        }

        $cluster_rank++;
    }

    # enables module cutting mode
    $_->_set_cutting_mode( $self->module_delineation ) for @clusters;

    $self->_set_clusters( \@clusters );

    return;
}

sub _extract_antismash_modules { 

    my ($root, @report_genes) = @_;

    my @report_modules;
    for my $module ( @{ forcearray $root->{'modulelist'}{'module'} }) {

        next
            if $module->{complete}{value} eq 'false';

        my @mgenes
            = grep { $module->{prot_start}{value} < $_->genomic_prot_end
                  && $module->{prot_end}{value}  >  $_->genomic_prot_begin }



( run in 1.734 second using v1.01-cache-2.11-cpan-97f6503c9c8 )