Bio-MUST-Core

 view release on metacpan or  search on metacpan

lib/Bio/MUST/Core/SeqId.pm  view on Meta::CPAN


    # possibly remove surrounding quotes (before anything else is done)
    if ($self->full_id =~ m/\A $RE{delimited}{-delim=>q{'"}}{-keep} \z/xms) {
        $self->_set_full_id($3);
    }

    # warn of trailing spaces as they harm taxonomic analysis
    carp '[BMC] Warning: "' . $self->full_id . '" has trailing spaces;'
        . ' cannot parse sequence id!' if $self->full_id =~ m/\s+\z/xms;

    # parse potential taxonomy-aware abbreviated id
    # Note: such ids are still considered as foreign_ids
    {
        my ($strain, $acc) = $self->full_id =~ $TAXABBR_ID;
        if (defined $strain) {
            $self->_set_gca_and_or_taxon_id($strain);
            $self->_set_accession($acc);
            $self->_set_foreign;
            return;
        }
    }

    # parse potential NCBI FASTA-style GI id
    # Note: such ids are still considered as foreign_ids
    {
        my ($gi, $acc) = $self->full_id =~ $NCBIGI_ID;
        if (defined $gi) {
            $self->_set_gi($gi);
            $self->_set_accession($acc);
            $self->_set_foreign;
            return;
        }
    }

    # parse potential NCBI FASTA-style GNL id
    # Note: such ids are still considered as foreign_ids
    {
        my ($match) = $self->full_id =~ $NCBIGNL_ID;
        if ($match) {
            $self->_set_parts( [ split /\|/xms, $match ] );
            $self->_set_database(   $self->get_part(1) );   # official
            $self->_set_identifier( $self->get_part(2) );   # aliases
            $self->_set_foreign;
            return;
        }
    }

    # skip RiboDB ids that look like weird full_ids
    if ($self->full_id =~ m/~$NCBIGCA/xms) {
        $self->_set_foreign;
        return;
    }

    # check full_id validity
    my  ($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
        = $self->full_id =~ $FULL_ID;
    unless (defined $genus) {

        # First try to coerce foreign full_id by replacing 1st '_' by ' '. If
        # this does not work, keep the original full_id and flag it as foreign.
        # This approach allows the transparent conversion of valid full_ids
        # from foreign software able to handle unlimited gap-free ids.
        # Note: This will fails if the optional family part contains an '_'.

        my $cand_id = $self->full_id =~ s{_}{ }xmsr;
        ($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
            = $cand_id =~ $FULL_ID;
        unless (defined $genus) {
            $self->_set_foreign;
            return;
        }
        $self->_set_full_id($cand_id);
    }

    # handle hyphenated genera that could interfere with family definition
    if (defined $family) {
        my $hyphenated = $family . '-' . $genus;
        if (defined $is_hyphenated{$hyphenated}) {
            $family = undef;
            $genus = $hyphenated;
        }
    }

    # handle underscored species that could interfere with species definition
    if (defined $strain) {
        my $underscored = $species . '_' . $strain;
        if (defined $is_underscored{$underscored}) {
            $strain = undef;
            $species = $underscored;
        }
    }

    # populate legacy components from full_id (and tail)
    $self->_set_family($family);
    $self->_set_tag($tag);
    $self->_set_genus($genus);
    $self->_set_species($species);
    $self->_set_strain($strain);
    $self->_set_accession($acc);
    $self->_set_tail($tail);

    # populate modern components if available
    $self->_set_gca_and_or_taxon_id($strain) if defined $strain;
    $self->_set_gi($acc)                     if $acc =~ $PKEYONLY;

    # set new flag if needed
    $self->_set_new if $new;

    return;
}

}   # end of BEGIN block


around qr{ is_new | is_genus_only | is_doubtful | org $  }xms => sub {
    my $method = shift;
    my $self   = shift;

    # Note: we return an explicit undef to emulate other accessor behavior
    return undef if $self->is_foreign;      ## no critic (ProhibitExplicitReturnUndef)
    return $self->$method(@_);



( run in 2.694 seconds using v1.01-cache-2.11-cpan-d8267643d1d )