Bio-MUST-Core
view release on metacpan or search on metacpan
lib/Bio/MUST/Core/SeqId.pm view on Meta::CPAN
# possibly remove surrounding quotes (before anything else is done)
if ($self->full_id =~ m/\A $RE{delimited}{-delim=>q{'"}}{-keep} \z/xms) {
$self->_set_full_id($3);
}
# warn of trailing spaces as they harm taxonomic analysis
carp '[BMC] Warning: "' . $self->full_id . '" has trailing spaces;'
. ' cannot parse sequence id!' if $self->full_id =~ m/\s+\z/xms;
# parse potential taxonomy-aware abbreviated id
# Note: such ids are still considered as foreign_ids
{
my ($strain, $acc) = $self->full_id =~ $TAXABBR_ID;
if (defined $strain) {
$self->_set_gca_and_or_taxon_id($strain);
$self->_set_accession($acc);
$self->_set_foreign;
return;
}
}
# parse potential NCBI FASTA-style GI id
# Note: such ids are still considered as foreign_ids
{
my ($gi, $acc) = $self->full_id =~ $NCBIGI_ID;
if (defined $gi) {
$self->_set_gi($gi);
$self->_set_accession($acc);
$self->_set_foreign;
return;
}
}
# parse potential NCBI FASTA-style GNL id
# Note: such ids are still considered as foreign_ids
{
my ($match) = $self->full_id =~ $NCBIGNL_ID;
if ($match) {
$self->_set_parts( [ split /\|/xms, $match ] );
$self->_set_database( $self->get_part(1) ); # official
$self->_set_identifier( $self->get_part(2) ); # aliases
$self->_set_foreign;
return;
}
}
# skip RiboDB ids that look like weird full_ids
if ($self->full_id =~ m/~$NCBIGCA/xms) {
$self->_set_foreign;
return;
}
# check full_id validity
my ($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
= $self->full_id =~ $FULL_ID;
unless (defined $genus) {
# First try to coerce foreign full_id by replacing 1st '_' by ' '. If
# this does not work, keep the original full_id and flag it as foreign.
# This approach allows the transparent conversion of valid full_ids
# from foreign software able to handle unlimited gap-free ids.
# Note: This will fails if the optional family part contains an '_'.
my $cand_id = $self->full_id =~ s{_}{ }xmsr;
($family, $tag, $genus, $species, $strain, $acc, $tail, $new)
= $cand_id =~ $FULL_ID;
unless (defined $genus) {
$self->_set_foreign;
return;
}
$self->_set_full_id($cand_id);
}
# handle hyphenated genera that could interfere with family definition
if (defined $family) {
my $hyphenated = $family . '-' . $genus;
if (defined $is_hyphenated{$hyphenated}) {
$family = undef;
$genus = $hyphenated;
}
}
# handle underscored species that could interfere with species definition
if (defined $strain) {
my $underscored = $species . '_' . $strain;
if (defined $is_underscored{$underscored}) {
$strain = undef;
$species = $underscored;
}
}
# populate legacy components from full_id (and tail)
$self->_set_family($family);
$self->_set_tag($tag);
$self->_set_genus($genus);
$self->_set_species($species);
$self->_set_strain($strain);
$self->_set_accession($acc);
$self->_set_tail($tail);
# populate modern components if available
$self->_set_gca_and_or_taxon_id($strain) if defined $strain;
$self->_set_gi($acc) if $acc =~ $PKEYONLY;
# set new flag if needed
$self->_set_new if $new;
return;
}
} # end of BEGIN block
around qr{ is_new | is_genus_only | is_doubtful | org $ }xms => sub {
my $method = shift;
my $self = shift;
# Note: we return an explicit undef to emulate other accessor behavior
return undef if $self->is_foreign; ## no critic (ProhibitExplicitReturnUndef)
return $self->$method(@_);
( run in 2.694 seconds using v1.01-cache-2.11-cpan-d8267643d1d )