Bio-MUST-Tools-Mcl

 view release on metacpan or  search on metacpan

bin/abbr-ids-fas.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: abbr-ids-fas.pl
# ABSTRACT: Abbreviate (standardize) seq ids in FASTA files

use Modern::Perl '2011';
use autodie;

use File::Basename;
use Getopt::Euclid qw(:vars);
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:seqids);
use Bio::MUST::Core::Utils qw(change_suffix secure_outfile);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::IdMapper';


# regexes for capturing unique identifier component
my %regex_for = (
    ':DEF' => $DEF_ID,
    ':GI'  =>  $GI_ID,
    ':GNL' => $GNL_ID,
    ':JGI' => $JGI_ID,
    ':PAC' => $PAC_ID,
);

# load optional prefix mapper file
my $prefix_mapper;
if ($ARGV_id_prefix_mapper) {
    ### Taking prefixes from: $ARGV_id_prefix_mapper
    $prefix_mapper = IdMapper->load($ARGV_id_prefix_mapper);
}

for my $infile (@ARGV_infiles) {

    ### Processing: $infile
    my $ali = Ali->load($infile);
    $ali->dont_guess;

    # determine seq_id prefix
    my $prefix = $ARGV_id_prefix // q{};        # defaults to no prefix
    if ($prefix_mapper) {                       # infile paths are ignored
        my ($filename) = fileparse($infile);
        $prefix .= $prefix_mapper->abbr_id_for($filename);
    }
    if ($prefix) {
        ### Prefixing seq ids with: $prefix
        $prefix .= '|';                         # add '|' separator
    }

    # build id_mapper
    my $id_mapper;

    # 1. regex mapper (first use input as hash key then as regex)
    if ($ARGV_id_regex) {
        my $regex = $regex_for{$ARGV_id_regex} // $ARGV_id_regex;
        $id_mapper = $ali->regex_mapper($prefix, $regex);
        ### Using seq id regex: $regex
    }

    # 2. accession mapper
    elsif ($ARGV_ids_from_acc) {
        $id_mapper = $ali->acc_mapper(  $prefix);
        ### Using accessions as seq ids

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.888 second using v1.00-cache-2.02-grep-82fe00e-cpan-72ae3ad1e6da )