Bio-MUST-Tools-Mcl
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
bin/abbr-ids-fas.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: abbr-ids-fas.pl
# ABSTRACT: Abbreviate (standardize) seq ids in FASTA files
use Modern::Perl '2011';
use autodie;
use File::Basename;
use Getopt::Euclid qw(:vars);
use Smart::Comments;
use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:seqids);
use Bio::MUST::Core::Utils qw(change_suffix secure_outfile);
use aliased 'Bio::MUST::Core::Ali';
use aliased 'Bio::MUST::Core::IdMapper';
# regexes for capturing unique identifier component
my %regex_for = (
':DEF' => $DEF_ID,
':GI' => $GI_ID,
':GNL' => $GNL_ID,
':JGI' => $JGI_ID,
':PAC' => $PAC_ID,
);
# load optional prefix mapper file
my $prefix_mapper;
if ($ARGV_id_prefix_mapper) {
### Taking prefixes from: $ARGV_id_prefix_mapper
$prefix_mapper = IdMapper->load($ARGV_id_prefix_mapper);
}
for my $infile (@ARGV_infiles) {
### Processing: $infile
my $ali = Ali->load($infile);
$ali->dont_guess;
# determine seq_id prefix
my $prefix = $ARGV_id_prefix // q{}; # defaults to no prefix
if ($prefix_mapper) { # infile paths are ignored
my ($filename) = fileparse($infile);
$prefix .= $prefix_mapper->abbr_id_for($filename);
}
if ($prefix) {
### Prefixing seq ids with: $prefix
$prefix .= '|'; # add '|' separator
}
# build id_mapper
my $id_mapper;
# 1. regex mapper (first use input as hash key then as regex)
if ($ARGV_id_regex) {
my $regex = $regex_for{$ARGV_id_regex} // $ARGV_id_regex;
$id_mapper = $ali->regex_mapper($prefix, $regex);
### Using seq id regex: $regex
}
# 2. accession mapper
elsif ($ARGV_ids_from_acc) {
$id_mapper = $ali->acc_mapper( $prefix);
### Using accessions as seq ids
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.888 second using v1.00-cache-2.02-grep-82fe00e-cpan-72ae3ad1e6da )