BioPerl
view release on metacpan or search on metacpan
scripts/Bio-DB-GFF/bp_genbank2gff3.pl view on Meta::CPAN
# protein_id => 'Dbxref', also seen Dbxref tags: EC_number
# translation: handled in gene_features
);
$| = 1;
my $quiet= !$verbose;
my $ok= GetOptions( 'd|dir|input:s' => \$dir,
'z|zip' => \$zip,
'h|help' => \$help,
's|summary' => \$summary,
'r|noinfer' => \$noinfer,
'i|conf=s' => \$CONF,
'sofile=s' => \$SO_FILE,
'm|manual' => \$MANUAL,
'o|outdir|output:s'=> \$outdir,
'x|filter:s'=> \@filter,
'y|split' => \$split,
"ethresh|e=s"=>\$ethresh,
'c|CDS!' => \$CDSkeep,
'f|format=s' => \$FORMAT,
'typesource=s' => \$source_type,
'GFF_VERSION=s' => \$GFF_VERSION,
'quiet!' => \$quiet, # swap quiet to verbose
'DEBUG!' => \$DEBUG,
'n|nolump' => \$nolump);
my $lump = 1 unless $nolump || $split;
$verbose= !$quiet;
# look for help request
pod2usage(2) if $help || !$ok;
# keep SOURCEID as-is and change FORMAT for SeqIO types;
# note SeqIO uses file.suffix to guess type; not useful here
$SOURCEID= $FORMAT;
$FORMAT = "swiss" if $FORMAT =~/UniProt|trembl/;
$verbose =1 if($DEBUG);
# initialize handlers
my $unflattener = Bio::SeqFeature::Tools::Unflattener->new; # for ensembl genomes (-trust_grouptag=>1);
$unflattener->error_threshold($ethresh) if $ethresh;
$unflattener->verbose(1) if($DEBUG);
# $unflattener->group_tag('gene') if($FORMAT =~ /embl/i) ; #? ensembl only?
# ensembl parsing is still problematic, forget this
my $tm = Bio::SeqFeature::Tools::TypeMapper->new;
my $idh = Bio::SeqFeature::Tools::IDHandler->new;
# dgg
$source_type ||= "region"; # should really parse from FT.source contents below
#my $FTSOmap = $tm->FT_SO_map();
my $FTSOmap;
my $FTSOsynonyms;
if (defined($SO_FILE) && $SO_FILE eq 'live') {
print "\nDownloading the latest SO file from ".SO_URL."\n\n";
use LWP::UserAgent;
my $ua = LWP::UserAgent->new(timeout => 30);
my $request = HTTP::Request->new(GET => SO_URL);
my $response = $ua->request($request);
if ($response->status_line =~ /200/) {
use File::Temp qw/ tempfile /;
my ($fh, $fn) = tempfile();
print $fh $response->content;
$SO_FILE = $fn;
} else {
print "Couldn't download SO file online...skipping validation.\n"
. "HTTP Status was " . $response->status_line . "\n"
and undef $SO_FILE
}
}
if ($SO_FILE) {
my (%terms, %syn);
my $parser = Bio::OntologyIO->new( -format => "obo", -file => $SO_FILE );
$ONTOLOGY = $parser->next_ontology();
for ($ONTOLOGY->get_all_terms) {
my $feat = $_;
$terms{$feat->name} = $feat->name;
#$terms{$feat->name} = $feat;
my @syn = $_->each_synonym;
push @{$syn{$_}}, $feat->name for @syn;
#push @{$syn{$_}}, $feat for @syn;
}
$FTSOmap = \%terms;
$FTSOsynonyms = \%syn;
my %hardTerms = %{ $tm->FT_SO_map() };
map { $FTSOmap->{$_} ||= $hardTerms{$_} } keys %hardTerms;
} else {
my %terms = %{ $tm->FT_SO_map() };
while (my ($k,$v) = each %terms) {
$FTSOmap->{$k} = ref($v) ? shift @$v : $v;
}
}
$TYPE_MAP = $FTSOmap;
$SYN_MAP = $FTSOsynonyms;
# #convert $FTSOmap undefined to valid SO : moved to TypeMapper->map_types( -undefined => "region")
# stringify filter list if applicable
my $filter = join ' ', @filter if @filter;
# determine input files
my $stdin=0; # dgg: let dir == stdin == '-' for pipe use
if ($dir && ($dir eq '-' || $dir eq 'stdin')) {
( run in 1.949 second using v1.01-cache-2.11-cpan-0d23b851a93 )