BioPerl

 view release on metacpan or  search on metacpan

scripts/Bio-DB-GFF/bp_genbank2gff3.pl  view on Meta::CPAN

  # protein_id => 'Dbxref', also seen Dbxref tags: EC_number 
  # translation: handled in gene_features
);


$| = 1;
my $quiet= !$verbose;
my $ok= GetOptions( 'd|dir|input:s'   => \$dir,
            'z|zip'     => \$zip, 
            'h|help'    => \$help,
            's|summary' => \$summary,
            'r|noinfer' => \$noinfer,
            'i|conf=s' => \$CONF,
            'sofile=s' => \$SO_FILE,
            'm|manual' => \$MANUAL,
            'o|outdir|output:s'=> \$outdir,
            'x|filter:s'=> \@filter,
            'y|split'   => \$split,
            "ethresh|e=s"=>\$ethresh,
            'c|CDS!'    => \$CDSkeep,
            'f|format=s' => \$FORMAT,
            'typesource=s' => \$source_type,
            'GFF_VERSION=s' => \$GFF_VERSION,
            'quiet!'    => \$quiet, # swap quiet to verbose
            'DEBUG!'    => \$DEBUG,
            'n|nolump'  => \$nolump);

my $lump = 1 unless $nolump || $split;
$verbose= !$quiet;

# look for help request
pod2usage(2) if $help || !$ok;

# keep SOURCEID as-is and change FORMAT for SeqIO types; 
# note SeqIO uses file.suffix to guess type; not useful here
$SOURCEID= $FORMAT; 
$FORMAT  = "swiss" if $FORMAT =~/UniProt|trembl/;
$verbose =1 if($DEBUG);

# initialize handlers
my $unflattener = Bio::SeqFeature::Tools::Unflattener->new; # for ensembl genomes (-trust_grouptag=>1);
$unflattener->error_threshold($ethresh) if $ethresh;
$unflattener->verbose(1) if($DEBUG);
# $unflattener->group_tag('gene') if($FORMAT =~ /embl/i) ; #? ensembl only? 
# ensembl parsing is still problematic, forget this

my $tm  = Bio::SeqFeature::Tools::TypeMapper->new;
my $idh = Bio::SeqFeature::Tools::IDHandler->new;

# dgg
$source_type ||= "region"; # should really parse from FT.source contents below

#my $FTSOmap = $tm->FT_SO_map();
my $FTSOmap;
my $FTSOsynonyms;

if (defined($SO_FILE) && $SO_FILE eq 'live') {
    print "\nDownloading the latest SO file from ".SO_URL."\n\n";
    use LWP::UserAgent;
    my $ua = LWP::UserAgent->new(timeout => 30);
    my $request = HTTP::Request->new(GET => SO_URL);
    my $response = $ua->request($request);


    if ($response->status_line =~ /200/) {
        use File::Temp qw/ tempfile /;
        my ($fh, $fn) = tempfile();
        print $fh $response->content;
        $SO_FILE = $fn;
    } else {
        print "Couldn't download SO file online...skipping validation.\n" 
            . "HTTP Status was " . $response->status_line . "\n" 
            and undef $SO_FILE
    }
}

if ($SO_FILE) {


    my (%terms, %syn);

    my $parser = Bio::OntologyIO->new( -format => "obo", -file => $SO_FILE );
    $ONTOLOGY = $parser->next_ontology();

    for ($ONTOLOGY->get_all_terms) { 
        my $feat = $_;

        $terms{$feat->name} = $feat->name;
        #$terms{$feat->name} = $feat;

        my @syn = $_->each_synonym;

        push @{$syn{$_}}, $feat->name for @syn;
        #push @{$syn{$_}}, $feat for @syn;
    }

    $FTSOmap = \%terms;
    $FTSOsynonyms = \%syn;

    my %hardTerms = %{ $tm->FT_SO_map() };
    map { $FTSOmap->{$_} ||= $hardTerms{$_} } keys %hardTerms;

} else { 
    my %terms = %{ $tm->FT_SO_map() };
    while (my ($k,$v) = each %terms) {
        $FTSOmap->{$k} = ref($v) ? shift @$v : $v;
    }
}

$TYPE_MAP = $FTSOmap;
$SYN_MAP = $FTSOsynonyms;


# #convert $FTSOmap undefined to valid SO : moved to TypeMapper->map_types( -undefined => "region")

# stringify filter list if applicable
my $filter = join ' ', @filter  if @filter;

# determine input files
my $stdin=0; # dgg: let dir == stdin == '-' for pipe use
if ($dir && ($dir eq '-' || $dir eq 'stdin')) {



( run in 1.949 second using v1.01-cache-2.11-cpan-0d23b851a93 )