BioPerl

 view release on metacpan or  search on metacpan

scripts/Bio-DB-GFF/bp_genbank2gff.pl  view on Meta::CPAN


GetOptions (
            'dsn:s'       => \$DSN,
            'user:s'      => \$USER,
            'password:s'  => \$PASSWORD,
            'adaptor:s'   => \$ADAPTOR,
            'accession'   => \$ACC,
            'file'        => \$FILE,
            'viral'       => \$VIRAL,
            'acc_file'    => \$accFILE,
            'acc_pipe'    => \$accPIPE,
	    'source:s'    => \$SOURCE,
            'gb_folder=s' => \$gbFOLDER,
            'proxy:s'     => \$PROXY,
            'stdout'      => \$STDOUT,
            'create'      => \$CREATE) or die $USAGE;


die $USAGE unless ($DSN || $STDOUT);  # at a minimum we need to have a place to write to!

# some local defaults
$DSN     ||= 'dbi:mysql:test';
$ADAPTOR ||= $STDOUT ? 'memory' : 'dbi::mysql';

# Ensure that biofetch inherits from the "right" adaptor.
# This is a horrible hack and should be fixed.
eval "use Bio::DB::GFF::Adaptor::${ADAPTOR}";
local @Bio::DB::GFF::Adaptor::biofetch::ISA = "Bio::DB::GFF::Adaptor::${ADAPTOR}";

my $biofetch = $STDOUT ? 'biofetch_to_stdout' : 'biofetch';
my @dsn      = $STDOUT ? () : (-dsn => $DSN);

my @auth;
push @auth,(-user=>$USER)     if defined $USER;
push @auth,(-pass=>$PASSWORD) if defined $PASSWORD;
push @auth,(-proxy=>$PROXY)   if defined $PROXY;

my %preferred_tags = (
		      strain        => 10,
		      organism      => 20,
		      protein_id    => 40,
		      locus_tag     => 50,
		      locus         => 60,
		      gene          => 70,
		      standard_name => 80,
                     );
$preferred_tags{'product'} = 90 if $VIRAL; # added this to the default list for viral genomes
       # since most functions come from post-translational processing, so the default labels are c**p!

my $db = Bio::DB::GFF->new(-adaptor=>$biofetch,
			   @dsn,
			   @auth,
			   -preferred_tags => \%preferred_tags,
			   -source=> $SOURCE || 'Genbank')
  or die "Can't open database: ",Bio::DB::GFF->error,"\n";

if ($CREATE) {
  $db->initialize(1);
}

die "you must specify either an accession to retrieve from\nembl or a local file containing data in embl format\n" if (($FILE || $ACC) && !scalar(@ARGV));

if ($ACC) {
  while ($_ = shift) {
    status(loading => $_);
    my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $_);
    status(done    => $result);
  }
  exit 1;
}

elsif ($FILE) {
  while ($_ = shift) {
    status('loading' => $_);
    my $result = $db->load_from_file($_);
    status (done => $result);
  }
  exit 1;
}

elsif ($accFILE){
    my $filename = shift;
    die "you must supply a filename after the --accFILE command line flag\n" unless $filename;
    die "file $filename does not exist\n" unless (-e $filename && !(-d $filename));
    open my $IN, '<', $filename or die "Could not read file '$filename' for reading accession numbers: $!\n";
    while (my $line = <$IN>){
        chomp $line;
	status(loading => $line);
        my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $line);
	status(done => $result);
    }
    close $IN;
    exit 1;
}

elsif ($gbFOLDER){
    my $dir = $gbFOLDER;
    die "folder $dir does not exist\n" unless (-e $dir && -d $dir);
    opendir DIR, "$dir" || die "can't open directory $dir for reading: $!\n";
    my @files = readdir DIR;
    foreach my $file(@files){
        if (!(-e "$gbFOLDER/$file") || (-d "$gbFOLDER/$file")){
            print STDERR " $gbFOLDER/$file is not a filename!  Skipping...\n";
            next
        }
        my $result = $db->load_from_file("$gbFOLDER/$file");
        print STDERR $result ? "ok\n" : "failed\n";        
    }
} elsif ($accPIPE){
    my @accessions = <STDIN>;
    chomp @accessions;
    foreach (@accessions){
      status(loading => $_);
      my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $_);
      status(done => $result);
    }
    exit 1;
}

else {
  my $done;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 3.797 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )