BioPerl-DB
view release on metacpan or search on metacpan
scripts/biosql/load_seqdatabase.pl view on Meta::CPAN
#
# create the DBAdaptorI for our database
#
my $db = Bio::DB::BioDB->new(-database => "biosql",
-printerror => $printerror,
-host => $host,
-port => $port,
-dbname => $dbname,
-driver => $driver,
-user => $dbuser,
-pass => $dbpass,
-dsn => $dsn,
-schema => $schema,
-initrc => $initrc,
);
$db->verbose($debug) if $debug > 0;
# declarations
my ($pseq, $adp);
my $time = time();
my $n_entries = 0;
#
# loop over every input file and load its content
#
foreach $file ( @files ) {
my $fh = $file;
my $seqin;
# create a handle if it's not one already
if(! ref($fh)) {
$fh = gensym;
my $fspec = $uncompress ? "gunzip -c $file |" : "<$file";
if(! open($fh, $fspec)) {
warn "unable to open $file for reading, skipping: $!\n";
next;
}
print STDERR "Loading $file ...\n";
}
# create stream
$seqin = $objio->new(-fh => $fh,
$format ? (-format => $format) : (),
@fmtargs);
# establish filter if provided
if($condition) {
if(! $seqin->can('sequence_builder')) {
$seqin->throw("object IO parser ".ref($seqin).
" does not support control by ObjectBuilderIs");
}
$seqin->sequence_builder->add_object_condition($condition);
}
# chain to pipeline if pipelining is requested
if(@pipemods) {
$pipemods[0]->source_stream($seqin);
$seqin = $pipemods[-1];
}
# reset entry counter and timer
$n_entries = 0;
$time = time();
# loop over the stream
while( my $seq = $seqin->$nextobj ) {
# increment entry counter
$n_entries++;
# report progress if enabled
if (($logchunk > 0) && (($n_entries % $logchunk) == 0)) {
my $elapsed = time() - $time;
printf STDERR
"\t... loaded $n_entries entries "
. "(in %.2d:%.2d:%.2d, %5.2f entries/s)\n",
$elapsed/3600, ($elapsed % 3600)/60, $elapsed % 60,
$logchunk / $elapsed;
$time = time();
}
# we can't store the structure for structured values yet, so
# flatten them
if($seq->isa("Bio::AnnotatableI")) {
flatten_annotations($seq->annotation);
}
# don't forget to add namespace if the parser doesn't supply one
$seq->namespace($namespace) unless $seq->namespace();
# look up or delete first?
my $lseq;
if($lookup_flag || $remove_flag) {
# look up
$adp = $db->get_object_adaptor($seq);
$lseq = $adp->find_by_unique_key($seq,
-obj_factory =>
$seqin->object_factory(),
-flat_only => $flat_flag);
# found?
if($lseq) {
# merge old and new if a function for this is provided
$seq = &$merge_objs($lseq, $seq, $db) if $merge_objs;
# the return value may indicate to skip to the next
next unless $seq;
}
}
# try to serialize
eval {
# set the adaptor variable before any operation which may throw
# us out of the eval block
$adp = $lseq ? $lseq->adaptor() : $db->get_object_adaptor($seq);
# delete first if requested
$lseq->remove() if $remove_flag && $lseq;
# on update, skip the rest if we are not supposed to update
if(! ($lseq && $no_update_flag)) {
# create a persistent object out of the seq if it's
# not one already (merge_objs may have returned the
# looked up sequence, i.e., $lseq)
$pseq = $seq->isa("Bio::DB::PersistentObjectI")
? $seq : $db->create_persistent($seq);
# store the primary key of what we found by lookup (this
# is going to be an udate then)
if($lseq && $lseq->primary_key) {
( run in 3.002 seconds using v1.01-cache-2.11-cpan-d7f47b0818f )