BioPerl-DB

 view release on metacpan or  search on metacpan

scripts/biosql/load_seqdatabase.pl  view on Meta::CPAN

#
# create the DBAdaptorI for our database
#
my $db = Bio::DB::BioDB->new(-database   => "biosql",
                             -printerror => $printerror,
                             -host       => $host,
                             -port       => $port,
                             -dbname     => $dbname,
                             -driver     => $driver,
                             -user       => $dbuser,
                             -pass       => $dbpass,
                             -dsn        => $dsn,
                             -schema     => $schema,
                             -initrc     => $initrc,
                             );
$db->verbose($debug) if $debug > 0;

# declarations
my ($pseq, $adp);
my $time = time();
my $n_entries = 0;

#
# loop over every input file and load its content
#
foreach $file ( @files ) {
    
    my $fh = $file;
    my $seqin;

    # create a handle if it's not one already
    if(! ref($fh)) {
        $fh = gensym;
        my $fspec = $uncompress ? "gunzip -c $file |" : "<$file";
        if(! open($fh, $fspec)) {
            warn "unable to open $file for reading, skipping: $!\n";
            next;
        }
        print STDERR "Loading $file ...\n";
    }
    # create stream
    $seqin = $objio->new(-fh => $fh,
                         $format ? (-format => $format) : (),
                         @fmtargs);

    # establish filter if provided
    if($condition) {
        if(! $seqin->can('sequence_builder')) {
            $seqin->throw("object IO parser ".ref($seqin).
                          " does not support control by ObjectBuilderIs");
        }
        $seqin->sequence_builder->add_object_condition($condition);
    }

    # chain to pipeline if pipelining is requested
    if(@pipemods) {
        $pipemods[0]->source_stream($seqin);
        $seqin = $pipemods[-1];
    }

    # reset entry counter and timer
    $n_entries = 0;
    $time = time();

    # loop over the stream
    while( my $seq = $seqin->$nextobj ) {
        # increment entry counter
        $n_entries++;

        # report progress if enabled
        if (($logchunk > 0) && (($n_entries % $logchunk) == 0)) {
            my $elapsed = time() - $time;
            printf STDERR 
                "\t... loaded $n_entries entries "
                . "(in %.2d:%.2d:%.2d, %5.2f entries/s)\n",
                $elapsed/3600, ($elapsed % 3600)/60, $elapsed % 60,
                $logchunk / $elapsed;
            $time = time();
        }

        # we can't store the structure for structured values yet, so
        # flatten them
        if($seq->isa("Bio::AnnotatableI")) {
            flatten_annotations($seq->annotation);
        }
        # don't forget to add namespace if the parser doesn't supply one
        $seq->namespace($namespace) unless $seq->namespace();
        # look up or delete first?
        my $lseq;
        if($lookup_flag || $remove_flag) {
            # look up
            $adp = $db->get_object_adaptor($seq);
            $lseq = $adp->find_by_unique_key($seq,
                                             -obj_factory => 
                                             $seqin->object_factory(),
                                             -flat_only => $flat_flag);
            # found?
            if($lseq) {
                # merge old and new if a function for this is provided
                $seq = &$merge_objs($lseq, $seq, $db) if $merge_objs;
                # the return value may indicate to skip to the next
                next unless $seq;
            }
        }
        # try to serialize
        eval {
            # set the adaptor variable before any operation which may throw
            # us out of the eval block
            $adp = $lseq ? $lseq->adaptor() : $db->get_object_adaptor($seq);
            # delete first if requested
            $lseq->remove() if $remove_flag && $lseq;
            # on update, skip the rest if we are not supposed to update
            if(! ($lseq && $no_update_flag)) {
                # create a persistent object out of the seq if it's
                # not one already (merge_objs may have returned the
                # looked up sequence, i.e., $lseq)
                $pseq = $seq->isa("Bio::DB::PersistentObjectI") 
                    ? $seq : $db->create_persistent($seq);
                # store the primary key of what we found by lookup (this
                # is going to be an udate then)
                if($lseq && $lseq->primary_key) {



( run in 3.002 seconds using v1.01-cache-2.11-cpan-d7f47b0818f )