BioPerl-DB

 view release on metacpan or  search on metacpan

scripts/biosql/terms/add-term-annot.pl  view on Meta::CPAN

    }
    system("perldoc $0");
    exit($ok ? 0 : 2);
}

#
# determine the function for re-throwing exceptions depending on $debug
#
my $throw = ($debug > 0) ? \&Carp::confess : \&Carp::croak;

#
# determine input source(s)
#
my @files = @ARGV ? @ARGV : (\*STDIN);

#
# create the DBAdaptorI for our database
#
my $db = Bio::DB::BioDB->new(-database   => "biosql",
                             -printerror => $printerror,
                             -host       => $host,
                             -dbname     => $dbname,
                             -driver     => $driver,
                             -user       => $dbuser,
                             -pass       => $dbpass,
                             );
$db->verbose($debug) if $debug > 0;

# the ontology for the terms to associate
my $ont = _find_or_create_ont($db, $namespace) if $namespace;

# persistence adaptors that we'll use multiple times
my $seqadp = $db->get_object_adaptor("Bio::SeqI");
my $termadp = $db->get_object_adaptor("Bio::Ontology::TermI");

# declarations
my $time = time();
my $n_entries = 0;

# the sequence object factory
my $seqfactory = Bio::Seq::SeqFactory->new(-type => "Bio::Seq");

#
# loop over every input file and load its content
#
foreach $file ( @files ) {
    
    my $fh = $file;

    # create a handle if it's not one already
    if(! ref($fh)) {
        $fh = gensym;
        my $fspec = $uncompress ? "gunzip -c $file |" : "<$file";
        if(! open($fh, $fspec)) {
            warn "unable to open $file for reading, skipping: $!\n";
            next;
        }
        print STDERR "Loading $file ...\n";
    }

    # reset entry counter and timer
    $n_entries = 0;
    $time = time();

    # loop over the stream
    while (my $line = <$fh>) {
        
        chomp($line);

        if ($line =~ /^##\s*(\w+):\s*(.*)/) {
            # processing instruction line
            my $procinstr = $1;
            my $val = $2;
            # trim leading and trailing whitespace from the value
            $val =~ s/^\s+//;
            $val =~ s/\s+$//;
            # interpret the instruction
            if (lc($procinstr) eq "ontology") {
                $ont = _find_or_create_ont($db, $val);
            } elsif (lc($procinstr) eq "columns") {
                @colnames = split(/[\t,]/, $val);
            } else {
                warn("ignoring unknown processing instruction '$procinstr'");
            }
            next;
        }

        # ignore empty and comment lines
        next if ($line =~ /^#/) || ($line =~ /^\s*$/);

        # this is a data line, split into columns
        my @fields = split(/[\t,]/, $line);        

        # gather the sequence (bioentry) query constraints and bind
        # values, and at the same time define the term
        my $term = Bio::Ontology::Term->new(-ontology => $ont);
        my @qcs = ();
        my @values = ();
        for (my $i = 0; $i < @colnames; $i++) {
            if ($colnames[$i] =~ /^term.*\.(.*)/i) {
                my $attr = $1;
                $term->$attr($fields[$i]);
            } else {
                my $qc = Bio::DB::Query::QueryConstraint->new();
                # namespace is a special case because it's a separate entity
                # in the relational model but not an object in the object model
                if (lc($colnames[$i]) eq "namespace") {
                    $qc->set("db.name = ?");
                } else {
                    $qc->set("seq.".$colnames[$i]." = ?");
                }
                push(@qcs, $qc);
                push(@values, $fields[$i]);
            }
        }
        # skip to next one if no constraints found
        if (!@qcs) {
            warn("no constraints found in line '$line', skipping");
            next;
        }



( run in 1.014 second using v1.01-cache-2.11-cpan-d7f47b0818f )