BioPerl-DB
view release on metacpan or search on metacpan
scripts/biosql/terms/add-term-annot.pl view on Meta::CPAN
}
system("perldoc $0");
exit($ok ? 0 : 2);
}
#
# determine the function for re-throwing exceptions depending on $debug
#
my $throw = ($debug > 0) ? \&Carp::confess : \&Carp::croak;
#
# determine input source(s)
#
my @files = @ARGV ? @ARGV : (\*STDIN);
#
# create the DBAdaptorI for our database
#
my $db = Bio::DB::BioDB->new(-database => "biosql",
-printerror => $printerror,
-host => $host,
-dbname => $dbname,
-driver => $driver,
-user => $dbuser,
-pass => $dbpass,
);
$db->verbose($debug) if $debug > 0;
# the ontology for the terms to associate
my $ont = _find_or_create_ont($db, $namespace) if $namespace;
# persistence adaptors that we'll use multiple times
my $seqadp = $db->get_object_adaptor("Bio::SeqI");
my $termadp = $db->get_object_adaptor("Bio::Ontology::TermI");
# declarations
my $time = time();
my $n_entries = 0;
# the sequence object factory
my $seqfactory = Bio::Seq::SeqFactory->new(-type => "Bio::Seq");
#
# loop over every input file and load its content
#
foreach $file ( @files ) {
my $fh = $file;
# create a handle if it's not one already
if(! ref($fh)) {
$fh = gensym;
my $fspec = $uncompress ? "gunzip -c $file |" : "<$file";
if(! open($fh, $fspec)) {
warn "unable to open $file for reading, skipping: $!\n";
next;
}
print STDERR "Loading $file ...\n";
}
# reset entry counter and timer
$n_entries = 0;
$time = time();
# loop over the stream
while (my $line = <$fh>) {
chomp($line);
if ($line =~ /^##\s*(\w+):\s*(.*)/) {
# processing instruction line
my $procinstr = $1;
my $val = $2;
# trim leading and trailing whitespace from the value
$val =~ s/^\s+//;
$val =~ s/\s+$//;
# interpret the instruction
if (lc($procinstr) eq "ontology") {
$ont = _find_or_create_ont($db, $val);
} elsif (lc($procinstr) eq "columns") {
@colnames = split(/[\t,]/, $val);
} else {
warn("ignoring unknown processing instruction '$procinstr'");
}
next;
}
# ignore empty and comment lines
next if ($line =~ /^#/) || ($line =~ /^\s*$/);
# this is a data line, split into columns
my @fields = split(/[\t,]/, $line);
# gather the sequence (bioentry) query constraints and bind
# values, and at the same time define the term
my $term = Bio::Ontology::Term->new(-ontology => $ont);
my @qcs = ();
my @values = ();
for (my $i = 0; $i < @colnames; $i++) {
if ($colnames[$i] =~ /^term.*\.(.*)/i) {
my $attr = $1;
$term->$attr($fields[$i]);
} else {
my $qc = Bio::DB::Query::QueryConstraint->new();
# namespace is a special case because it's a separate entity
# in the relational model but not an object in the object model
if (lc($colnames[$i]) eq "namespace") {
$qc->set("db.name = ?");
} else {
$qc->set("seq.".$colnames[$i]." = ?");
}
push(@qcs, $qc);
push(@values, $fields[$i]);
}
}
# skip to next one if no constraints found
if (!@qcs) {
warn("no constraints found in line '$line', skipping");
next;
}
( run in 1.014 second using v1.01-cache-2.11-cpan-d7f47b0818f )