Bio-DB-GFF
view release on metacpan or search on metacpan
lib/Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
my $join = $self->make_features_join_part;
my $range = $self->make_features_by_range_where_part('overlaps',
{refseq=>$location->[0],
class =>'',
start=>$location->[1],
stop =>$location->[2]}) if $location;
# group query
my $query1 = "SELECT $select FROM $from WHERE $where AND $join";
$query1 .= " AND $range" if $range;
# alias query
$from = $self->make_features_from_part(undef,{attributes=>1});
($where,@args) = $self->make_features_by_alias_where_part($class,$name); # potential bug - @args1==@args2?
my $query2 = "SELECT $select FROM $from WHERE $where AND $join";
$query2 .= " AND $range" if $range;
my $count = 0;
for my $query ($query1,$query2) {
my $sth = $self->dbh->do_query($query,@args);
while (my @row = $sth->fetchrow_array) {
$callback->(@row);
$count++;
}
$sth->finish;
}
return $count;
}
=head2 _feature_by_id
Title : _feature_by_id
Usage : $db->_feature_by_id($ids,$type,$callback)
Function: get a list of features by ID
Returns : count of number of features retrieved
Args : arrayref containing list of IDs to fetch and a callback
Status : protected
This method is used internally. The $type selector is one of
"feature" or "group". The callback arguments are those used by
make_feature(). Internally, it invokes the following abstract
procedures:
make_features_select_part
make_features_from_part
make_features_by_id_where_part
make_features_join_part
=cut
sub _feature_by_id {
my $self = shift;
my ($ids,$type,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $select = $self->make_features_select_part;
my $from = $self->make_features_from_part;
my ($where,@args) = $type eq 'feature' ? $self->make_features_by_id_where_part($ids)
: $self->make_features_by_gid_where_part($ids);
my $join = $self->make_features_join_part;
my $query = "SELECT $select FROM $from WHERE $where AND $join";
my $sth = $self->dbh->do_query($query,@args);
my $count = 0;
while (my @row = $sth->fetchrow_array) {
$callback->(@row);
$count++;
}
$sth->finish;
return $count;
}
sub _feature_by_attribute {
my $self = shift;
my ($attributes,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $select = $self->make_features_select_part;
my $from = $self->make_features_from_part(undef,{attributes=>$attributes});
my ($where,@args) = $self->make_features_by_range_where_part('',{attributes=>$attributes});
my $join = $self->make_features_join_part({attributes=>$attributes});
my $query = "SELECT $select FROM $from WHERE $where AND $join";
my $sth = $self->dbh->do_query($query,@args);
my $count = 0;
while (my @row = $sth->fetchrow_array) {
$callback->(@row);
$count++;
}
$sth->finish;
return $count;
}
=head2 get_types
Title : get_types
Usage : $db->get_types($refseq,$refclass,$start,$stop,$count)
Function: get list of types
Returns : a list of Bio::DB::GFF::Typename objects
Args : see below
Status : Public
This method is responsible for fetching the list of feature type names
from the database. The query may be limited to a particular range, in
which case the range is indicated by a landmark sequence name and
class and its subrange, if any. These arguments may be undef if it is
desired to retrieve all feature types in the database (which may be a
slow operation in some implementations).
If the $count flag is false, the method returns a simple list of
vBio::DB::GFF::Typename objects. If $count is true, the method returns
a list of $name=E<gt>$count pairs, where $count indicates the number of
times this feature occurs in the range.
Internally, this method calls upon the following functions to generate
the SQL and its bind variables:
($q1,@args) = make_types_select_part(@args);
($q2,@args) = make_types_from_part(@args);
lib/Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
my $self = shift;
my ($field,$value) = @_;
return qq($field = ?) if $value =~ /^[!@%&a-zA-Z0-9_\'\" ~-]+$/;
return qq($field REGEXP ?);
}
=head2 exact_match
Title : exact_match
Usage : $string = $db->exact_match($field,$value)
Function: create a SQL fragment for performing exact string matching
Returns : query string
Args : the table field and match value
Status : public
This method produces the SQL fragment for matching a field name to a
constant string value.
=cut
sub exact_match {
my $self = shift;
my ($field,$value) = @_;
return qq($field = ?);
}
=head2 search_notes
Title : search_notes
Usage : @search_results = $db->search_notes("full text search string",$limit)
Function: Search the notes for a text string, using mysql full-text search
Returns : array of results
Args : full text search string, and an optional row limit
Status : public
This is a mysql-specific method. Given a search string, it performs a
full-text search of the notes table and returns an array of results.
Each row of the returned array is a arrayref containing the following fields:
column 1 A Bio::DB::GFF::Featname object, suitable for passing to segment()
column 2 The text of the note
column 3 A relevance score.
column 4 A Bio::DB::GFF::Typename object
=cut
sub search_notes {
my $self = shift;
my ($search_string,$limit) = @_;
$search_string =~ tr/*?//d;
my @words = $search_string =~ /(\w+)/g;
my $regex = join '|',@words;
my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
my $search = join(' OR ',@searches);
my $query = <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
FROM fgroup,fattribute_to_feature,fdata,ftype
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND fdata.ftypeid=ftype.ftypeid
AND ($search)
END
;
my $sth = $self->dbh->do_query($query);
my @results;
while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
next unless $class && $name; # sorry, ignore NULL objects
my @matches = $note =~ /($regex)/g;
my $relevance = 10*@matches;
my $featname = Bio::DB::GFF::Featname->new($class=>$name);
my $type = Bio::DB::GFF::Typename->new($method,$source);
push @results,[$featname,$note,$relevance,$type];
last if $limit && @results >= $limit;
}
@results;
}
=head2 meta
Title : meta
Usage : $value = $db->meta($name [,$newval])
Function: get or set a meta variable
Returns : a string
Args : meta variable name and optionally value
Status : public
Get or set a named metavariable for the database. Metavariables can
be used for database-specific settings. This method calls two
class-specific methods which must be implemented:
make_meta_get_query() Returns a sql fragment which given a meta
parameter name, returns its value. One bind
variable.
make_meta_set_query() Returns a sql fragment which takes two bind
arguments, the parameter name and its value
Don't make changes unless you know what you're doing! It will affect the
persistent database.
=cut
sub meta {
my $self = shift;
my $param_name = uc shift;
# getting
if (@_) {
my $value = shift;
my $sql = $self->make_meta_set_query() or return;
my $sth = $self->dbh->prepare_delayed($sql)
or $self->error("Can't prepare $sql: ",$self->dbh->errstr), return;
$sth->execute($param_name,$value)
or $self->error("Can't execute $sql: ",$self->dbh->errstr), return;
$sth->finish;
return $self->{meta}{$param_name} = $value;
lib/Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
sub schema {
shift->throw("The schema() method must be implemented by subclass");
}
=head2 DESTROY
Title : DESTROY
Usage : $db->DESTROY
Function: disconnect database at destruct time
Returns : void
Args : none
Status : protected
This is the destructor for the class.
=cut
sub DESTROY {
my $self = shift;
$self->features_db->disconnect if defined $self->features_db;
}
################## query cache ##################
#########################################
## Moved from mysql.pm and mysqlopt.pm ##
#########################################
=head2 make_features_by_name_where_part
Title : make_features_by_name_where_part
Usage : $db->make_features_by_name_where_part
Function: create the SQL fragment needed to select a feature by its group name & class
Returns : a SQL fragment and bind arguments
Args : see below
Status : Protected
=cut
sub make_features_by_name_where_part {
my $self = shift;
my ($class,$name) = @_;
if ($name =~ /\*/) {
$name =~ s/%/\\%/g;
$name =~ s/_/\\_/g;
$name =~ tr/*/%/;
return ("fgroup.gclass=? AND fgroup.gname LIKE ?",$class,$name);
} else {
return ("fgroup.gclass=? AND fgroup.gname=?",$class,$name);
}
}
sub make_features_by_alias_where_part {
my $self = shift;
my ($class,$name) = @_;
if ($name =~ /\*/) {
$name =~ tr/*/%/;
$name =~ s/_/\\_/g;
return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value LIKE ? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=...
} else {
return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value=? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata...
}
}
sub make_features_by_attribute_where_part {
my $self = shift;
my $attributes = shift;
my @args;
my @sql;
foreach (keys %$attributes) {
push @sql,"(fattribute.fattribute_name=? AND fattribute_to_feature.fattribute_value=?)";
push @args,($_,$attributes->{$_});
}
return (join(' OR ',@sql),@args);
}
=head2 make_features_by_id_where_part
Title : make_features_by_id_where_part
Usage : $db->make_features_by_id_where_part($ids)
Function: create the SQL fragment needed to select a set of features by their ids
Returns : a SQL fragment and bind arguments
Args : arrayref of IDs
Status : Protected
=cut
sub make_features_by_id_where_part {
my $self = shift;
my $ids = shift;
my $set = join ",",@$ids;
return ("fdata.fid IN ($set)");
}
=head2 make_features_by_gid_where_part
Title : make_features_by_id_where_part
Usage : $db->make_features_by_gid_where_part($ids)
Function: create the SQL fragment needed to select a set of features by their ids
Returns : a SQL fragment and bind arguments
Args : arrayref of IDs
Status : Protected
=cut
sub make_features_by_gid_where_part {
my $self = shift;
my $ids = shift;
my $set = join ",",@$ids;
return ("fgroup.gid IN ($set)");
}
=head2 make_features_from_part
Title : make_features_from_part
Usage : $string = $db->make_features_from_part()
Function: make from part of the features query
Returns : a string
Args : none
Status : protected
This method creates the part of the features query that immediately
follows the FROM keyword.
=cut
sub make_features_from_part {
my $self = shift;
my $sparse = shift;
my $options = shift || {};
return $options->{attributes} ? "fdata,ftype,fgroup,fattribute,fattribute_to_feature\n"
: "fdata,ftype,fgroup\n";
}
=head2 make_features_join_part
Title : make_features_join_part
Usage : $string = $db->make_features_join_part()
Function: make join part of the features query
Returns : a string
Args : none
Status : protected
This method creates the part of the features query that immediately
follows the WHERE keyword.
=cut
sub make_features_join_part {
my $self = shift;
my $options = shift || {};
return !$options->{attributes} ? <<END1 : <<END2;
fgroup.gid = fdata.gid
AND ftype.ftypeid = fdata.ftypeid
END1
fgroup.gid = fdata.gid
AND ftype.ftypeid = fdata.ftypeid
AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
AND fdata.fid=fattribute_to_feature.fid
END2
}
=head2 make_features_order_by_part
Title : make_features_order_by_part
Usage : ($query,@args) = $db->make_features_order_by_part()
Function: make the ORDER BY part of the features() query
Returns : a SQL fragment and bind arguments, if any
Args : none
Status : protected
This method creates the part of the features query that immediately
follows the ORDER BY part of the query issued by features() and
related methods.
=cut
sub make_features_order_by_part {
my $self = shift;
my $options = shift || {};
return "fgroup.gname";
}
=head2 make_features_group_by_part
Title : make_features_group_by_part
Usage : ($query,@args) = $db->make_features_group_by_part()
Function: make the GROUP BY part of the features() query
Returns : a SQL fragment and bind arguments, if any
Args : none
Status : protected
This method creates the part of the features query that immediately
follows the GROUP BY part of the query issued by features() and
related methods.
=cut
sub make_features_group_by_part {
my $self = shift;
my $options = shift || {};
if (my $att = $options->{attributes}) {
my $key_count = keys %$att;
return unless $key_count > 1;
return ("fdata.fid,fref,fstart,fstop,fsource,
fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,
ftarget_stop,fdata.gid
HAVING count(fdata.fid) > ?",$key_count-1);
}
elsif (my $b = $options->{bin_width}) {
return "fref,fstart,fdata.ftypeid";
}
}
=head2 refseq_query
Title : refseq_query
Usage : ($query,@args) = $db->refseq_query($name,$class)
Function: create SQL fragment that selects the desired reference sequence
Returns : a list containing the query and bind arguments
Args : reference sequence name and class
Status : protected
This method is called by make_features_by_range_where_part() to
construct the part of the select WHERE section that selects a
particular reference sequence. It returns a mult-element list in
which the first element is the SQL fragment and subsequent elements
are bind values.
For example:
sub refseq_query {
my ($name,$class) = @_;
return ('gff.refseq=? AND gff.refclass=?',
$name,$class);
}
The current schema does not distinguish among different classes of
reference sequence.
=cut
# IMPORTANT NOTE: THE MYSQL SCHEMA IGNORES THE SEQUENCE CLASS
# THIS SHOULD BE FIXED
sub refseq_query {
my $self = shift;
my ($refseq,$refclass) = @_;
my $query = "fdata.fref=?";
return wantarray ? ($query,$refseq) : $self->dbh->dbi_quote($query,$refseq);
}
=head2 attributes
Title : attributes
Usage : @attributes = $db->attributes($id,$name)
Function: get the attributes on a particular feature
Returns : an array of string
Args : feature ID
Status : public
Some GFF version 2 files use the groups column to store a series of
attribute/value pairs. In this interpretation of GFF, the first such
pair is treated as the primary group for the feature; subsequent pairs
are treated as attributes. Two attributes have special meaning:
"Note" is for backward compatibility and is used for unstructured text
remarks. "Alias" is considered as a synonym for the feature name.
lib/Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
if (defined $typelist && @$typelist) {
my ($q,@a) = $self->types_query($typelist);
push @query,($q);
push @args,@a;
}
my $query = @query ? join(' AND ',@query) : '1=1';
return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}
=head2 make_types_group_part
Title : make_types_group_part
Usage : ($string,@args) = $db->make_types_group_part(@args)
Function: create the GROUP BY portion of the SQL for fetching features type lists
Returns : query string and bind arguments
Args : see below
Status : protected
This method is called by get_types() to generate the query fragment
and bind arguments for the GROUP BY part of the query that retrieves
lists of feature types. The four positional arguments are as follows:
$refseq reference sequence name
$start start of region
$stop end of region
$want_count true to return the count of this feature type
=cut
sub make_types_group_part {
my $self = shift;
my ($srcseq,$start,$stop,$want_count) = @_;
return unless $srcseq or $want_count;
return 'ftype.ftypeid,ftype.fmethod,ftype.fsource';
}
=head2 get_feature_id
Title : get_feature_id
Usage : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
Function: get the ID of a feature
Returns : an integer ID or undef
Args : none
Status : private
This internal method is called by load_gff_line to look up the integer
ID of an existing feature. It is ony needed when replacing a feature
with new information.
=cut
# this method is called when needed to look up a feature's ID
sub get_feature_id {
my $self = shift;
my ($ref,$start,$stop,$typeid,$groupid) = @_;
my $s = $self->{load_stuff};
unless ($s->{get_feature_id}) {
my $dbh = $self->features_db;
$s->{get_feature_id} =
$dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
}
my $sth = $s->{get_feature_id} or return;
$sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
my ($fid) = $sth->fetchrow_array;
return $fid;
}
=head2 make_abscoord_query
Title : make_abscoord_query
Usage : $sth = $db->make_abscoord_query($name,$class);
Function: create query that finds the reference sequence coordinates given a landmark & classa
Returns : a DBI statement handle
Args : name and class of landmark
Status : protected
The statement handler should return rows containing five fields:
1. reference sequence name
2. reference sequence class
3. start position
4. stop position
5. strand ("+" or "-")
This query always returns "Sequence" as the class of the reference
sequence.
=cut
# given sequence name, return (reference,start,stop,strand)
sub make_abscoord_query {
my $self = shift;
my ($name,$class,$refseq) = @_;
#my $query = GETSEQCOORDS;
my $query = $self->getseqcoords_query();
my $getforcedseqcoords = $self->getforcedseqcoords_query() ;
if ($name =~ /\*/) {
$name =~ s/%/\\%/g;
$name =~ s/_/\\_/g;
$name =~ tr/*/%/;
$query =~ s/gname=\?/gname LIKE ?/;
}
defined $refseq ? $self->dbh->do_query($getforcedseqcoords,$name,$class,$refseq)
: $self->dbh->do_query($query,$name,$class);
}
sub make_aliasabscoord_query {
my $self = shift;
my ($name,$class) = @_;
#my $query = GETALIASCOORDS;
my $query = $self->getaliascoords_query();
if ($name =~ /\*/) {
$name =~ s/%/\\%/g;
$name =~ s/_/\\_/g;
$name =~ tr/*/%/;
$query =~ s/gname=\?/gname LIKE ?/;
}
$self->dbh->do_query($query,$name,$class);
lib/Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
# find features that are completely contained within a ranged
sub contains_query {
my $self = shift;
my ($start,$stop) = @_;
my ($bq,@bargs) = $self->bin_query($start,$stop,undef,bin($start,$stop,$self->min_bin));
my ($iq,@iargs) = $self->contains_query_nobin($start,$stop);
my $query = "($bq)\n\tAND $iq";
my @args = (@bargs,@iargs);
return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}
# find features that are completely contained within a range
sub contained_in_query {
my $self = shift;
my ($start,$stop) = @_;
my ($bq,@bargs) = $self->bin_query($start,$stop,abs($stop-$start)+1,undef);
my ($iq,@iargs) = $self->contained_in_query_nobin($start,$stop);
my $query = "($bq)\n\tAND $iq";
my @args = (@bargs,@iargs);
return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}
# implement the _delete_fattribute_to_feature() method
sub _delete_fattribute_to_feature {
my $self = shift;
my @feature_ids = @_;
my $dbh = $self->features_db;
my $fields = join ',',map{$dbh->quote($_)} @feature_ids;
my $query = "delete from fattribute_to_feature where fid in ($fields)";
warn "$query\n" if $self->debug;
my $result = $dbh->do($query);
defined $result or $self->throw($dbh->errstr);
$result;
}
# implement the _delete_features() method
sub _delete_features {
my $self = shift;
my @feature_ids = @_;
my $dbh = $self->features_db;
my $fields = join ',',map{$dbh->quote($_)} @feature_ids;
# delete from fattribute_to_feature
$self->_delete_fattribute_to_feature(@feature_ids);
my $query = "delete from fdata where fid in ($fields)";
warn "$query\n" if $self->debug;
my $result = $dbh->do($query);
defined $result or $self->throw($dbh->errstr);
$result;
}
# implement the _delete_groups() method
sub _delete_groups {
my $self = shift;
my @group_ids = @_;
my $dbh = $self->features_db;
my $fields = join ',',map{$dbh->quote($_)} @group_ids;
foreach my $gid (@group_ids){
my @features = $self->get_feature_by_gid($gid);
$self->delete_features(@features);
}
my $query = "delete from fgroup where gid in ($fields)";
warn "$query\n" if $self->debug;
my $result = $dbh->do($query);
defined $result or $self->throw($dbh->errstr);
$result;
}
# implement the _delete() method
sub _delete {
my $self = shift;
my $delete_spec = shift;
my $ranges = $delete_spec->{segments} || [];
my $types = $delete_spec->{types} || [];
my $force = $delete_spec->{force};
my $range_type = $delete_spec->{range_type};
my $dbh = $self->features_db;
my $query = 'delete from fdata';
my @where;
my @range_part;
for my $segment (@$ranges) {
my $ref = $dbh->quote($segment->abs_ref);
my $start = $segment->abs_start;
my $stop = $segment->abs_stop;
my $range = $range_type eq 'overlaps' ? $self->overlap_query($start,$stop)
: $range_type eq 'contains' ? $self->contains_query($start,$stop)
: $range_type eq 'contained_in' ? $self->contained_in_query($start,$stop)
: $self->throw("Invalid range type '$range_type'");
push @range_part,"(fref=$ref AND $range)";
}
push @where,'('. join(' OR ',@range_part).')' if @range_part;
# get all the types
if (@$types) {
my $types_where = $self->types_query($types);
my $types_query = "select ftypeid from ftype where $types_where";
my $result = $dbh->selectall_arrayref($types_query);
my @typeids = map {$_->[0]} @$result;
my $typelist = join ',',map{$dbh->quote($_)} @typeids;
$typelist ||= "0"; # don't cause DBI to die with invalid SQL when
# unknown feature types were requested.
push @where,"(ftypeid in ($typelist))";
}
$self->throw("This operation would delete all feature data and -force not specified")
unless @where || $force;
$query .= " where ".join(' and ',@where) if @where;
warn "$query\n" if $self->debug;
my $result = $dbh->do($query);
defined $result or $self->throw($dbh->errstr);
$result;
}
=head2 feature_summary
Title : feature_summary
Usage : $summary = $db->feature_summary(@args)
Function: returns a coverage summary across indicated region/type
Returns : a Bio::SeqFeatureI object containing the "coverage" tag
( run in 0.782 second using v1.01-cache-2.11-cpan-5735350b133 )