Bio-DB-GFF

 view release on metacpan or  search on metacpan

lib/Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

  my $join           = $self->make_features_join_part;
  my $range          = $self->make_features_by_range_where_part('overlaps',
								{refseq=>$location->[0],
								 class =>'',
								 start=>$location->[1],
								 stop =>$location->[2]}) if $location;
  # group query
  my $query1  = "SELECT $select FROM $from WHERE $where AND $join";
  $query1    .= " AND $range" if $range;

  # alias query
  $from  = $self->make_features_from_part(undef,{attributes=>1});
  ($where,@args) = $self->make_features_by_alias_where_part($class,$name);  # potential bug - @args1==@args2?

  my $query2  = "SELECT $select FROM $from WHERE $where AND $join";
  $query2    .= " AND $range" if $range;

  my $count = 0;

  for my $query ($query1,$query2) {
    my $sth    = $self->dbh->do_query($query,@args);
    while (my @row = $sth->fetchrow_array) {
      $callback->(@row);
      $count++;
    }
    $sth->finish;
  }

  return $count;
}

=head2 _feature_by_id

 Title   : _feature_by_id
 Usage   : $db->_feature_by_id($ids,$type,$callback)
 Function: get a list of features by ID
 Returns : count of number of features retrieved
 Args    : arrayref containing list of IDs to fetch and a callback
 Status  : protected

This method is used internally.  The $type selector is one of
"feature" or "group".  The callback arguments are those used by
make_feature().  Internally, it invokes the following abstract
procedures:

 make_features_select_part
 make_features_from_part
 make_features_by_id_where_part
 make_features_join_part

=cut

sub _feature_by_id {
  my $self = shift;
  my ($ids,$type,$callback) = @_;
  $callback || $self->throw('must provide a callback argument');

  my $select         = $self->make_features_select_part;
  my $from           = $self->make_features_from_part;
  my ($where,@args)  = $type eq 'feature' ? $self->make_features_by_id_where_part($ids)
                                          : $self->make_features_by_gid_where_part($ids);
  my $join           = $self->make_features_join_part;
  my $query          = "SELECT $select FROM $from WHERE $where AND $join";
  my $sth            = $self->dbh->do_query($query,@args);

  my $count = 0;
  while (my @row = $sth->fetchrow_array) {
    $callback->(@row);
    $count++;
  }
  $sth->finish;
  return $count;
}

sub _feature_by_attribute {
  my $self = shift;
  my ($attributes,$callback) = @_;
  $callback || $self->throw('must provide a callback argument');

  my $select         = $self->make_features_select_part;
  my $from           = $self->make_features_from_part(undef,{attributes=>$attributes});
  my ($where,@args)  = $self->make_features_by_range_where_part('',{attributes=>$attributes});
  my $join           = $self->make_features_join_part({attributes=>$attributes});
  my $query          = "SELECT $select FROM $from WHERE $where AND $join";
  my $sth            = $self->dbh->do_query($query,@args);

  my $count = 0;
  while (my @row = $sth->fetchrow_array) {
    $callback->(@row);
    $count++;
  }
  $sth->finish;
  return $count;
}

=head2 get_types

 Title   : get_types
 Usage   : $db->get_types($refseq,$refclass,$start,$stop,$count)
 Function: get list of types
 Returns : a list of Bio::DB::GFF::Typename objects
 Args    : see below
 Status  : Public

This method is responsible for fetching the list of feature type names
from the database.  The query may be limited to a particular range, in
which case the range is indicated by a landmark sequence name and
class and its subrange, if any.  These arguments may be undef if it is
desired to retrieve all feature types in the database (which may be a
slow operation in some implementations).

If the $count flag is false, the method returns a simple list of
vBio::DB::GFF::Typename objects.  If $count is true, the method returns
a list of $name=E<gt>$count pairs, where $count indicates the number of
times this feature occurs in the range.

Internally, this method calls upon the following functions to generate
the SQL and its bind variables:

  ($q1,@args) = make_types_select_part(@args);
  ($q2,@args) = make_types_from_part(@args);

lib/Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

  my $self           = shift;
  my ($field,$value) = @_;
  return qq($field = ?) if $value =~ /^[!@%&a-zA-Z0-9_\'\" ~-]+$/;
  return qq($field REGEXP ?);
}

=head2 exact_match

 Title   : exact_match
 Usage   : $string = $db->exact_match($field,$value)
 Function: create a SQL fragment for performing exact string matching
 Returns : query string
 Args    : the table field and match value
 Status  : public

This method produces the SQL fragment for matching a field name to a
constant string value.

=cut

sub exact_match {
  my $self           = shift;
  my ($field,$value) = @_;
  return qq($field = ?);
}

=head2 search_notes

 Title   : search_notes
 Usage   : @search_results = $db->search_notes("full text search string",$limit)
 Function: Search the notes for a text string, using mysql full-text search
 Returns : array of results
 Args    : full text search string, and an optional row limit
 Status  : public

This is a mysql-specific method.  Given a search string, it performs a
full-text search of the notes table and returns an array of results.
Each row of the returned array is a arrayref containing the following fields:

  column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
  column 2     The text of the note
  column 3     A relevance score.
  column 4     A Bio::DB::GFF::Typename object

=cut

sub search_notes {
  my $self = shift;
  my ($search_string,$limit) = @_;

  $search_string =~ tr/*?//d; 

  my @words  = $search_string =~ /(\w+)/g;
  my $regex  = join '|',@words;
  my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
  my $search   = join(' OR ',@searches);

  my $query = <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
  FROM fgroup,fattribute_to_feature,fdata,ftype
  WHERE fgroup.gid=fdata.gid
     AND fdata.fid=fattribute_to_feature.fid
     AND fdata.ftypeid=ftype.ftypeid
     AND ($search)
END
;

  my $sth = $self->dbh->do_query($query);
  my @results;
  while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
     next unless $class && $name;    # sorry, ignore NULL objects
     my @matches = $note =~ /($regex)/g;
     my $relevance = 10*@matches;
     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
     my $type     = Bio::DB::GFF::Typename->new($method,$source);
     push @results,[$featname,$note,$relevance,$type];
     last if $limit && @results >= $limit;
  }
  @results;
}


=head2 meta

 Title   : meta
 Usage   : $value = $db->meta($name [,$newval])
 Function: get or set a meta variable
 Returns : a string
 Args    : meta variable name and optionally value
 Status  : public

Get or set a named metavariable for the database.  Metavariables can
be used for database-specific settings.  This method calls two
class-specific methods which must be implemented:

  make_meta_get_query()   Returns a sql fragment which given a meta
                          parameter name, returns its value.  One bind
                          variable.
  make_meta_set_query()   Returns a sql fragment which takes two bind
                          arguments, the parameter name and its value


Don't make changes unless you know what you're doing!  It will affect the
persistent database.

=cut

sub meta {
  my $self = shift;
  my $param_name = uc shift;

  # getting
  if (@_) {
    my $value = shift;
    my $sql = $self->make_meta_set_query() or return;
    my $sth = $self->dbh->prepare_delayed($sql) 
              or $self->error("Can't prepare $sql: ",$self->dbh->errstr), return;
    $sth->execute($param_name,$value)
              or $self->error("Can't execute $sql: ",$self->dbh->errstr), return;
    $sth->finish;
    return $self->{meta}{$param_name} = $value;

lib/Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN


sub schema {
  shift->throw("The schema() method must be implemented by subclass");
}

=head2 DESTROY

 Title   : DESTROY
 Usage   : $db->DESTROY
 Function: disconnect database at destruct time
 Returns : void
 Args    : none
 Status  : protected

This is the destructor for the class.

=cut

sub DESTROY {
  my $self = shift;
  $self->features_db->disconnect if defined $self->features_db;
}

################## query cache ##################


#########################################  
## Moved from mysql.pm and mysqlopt.pm ##
#########################################

=head2 make_features_by_name_where_part

 Title   : make_features_by_name_where_part
 Usage   : $db->make_features_by_name_where_part
 Function: create the SQL fragment needed to select a feature by its group name & class
 Returns : a SQL fragment and bind arguments
 Args    : see below
 Status  : Protected

=cut

sub make_features_by_name_where_part {
  my $self = shift;
  my ($class,$name) = @_;
  if ($name =~ /\*/) {
    $name =~ s/%/\\%/g;
    $name =~ s/_/\\_/g;
    $name =~ tr/*/%/;
    return ("fgroup.gclass=? AND fgroup.gname LIKE ?",$class,$name);
  } else {
    return ("fgroup.gclass=? AND fgroup.gname=?",$class,$name);
  }
}

sub make_features_by_alias_where_part {
  my $self = shift;
  my ($class,$name) = @_;
  if ($name =~ /\*/) {
    $name =~ tr/*/%/;
    $name =~ s/_/\\_/g;
    return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value LIKE ? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=...
  } else {
    return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value=? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata...
  }

}

sub make_features_by_attribute_where_part {
  my $self = shift;
  my $attributes = shift;
  my @args;
  my @sql;
  foreach (keys %$attributes) {
     push @sql,"(fattribute.fattribute_name=? AND fattribute_to_feature.fattribute_value=?)";
     push @args,($_,$attributes->{$_});
  }
  return (join(' OR ',@sql),@args);
}

=head2 make_features_by_id_where_part

 Title   : make_features_by_id_where_part
 Usage   : $db->make_features_by_id_where_part($ids)
 Function: create the SQL fragment needed to select a set of features by their ids
 Returns : a SQL fragment and bind arguments
 Args    : arrayref of IDs
 Status  : Protected

=cut

sub make_features_by_id_where_part {
  my $self = shift;
  my $ids = shift;
  my $set = join ",",@$ids;
  return ("fdata.fid IN ($set)");
}

=head2 make_features_by_gid_where_part

 Title   : make_features_by_id_where_part
 Usage   : $db->make_features_by_gid_where_part($ids)
 Function: create the SQL fragment needed to select a set of features by their ids
 Returns : a SQL fragment and bind arguments
 Args    : arrayref of IDs
 Status  : Protected

=cut

sub make_features_by_gid_where_part {
  my $self = shift;
  my $ids = shift;
  my $set = join ",",@$ids;
  return ("fgroup.gid IN ($set)");
}


=head2 make_features_from_part

 Title   : make_features_from_part
 Usage   : $string = $db->make_features_from_part()
 Function: make from part of the features query
 Returns : a string
 Args    : none
 Status  : protected

This method creates the part of the features query that immediately
follows the FROM keyword.

=cut

sub make_features_from_part {
  my $self = shift;
  my $sparse = shift;
  my $options = shift || {};
  return $options->{attributes} ? "fdata,ftype,fgroup,fattribute,fattribute_to_feature\n"
                                : "fdata,ftype,fgroup\n";
}


=head2 make_features_join_part

 Title   : make_features_join_part
 Usage   : $string = $db->make_features_join_part()
 Function: make join part of the features query
 Returns : a string
 Args    : none
 Status  : protected

This method creates the part of the features query that immediately
follows the WHERE keyword.

=cut

sub make_features_join_part {
  my $self = shift;
  my $options = shift || {};
  return !$options->{attributes} ? <<END1 : <<END2;
  fgroup.gid = fdata.gid 
  AND ftype.ftypeid = fdata.ftypeid
END1
  fgroup.gid = fdata.gid 
  AND ftype.ftypeid = fdata.ftypeid
  AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
  AND fdata.fid=fattribute_to_feature.fid
END2
}

=head2 make_features_order_by_part

 Title   : make_features_order_by_part
 Usage   : ($query,@args) = $db->make_features_order_by_part()
 Function: make the ORDER BY part of the features() query
 Returns : a SQL fragment and bind arguments, if any
 Args    : none
 Status  : protected

This method creates the part of the features query that immediately
follows the ORDER BY part of the query issued by features() and
related methods.

=cut

sub make_features_order_by_part {
  my $self = shift;
  my $options = shift || {};
  return "fgroup.gname";
}

=head2 make_features_group_by_part

 Title   : make_features_group_by_part
 Usage   : ($query,@args) = $db->make_features_group_by_part()
 Function: make the GROUP BY part of the features() query
 Returns : a SQL fragment and bind arguments, if any
 Args    : none
 Status  : protected

This method creates the part of the features query that immediately
follows the GROUP BY part of the query issued by features() and
related methods.

=cut

sub make_features_group_by_part {
  my $self = shift;
  my $options = shift || {};
  if (my $att = $options->{attributes}) {
    my $key_count = keys %$att;
    return unless $key_count > 1;
    return ("fdata.fid,fref,fstart,fstop,fsource,
           fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,
           ftarget_stop,fdata.gid
     HAVING count(fdata.fid) > ?",$key_count-1);
  }
  elsif (my $b = $options->{bin_width}) {
    return "fref,fstart,fdata.ftypeid";
  }

}

=head2 refseq_query

 Title   : refseq_query
 Usage   : ($query,@args) = $db->refseq_query($name,$class)
 Function: create SQL fragment that selects the desired reference sequence
 Returns : a list containing the query and bind arguments
 Args    : reference sequence name and class
 Status  : protected

This method is called by make_features_by_range_where_part() to
construct the part of the select WHERE section that selects a
particular reference sequence.  It returns a mult-element list in
which the first element is the SQL fragment and subsequent elements
are bind values.

For example:

  sub refseq_query {
     my ($name,$class) = @_;
     return ('gff.refseq=? AND gff.refclass=?',
	     $name,$class);
  }

The current schema does not distinguish among different classes of
reference sequence.

=cut

# IMPORTANT NOTE: THE MYSQL SCHEMA IGNORES THE SEQUENCE CLASS
# THIS SHOULD BE FIXED
sub refseq_query {
  my $self = shift;
  my ($refseq,$refclass) = @_;
  my $query = "fdata.fref=?";
  return wantarray ? ($query,$refseq) : $self->dbh->dbi_quote($query,$refseq);
}

=head2 attributes

 Title   : attributes
 Usage   : @attributes = $db->attributes($id,$name)
 Function: get the attributes on a particular feature
 Returns : an array of string
 Args    : feature ID
 Status  : public

Some GFF version 2 files use the groups column to store a series of
attribute/value pairs.  In this interpretation of GFF, the first such
pair is treated as the primary group for the feature; subsequent pairs
are treated as attributes.  Two attributes have special meaning:
"Note" is for backward compatibility and is used for unstructured text
remarks.  "Alias" is considered as a synonym for the feature name.

lib/Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

  if (defined $typelist && @$typelist) {
    my ($q,@a) = $self->types_query($typelist);
    push @query,($q);
    push @args,@a;
  }
  my $query = @query ? join(' AND ',@query) : '1=1';
  return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}

=head2 make_types_group_part

 Title   : make_types_group_part
 Usage   : ($string,@args) = $db->make_types_group_part(@args)
 Function: create the GROUP BY portion of the SQL for fetching features type lists
 Returns : query string and bind arguments
 Args    : see below
 Status  : protected

This method is called by get_types() to generate the query fragment
and bind arguments for the GROUP BY part of the query that retrieves
lists of feature types.  The four positional arguments are as follows:

 $refseq      reference sequence name
 $start       start of region
 $stop        end of region
 $want_count  true to return the count of this feature type

=cut

sub make_types_group_part {
  my $self = shift;
  my ($srcseq,$start,$stop,$want_count) = @_;
  return unless $srcseq or $want_count;
  return 'ftype.ftypeid,ftype.fmethod,ftype.fsource';
}


=head2 get_feature_id

 Title   : get_feature_id
 Usage   : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
 Function: get the ID of a feature
 Returns : an integer ID or undef
 Args    : none
 Status  : private

This internal method is called by load_gff_line to look up the integer
ID of an existing feature.  It is ony needed when replacing a feature
with new information.

=cut

# this method is called when needed to look up a feature's ID
sub get_feature_id {
  my $self = shift;
  my ($ref,$start,$stop,$typeid,$groupid) = @_;
  my $s = $self->{load_stuff};
  unless ($s->{get_feature_id}) {
    my $dbh = $self->features_db;
    $s->{get_feature_id} =
      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
  }
  my $sth = $s->{get_feature_id} or return;
  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
  my ($fid) = $sth->fetchrow_array;
  return $fid;
}



=head2 make_abscoord_query

 Title   : make_abscoord_query
 Usage   : $sth = $db->make_abscoord_query($name,$class);
 Function: create query that finds the reference sequence coordinates given a landmark & classa
 Returns : a DBI statement handle
 Args    : name and class of landmark
 Status  : protected

The statement handler should return rows containing five fields:

  1. reference sequence name
  2. reference sequence class
  3. start position
  4. stop position
  5. strand ("+" or "-")

This query always returns "Sequence" as the class of the reference
sequence.

=cut

# given sequence name, return (reference,start,stop,strand)
sub make_abscoord_query {
  my $self = shift;
  my ($name,$class,$refseq) = @_;
  #my $query = GETSEQCOORDS;
  my $query = $self->getseqcoords_query();
  my $getforcedseqcoords = $self->getforcedseqcoords_query() ;
  if ($name =~ /\*/) {
    $name =~ s/%/\\%/g;
    $name =~ s/_/\\_/g;
    $name =~ tr/*/%/;
    $query =~ s/gname=\?/gname LIKE ?/;
  }
  defined $refseq ? $self->dbh->do_query($getforcedseqcoords,$name,$class,$refseq) 
    : $self->dbh->do_query($query,$name,$class);
}

sub make_aliasabscoord_query {
  my $self = shift;
  my ($name,$class) = @_;
  #my $query = GETALIASCOORDS;
  my $query = $self->getaliascoords_query();
  if ($name =~ /\*/) {
    $name =~ s/%/\\%/g;
    $name =~ s/_/\\_/g;
    $name =~ tr/*/%/;
    $query =~ s/gname=\?/gname LIKE ?/;
  }
  $self->dbh->do_query($query,$name,$class);

lib/Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

# find features that are completely contained within a ranged
sub contains_query {
  my $self = shift;
  my ($start,$stop) = @_;
  my ($bq,@bargs)   = $self->bin_query($start,$stop,undef,bin($start,$stop,$self->min_bin));
  my ($iq,@iargs)   = $self->contains_query_nobin($start,$stop);
  my $query = "($bq)\n\tAND $iq";
  my @args  = (@bargs,@iargs);
  return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}

# find features that are completely contained within a range
sub contained_in_query {
  my $self = shift;
  my ($start,$stop) = @_;
  my ($bq,@bargs)   = $self->bin_query($start,$stop,abs($stop-$start)+1,undef);
  my ($iq,@iargs)   = $self->contained_in_query_nobin($start,$stop);
  my $query = "($bq)\n\tAND $iq";
  my @args  = (@bargs,@iargs);
  return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}

# implement the _delete_fattribute_to_feature() method
sub _delete_fattribute_to_feature {
  my $self         = shift;
  my @feature_ids  = @_;
  my $dbh          = $self->features_db;
  my $fields       = join ',',map{$dbh->quote($_)} @feature_ids;

  my $query = "delete from fattribute_to_feature where fid in ($fields)";
  warn "$query\n" if $self->debug;
  my $result = $dbh->do($query);
  defined $result or $self->throw($dbh->errstr);
  $result;
}

# implement the _delete_features() method
sub _delete_features {
  my $self = shift;
  my @feature_ids = @_;
  my $dbh          = $self->features_db;
  my $fields       = join ',',map{$dbh->quote($_)} @feature_ids;

  # delete from fattribute_to_feature
  $self->_delete_fattribute_to_feature(@feature_ids);

  my $query = "delete from fdata where fid in ($fields)";
  warn "$query\n" if $self->debug;
  my $result = $dbh->do($query);
  defined $result or $self->throw($dbh->errstr);
  $result;
}

# implement the _delete_groups() method
sub _delete_groups {
  my $self = shift;
  my @group_ids    = @_;
  my $dbh          = $self->features_db;
  my $fields       = join ',',map{$dbh->quote($_)} @group_ids;

  foreach my $gid (@group_ids){
      my @features = $self->get_feature_by_gid($gid);
      $self->delete_features(@features);
  }

  my $query  = "delete from fgroup where gid in ($fields)";
  warn "$query\n" if $self->debug;
  my $result = $dbh->do($query);
  defined $result or $self->throw($dbh->errstr);
  $result;
}

# implement the _delete() method
sub _delete {
  my $self = shift;
  my $delete_spec = shift;
  my $ranges      = $delete_spec->{segments} || [];
  my $types       = $delete_spec->{types}    || [];
  my $force       = $delete_spec->{force};
  my $range_type  = $delete_spec->{range_type};
  my $dbh         = $self->features_db;

  my $query = 'delete from fdata';
  my @where;

  my @range_part;
  for my $segment (@$ranges) {
    my $ref   = $dbh->quote($segment->abs_ref);
    my $start = $segment->abs_start;
    my $stop  = $segment->abs_stop;
    my $range =  $range_type eq 'overlaps'     ? $self->overlap_query($start,$stop)
               : $range_type eq 'contains'     ? $self->contains_query($start,$stop)
	       : $range_type eq 'contained_in' ? $self->contained_in_query($start,$stop)
	       : $self->throw("Invalid range type '$range_type'");
    push @range_part,"(fref=$ref AND $range)";
  }
  push @where,'('. join(' OR ',@range_part).')' if @range_part;

  # get all the types
  if (@$types) {
    my $types_where = $self->types_query($types);
    my $types_query = "select ftypeid from ftype where $types_where";
    my $result      = $dbh->selectall_arrayref($types_query);
    my @typeids     = map {$_->[0]} @$result;
    my $typelist    = join ',',map{$dbh->quote($_)} @typeids;
    $typelist ||= "0"; # don't cause DBI to die with invalid SQL when
                       # unknown feature types were requested.
    push @where,"(ftypeid in ($typelist))";
  }
  $self->throw("This operation would delete all feature data and -force not specified")
    unless @where || $force;
  $query .= " where ".join(' and ',@where) if @where;
  warn "$query\n" if $self->debug;
  my $result = $dbh->do($query);

  defined $result or $self->throw($dbh->errstr);
  $result;
}


=head2 feature_summary

 Title   : feature_summary
 Usage   : $summary = $db->feature_summary(@args)
 Function: returns a coverage summary across indicated region/type
 Returns : a Bio::SeqFeatureI object containing the "coverage" tag



( run in 0.782 second using v1.01-cache-2.11-cpan-5735350b133 )