BioPerl

 view release on metacpan or  search on metacpan

Bio/AlignIO/phylip.pm  view on Meta::CPAN

    while( my $aln = $gcgstream->next_aln ) {
        $phylipstream->write_aln($aln);
    }

This example shows how to read phylip format:

    my $in = Bio::AlignIO->new(
      -file        => $inFile,
      -format      => 'phylip',
      -interleaved => 0,
      -longid      => 1
    );

    my $out = Bio::AlignIO->new(
      -file   => ">$outFile",
      -format => 'fasta'
    );

    while ( my $aln = $in->next_aln() ) {
      $out->write_aln($aln);
    }

The -longid argument is required if the input phylip format file
has ids with lengths greater then 10 characters.

=head1 DESCRIPTION

This object can transform Bio::SimpleAlign objects to and from PHYLIP
format. By default it works with the interleaved format. By specifying
the flag -interleaved =E<gt> 0 in the initialization the module can
read or write data in sequential format.

Reading phylip format with long IDs up to 50 characters is supported by
the flag -longid =E<gt>1. ID strings can be surrounded by single quotes.
They are mandatory only if the IDs contain spaces.

=head1 FEEDBACK

=head2 Support

Please direct usage questions or support issues to the mailing list:

I<bioperl-l@bioperl.org>

Bio/AlignIO/phylip.pm  view on Meta::CPAN

           -idlinebreak => insert a line break after the sequence id
                           so that sequence starts on the next line
           -flag_SI => whether or not write a "S" or "I" just after
                       the num.seq. and line len., in the first line
           -tag_length => integer of how long the tags have to be in
                         each line between the space separator. set it
                         to 0 to have 1 tag only.
           -wrap_sequential => boolean for whether or not sequential
                               format should be broken up or a single line
                               default is false (single line)
           -longid => boolean to read arbitrary long IDs (default is false)

=cut

sub _initialize {
    my ( $self, @args ) = @_;
    $self->SUPER::_initialize(@args);

    my ( $interleave, $linelen, $idlinebreak,
        $idlength, $flag_SI, $tag_length, $ws, $longid )
        = $self->_rearrange(
        [   qw(INTERLEAVED
                LINE_LENGTH
                IDLINEBREAK
                IDLENGTH
                FLAG_SI
                TAG_LENGTH
                WRAP_SEQUENTIAL
                LONGID)
        ],
        @args
        );
    $self->interleaved( $interleave ? 1 : 0 ) if defined $interleave;
    $self->idlength( $idlength || $DEFAULTIDLENGTH );
    $self->id_linebreak(1) if ($idlinebreak);
    $self->line_length($linelen) if defined $linelen && $linelen > 0;
    $self->flag_SI(1) if ($flag_SI);
    $self->tag_length($tag_length) if ( $tag_length || $DEFAULTTAGLEN );
    $self->wrap_sequential( $ws ? 1 : 0 );
    $self->longid( $longid      ? 1 : 0 );
    1;
}

=head2 next_aln

 Title   : next_aln
 Usage   : $aln = $stream->next_aln()
 Function: returns the next alignment in the stream.
           Throws an exception if trying to read in PHYLIP
           sequential format.

Bio/AlignIO/phylip.pm  view on Meta::CPAN

    my $idlen = $self->idlength;
    $count = 0;

    while ( $entry = $self->_readline ) {
        if ( $entry =~ /^\s?$/ ) {    # eat the newlines
            next;
        }

# Names can be in a few different formats:
# 1. they can be traditional phylip: 10 chars long, period. If this is the case, that name can have spaces.
# 2. they can be hacked with a long ID, as passed in with the flag -longid.
# 3. if there is a long ID, the name can have spaces as long as it is wrapped in single quotes.
        if ( $self->longid() ) {    # 2 or 3
            if ( $entry =~ /^'(.+)'\s+(.+)$/ ) {  # 3. name has single quotes.
                $name = $1;
                $str  = $2;
            } else { # 2. name does not have single quotes, so should not have spaces.
                 # therefore, the first part of the line is the name and the rest is the seq.
                 # make sure that the line does not lead with extra spaces.
                $entry =~ s/^\s+//;
                ( $name, $str ) = split( /\s+/, $entry, 2 );
            }
        } else {    # 1. traditional phylip.

Bio/AlignIO/phylip.pm  view on Meta::CPAN

        } else {
            $self->_print(
                sprintf( " %s %s\n", $aln->num_sequences, $aln->length ) );
        }

        $idlength    = $self->idlength();
        $line_length = $self->line_length();
        $tag_length  = $self->tag_length();
        foreach $seq ( $aln->each_seq() ) {
            $name = $aln->displayname( $seq->get_nse );
            if ( $self->longid ) {
                $self->warn(
                    "The length of the name is over 50 chars long [$name]")
                    if length($name) > 50;
                $name = "'$name'  ";
            } else {
                $name = substr( $name, 0, $idlength )
                    if length($name) > $idlength;
                $name = sprintf( "%-" . $idlength . "s", $name );
                if ( $self->interleaved() ) {
                    $name .= '   ';

Bio/AlignIO/phylip.pm  view on Meta::CPAN

=cut

sub wrap_sequential {
    my ( $self, $value ) = @_;
    if ( defined $value ) {
        $self->{'_wrap_sequential'} = $value;
    }
    return $self->{'_wrap_sequential'} || 0;
}

=head2 longid

 Title   : longid
 Usage   : $obj->longid($newval)
 Function:
 Returns : value of longid
 Args    : newvalue (optional)


=cut

sub longid {
    my ( $self, $value ) = @_;
    if ( defined $value ) {
        $self->{'_longid'} = $value;
    }
    return $self->{'_longid'} || 0;
}

1;

Bio/Assembly/IO/maq.pm  view on Meta::CPAN

    Usage   : my $singletobj = $self->_store_read(\%readinfo, \%contiginfo);
    Function: store information of a singlet belonging to a scaffold in a singlet object
    Returns : Bio::Assembly::Singlet
    Args    : hash, hash

=cut

sub _store_singlet {
    my ($self, $contiginfo, $contigobj) = @_;

    my $contigid = $$contiginfo{'asmbl_id'};
    my $seqref = ($contigobj->each_seq())[0];
    my $singletobj = Bio::Assembly::Singlet->new( -id     => $contigid,
                                                  -seqref => $seqref   );

    # Add other misc contig information as features of the contig
    # Add other misc read information as subsequence feature
    #my @other = grep !/_sfc|_assembly|_elem/, keys %$contiginfo; # remove the objects; _elem contains a code ref and can't be frozen. Just shooting blind here.
    #my %other;
    #@other{@other} = @$contiginfo{@other};
    #my $contigtags = Bio::SeqFeature::Generic->new(
    #    -primary     => '_main_contig_feature',
    #    -source      => $$contiginfo{asmbl_id},

Bio/Assembly/IO/tigr.pm  view on Meta::CPAN

    Returns : Bio::Assembly::Singlet
    Args    : hash, hash

=cut

sub _store_singlet {
    my ($self, $readinfo, $contiginfo) = @_;
    # Singlets in TIGR_Assembler are represented as a contig of one sequence
    # We try to simulate this duality by playing around with the Singlet object

    my $contigid = $$contiginfo{'asmbl_id'};
    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});

    # Create a sequence object
    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});
    my $seqobj = Bio::Seq::Quality->new(
       -primary_id => $readid,
       -display_id => $readid,
       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase
       -start      => 1,
       -strand     => $$readinfo{'strand'},
       -alphabet   => 'dna',
       -qual       => $self->_qual_hex2dec($$contiginfo{'quality'})    
   );

   # Create singlet from sequence and add it to scaffold
   my $singletobj = Bio::Assembly::Singlet->new(
     -id     => $contigid,
     -seqref => $seqobj
   );

   # Add other misc contig information as features of the singlet
   my $contigtags = Bio::SeqFeature::Generic->new(
        -primary     => '_main_contig_feature',
        -source      => $contigid,
        -start       => 1,
        -end         => $singletobj->get_consensus_length(),
        -strand      => 1,
        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},
                          'com_name'   => $$contiginfo{'com_name'},
                          'type'       => $$contiginfo{'type'},
                          'method'     => $$contiginfo{'method'},
                          'ed_status'  => $$contiginfo{'ed_status'},
                          'full_cds'   => $$contiginfo{'full_cds'},
                          'cds_start'  => $$contiginfo{'cds_start'},

Bio/Assembly/IO/tigr.pm  view on Meta::CPAN

   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)
   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates
   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq

   my $alncoord = Bio::SeqFeature::Generic->new(
       -primary     => '_aligned_coord',
       -source      => $readid,
       -start       => $$readinfo{'aln_start'},
       -end         => $$readinfo{'aln_end'},
       -strand      => $$readinfo{'strand'},
       -tag         => { 'contig' => $contigid }
   );
   $alncoord->attach_seq($singletobj->seqref);
   $singletobj->add_features([ $alncoord ], 0);

   # Add quality clipping read information in singlet features
   # (from 'aligned read' to 'gapped consensus' coordinates)
   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};
   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};
   my $clipcoord = Bio::SeqFeature::Generic->new(
       -primary     => '_quality_clipping',
       -source      => $readid,
       -start       => $$readinfo{'clip_start'},
       -end         => $$readinfo{'clip_end'},
       -strand      => $$readinfo{'strand'},
       -tag         => { 'contig' => $contigid }
   );
   $clipcoord->attach_seq($singletobj->seqref);
   $singletobj->add_features([ $clipcoord ], 0);

   # Add other misc read information as subsequence feature
   my $readtags = Bio::SeqFeature::Generic->new(
       -primary     => '_main_read_feature',
       -source      => $readid,
       -start       => $$readinfo{'aln_start'},
       -end         => $$readinfo{'aln_end'},

Bio/Assembly/IO/tigr.pm  view on Meta::CPAN

sub write_contig {
    my ($self, @args) = @_;
    my ($contigobj) = $self->_rearrange([qw(CONTIG)], @args);

    # Sanity check
    if ( !$contigobj || !$contigobj->isa('Bio::Assembly::Contig') ) {
        $self->throw("Must provide a Bio::Assembly::Contig or Singlet object when calling write_contig");
    }

    my $decimal_format = '%.2f';
    my $contigid = $contigobj->id;
    my $numseqs = $contigobj->num_sequences;

    if ( $contigobj->isa('Bio::Assembly::Singlet') ) {
        # This is a singlet
        my $readid     = $contigobj->seqref->id;      
        my $singletobj = $contigobj;

        # Get contig information
        my ($contanno) = $singletobj->get_features_collection->get_features_by_type("_main_contig_feature:$contigid");

        my %contiginfo;
        $contiginfo{'sequence'}   = $singletobj->seqref->seq;
        $contiginfo{'lsequence'}  = $contiginfo{'sequence'};
        $contiginfo{'quality'}    = $self->_qual_dec2hex(
            join ' ', @{$singletobj->get_consensus_quality->qual} );
        $contiginfo{'asmbl_id'}   = $contigid;
        $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   
        $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];
        $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];
        $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];
        $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];
        $contiginfo{'redundancy'} = sprintf($decimal_format, 1);
        $contiginfo{'perc_N'}     = sprintf(
            $decimal_format, $self->_perc_N($contiginfo{'sequence'}));
        $contiginfo{'seqnum'}     = 1;
        $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];

Bio/Assembly/IO/tigr.pm  view on Meta::CPAN

            "comment\t$readinfo{'comment'}\n".
            "db\t$readinfo{'db'}\n".
            "offset\t$readinfo{'offset'}\n".
            "lsequence\t$readinfo{'lsequence'}\n"
        );
        $self->_print("|\n");

    } else {
        # This is a contig
        # Get contig information
        my ($contanno) = $contigobj->get_features_collection->get_features_by_type("_main_contig_feature:$contigid");
        my %contiginfo;
        $contiginfo{'sequence'}   = $self->_ungap(
            $contigobj->get_consensus_sequence->seq);
        $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;
        $contiginfo{'quality'}    = $self->_qual_dec2hex(
            join ' ', @{$contigobj->get_consensus_quality->qual});
        $contiginfo{'asmbl_id'}   = $contigid;
        $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];
        $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];
        $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];
        $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];
        $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];
        $contiginfo{'redundancy'} = sprintf(
            $decimal_format, $self->_redundancy($contigobj));
        $contiginfo{'perc_N'}     = sprintf(
            $decimal_format, $self->_perc_N($contiginfo{'sequence'}));
        $contiginfo{'seqnum'}     = $contigobj->num_sequences;

Bio/Assembly/Tools/ContigSpectrum.pm  view on Meta::CPAN

  if ($nof_seq != 0) {
    $avg_length = ($p_avg_length * $p_nof_seq + $n_avg_length * $n_nof_seq) / $nof_seq;
  }
  return $avg_length, $nof_seq;
}


=head2 _get_assembly_overlap_stats

  Title   : _get_assembly_overlap_stats
  Usage   : my ($avglength, $avgidentity, $minlength, $min_identity, $nof_overlaps)
              = $csp->_get_assembly_overlap_stats($assemblyobj);
  Function: Get statistics about pairwise overlaps in contigs of an assembly
  Returns : average overlap length
            average identity percent
            minimum overlap length
            minimum identity percent
            number of overlaps
  Args    : Bio::Assembly::Scaffold, Contig or Singlet object
            hash reference with the IDs of the sequences to consider [optional]

Bio/Assembly/Tools/ContigSpectrum.pm  view on Meta::CPAN

      $self->_get_contig_overlap_stats($contig_obj, $seq_hash) );
  }

  return @asm_stats;
}


=head2 _get_contig_overlap_stats

  Title   : _get_contig_overlap_stats
  Usage   : my ($avglength, $avgidentity, $minlength, $min_identity, $nof_overlaps)
              = $csp->_get_contig_overlap_stats($contigobj);
  Function: Get statistics about pairwise overlaps in a contig or singlet. The
              statistics are obtained using graph theory: each read is a node
              and the edges between 2 reads are weighted by minus the number of
              conserved residues in the alignment between the 2 reads. The
              minimum spanning tree of this graph represents the overlaps that
              form the contig. Overlaps that do not satisfy the minimum overlap
              length and similarity get a malus on their score.
              Note: This function requires the optional BioPerl dependency
              module called 'Graph'

Bio/Cluster/UniGene.pm  view on Meta::CPAN

 Usage   : used by ClusterIO
 Returns : a new Bio::Cluster::Unigene object

=cut

sub new {
    # standard new call..
    my($caller,@args) = @_;
    my $self = $caller->SUPER::new(@args);

    my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
	$self->_rearrange([qw(UNIGENE_ID
			      DESCRIPTION
			      MEMBERS
			      SIZE
			      SPECIES
			      DISPLAY_ID
			      OBJECT_ID
			      NAMESPACE
			      AUTHORITY
			      VERSION
			      SEQFACTORY
			      )], @args);

    $self->{'_alphabet'} = 'dna';

    $self->unigene_id($ugid) if $ugid;
    $self->description($desc) if $desc;
    $self->sequences($mems) if $mems;
    $self->size($size) if defined($size);
    $self->display_id($dispid) if $dispid; # overwrites ugid
    $self->object_id($id) if $id;          # overwrites dispid
    $self->namespace($ns || 'UniGene');
    $self->authority($auth || 'NCBI');
    $self->version($v) if defined($v);
    if( ! defined $seqfact ) {
	$seqfact = Bio::Seq::SeqFactory->new
	    (-verbose => $self->verbose(), 
	     -type => 'Bio::Seq::RichSeq');
    }
    $self->sequence_factory($seqfact);

Bio/DB/EntrezGene.pm  view on Meta::CPAN


 Title   : get_request
 Usage   : my $url = $self->get_request
 Function: HTTP::Request
 Returns : 
 Args    : %qualifiers = a hash of qualifiers (ids, format, etc)

=head2 get_Stream_by_id

  Title   : get_Stream_by_id
  Usage   : $stream = $db->get_Stream_by_id( [$gid1, $gid2] );
  Function: Gets a series of Seq objects using Gene ids
  Returns : A Bio::SeqIO stream object
  Args    : A reference to an array of Gene ids

=head2 request_format

 Title   : request_format
 Usage   : my $format = $self->request_format;
           $self->request_format($format);
 Function: Get or set sequence format retrieval

Bio/DB/GFF.pm  view on Meta::CPAN

  my $self = shift;
  my $id   = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
  my $features = [];
  my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
  $self->_feature_by_id($id,'feature',$callback);
  return wantarray ? @$features : $features->[0];
}
*fetch_feature_by_id = \&get_feature_by_id;

=head2 get_feature_by_gid

 Title   : get_feature_by_gid
 Usage   : $db->get_feature_by_gid($id)
 Function: fetch segments by feature ID
 Returns : a Bio::DB::GFF::Feature object
 Args    : the feature ID
 Status  : public

This method can be used to fetch a feature from the database using its
group ID.  Not all GFF databases support IDs, so be careful with this.

The group ID is often more interesting than the feature ID, since
groups can be complex objects containing subobjects.

=cut

sub get_feature_by_gid {
  my $self = shift;
  my $id   = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
  my $features = [];
  my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
  $self->_feature_by_id($id,'group',$callback);
  return wantarray ? @$features : $features->[0];
}
*fetch_feature_by_gid = \&get_feature_by_gid;

=head2 delete_fattribute_to_features

 Title   : delete_fattribute_to_features
 Usage   : $db->delete_fattribute_to_features(@ids_or_features)
 Function: delete one or more fattribute_to_features
 Returns : count of fattribute_to_features deleted
 Args    : list of features or feature ids
 Status  : public

Bio/DB/GFF.pm  view on Meta::CPAN

  return bless {ids=>$ids,db=>$db,type=>$type},$class;
}

sub next_seq {
  my $self = shift;
  my $next = shift @{$self->{ids}};
  return unless $next;
  my $name = ref($next) eq 'ARRAY' ? Bio::DB::GFF::Featname->new(@$next) : $next;
  my $segment = $self->{type} eq 'name'      ? $self->{db}->segment($name)
                : $self->{type} eq 'feature' ? $self->{db}->fetch_feature_by_id($name)
                : $self->{type} eq 'group'   ? $self->{db}->fetch_feature_by_gid($name)
                : $self->throw("Bio::DB::GFF::ID_Iterator called to fetch an unknown type of identifier");
  $self->throw("id does not exist") unless $segment;
  return $segment;
}

package Bio::DB::GFF::FeatureIterator;

sub new {
    my $self     = shift;
    my @features = @_;

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

=cut

sub _feature_by_id {
  my $self = shift;
  my ($ids,$type,$callback) = @_;
  $callback || $self->throw('must provide a callback argument');

  my $select         = $self->make_features_select_part;
  my $from           = $self->make_features_from_part;
  my ($where,@args)  = $type eq 'feature' ? $self->make_features_by_id_where_part($ids)
                                          : $self->make_features_by_gid_where_part($ids);
  my $join           = $self->make_features_join_part;
  my $query          = "SELECT $select FROM $from WHERE $where AND $join";
  my $sth            = $self->dbh->do_query($query,@args);

  my $count = 0;
  while (my @row = $sth->fetchrow_array) {
    $callback->(@row);
    $count++;
  }
  $sth->finish;

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

  $search_string =~ tr/*?//d; 

  my @words  = $search_string =~ /(\w+)/g;
  my $regex  = join '|',@words;
  my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
  my $search   = join(' OR ',@searches);

  my $query = <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
  FROM fgroup,fattribute_to_feature,fdata,ftype
  WHERE fgroup.gid=fdata.gid
     AND fdata.fid=fattribute_to_feature.fid
     AND fdata.ftypeid=ftype.ftypeid
     AND ($search)
END
;

  my $sth = $self->dbh->do_query($query);
  my @results;
  while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
     next unless $class && $name;    # sorry, ignore NULL objects

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

    return ("fgroup.gclass=? AND fgroup.gname=?",$class,$name);
  }
}

sub make_features_by_alias_where_part {
  my $self = shift;
  my ($class,$name) = @_;
  if ($name =~ /\*/) {
    $name =~ tr/*/%/;
    $name =~ s/_/\\_/g;
    return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value LIKE ? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=...
  } else {
    return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value=? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata...
  }

}

sub make_features_by_attribute_where_part {
  my $self = shift;
  my $attributes = shift;
  my @args;
  my @sql;
  foreach (keys %$attributes) {

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN


=cut

sub make_features_by_id_where_part {
  my $self = shift;
  my $ids = shift;
  my $set = join ",",@$ids;
  return ("fdata.fid IN ($set)");
}

=head2 make_features_by_gid_where_part

 Title   : make_features_by_id_where_part
 Usage   : $db->make_features_by_gid_where_part($ids)
 Function: create the SQL fragment needed to select a set of features by their ids
 Returns : a SQL fragment and bind arguments
 Args    : arrayref of IDs
 Status  : Protected

=cut

sub make_features_by_gid_where_part {
  my $self = shift;
  my $ids = shift;
  my $set = join ",",@$ids;
  return ("fgroup.gid IN ($set)");
}


=head2 make_features_from_part

 Title   : make_features_from_part
 Usage   : $string = $db->make_features_from_part()
 Function: make from part of the features query
 Returns : a string
 Args    : none

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN


This method creates the part of the features query that immediately
follows the WHERE keyword.

=cut

sub make_features_join_part {
  my $self = shift;
  my $options = shift || {};
  return !$options->{attributes} ? <<END1 : <<END2;
  fgroup.gid = fdata.gid 
  AND ftype.ftypeid = fdata.ftypeid
END1
  fgroup.gid = fdata.gid 
  AND ftype.ftypeid = fdata.ftypeid
  AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
  AND fdata.fid=fattribute_to_feature.fid
END2
}

=head2 make_features_order_by_part

 Title   : make_features_order_by_part
 Usage   : ($query,@args) = $db->make_features_order_by_part()

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

=cut

sub make_features_group_by_part {
  my $self = shift;
  my $options = shift || {};
  if (my $att = $options->{attributes}) {
    my $key_count = keys %$att;
    return unless $key_count > 1;
    return ("fdata.fid,fref,fstart,fstop,fsource,
           fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,
           ftarget_stop,fdata.gid
     HAVING count(fdata.fid) > ?",$key_count-1);
  }
  elsif (my $b = $options->{bin_width}) {
    return "fref,fstart,fdata.ftypeid";
  }

}

=head2 refseq_query

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

=cut

# this method is called when needed to look up a feature's ID
sub get_feature_id {
  my $self = shift;
  my ($ref,$start,$stop,$typeid,$groupid) = @_;
  my $s = $self->{load_stuff};
  unless ($s->{get_feature_id}) {
    my $dbh = $self->features_db;
    $s->{get_feature_id} =
      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
  }
  my $sth = $s->{get_feature_id} or return;
  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
  my ($fid) = $sth->fetchrow_array;
  return $fid;
}



=head2 make_abscoord_query

Bio/DB/GFF/Adaptor/dbi.pm  view on Meta::CPAN

  $result;
}

# implement the _delete_groups() method
sub _delete_groups {
  my $self = shift;
  my @group_ids    = @_;
  my $dbh          = $self->features_db;
  my $fields       = join ',',map{$dbh->quote($_)} @group_ids;

  foreach my $gid (@group_ids){
      my @features = $self->get_feature_by_gid($gid);
      $self->delete_features(@features);
  }

  my $query  = "delete from fgroup where gid in ($fields)";
  warn "$query\n" if $self->debug;
  my $result = $dbh->do($query);
  defined $result or $self->throw($dbh->errstr);
  $result;
}

# implement the _delete() method
sub _delete {
  my $self = shift;
  my $delete_spec = shift;

Bio/DB/GFF/Adaptor/dbi/mysql.pm  view on Meta::CPAN

use constant GETSEQCOORDS =><<END;
SELECT fref,
       IF(ISNULL(gclass),'Sequence',gclass),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup
  WHERE fgroup.gname=?
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    GROUP BY fref,fstrand,gname
END
;

use constant GETALIASCOORDS =><<END;
SELECT fref,
       IF(ISNULL(gclass),'Sequence',gclass),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup,fattribute,fattribute_to_feature
  WHERE fattribute_to_feature.fattribute_value=?
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    AND fattribute.fattribute_name='Alias'
    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
    AND fattribute_to_feature.fid=fdata.fid
    GROUP BY fref,fstrand,gname
END
;

use constant GETALIASLIKE =><<END;
SELECT fref,
       IF(ISNULL(gclass),'Sequence',gclass),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup,fattribute,fattribute_to_feature
  WHERE fattribute_to_feature.fattribute_value LIKE ?
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    AND fattribute.fattribute_name='Alias'
    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
    AND fattribute_to_feature.fid=fdata.fid
    GROUP BY fref,fstrand,gname
END
;

use constant GETFORCEDSEQCOORDS =><<END;
SELECT fref,
       IF(ISNULL(gclass),'Sequence',gclass),
       min(fstart),
       max(fstop),
       fstrand
  FROM fdata,fgroup
  WHERE fgroup.gname=?
    AND fgroup.gclass=?
    AND fdata.fref=?
    AND fgroup.gid=fdata.gid
    GROUP BY fref,fstrand
END
;

use constant FULLTEXTSEARCH => <<END;
SELECT distinct gclass,gname,fattribute_value,MATCH(fattribute_value) AGAINST (?) as score,fmethod,fsource
  FROM fgroup,fattribute_to_feature,fdata,ftype
  WHERE fgroup.gid=fdata.gid
    AND fdata.fid=fattribute_to_feature.fid
    AND fdata.ftypeid=ftype.ftypeid
    AND MATCH(fattribute_value) AGAINST (?)
END
;

=head1 DESCRIPTION

This adaptor implements a specific mysql database schema that is
compatible with Bio::DB::GFF.  It inherits from

Bio/DB/GFF/Adaptor/dbi/mysql.pm  view on Meta::CPAN

This is the feature data table.  Its columns are:
-
    fid	           feature ID (integer)
    fref           reference sequence name (string)
    fstart         start position relative to reference (integer)
    fstop          stop position relative to reference (integer)
    ftypeid        feature type ID (integer)
    fscore         feature score (float); may be null
    fstrand        strand; one of "+" or "-"; may be null
    fphase         phase; one of 0, 1 or 2; may be null
    gid            group ID (integer)
    ftarget_start  for similarity features, the target start position (integer)
    ftarget_stop   for similarity features, the target stop position (integer)

Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature.  However, in the current schema, query performance
suffers dramatically when this additional join is added.

=item fgroup

This is the group table. There is one row for each group.  Columns:

    gid	      the group ID (integer)
    gclass    the class of the group (string)
    gname     the name of the group (string)

The group table serves multiple purposes.  As you might expect, it is
used to cluster features that logically belong together, such as the
multiple exons of the same transcript.  It is also used to assign a
name and class to a singleton feature.  Finally, the group table is
used to identify the target of a similarity hit.  This is consistent
with the way in which the group field is used in the GFF version 2
format.

The fgroup.gid field joins with the fdata.gid field. 

Examples:

  mysql> select * from fgroup where gname='sjj_2L52.1';
  +-------+-------------+------------+
  | gid   | gclass      | gname      |
  +-------+-------------+------------+
  | 69736 | PCR_product | sjj_2L52.1 |
  +-------+-------------+------------+
  1 row in set (0.70 sec)

  mysql> select fref,fstart,fstop from fdata,fgroup 
            where gclass='PCR_product' and gname = 'sjj_2L52.1' 
                  and fdata.gid=fgroup.gid;
  +---------------+--------+-------+
  | fref          | fstart | fstop |
  +---------------+--------+-------+
  | CHROMOSOME_II |   1586 |  2355 |
  +---------------+--------+-------+
  1 row in set (0.03 sec)

=item ftype

This table contains the feature types, one per row.  Columns are:

    ftypeid      the feature type ID (integer)
    fmethod      the feature type method name (string)
    fsource      the feature type source name (string)

The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:

  mysql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype 
         where gclass='PCR_product' 
               and gname = 'sjj_2L52.1'
               and fdata.gid=fgroup.gid
               and fdata.ftypeid=ftype.ftypeid;
  +---------------+--------+-------+-------------+-----------+
  | fref          | fstart | fstop | fmethod     | fsource   |
  +---------------+--------+-------+-------------+-----------+
  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
  +---------------+--------+-------+-------------+-----------+
  1 row in set (0.08 sec)

=item fdna

Bio/DB/GFF/Adaptor/dbi/mysql.pm  view on Meta::CPAN

  $b*(1+floor(fstart/$b)) as fstop,
  IF(ISNULL(fsource),fmethod,concat(fmethod,':',fsource)),'bin',
  count(*) as fscore,
  '.','.','bin',
  IF(ISNULL(fsource),concat(fref,':',fmethod),concat(fref,':',fmethod,':',fsource)),
  NULL,NULL,NULL,NULL
END
;
  } else {
    $s = <<END;
fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
END
;
}
  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
  $s;
}


# IMPORTANT NOTE:
# WHETHER OR NOT THIS WORKS IS CRITICALLY DEPENDENT ON THE RELATIVE MAGNITUDE OF THE
sub make_features_from_part {
  my $self = shift;
  my $sparse_types  = shift;
  my $options       = shift || {};
  my $sparse_groups = $options->{sparse_groups};
  my $index =  $sparse_groups ? ' USE INDEX(gid)'
             : $sparse_types  ? ' USE INDEX(ftypeid)'
             : '';
  return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
                                : "fdata${index},ftype,fgroup\n";
}

=head2 search_notes

 Title   : search_notes
 Usage   : @search_results = $db->search_notes("full text search string",$limit)

Bio/DB/GFF/Adaptor/dbi/mysql.pm  view on Meta::CPAN

 create table fdata (
    fid	                int not null  auto_increment,
    fref                varchar(100) not null,
    fstart              int not null,
    fstop               int not null,
    fbin                double precision,
    ftypeid             int not null,
    fscore              float,
    fstrand             enum('+','-'),
    fphase              enum('0','1','2'),
    gid                 int not null,
    ftarget_start       int,
    ftarget_stop        int,
    primary key(fid),
    unique index(fref,fbin,fstart,fstop,ftypeid,gid),
    index(ftypeid),
    index(gid)
		   ) $engine=MyISAM
}  # fdata table
}, # fdata

		fgroup =>{ 
table=> qq{
create table fgroup (
    gid	    int not null  auto_increment,
    gclass  varchar(100),
    gname   varchar(100),
    primary key(gid),
    unique(gclass,gname)
)  $engine=MyISAM
}
},

          ftype => {
table=> qq{
create table ftype (
    ftypeid      int not null   auto_increment,
    fmethod       varchar(100) not null,

Bio/DB/GFF/Adaptor/dbi/mysql.pm  view on Meta::CPAN

    my $tables = join ', ',@tables;
    $dbh->do("LOCK TABLES $tables");
  }
#  for my $table (qw(fdata)) {
#    $dbh->do("alter table $table disable keys");
#  }

  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');

  my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
  my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');

  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');

  my $insert_data  = $dbh->prepare_delayed(<<END);
INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
		   fstrand,fphase,gid,ftarget_start,ftarget_stop)
       VALUES(?,?,?,?,?,?,?,?,?,?,?)
END
;


  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
  $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
  $self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;
  $self->{load_stuff}{sth}{insert_fgroup}    = $insert_group;
  $self->{load_stuff}{sth}{insert_fdata}     = $insert_data;

Bio/DB/GFF/Adaptor/dbi/mysql.pm  view on Meta::CPAN

=cut

# this method is called when needed to look up a feature's ID
sub get_feature_id {
  my $self = shift;
  my ($ref,$start,$stop,$typeid,$groupid) = @_;
  my $s = $self->{load_stuff};
  unless ($s->{get_feature_id}) {
    my $dbh = $self->features_db;
    $s->{get_feature_id} =
      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
  }
  my $sth = $s->{get_feature_id} or return;
  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
  my ($fid) = $sth->fetchrow_array;
  return $fid;
}

sub _add_interval_stats_table {
    my $self              = shift;
    my $schema            = $self->schema;

Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm  view on Meta::CPAN

This is the feature data table.  Its columns are:
-
    fid	           feature ID (integer)
    fref           reference sequence name (string)
    fstart         start position relative to reference (integer)
    fstop          stop position relative to reference (integer)
    ftypeid        feature type ID (integer)
    fscore         feature score (float); may be null
    fstrand        strand; one of "+" or "-"; may be null
    fphase         phase; one of 0, 1 or 2; may be null
    feature_id     group ID used to be 'gid' (integer)
    ftarget_start  for similarity features, the target start position (integer)
    ftarget_stop   for similarity features, the target stop position (integer)

Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature.  However, in the current schema, query performance
suffers dramatically when this additional join is added.

=item cmap_feature (replaces fgroup)

Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm  view on Meta::CPAN


sub make_features_order_by_part {
  my $self = shift;
  my $options = shift || {};
  return "cmap_feature.feature_name";
}

=head2 create_cmap_viewer_link

 Title   : create_cmap_viewer_link
 Usage   : $link_str = $db->create_cmap_viewer_link(data_source=>$ds,group_id=>$gid)
 Function: 
 Returns : 
 Args    : 
 Status  : 


=cut

sub create_cmap_viewer_link {
  my $self = shift;
  my %args = @_;
  my $data_source = $args{'data_source'};
  my $gid         = $args{'group_id'};
  my $link_str    = undef;

  my $db = $self->features_db;
  my $sql_str = qq[
    select f.feature_name, 
        f.feature_type_accession feature_type_aid,
        m.accession_id as map_aid,
        ms.accession_id as map_set_aid 
    from cmap_feature f, 
        cmap_map m, 
        cmap_map_set ms 
    where f.map_id=m.map_id 
        and ms.map_set_id=m.map_set_id 
        and f.feature_id=$gid
    ];

  my $result_ref = $db->selectrow_hashref($sql_str,{ Columns => {} });
  
  if ( $result_ref ) {
    $link_str='/cgi-bin/cmap/viewer?ref_map_set_aid='
      . $result_ref->{'map_set_aid'}
      . '&ref_map_aids='
      . $result_ref->{'map_aid'}
      . '&data_source='

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

use constant GETSEQCOORDS =><<END;
SELECT fref,
       NVL(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup
  WHERE fgroup.gname=?
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    GROUP BY fref,fstrand,gclass,gname
END
;

use constant GETALIASCOORDS =><<END;
SELECT fref,
       NVL(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup,fattribute,fattribute_to_feature
  WHERE fattribute_to_feature.fattribute_value=?
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    AND fattribute.fattribute_name='Alias'
    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
    AND fattribute_to_feature.fid=fdata.fid
    GROUP BY fref,fstrand,gclass,gname
END
;

use constant GETALIASLIKE =><<END;
SELECT fref,
       NVL(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup,fattribute,fattribute_to_feature
  WHERE fattribute_to_feature.fattribute_value LIKE ?
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    AND fattribute.fattribute_name='Alias'
    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
    AND fattribute_to_feature.fid=fdata.fid
    GROUP BY fref,fstrand,gname
END
;


use constant GETFORCEDSEQCOORDS =><<END;
SELECT fref,
       NVL(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand
  FROM fdata,fgroup
  WHERE fgroup.gname=?
    AND fgroup.gclass=?
    AND fdata.fref=?
    AND fgroup.gid=fdata.gid
    GROUP BY fref,fstrand,gclass
END
;

########################
# moved from mysqlopt.pm
########################

# this is the largest that any reference sequence can be (100 megabases)
use constant MAX_BIN    => 100_000_000;

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

This is the feature data table.  Its columns are:

    fid	           feature ID (integer)
    fref           reference sequence name (string)
    fstart         start position relative to reference (integer)
    fstop          stop position relative to reference (integer)
    ftypeid        feature type ID (integer)
    fscore         feature score (float); may be null
    fstrand        strand; one of "+" or "-"; may be null
    fphase         phase; one of 0, 1 or 2; may be null
    gid            group ID (integer)
    ftarget_start  for similarity features, the target start position (integer)
    ftarget_stop   for similarity features, the target stop position (integer)

Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature.  However, in the current schema, query performance
suffers dramatically when this additional join is added.

=item fgroup

This is the group table. There is one row for each group.  Columns:

    gid	      the group ID (integer)
    gclass    the class of the group (string)
    gname     the name of the group (string)

The group table serves multiple purposes.  As you might expect, it is
used to cluster features that logically belong together, such as the
multiple exons of the same transcript.  It is also used to assign a
name and class to a singleton feature.  Finally, the group table is
used to identify the target of a similarity hit.  This is consistent
with the way in which the group field is used in the GFF version 2
format.

The fgroup.gid field joins with the fdata.gid field. 

Examples:

  sql> select * from fgroup where gname='sjj_2L52.1';
  +-------+-------------+------------+
  | gid   | gclass      | gname      |
  +-------+-------------+------------+
  | 69736 | PCR_product | sjj_2L52.1 |
  +-------+-------------+------------+
  1 row in set (0.70 sec)

  sql> select fref,fstart,fstop from fdata,fgroup 
            where gclass='PCR_product' and gname = 'sjj_2L52.1' 
                  and fdata.gid=fgroup.gid;
  +---------------+--------+-------+
  | fref          | fstart | fstop |
  +---------------+--------+-------+
  | CHROMOSOME_II |   1586 |  2355 |
  +---------------+--------+-------+
  1 row in set (0.03 sec)

=item ftype

This table contains the feature types, one per row.  Columns are:

    ftypeid      the feature type ID (integer)
    fmethod      the feature type method name (string)
    fsource      the feature type source name (string)

The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:

  sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype 
         where gclass='PCR_product' 
               and gname = 'sjj_2L52.1'
               and fdata.gid=fgroup.gid
               and fdata.ftypeid=ftype.ftypeid;
  +---------------+--------+-------+-------------+-----------+
  | fref          | fstart | fstop | fmethod     | fsource   |
  +---------------+--------+-------+-------------+-----------+
  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
  +---------------+--------+-------+-------------+-----------+
  1 row in set (0.08 sec)

=item fdna

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

create table fdata (
  fid INTEGER  NOT NULL,
  fref VARCHAR(100) DEFAULT '' NOT NULL,
  fstart INTEGER DEFAULT '0' NOT NULL,
  fstop INTEGER DEFAULT '0' NOT NULL,
  fbin NUMBER DEFAULT '0.000000' NOT NULL,
  ftypeid INTEGER DEFAULT '0' NOT NULL,
  fscore NUMBER  ,
  fstrand VARCHAR2(3)   CHECK (fstrand IN ('+','-')),
  fphase VARCHAR2(3)   CHECK (fphase IN ('0','1','2')),
  gid INTEGER DEFAULT '0' NOT NULL,
  ftarget_start INTEGER  ,
  ftarget_stop INTEGER  ,
  CONSTRAINT fdata_pk PRIMARY KEY (fid)
)
}, # fdata table

index=>{
		fdata_fref_idx => q{
CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
},
	
		fdata_ftypeid_idx => q{
CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
},

		fdata_gid_idx => q{
CREATE  INDEX fdata_gid_idx ON fdata (gid)
}
	 }, # fdata indexes

sequence=> {
		fdata_fid_sq => q{
CREATE SEQUENCE fdata_fid_sq START WITH 1
}
	    }, # fdata sequences

trigger=> {

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

}
	   }# fdata triggers
			
}, # fdata



		fgroup => { 
table => q{
CREATE TABLE fgroup (
  gid INTEGER  NOT NULL,
  gclass VARCHAR(100)  ,
  gname VARCHAR(100)  ,
  CONSTRAINT fgroup_pk PRIMARY KEY (gid)
)
}, # fgroup table

index => {
		fgroup_gclass_idx => q{
CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
}
	   }, # fgroup indexes

sequence => {

		fgroup_gid_sq => q{
CREATE SEQUENCE fgroup_gid_sq START WITH 1
}
	     }, # fgroup sequences


trigger => {
		fgroup_gid_ai => q{
CREATE OR REPLACE TRIGGER fgroup_gid_ai
BEFORE INSERT ON fgroup
FOR EACH ROW WHEN (new.gid IS NULL OR new.gid = 0)
BEGIN
   SELECT fgroup_gid_sq.nextval INTO :new.gid FROM dual;
END;
}
	    } # fgroup triggers

}, # fgroup

		ftype => { 
table => q{
CREATE TABLE ftype (
  ftypeid INTEGER  NOT NULL,

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

    my @tables = map { "$_ WRITE"} $self->tables;
    my $tables = join ', ',@tables;
    $dbh->do("LOCK TABLES $tables");
  }

  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
  my $sequence_type = (keys %{$schema->{ftype}{sequence}})[0];
  my $insertid_type = $dbh->prepare_delayed("SELECT $sequence_type.CURRVAL FROM dual");

  my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
  my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
  my $sequence_group = (keys %{$schema->{fgroup}{sequence}})[0];
  my $insertid_group = $dbh->prepare_delayed("SELECT $sequence_group.CURRVAL FROM dual");

  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
  my $sequence_attribute = (keys %{$schema->{fattribute}{sequence}})[0];
  my $insertid_attribute = $dbh->prepare_delayed("SELECT $sequence_attribute.CURRVAL FROM dual");

  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');

  my $insert_data  = $dbh->prepare_delayed(<<END);
INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
		   fstrand,fphase,gid,ftarget_start,ftarget_stop)
       VALUES(?,?,?,?,?,?,?,?,?,?,?)
END
;
  my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
  my $sequence_data =  (keys %{$schema->{fdata}{sequence}})[0];
  my $insertid_data = $dbh->prepare_delayed("SELECT $sequence_data.CURRVAL FROM dual");



  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

  $search_string =~ tr/*?//d;

  my @words  = $search_string =~ /(\w+)/g;
  my $regex  = join '|',@words;
  my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
  my $search   = join(' OR ',@searches);

  my $query = <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
  FROM fgroup,fattribute_to_feature,fdata,ftype
  WHERE fgroup.gid=fdata.gid
     AND fdata.fid=fattribute_to_feature.fid
     AND fdata.ftypeid=ftype.ftypeid
     AND ($search)
END
;

  my $sth = $self->dbh->do_query($query);
  my @results;
  while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
     next unless $class && $name;    # sorry, ignore NULL objects

Bio/DB/GFF/Adaptor/dbi/oracle.pm  view on Meta::CPAN

  $b*(1+floor(fstart/$b)) as fstop,
  NVL2(fsource,fmethod||':'||fsource,fmethod),'bin',
  count(*) as fscore,
  '.','.','bin',
  NVL2(fsource , fref||':'||fmethod||':'||fsource , fref||':'||fmethod),
  NULL,NULL,NULL,NULL
END
;
  } else {
    $s = <<END;
fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
END
;
}
  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
  $s;
}

sub make_features_from_part_bkup {
  my $self = shift;
  my $sparse = shift;

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

use constant GETSEQCOORDS =><<END;
SELECT fref,
       COALESCE(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup
  WHERE lower(fgroup.gname) = lower(?)
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    GROUP BY fref,fstrand,gclass,gname
END
;

use constant GETALIASCOORDS =><<END;
SELECT fref,
       COALESCE(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup,fattribute,fattribute_to_feature
  WHERE lower(fattribute_to_feature.fattribute_value)=lower(?)
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    AND fattribute.fattribute_name='Alias'
    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
    AND fattribute_to_feature.fid=fdata.fid
    GROUP BY fref,fstrand,gclass,gname
END
;

use constant GETALIASLIKE =><<END;
SELECT fref,
       COALESCE(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand,
       gname
  FROM fdata,fgroup,fattribute,fattribute_to_feature
  WHERE lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
    AND fgroup.gclass=?
    AND fgroup.gid=fdata.gid
    AND fattribute.fattribute_name='Alias'
    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
    AND fattribute_to_feature.fid=fdata.fid
    GROUP BY fref,fstrand,gname
END
;


use constant GETFORCEDSEQCOORDS =><<END;
SELECT fref,
       COALESCE(gclass,'Sequence'),
       min(fstart),
       max(fstop),
       fstrand
  FROM fdata,fgroup
  WHERE lower(fgroup.gname) = lower(?)
    AND fgroup.gclass=?
    AND lower(fdata.fref) = lower(?)
    AND fgroup.gid=fdata.gid
    GROUP BY fref,fstrand,gclass
END
;

use constant FULLTEXTWILDCARD => <<END;
SELECT distinct gclass,gname,fattribute_value
    FROM fgroup,fattribute_to_feature,fdata
     WHERE fgroup.gid=fdata.gid
       AND fdata.fid=fattribute_to_feature.fid
       AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
END
;

########################
# moved from mysqlopt.pm
########################

# this is the largest that any reference sequence can be (100 megabases)

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

This is the feature data table.  Its columns are:

    fid	           feature ID (integer)
    fref           reference sequence name (string)
    fstart         start position relative to reference (integer)
    fstop          stop position relative to reference (integer)
    ftypeid        feature type ID (integer)
    fscore         feature score (float); may be null
    fstrand        strand; one of "+" or "-"; may be null
    fphase         phase; one of 0, 1 or 2; may be null
    gid            group ID (integer)
    ftarget_start  for similarity features, the target start position (integer)
    ftarget_stop   for similarity features, the target stop position (integer)

Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature.  However, in the current schema, query performance
suffers dramatically when this additional join is added.

=item fgroup

This is the group table. There is one row for each group.  Columns:

    gid	      the group ID (integer)
    gclass    the class of the group (string)
    gname     the name of the group (string)

The group table serves multiple purposes.  As you might expect, it is
used to cluster features that logically belong together, such as the
multiple exons of the same transcript.  It is also used to assign a
name and class to a singleton feature.  Finally, the group table is
used to identify the target of a similarity hit.  This is consistent
with the way in which the group field is used in the GFF version 2
format.

The fgroup.gid field joins with the fdata.gid field. 

Examples:

  sql> select * from fgroup where gname='sjj_2L52.1';
  +-------+-------------+------------+
  | gid   | gclass      | gname      |
  +-------+-------------+------------+
  | 69736 | PCR_product | sjj_2L52.1 |
  +-------+-------------+------------+
  1 row in set (0.70 sec)

  sql> select fref,fstart,fstop from fdata,fgroup 
            where gclass='PCR_product' and gname = 'sjj_2L52.1' 
                  and fdata.gid=fgroup.gid;
  +---------------+--------+-------+
  | fref          | fstart | fstop |
  +---------------+--------+-------+
  | CHROMOSOME_II |   1586 |  2355 |
  +---------------+--------+-------+
  1 row in set (0.03 sec)

=item ftype

This table contains the feature types, one per row.  Columns are:

    ftypeid      the feature type ID (integer)
    fmethod      the feature type method name (string)
    fsource      the feature type source name (string)

The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:

  sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype 
         where gclass='PCR_product' 
               and gname = 'sjj_2L52.1'
               and fdata.gid=fgroup.gid
               and fdata.ftypeid=ftype.ftypeid;
  +---------------+--------+-------+-------------+-----------+
  | fref          | fstart | fstop | fmethod     | fsource   |
  +---------------+--------+-------+-------------+-----------+
  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
  +---------------+--------+-------+-------------+-----------+
  1 row in set (0.08 sec)

=item fdna

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

CREATE TABLE "fdata" (
  "fid" serial NOT NULL,
  "fref" character varying(100) DEFAULT '' NOT NULL,
  "fstart" integer DEFAULT '0' NOT NULL,
  "fstop" integer DEFAULT '0' NOT NULL,
  "fbin" double precision DEFAULT '0.000000' NOT NULL,
  "ftypeid" integer DEFAULT '0' NOT NULL,
  "fscore" double precision DEFAULT NULL,
  "fstrand" character varying(3) DEFAULT NULL,
  "fphase" character varying(3) DEFAULT NULL,
  "gid" integer DEFAULT '0' NOT NULL,
  "ftarget_start" integer DEFAULT NULL,
  "ftarget_stop" integer DEFAULT NULL,
  CONSTRAINT chk_fdata_fstrand CHECK (fstrand IN ('+','-')),
  CONSTRAINT chk_fdata_fphase CHECK (fphase IN ('0','1','2')),
  CONSTRAINT pk_fdata PRIMARY KEY (fid)
)
}, # fdata table

#CONSTRAINT fref_fdata UNIQUE (fref, fbin, fstart, fstop, ftypeid, gid)
# fdata_fref_idx => q{ CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)}, 

index=>{
                fdata_fref_idx => q{
CREATE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
},

		fdata_ftypeid_idx => q{
CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
},

		fdata_gid_idx => q{
CREATE INDEX fdata_gid_idx ON fdata (gid)
}
	 }, # fdata indexes

}, # fdata



		fgroup => { 
table => q{
CREATE TABLE "fgroup" (
  "gid" serial NOT NULL,
  "gclass" character varying(100) DEFAULT NULL,
  "gname" character varying(100) DEFAULT NULL,
  CONSTRAINT pk_fgroup PRIMARY KEY (gid)
)
}, # fgroup table

index => {
		fgroup_gclass_idx => q{
CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
},
                fgroup_gname_idx => q{
CREATE INDEX fgroup_gname_idx ON fgroup(gname)
},

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

  if ($self->lock_on_load) {
    my @tables = map { "$_ WRITE"} $self->tables;
    my $tables = join ', ',@tables;
    $dbh->do("LOCK TABLES $tables");
  }

  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
  my $insertid_type = $dbh->prepare_delayed("SELECT currval('ftype_ftypeid_seq')");

  my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE lower(gname)=lower(?) AND gclass=?');
  my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
  my $insertid_group = $dbh->prepare_delayed("SELECT currval('fgroup_gid_seq')");

  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
  my $insertid_attribute = $dbh->prepare_delayed("SELECT currval('fattribute_fattribute_id_seq')");

  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');

  my $insert_data  = $dbh->prepare_delayed(<<END);
INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
		   fstrand,fphase,gid,ftarget_start,ftarget_stop)
       VALUES(?,?,?,?,?,?,?,?,?,?,?)
END
;
  my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
  my $insertid_data = $dbh->prepare_delayed("SELECT currval('fdata_fid_seq')");

  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
  $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
  $self->{load_stuff}{sth}{insertid_ftype}   = $insertid_type;
  $self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

  'bin',
  count(*) as fscore,
  '.','.','bin',
  CASE WHEN fsource IS NULL THEN fref||':'||fmethod
       ELSE fref||':'||fmethod||':'||fsource,
  NULL,NULL,NULL,NULL
END
;
  } else {
    $s = <<END;
fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,fgroup.gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
END
;
}
  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
  $s;
}

sub make_features_from_part_bkup {
  my $self = shift;
  my $sparse = shift;

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

  $sth->finish;
  return $count;
}

sub update_sequences {
  my $self = shift;
  my $dbh  = $self->features_db;
 
  $dbh->do("SELECT setval('public.fdata_fid_seq', max(fid)+1) FROM fdata");
  $dbh->do("SELECT setval('public.fattribute_fattribute_id_seq', max(fattribute_id)+1) FROM fattribute");
  $dbh->do("SELECT setval('public.fgroup_gid_seq', max(gid)+1) FROM fgroup");
  $dbh->do("SELECT setval('public.ftype_ftypeid_seq', max(ftypeid)+1) FROM ftype");

  1;
}

=head2 make_features_by_name_where_part

 Title   : make_features_by_name_where_part
 Usage   : $db->make_features_by_name_where_part
 Function: Overrides a function in Bio::DB::GFF::Adaptor::dbi to insure

Bio/DB/GFF/Adaptor/dbi/pg.pm  view on Meta::CPAN

  return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}

sub get_feature_id {
  my $self = shift;
  my ($ref,$start,$stop,$typeid,$groupid) = @_;
  my $s = $self->{load_stuff};
  unless ($s->{get_feature_id}) {
    my $dbh = $self->features_db;
    $s->{get_feature_id} =
      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE lower(fref)=lower(?) AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
  }
  my $sth = $s->{get_feature_id} or return;
  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
  my ($fid) = $sth->fetchrow_array;
  return $fid;
}

sub _delete {
  my $self = shift;
  my $delete_spec = shift;

Bio/DB/GFF/Adaptor/dbi/pg_fts.pm  view on Meta::CPAN

=cut

# a simple postgres adaptor
use strict;
use Bio::DB::GFF::Adaptor::dbi;
use base qw(Bio::DB::GFF::Adaptor::dbi::pg);

use constant FULLTEXTSEARCH => <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
    FROM fgroup,fattribute_to_feature,fdata,ftype
     WHERE fgroup.gid=fdata.gid
       AND fdata.fid=fattribute_to_feature.fid
       AND fdata.ftypeid=ftype.ftypeid
       AND (fattribute_to_feature.idxfti @@ to_tsquery('default', ?))
END
;

use constant FULLTEXTWILDCARD => <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
    FROM fgroup,fattribute_to_feature,fdata,ftype
     WHERE fgroup.gid=fdata.gid
       AND fdata.fid=fattribute_to_feature.fid
       AND fdata.ftypeid=ftype.ftypeid
       AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
END
;

sub new {
  my $class = shift;
  my $self  = $class->SUPER::new(@_);
  return $self;

Bio/DB/Query/GenBank.pm  view on Meta::CPAN

 Title   : new
 Usage   : $db = Bio::DB::Query::GenBank->new(@args)
 Function: create new query object
 Returns : new query object
 Args    : -db       database (see below for allowable values)
           -query    query string
           -mindate  minimum date to retrieve from (YYYY/MM/DD)
           -maxdate  maximum date to retrieve from (YYYY/MM/DD)
           -reldate  relative date to retrieve from (days)
           -datetype date field to use ('edat' or 'mdat')
           -ids      array ref of gids (overrides query)
           -maxids   the maximum number of IDs you wish to collect
                     (defaults to 100)

This method creates a new query object.  Typically you will specify a
-db and a -query argument, possibly modified by -mindate, -maxdate, or
-reldate.  -mindate and -maxdate specify minimum and maximum dates for
entries you are interested in retrieving, expressed in the form
YYYY/MM/DD.  -reldate is used to fetch entries that are more recent
than the indicated number of days.

Bio/DB/SeqFeature/Store/DBI/SQLite.pm  view on Meta::CPAN

  $sth->finish;
}


sub _dump_update_attribute_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $fh        = $self->dump_filehandle('attribute');
  my $dbh       = $self->dbh;
  for my $tag ($obj->all_tags) {
    my $tagid = $self->_attributeid($tag);
    for my $value ($obj->each_tag_value($tag)) {
      # unlike DBI::mysql, don't quote, as quotes will be quoted when loaded
      print $fh join("\t",$id,$tagid,$value),"\n";
    }
  }
}

sub _update_indexes {
    my $self = shift;
    my $obj  = shift;
    defined (my $id   = $obj->primary_id) or return;
    $self->SUPER::_update_indexes($obj);

Bio/DB/SeqFeature/Store/DBI/mysql.pm  view on Meta::CPAN

}

sub _update_attribute_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $attribute = $self->_attribute_table;
  $self->_delete_index($attribute,$id);

  my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
  for my $tag ($obj->get_all_tags) {
    my $tagid = $self->_attributeid($tag);
    for my $value ($obj->get_tag_values($tag)) {
      $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
    }
  }
  $sth->finish;
}

sub _genericid {
  my $self = shift;
  my ($table,$namefield,$name,$add_if_missing) = @_;
  my $qualified_table = $self->_qualify($table);
  my $sth = $self->_prepare(<<END);

Bio/DB/SeqFeature/Store/DBI/mysql.pm  view on Meta::CPAN

  print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
  print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
}

sub _dump_update_attribute_index {
  my $self = shift;
  my ($obj,$id) = @_;
  my $fh        = $self->dump_filehandle('attribute');
  my $dbh       = $self->dbh;
  for my $tag ($obj->all_tags) {
    my $tagid = $self->_attributeid($tag);
    for my $value ($obj->each_tag_value($tag)) {
      print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
    }
  }
}

sub coverage_array {
    my $self = shift;
    my ($seq_name,$start,$end,$types,$bins) = 
	rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
		   ['TYPES','TYPE','PRIMARY_TAG'],'BINS'],@_);

Bio/Map/Clone.pm  view on Meta::CPAN

       $remark,$fpnumber,$seqtype,$seqstatus,$fpcremark,
       $matche,$matcha,$matchp,
       $range) = $self->_rearrange([qw(NAME  MARKERS CONTIG TYPE
				       BANDS GEL GROUP REMARK FPNUMBER
				       SEQUENCETYPE SEQUENCESTATUS
				       FPCREMARK MATCHE MATCHA MATCHP
				       RANGE)],@args);

   $self->name($name)                  if defined $name;
   $self->markers($markers)            if defined $markers;
   $self->contigid($contig)            if defined $contig;
   $self->type($type)                  if defined $type;
   $self->bands($bands)                if defined $bands;
   $self->gel($gel)                    if defined $gel;
   $self->group($group)                if defined $group;
   $self->remark($remark)              if defined $remark;
   $self->fp_number($fpnumber)         if defined $fpnumber;
   $self->sequence_type($seqtype)     if defined $seqtype;
   $self->sequence_status($seqstatus) if defined $seqstatus;
   $self->fpc_remark($fpcremark)       if defined $fpcremark;
   $self->range($range)                if defined $range;

Bio/Map/Clone.pm  view on Meta::CPAN

 Args    : none to get, OR string to set

=cut

sub group {
    my ($self) = shift;
    return $self->{'_group'} = shift if @_;    
    return $self->{'_group'};
}

=head2 contigid

 Title   : contigid
 Usage   : my $ctg = $cloneobj->contigid();
 Function: Get/set the contig this clone belongs to
 Returns : scalar representing the contig
 Args    : none to get, OR string to set

=cut

sub contigid {
    my ($self) = shift;
    $self->{'_contig'} = shift if @_;
    return $self->{'_contig'} || 0;
}

=head2 each_markerid

 Title   : each_markerid
 Usage   : @markers = $cloneobj->each_markerid();
 Function: retrieves all the elements in a map unordered

Bio/Map/FPCMarker.pm  view on Meta::CPAN

 *** This only supplies the ids set with the set_clones method ***
 *** It has nothing to do with actual Bio::Map::MappableI objects ***

=cut

sub each_cloneid {
    my ($self) = @_;
    return $self->_each_element('clones');
}

=head2 each_contigid

 Title   : each_contigid
 Usage   : my @contigs = $map->each_contigid();
 Function: retrieves all the contig ids in a map unordered
 Returns : list of strings (ids)
 Args    : none

 *** This only supplies the ids set with the set_contigs method ***
 *** It has nothing to do with actual Bio::Map::MapI objects ***

=cut

sub each_contigid {
    my ($self) = @_;
    return $self->_each_element('contigs');
}

sub _each_element{
    my ($self, $type) = @_;

    $type = 'clones' unless defined $type;
    $type = lc("_$type");

Bio/Map/Physical.pm  view on Meta::CPAN

    _remark     => $remark,
	_clones     => \%clones,
	_contigs    => \%contigs,
	_position   => \%markerpos,	
    }, 'Bio::Map::FPCMarker');

    $self->{'_markers'}{$marker}{'marker'} = $markerobj;
    return $markerobj;
}

=head2 each_contigid

 Title   : each_contigid
 Usage   : my @contigs = $map->each_contigid();
 Function: returns a list of contigs (numbers)
 Returns : list of contigs
 Args    : none

=cut

sub each_contigid {
    my ($self) = @_;
    return keys (%{$self->{'_contigs'}});
}

=head2 get_contigobj

 Title   : get_contigobj
 Usage   : my $contigobj = $map->get_contigobj('CONTIG1');
 Function: returns an object of the contig given in the argument
 Returns : object of the contig

Bio/Map/Physical.pm  view on Meta::CPAN


sub print_contiglist{
    my ($self,$showall) = @_;
    my $pos;

    $showall = 0 if (!defined($showall));
    my %_contigs = %{$self->{'_contigs'}};
    my %_markers = %{$self->{'_markers'}};
    my %_clones  = %{$self->{'_clones'}};

    my @contigs       = $self->each_contigid();
    my @sortedcontigs = sort {$a <=> $b } @contigs;

    print "\n\nContig List\n\n";
    foreach my $contig (@sortedcontigs) {
        my %list;
	my %alist;
	
	my $ctgAnchor  = $_contigs{$contig}{'anchor'};
	my $ctgGroup   = $_contigs{$contig}{'group'};	
	

Bio/Map/Physical.pm  view on Meta::CPAN

    my $i;
    my ($depth, $save_depth);
    my ($x, $y);
    my @stack;
    my ($k, $j, $s);
    my $pos;
    my $contig;

    # Calculate the position for the marker in the contig

    my @contigs       = $self->each_contigid();
    my @sortedcontigs = sort {$a <=> $b } @contigs;
    my $offset = 0;
    my %gffclones;
    my %gffcontigs;
    my %gffmarkers;
    my $basepair = 4096;

    foreach my $contig (@sortedcontigs) {
        if($_contigs{$contig}{'range'} ) {	
	    $offset =  $_contigs{$contig}{'range'}{'start'};	

Bio/Map/Physical.pm  view on Meta::CPAN

    my $i;
    my ($depth, $save_depth);
    my ($x, $y);
    my @stack;
    my ($k, $j, $s);
    my $pos;
    my $contig;

    # Calculate the position for the marker in the contig

    my @contigs       = $self->each_contigid();
    my @sortedcontigs = sort {$a <=> $b } @contigs;
    my $offset;
    my %gffclones;
    my %gffcontigs;

    foreach my $marker ($self->each_markerid()) {
        my (@ctgmarker, @sortedctgmarker);
	
	my @clones = (keys %{$_markers{$marker}{'clones'}})
	    if (exists ($_markers{$marker}{'clones'} ));

Bio/Map/Physical.pm  view on Meta::CPAN


=cut

sub _calc_contigposition{
    my ($self) = @_;

    my %_contigs = %{$self->{'_contigs'}};
    my %_markers = %{$self->{'_markers'}};
    my %_clones  = %{$self->{'_clones'}};

    my @contigs       = $self->each_contigid();
    my @sortedcontigs = sort {$a <=> $b } @contigs;

    foreach my $contig (@sortedcontigs) {
		my $position = 0;
	my $group;
	
	if (exists($_contigs{$contig}{'group'}) ) {		
	
	    my %weightedmarkers;
	    my @mkrs = keys (%{$_contigs{$contig}{'markers'}})

Bio/Map/Physical.pm  view on Meta::CPAN

 Usage   : $map->_calc_contiggroup();
 Function: calculates the group of the contig
 Returns : none
 Args    : none

=cut

sub _calc_contiggroup {
    my ($self)  = @_;
    my %_contig = %{$self->{'_contigs'}};
    my @contigs = $self->each_contigid();

    foreach my $ctg (@contigs) {
        my $chr = floor($ctg/1000);
		$_contig{$ctg}{'group'} = $chr;
    }
}

=head2 _setI<E<lt>TypeE<gt>>Ref

 Title   : _set<Type>Ref

Bio/Root/IO.pm  view on Meta::CPAN

        $roots = [$roots] unless ref $roots;
    } else {
        $self->warn("No root path(s) specified\n");
        return 0;
    }

    my $root;
    for $root (@{$roots}) {
        $root =~ s#/\z##;
        (undef, undef, my $rp) = lstat $root or next;
        $rp &= 07777;   # don't forget setuid, setgid, sticky bits
        if ( -d _ ) {
            # notabene: 0777 is for making readable in the first place,
            # it's also intended to change it to writable in case we have
            # to recurse in which case we are better than rm -rf for
            # subtrees with strange permissions
            chmod(0777, ($Is_VMS ? VMS::Filespec::fileify($root) : $root))
              or $self->warn("Could not make directory '$root' read+writable: $!")
            unless $safe;
            if (opendir DIR, $root){
                @files = readdir DIR;

Bio/Root/Utilities.pm  view on Meta::CPAN

=cut

#--------------
sub file_info {
#--------------
    my ($self, %param) = @_;
    my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
    $get ||= 'all';
    $fmt ||= 'yyyy-mm-dd';

    my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
       $atime, $mtime, $ctime, $blksize, $blocks) = stat $file;

    if($get =~ /date/i) {
        ## I can  get the elapsed time since the file was modified but
        ## it's not so straightforward to get the date in a nice format...
        ## Think about using a standard CPAN module for this, like
        ## Date::Manip or Date::DateCalc.

        my $date = $mtime;
        my $elsec = time - $mtime;

Bio/SearchIO/fasta.pm  view on Meta::CPAN

	if exists $hsp->{evalue};
      $self->element({'Name' => 'Hsp_evalue2', 'Data' => $hsp->{evalue2} } )
	if exists $hsp->{evalue2};

      $self->element({'Name' => 'Hsp_bit-score', 'Data' => $hsp->{bits} } )
	if exists $hsp->{bits};
      $self->element({'Name' => 'Hsp_sw-score', 'Data' => $hsp->{'n-w'} } )
	if exists $hsp->{'n-w'};
      $self->element({'Name' => 'Hsp_sw-score', 'Data' => $hsp->{sw} } )
	if exists $hsp->{sw};
      $self->element({'Name' => 'Hsp_gaps', 'Data' => $hsp->{'%_gid'} } )
	if exists $hsp->{'%_gid'};
      $self->element({
		      'Name' => 'Hsp_identity',
		      'Data' =>
		      sprintf( "%.0f", $hsp->{'%_id'} * $hsp->{alen} )
		     }) if ( exists $hsp->{'%_id'} && exists $hsp->{alen} );

      if ( exists $hsp->{'%_gid'} ) {
	$self->element(
		       {
			'Name' => 'Hsp_positive',
			'Data' =>
			sprintf( "%.0f", $hsp->{'%_gid'} * $hsp->{alen} )
		       }
		      ) if exists $hsp->{'%_gid'} && exists $hsp->{alen};
      } else {
	$self->element(
		       {
			'Name' => 'Hsp_positive',
			'Data' =>
			sprintf( "%.0f", $hsp->{'%_id'} * $hsp->{alen} )
		       }
		      ) if ( exists $hsp->{'%_id'} && exists $hsp->{alen} );
      }

Bio/SeqIO/entrezgene.pm  view on Meta::CPAN


The C<-debug> and C<-locuslink> options slow down the parser.

Example code which looks for ontology terms:

  my $eio = new Bio::SeqIO(-file => $file,
                           -format => 'entrezgene',
                           -service_record => 'yes');

  while (my $seq = $eio->next_seq) {
    my $gid = $seq->accession_number;
    foreach my $ot ($ann->get_Annotations('OntologyTerm')) {
      next if ($ot->term->authority eq 'STS marker'); # No STS markers
      my $evid = $ot->comment;
      $evid =~ s/evidence: //i;
      my @ref = $ot->term->get_references;
      my $id = $ot->identifier;
      my $fid = 'GO:' . sprintf("%07u",$id);
      print join("\t",$gid, $ot->ontology->name, $ot->name, $evid,
        $fid, @ref?$ref[0]->medline:''), "\n";
    }
  }

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to

Bio/SeqIO/game/gameWriter.pm  view on Meta::CPAN

    my $str = $feat->strand;
    my $id = $self->_find_name($feat, 'standard_name')
          || $self->_find_name($feat, 'gene')
	  || $self->_find_name($feat, $feat->primary_tag)
	  || $self->_find_name($feat, 'locus_tag') 
	  || $self->_find_name($feat, 'symbol')
          || $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
Could not find a gene/feature ID, feature must have a primary tag or a tag
with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
EOM
    my $gid = $self->_find_name($feat, 'gene') || $id;

    $writer->startTag('annotation', id => $id);
    $self->_element('name', $gid);
    $self->_element('type', $feat->primary_tag);
    $self->_render_tags( $feat,
			 \&_render_date_tags,
			 \&_render_dbxref_tags,
			 \&_render_comment_tags,
			 \&_render_tags_as_properties,
		       );
    
    my @genes;
    

Bio/SeqIO/game/gameWriter.pm  view on Meta::CPAN

	# we are in a gene container; gene must then be one level down
	@genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
    }

    for my $g ( @genes ) {
	my $id ||= $self->_find_name($g, 'standard_name')
               || $self->_find_name($g, 'gene') 
	       || $self->_find_name($feat, 'locus_tag')
               || $self->_find_name($feat, 'symbol')
               || $self->throw("Could not find a gene ID");
	my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);

	$writer->startTag('gene', association => 'IS');
        $self->_element('name', $gid);
        $writer->endTag('gene');

        my $proteins;
	my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
	my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
	my @variants = ('A' .. 'Z');

	for my $mRNA (@mRNAs) {
	    my ($sn, @units);
            # if the mRNA is a generic transcript, it must be a non-spliced RNA gene

Bio/SeqIO/game/gameWriter.pm  view on Meta::CPAN

			($add_seq{desc}) = $cds->get_tag_values('product_desc');
			$cds->remove_tag('product_desc');
		    }
		    
		    unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
			my $start = $cds->start;
			my $end   = $cds->end;
			my $str   = $cds->strand;
			my $acc   = $self->{seq}->accession || $self->{seq}->display_id;
			$str = $str < 0 ? '[-]' : '';
			$add_seq{desc}  = "translation from_gene[$gid] " .
			    "cds_boundaries:(" . $acc . 
			    ":$start..$end$str) transcript_info:[$name]";
		    }
		    $self->{add_seqs} ||= [];
		    push @{$self->{add_seqs}}, \%add_seq;
		}
	    }

	    
	    $writer->startTag('feature_set', id => $name);

Bio/SeqIO/game/gameWriter.pm  view on Meta::CPAN

		@units = reverse @units;
	    }
            
	    for my $unit ( @units ) {
		if ( $unit->primary_tag eq 'exon' ) {
		    my $ename = $id;
		    $ename .= ':' . ++$count;
		    $self->_feature_span($ename, $unit);
		}
		elsif ( $unit->primary_tag eq 'start_codon' ) {
		    $self->_feature_span(($sn || $gid), $unit, $self->{curr_pname});
		}
		else {
		    my $uname = $unit->primary_tag . ":$id";
		    $self->_feature_span($uname, $unit);
		}
	    }
	    $self->{curr_pname} = '';
	    $writer->endTag('feature_set');
	}
	

Bio/Tools/Geneid.pm  view on Meta::CPAN


=encoding utf-8

=head1 NAME

Bio::Tools::Geneid - Results of one geneid run

=head1 SYNOPSIS

  use Bio::Tools::Geneid;
  my $gid = Bio::Tools::Geneid(-file => "geneid.out");

  while (my $gene = $gid->next_prediction)
  {
    my @transcripts = $gene->transcripts;
      foreach my $t (@transcripts)
      {
        my @exons = $t->exons;
        foreach my $e (@exons)
        {
          printf("Exon %d..%d\n", $e->start, $e->end);
        }
      }

MANIFEST  view on Meta::CPAN

t/data/popstats.prettybase
t/data/pre_rel9.swiss
t/data/Primate_mtDNA.nex
t/data/primedseq.fa
t/data/primer3_infile.txt
t/data/primer3_outfile.txt
t/data/primer3_output.txt
t/data/prints.out
t/data/promoterwise.out
t/data/protpars.phy
t/data/protpars_longid.phy
t/data/ps_scan/out.PrositeScan
t/data/pseudowise.out
t/data/psi_xml.dat
t/data/psiblastreport.out
t/data/purine_v081.infernal
t/data/puzzle.tre
t/data/PX1CG.gb
t/data/Q8GBD3.swiss
t/data/qrna-relloc.out
t/data/qualfile.qual

maintenance/big_split/file_classification.csv  view on Meta::CPAN

,"t/data/trees.nexml.xml"
,"t/data/testaln2.fasta"
,"t/data/test.game"
,"t/data/GlimmerHMM.out"
,"t/data/bug2246.blast"
,"t/data/Primate_mtDNA.nex"
,"t/data/codeml4.mlc"
,"t/data/Rab1.chaos-xml"
,"t/data/NC_001284.gbk"
,"t/data/dna2.fa"
,"t/data/protpars_longid.phy"
,"t/data/cds_sample.embl"
,"t/data/SPAN_Family4nl.nex"
,"t/data/testdbaccnums.out"
,"t/data/longnames.aln"
,"t/data/tab2part.mif"
,"t/data/gmap_f9-reverse-strand.txt"
,"t/data/test1.wublastp"
,"t/data/genemark-fragment.out"
,"t/data/Q8GBD3.swiss"
,"t/data/stress_test_pubmed.xml"

maintenance/cvs2cl_by_file.pl  view on Meta::CPAN

    }

    die "No mail domain found\n"
      unless defined $Domain;

    open (MAPFILE, "<$User_Passwd_File")
        or die ("Unable to open $User_Passwd_File ($!)");
    while (<MAPFILE>)
    {
      # all lines are valid
      my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
      my $expansion = '';
      ($expansion) = split (',', $gecos)
        if defined $gecos && length $gecos;

      my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
      $expansions{$username} = "$expansion <$mailname>";
    }
    close (MAPFILE);
  }

scripts/Bio-DB-GFF/bp_bulk_load_gff.pl  view on Meta::CPAN

by using the --Temporary switch.
END
my @fasta_files_to_be_unlinked;
my @files = (FDATA,FTYPE,FGROUP,FDNA,FATTRIBUTE,FATTRIBUTE_TO_FEATURE);
foreach (@files) {
  $FH{$_} = IO::File->new(">$tmpdir/$_.$$") or die $_,": $!";
  $FH{$_}->autoflush;
}

if ( $use_pg ) {
  $FH{FDATA()                }->print("COPY fdata (fid, fref, fstart, fstop, fbin, ftypeid, fscore, fstrand, fphase, gid, ftarget_start, ftarget_stop) FROM stdin;\n");
  $FH{FTYPE()                }->print("COPY ftype (ftypeid, fmethod, fsource) FROM stdin;\n");
  $FH{FGROUP()               }->print("COPY fgroup (gid, gclass, gname) FROM stdin;\n");
  $FH{FATTRIBUTE()           }->print("COPY fattribute (fattribute_id, fattribute_name) FROM stdin;\n");
  $FH{FATTRIBUTE_TO_FEATURE()}->print("COPY fattribute_to_feature (fid, fattribute_id, fattribute_value) FROM stdin;\n");
}
my $FID     = 1;
my $GID     = 1;
my $FTYPEID = 1;
my $ATTRIBUTEID = 1;
my %GROUPID     = ();
my %FTYPEID     = ();
my %ATTRIBUTEID = ();

scripts/Bio-DB-GFF/bp_bulk_load_gff.pl  view on Meta::CPAN


  for (my $i=0; $i < @$group_name; $i++) {
    $group_class->[$i]  ||= '\N';
    $group_name->[$i]   ||= '\N';
    $target_start ||= '\N';
    $target_stop  ||= '\N';
    $method       ||= '\N';
    $source       ||= '\N';

    my $fid     = $FID++;
    my $gid     = $GROUPID{lc join('',$group_class->[$i],$group_name->[$i])}  ||= $GID++;
    my $ftypeid = $FTYPEID{lc join('',$source,$method)}                       ||= $FTYPEID++;

    my $bin = bin($start,$stop,$db->min_bin);
    $FH{ FDATA()  }->print(    join("\t",$fid,$ref,$start,$stop,$bin,$ftypeid,$score,$strand,$phase,$gid,$target_start,$target_stop),"\n"   );
    if ($use_mysqlcmap){
      my $feature_id    = next_number(
				      db         => $cmap_db,
				      table_name => 'cmap_feature',
				      id_field   => 'feature_id',
				     )
	or die 'No feature id';
      my $direction = $strand eq '-' ? -1:1;
      $FH{ FGROUP() }->print(    
			     join("\t",$feature_id,$feature_id,'NULL',0, $group_name->[$i],0,0,'NULL',1,$direction, $group_class->[$i],)
			     ,"\n"
			    ) unless $DONE{"G$gid"}++;
    }
    else {
      $FH{ FGROUP() }->print(    join("\t",$gid,$group_class->[$i],$group_name->[$i]),"\n") unless $DONE{"G$gid"}++;
    }
    $FH{ FTYPE()  }->print(    join("\t",$ftypeid,$method,$source),"\n"                   ) unless $DONE{"T$ftypeid"}++;

    foreach (@$attributes) {
      my ($key,$value) = @$_;
      my $attributeid = $ATTRIBUTEID{$key}   ||= $ATTRIBUTEID++;
      $FH{ FATTRIBUTE() }->print( join("\t",$attributeid,$key),"\n"                       ) unless $DONE{"A$attributeid"}++;
      $FH{ FATTRIBUTE_TO_FEATURE() }->print( join("\t",$fid,$attributeid,$value),"\n");
    }



( run in 3.655 seconds using v1.01-cache-2.11-cpan-5735350b133 )