view release on metacpan or search on metacpan
Bio/AlignIO/phylip.pm view on Meta::CPAN
while( my $aln = $gcgstream->next_aln ) {
$phylipstream->write_aln($aln);
}
This example shows how to read phylip format:
my $in = Bio::AlignIO->new(
-file => $inFile,
-format => 'phylip',
-interleaved => 0,
-longid => 1
);
my $out = Bio::AlignIO->new(
-file => ">$outFile",
-format => 'fasta'
);
while ( my $aln = $in->next_aln() ) {
$out->write_aln($aln);
}
The -longid argument is required if the input phylip format file
has ids with lengths greater then 10 characters.
=head1 DESCRIPTION
This object can transform Bio::SimpleAlign objects to and from PHYLIP
format. By default it works with the interleaved format. By specifying
the flag -interleaved =E<gt> 0 in the initialization the module can
read or write data in sequential format.
Reading phylip format with long IDs up to 50 characters is supported by
the flag -longid =E<gt>1. ID strings can be surrounded by single quotes.
They are mandatory only if the IDs contain spaces.
=head1 FEEDBACK
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
Bio/AlignIO/phylip.pm view on Meta::CPAN
-idlinebreak => insert a line break after the sequence id
so that sequence starts on the next line
-flag_SI => whether or not write a "S" or "I" just after
the num.seq. and line len., in the first line
-tag_length => integer of how long the tags have to be in
each line between the space separator. set it
to 0 to have 1 tag only.
-wrap_sequential => boolean for whether or not sequential
format should be broken up or a single line
default is false (single line)
-longid => boolean to read arbitrary long IDs (default is false)
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
my ( $interleave, $linelen, $idlinebreak,
$idlength, $flag_SI, $tag_length, $ws, $longid )
= $self->_rearrange(
[ qw(INTERLEAVED
LINE_LENGTH
IDLINEBREAK
IDLENGTH
FLAG_SI
TAG_LENGTH
WRAP_SEQUENTIAL
LONGID)
],
@args
);
$self->interleaved( $interleave ? 1 : 0 ) if defined $interleave;
$self->idlength( $idlength || $DEFAULTIDLENGTH );
$self->id_linebreak(1) if ($idlinebreak);
$self->line_length($linelen) if defined $linelen && $linelen > 0;
$self->flag_SI(1) if ($flag_SI);
$self->tag_length($tag_length) if ( $tag_length || $DEFAULTTAGLEN );
$self->wrap_sequential( $ws ? 1 : 0 );
$self->longid( $longid ? 1 : 0 );
1;
}
=head2 next_aln
Title : next_aln
Usage : $aln = $stream->next_aln()
Function: returns the next alignment in the stream.
Throws an exception if trying to read in PHYLIP
sequential format.
Bio/AlignIO/phylip.pm view on Meta::CPAN
my $idlen = $self->idlength;
$count = 0;
while ( $entry = $self->_readline ) {
if ( $entry =~ /^\s?$/ ) { # eat the newlines
next;
}
# Names can be in a few different formats:
# 1. they can be traditional phylip: 10 chars long, period. If this is the case, that name can have spaces.
# 2. they can be hacked with a long ID, as passed in with the flag -longid.
# 3. if there is a long ID, the name can have spaces as long as it is wrapped in single quotes.
if ( $self->longid() ) { # 2 or 3
if ( $entry =~ /^'(.+)'\s+(.+)$/ ) { # 3. name has single quotes.
$name = $1;
$str = $2;
} else { # 2. name does not have single quotes, so should not have spaces.
# therefore, the first part of the line is the name and the rest is the seq.
# make sure that the line does not lead with extra spaces.
$entry =~ s/^\s+//;
( $name, $str ) = split( /\s+/, $entry, 2 );
}
} else { # 1. traditional phylip.
Bio/AlignIO/phylip.pm view on Meta::CPAN
} else {
$self->_print(
sprintf( " %s %s\n", $aln->num_sequences, $aln->length ) );
}
$idlength = $self->idlength();
$line_length = $self->line_length();
$tag_length = $self->tag_length();
foreach $seq ( $aln->each_seq() ) {
$name = $aln->displayname( $seq->get_nse );
if ( $self->longid ) {
$self->warn(
"The length of the name is over 50 chars long [$name]")
if length($name) > 50;
$name = "'$name' ";
} else {
$name = substr( $name, 0, $idlength )
if length($name) > $idlength;
$name = sprintf( "%-" . $idlength . "s", $name );
if ( $self->interleaved() ) {
$name .= ' ';
Bio/AlignIO/phylip.pm view on Meta::CPAN
=cut
sub wrap_sequential {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_wrap_sequential'} = $value;
}
return $self->{'_wrap_sequential'} || 0;
}
=head2 longid
Title : longid
Usage : $obj->longid($newval)
Function:
Returns : value of longid
Args : newvalue (optional)
=cut
sub longid {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_longid'} = $value;
}
return $self->{'_longid'} || 0;
}
1;
Bio/Assembly/IO/maq.pm view on Meta::CPAN
Usage : my $singletobj = $self->_store_read(\%readinfo, \%contiginfo);
Function: store information of a singlet belonging to a scaffold in a singlet object
Returns : Bio::Assembly::Singlet
Args : hash, hash
=cut
sub _store_singlet {
my ($self, $contiginfo, $contigobj) = @_;
my $contigid = $$contiginfo{'asmbl_id'};
my $seqref = ($contigobj->each_seq())[0];
my $singletobj = Bio::Assembly::Singlet->new( -id => $contigid,
-seqref => $seqref );
# Add other misc contig information as features of the contig
# Add other misc read information as subsequence feature
#my @other = grep !/_sfc|_assembly|_elem/, keys %$contiginfo; # remove the objects; _elem contains a code ref and can't be frozen. Just shooting blind here.
#my %other;
#@other{@other} = @$contiginfo{@other};
#my $contigtags = Bio::SeqFeature::Generic->new(
# -primary => '_main_contig_feature',
# -source => $$contiginfo{asmbl_id},
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
Returns : Bio::Assembly::Singlet
Args : hash, hash
=cut
sub _store_singlet {
my ($self, $readinfo, $contiginfo) = @_;
# Singlets in TIGR_Assembler are represented as a contig of one sequence
# We try to simulate this duality by playing around with the Singlet object
my $contigid = $$contiginfo{'asmbl_id'};
my $readid = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});
# Create a sequence object
#$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});
my $seqobj = Bio::Seq::Quality->new(
-primary_id => $readid,
-display_id => $readid,
-seq => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase
-start => 1,
-strand => $$readinfo{'strand'},
-alphabet => 'dna',
-qual => $self->_qual_hex2dec($$contiginfo{'quality'})
);
# Create singlet from sequence and add it to scaffold
my $singletobj = Bio::Assembly::Singlet->new(
-id => $contigid,
-seqref => $seqobj
);
# Add other misc contig information as features of the singlet
my $contigtags = Bio::SeqFeature::Generic->new(
-primary => '_main_contig_feature',
-source => $contigid,
-start => 1,
-end => $singletobj->get_consensus_length(),
-strand => 1,
-tag => { 'seq_id' => $$contiginfo{'seq_id'},
'com_name' => $$contiginfo{'com_name'},
'type' => $$contiginfo{'type'},
'method' => $$contiginfo{'method'},
'ed_status' => $$contiginfo{'ed_status'},
'full_cds' => $$contiginfo{'full_cds'},
'cds_start' => $$contiginfo{'cds_start'},
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
# Add read location and sequence to singlet features (in 'gapped consensus' coordinates)
$$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates
$$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq
my $alncoord = Bio::SeqFeature::Generic->new(
-primary => '_aligned_coord',
-source => $readid,
-start => $$readinfo{'aln_start'},
-end => $$readinfo{'aln_end'},
-strand => $$readinfo{'strand'},
-tag => { 'contig' => $contigid }
);
$alncoord->attach_seq($singletobj->seqref);
$singletobj->add_features([ $alncoord ], 0);
# Add quality clipping read information in singlet features
# (from 'aligned read' to 'gapped consensus' coordinates)
$$readinfo{'clip_start'} = $$readinfo{'seq_lend'};
$$readinfo{'clip_end'} = $$readinfo{'seq_rend'};
my $clipcoord = Bio::SeqFeature::Generic->new(
-primary => '_quality_clipping',
-source => $readid,
-start => $$readinfo{'clip_start'},
-end => $$readinfo{'clip_end'},
-strand => $$readinfo{'strand'},
-tag => { 'contig' => $contigid }
);
$clipcoord->attach_seq($singletobj->seqref);
$singletobj->add_features([ $clipcoord ], 0);
# Add other misc read information as subsequence feature
my $readtags = Bio::SeqFeature::Generic->new(
-primary => '_main_read_feature',
-source => $readid,
-start => $$readinfo{'aln_start'},
-end => $$readinfo{'aln_end'},
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
sub write_contig {
my ($self, @args) = @_;
my ($contigobj) = $self->_rearrange([qw(CONTIG)], @args);
# Sanity check
if ( !$contigobj || !$contigobj->isa('Bio::Assembly::Contig') ) {
$self->throw("Must provide a Bio::Assembly::Contig or Singlet object when calling write_contig");
}
my $decimal_format = '%.2f';
my $contigid = $contigobj->id;
my $numseqs = $contigobj->num_sequences;
if ( $contigobj->isa('Bio::Assembly::Singlet') ) {
# This is a singlet
my $readid = $contigobj->seqref->id;
my $singletobj = $contigobj;
# Get contig information
my ($contanno) = $singletobj->get_features_collection->get_features_by_type("_main_contig_feature:$contigid");
my %contiginfo;
$contiginfo{'sequence'} = $singletobj->seqref->seq;
$contiginfo{'lsequence'} = $contiginfo{'sequence'};
$contiginfo{'quality'} = $self->_qual_dec2hex(
join ' ', @{$singletobj->get_consensus_quality->qual} );
$contiginfo{'asmbl_id'} = $contigid;
$contiginfo{'seq_id'} = ($contanno->get_tag_values('seq_id'))[0];
$contiginfo{'com_name'} = ($contanno->get_tag_values('com_name'))[0];
$contiginfo{'type'} = ($contanno->get_tag_values('type'))[0];
$contiginfo{'method'} = ($contanno->get_tag_values('method'))[0];
$contiginfo{'ed_status'} = ($contanno->get_tag_values('ed_status'))[0];
$contiginfo{'redundancy'} = sprintf($decimal_format, 1);
$contiginfo{'perc_N'} = sprintf(
$decimal_format, $self->_perc_N($contiginfo{'sequence'}));
$contiginfo{'seqnum'} = 1;
$contiginfo{'full_cds'} = ($contanno->get_tag_values('full_cds'))[0];
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
"comment\t$readinfo{'comment'}\n".
"db\t$readinfo{'db'}\n".
"offset\t$readinfo{'offset'}\n".
"lsequence\t$readinfo{'lsequence'}\n"
);
$self->_print("|\n");
} else {
# This is a contig
# Get contig information
my ($contanno) = $contigobj->get_features_collection->get_features_by_type("_main_contig_feature:$contigid");
my %contiginfo;
$contiginfo{'sequence'} = $self->_ungap(
$contigobj->get_consensus_sequence->seq);
$contiginfo{'lsequence'} = $contigobj->get_consensus_sequence->seq;
$contiginfo{'quality'} = $self->_qual_dec2hex(
join ' ', @{$contigobj->get_consensus_quality->qual});
$contiginfo{'asmbl_id'} = $contigid;
$contiginfo{'seq_id'} = ($contanno->get_tag_values('seq_id'))[0];
$contiginfo{'com_name'} = ($contanno->get_tag_values('com_name'))[0];
$contiginfo{'type'} = ($contanno->get_tag_values('type'))[0];
$contiginfo{'method'} = ($contanno->get_tag_values('method'))[0];
$contiginfo{'ed_status'} = ($contanno->get_tag_values('ed_status'))[0];
$contiginfo{'redundancy'} = sprintf(
$decimal_format, $self->_redundancy($contigobj));
$contiginfo{'perc_N'} = sprintf(
$decimal_format, $self->_perc_N($contiginfo{'sequence'}));
$contiginfo{'seqnum'} = $contigobj->num_sequences;
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
if ($nof_seq != 0) {
$avg_length = ($p_avg_length * $p_nof_seq + $n_avg_length * $n_nof_seq) / $nof_seq;
}
return $avg_length, $nof_seq;
}
=head2 _get_assembly_overlap_stats
Title : _get_assembly_overlap_stats
Usage : my ($avglength, $avgidentity, $minlength, $min_identity, $nof_overlaps)
= $csp->_get_assembly_overlap_stats($assemblyobj);
Function: Get statistics about pairwise overlaps in contigs of an assembly
Returns : average overlap length
average identity percent
minimum overlap length
minimum identity percent
number of overlaps
Args : Bio::Assembly::Scaffold, Contig or Singlet object
hash reference with the IDs of the sequences to consider [optional]
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
$self->_get_contig_overlap_stats($contig_obj, $seq_hash) );
}
return @asm_stats;
}
=head2 _get_contig_overlap_stats
Title : _get_contig_overlap_stats
Usage : my ($avglength, $avgidentity, $minlength, $min_identity, $nof_overlaps)
= $csp->_get_contig_overlap_stats($contigobj);
Function: Get statistics about pairwise overlaps in a contig or singlet. The
statistics are obtained using graph theory: each read is a node
and the edges between 2 reads are weighted by minus the number of
conserved residues in the alignment between the 2 reads. The
minimum spanning tree of this graph represents the overlaps that
form the contig. Overlaps that do not satisfy the minimum overlap
length and similarity get a malus on their score.
Note: This function requires the optional BioPerl dependency
module called 'Graph'
Bio/Cluster/UniGene.pm view on Meta::CPAN
Usage : used by ClusterIO
Returns : a new Bio::Cluster::Unigene object
=cut
sub new {
# standard new call..
my($caller,@args) = @_;
my $self = $caller->SUPER::new(@args);
my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
$self->_rearrange([qw(UNIGENE_ID
DESCRIPTION
MEMBERS
SIZE
SPECIES
DISPLAY_ID
OBJECT_ID
NAMESPACE
AUTHORITY
VERSION
SEQFACTORY
)], @args);
$self->{'_alphabet'} = 'dna';
$self->unigene_id($ugid) if $ugid;
$self->description($desc) if $desc;
$self->sequences($mems) if $mems;
$self->size($size) if defined($size);
$self->display_id($dispid) if $dispid; # overwrites ugid
$self->object_id($id) if $id; # overwrites dispid
$self->namespace($ns || 'UniGene');
$self->authority($auth || 'NCBI');
$self->version($v) if defined($v);
if( ! defined $seqfact ) {
$seqfact = Bio::Seq::SeqFactory->new
(-verbose => $self->verbose(),
-type => 'Bio::Seq::RichSeq');
}
$self->sequence_factory($seqfact);
Bio/DB/EntrezGene.pm view on Meta::CPAN
Title : get_request
Usage : my $url = $self->get_request
Function: HTTP::Request
Returns :
Args : %qualifiers = a hash of qualifiers (ids, format, etc)
=head2 get_Stream_by_id
Title : get_Stream_by_id
Usage : $stream = $db->get_Stream_by_id( [$gid1, $gid2] );
Function: Gets a series of Seq objects using Gene ids
Returns : A Bio::SeqIO stream object
Args : A reference to an array of Gene ids
=head2 request_format
Title : request_format
Usage : my $format = $self->request_format;
$self->request_format($format);
Function: Get or set sequence format retrieval
Bio/DB/GFF.pm view on Meta::CPAN
my $self = shift;
my $id = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
my %groups; # cache the groups we create to avoid consuming too much unecessary memory
my $features = [];
my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
$self->_feature_by_id($id,'feature',$callback);
return wantarray ? @$features : $features->[0];
}
*fetch_feature_by_id = \&get_feature_by_id;
=head2 get_feature_by_gid
Title : get_feature_by_gid
Usage : $db->get_feature_by_gid($id)
Function: fetch segments by feature ID
Returns : a Bio::DB::GFF::Feature object
Args : the feature ID
Status : public
This method can be used to fetch a feature from the database using its
group ID. Not all GFF databases support IDs, so be careful with this.
The group ID is often more interesting than the feature ID, since
groups can be complex objects containing subobjects.
=cut
sub get_feature_by_gid {
my $self = shift;
my $id = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
my %groups; # cache the groups we create to avoid consuming too much unecessary memory
my $features = [];
my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
$self->_feature_by_id($id,'group',$callback);
return wantarray ? @$features : $features->[0];
}
*fetch_feature_by_gid = \&get_feature_by_gid;
=head2 delete_fattribute_to_features
Title : delete_fattribute_to_features
Usage : $db->delete_fattribute_to_features(@ids_or_features)
Function: delete one or more fattribute_to_features
Returns : count of fattribute_to_features deleted
Args : list of features or feature ids
Status : public
Bio/DB/GFF.pm view on Meta::CPAN
return bless {ids=>$ids,db=>$db,type=>$type},$class;
}
sub next_seq {
my $self = shift;
my $next = shift @{$self->{ids}};
return unless $next;
my $name = ref($next) eq 'ARRAY' ? Bio::DB::GFF::Featname->new(@$next) : $next;
my $segment = $self->{type} eq 'name' ? $self->{db}->segment($name)
: $self->{type} eq 'feature' ? $self->{db}->fetch_feature_by_id($name)
: $self->{type} eq 'group' ? $self->{db}->fetch_feature_by_gid($name)
: $self->throw("Bio::DB::GFF::ID_Iterator called to fetch an unknown type of identifier");
$self->throw("id does not exist") unless $segment;
return $segment;
}
package Bio::DB::GFF::FeatureIterator;
sub new {
my $self = shift;
my @features = @_;
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
=cut
sub _feature_by_id {
my $self = shift;
my ($ids,$type,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $select = $self->make_features_select_part;
my $from = $self->make_features_from_part;
my ($where,@args) = $type eq 'feature' ? $self->make_features_by_id_where_part($ids)
: $self->make_features_by_gid_where_part($ids);
my $join = $self->make_features_join_part;
my $query = "SELECT $select FROM $from WHERE $where AND $join";
my $sth = $self->dbh->do_query($query,@args);
my $count = 0;
while (my @row = $sth->fetchrow_array) {
$callback->(@row);
$count++;
}
$sth->finish;
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
$search_string =~ tr/*?//d;
my @words = $search_string =~ /(\w+)/g;
my $regex = join '|',@words;
my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
my $search = join(' OR ',@searches);
my $query = <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
FROM fgroup,fattribute_to_feature,fdata,ftype
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND fdata.ftypeid=ftype.ftypeid
AND ($search)
END
;
my $sth = $self->dbh->do_query($query);
my @results;
while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
next unless $class && $name; # sorry, ignore NULL objects
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
return ("fgroup.gclass=? AND fgroup.gname=?",$class,$name);
}
}
sub make_features_by_alias_where_part {
my $self = shift;
my ($class,$name) = @_;
if ($name =~ /\*/) {
$name =~ tr/*/%/;
$name =~ s/_/\\_/g;
return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value LIKE ? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=...
} else {
return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value=? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata...
}
}
sub make_features_by_attribute_where_part {
my $self = shift;
my $attributes = shift;
my @args;
my @sql;
foreach (keys %$attributes) {
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
=cut
sub make_features_by_id_where_part {
my $self = shift;
my $ids = shift;
my $set = join ",",@$ids;
return ("fdata.fid IN ($set)");
}
=head2 make_features_by_gid_where_part
Title : make_features_by_id_where_part
Usage : $db->make_features_by_gid_where_part($ids)
Function: create the SQL fragment needed to select a set of features by their ids
Returns : a SQL fragment and bind arguments
Args : arrayref of IDs
Status : Protected
=cut
sub make_features_by_gid_where_part {
my $self = shift;
my $ids = shift;
my $set = join ",",@$ids;
return ("fgroup.gid IN ($set)");
}
=head2 make_features_from_part
Title : make_features_from_part
Usage : $string = $db->make_features_from_part()
Function: make from part of the features query
Returns : a string
Args : none
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
This method creates the part of the features query that immediately
follows the WHERE keyword.
=cut
sub make_features_join_part {
my $self = shift;
my $options = shift || {};
return !$options->{attributes} ? <<END1 : <<END2;
fgroup.gid = fdata.gid
AND ftype.ftypeid = fdata.ftypeid
END1
fgroup.gid = fdata.gid
AND ftype.ftypeid = fdata.ftypeid
AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
AND fdata.fid=fattribute_to_feature.fid
END2
}
=head2 make_features_order_by_part
Title : make_features_order_by_part
Usage : ($query,@args) = $db->make_features_order_by_part()
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
=cut
sub make_features_group_by_part {
my $self = shift;
my $options = shift || {};
if (my $att = $options->{attributes}) {
my $key_count = keys %$att;
return unless $key_count > 1;
return ("fdata.fid,fref,fstart,fstop,fsource,
fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,
ftarget_stop,fdata.gid
HAVING count(fdata.fid) > ?",$key_count-1);
}
elsif (my $b = $options->{bin_width}) {
return "fref,fstart,fdata.ftypeid";
}
}
=head2 refseq_query
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
=cut
# this method is called when needed to look up a feature's ID
sub get_feature_id {
my $self = shift;
my ($ref,$start,$stop,$typeid,$groupid) = @_;
my $s = $self->{load_stuff};
unless ($s->{get_feature_id}) {
my $dbh = $self->features_db;
$s->{get_feature_id} =
$dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
}
my $sth = $s->{get_feature_id} or return;
$sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
my ($fid) = $sth->fetchrow_array;
return $fid;
}
=head2 make_abscoord_query
Bio/DB/GFF/Adaptor/dbi.pm view on Meta::CPAN
$result;
}
# implement the _delete_groups() method
sub _delete_groups {
my $self = shift;
my @group_ids = @_;
my $dbh = $self->features_db;
my $fields = join ',',map{$dbh->quote($_)} @group_ids;
foreach my $gid (@group_ids){
my @features = $self->get_feature_by_gid($gid);
$self->delete_features(@features);
}
my $query = "delete from fgroup where gid in ($fields)";
warn "$query\n" if $self->debug;
my $result = $dbh->do($query);
defined $result or $self->throw($dbh->errstr);
$result;
}
# implement the _delete() method
sub _delete {
my $self = shift;
my $delete_spec = shift;
Bio/DB/GFF/Adaptor/dbi/mysql.pm view on Meta::CPAN
use constant GETSEQCOORDS =><<END;
SELECT fref,
IF(ISNULL(gclass),'Sequence',gclass),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup
WHERE fgroup.gname=?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
GROUP BY fref,fstrand,gname
END
;
use constant GETALIASCOORDS =><<END;
SELECT fref,
IF(ISNULL(gclass),'Sequence',gclass),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
WHERE fattribute_to_feature.fattribute_value=?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
AND fattribute.fattribute_name='Alias'
AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
AND fattribute_to_feature.fid=fdata.fid
GROUP BY fref,fstrand,gname
END
;
use constant GETALIASLIKE =><<END;
SELECT fref,
IF(ISNULL(gclass),'Sequence',gclass),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
WHERE fattribute_to_feature.fattribute_value LIKE ?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
AND fattribute.fattribute_name='Alias'
AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
AND fattribute_to_feature.fid=fdata.fid
GROUP BY fref,fstrand,gname
END
;
use constant GETFORCEDSEQCOORDS =><<END;
SELECT fref,
IF(ISNULL(gclass),'Sequence',gclass),
min(fstart),
max(fstop),
fstrand
FROM fdata,fgroup
WHERE fgroup.gname=?
AND fgroup.gclass=?
AND fdata.fref=?
AND fgroup.gid=fdata.gid
GROUP BY fref,fstrand
END
;
use constant FULLTEXTSEARCH => <<END;
SELECT distinct gclass,gname,fattribute_value,MATCH(fattribute_value) AGAINST (?) as score,fmethod,fsource
FROM fgroup,fattribute_to_feature,fdata,ftype
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND fdata.ftypeid=ftype.ftypeid
AND MATCH(fattribute_value) AGAINST (?)
END
;
=head1 DESCRIPTION
This adaptor implements a specific mysql database schema that is
compatible with Bio::DB::GFF. It inherits from
Bio/DB/GFF/Adaptor/dbi/mysql.pm view on Meta::CPAN
This is the feature data table. Its columns are:
-
fid feature ID (integer)
fref reference sequence name (string)
fstart start position relative to reference (integer)
fstop stop position relative to reference (integer)
ftypeid feature type ID (integer)
fscore feature score (float); may be null
fstrand strand; one of "+" or "-"; may be null
fphase phase; one of 0, 1 or 2; may be null
gid group ID (integer)
ftarget_start for similarity features, the target start position (integer)
ftarget_stop for similarity features, the target stop position (integer)
Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature. However, in the current schema, query performance
suffers dramatically when this additional join is added.
=item fgroup
This is the group table. There is one row for each group. Columns:
gid the group ID (integer)
gclass the class of the group (string)
gname the name of the group (string)
The group table serves multiple purposes. As you might expect, it is
used to cluster features that logically belong together, such as the
multiple exons of the same transcript. It is also used to assign a
name and class to a singleton feature. Finally, the group table is
used to identify the target of a similarity hit. This is consistent
with the way in which the group field is used in the GFF version 2
format.
The fgroup.gid field joins with the fdata.gid field.
Examples:
mysql> select * from fgroup where gname='sjj_2L52.1';
+-------+-------------+------------+
| gid | gclass | gname |
+-------+-------------+------------+
| 69736 | PCR_product | sjj_2L52.1 |
+-------+-------------+------------+
1 row in set (0.70 sec)
mysql> select fref,fstart,fstop from fdata,fgroup
where gclass='PCR_product' and gname = 'sjj_2L52.1'
and fdata.gid=fgroup.gid;
+---------------+--------+-------+
| fref | fstart | fstop |
+---------------+--------+-------+
| CHROMOSOME_II | 1586 | 2355 |
+---------------+--------+-------+
1 row in set (0.03 sec)
=item ftype
This table contains the feature types, one per row. Columns are:
ftypeid the feature type ID (integer)
fmethod the feature type method name (string)
fsource the feature type source name (string)
The ftype.ftypeid field joins with the fdata.ftypeid field. Example:
mysql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype
where gclass='PCR_product'
and gname = 'sjj_2L52.1'
and fdata.gid=fgroup.gid
and fdata.ftypeid=ftype.ftypeid;
+---------------+--------+-------+-------------+-----------+
| fref | fstart | fstop | fmethod | fsource |
+---------------+--------+-------+-------------+-----------+
| CHROMOSOME_II | 1586 | 2355 | PCR_product | GenePairs |
+---------------+--------+-------+-------------+-----------+
1 row in set (0.08 sec)
=item fdna
Bio/DB/GFF/Adaptor/dbi/mysql.pm view on Meta::CPAN
$b*(1+floor(fstart/$b)) as fstop,
IF(ISNULL(fsource),fmethod,concat(fmethod,':',fsource)),'bin',
count(*) as fscore,
'.','.','bin',
IF(ISNULL(fsource),concat(fref,':',fmethod),concat(fref,':',fmethod,':',fsource)),
NULL,NULL,NULL,NULL
END
;
} else {
$s = <<END;
fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
END
;
}
$s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
$s;
}
# IMPORTANT NOTE:
# WHETHER OR NOT THIS WORKS IS CRITICALLY DEPENDENT ON THE RELATIVE MAGNITUDE OF THE
sub make_features_from_part {
my $self = shift;
my $sparse_types = shift;
my $options = shift || {};
my $sparse_groups = $options->{sparse_groups};
my $index = $sparse_groups ? ' USE INDEX(gid)'
: $sparse_types ? ' USE INDEX(ftypeid)'
: '';
return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
: "fdata${index},ftype,fgroup\n";
}
=head2 search_notes
Title : search_notes
Usage : @search_results = $db->search_notes("full text search string",$limit)
Bio/DB/GFF/Adaptor/dbi/mysql.pm view on Meta::CPAN
create table fdata (
fid int not null auto_increment,
fref varchar(100) not null,
fstart int not null,
fstop int not null,
fbin double precision,
ftypeid int not null,
fscore float,
fstrand enum('+','-'),
fphase enum('0','1','2'),
gid int not null,
ftarget_start int,
ftarget_stop int,
primary key(fid),
unique index(fref,fbin,fstart,fstop,ftypeid,gid),
index(ftypeid),
index(gid)
) $engine=MyISAM
} # fdata table
}, # fdata
fgroup =>{
table=> qq{
create table fgroup (
gid int not null auto_increment,
gclass varchar(100),
gname varchar(100),
primary key(gid),
unique(gclass,gname)
) $engine=MyISAM
}
},
ftype => {
table=> qq{
create table ftype (
ftypeid int not null auto_increment,
fmethod varchar(100) not null,
Bio/DB/GFF/Adaptor/dbi/mysql.pm view on Meta::CPAN
my $tables = join ', ',@tables;
$dbh->do("LOCK TABLES $tables");
}
# for my $table (qw(fdata)) {
# $dbh->do("alter table $table disable keys");
# }
my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
my $insert_data = $dbh->prepare_delayed(<<END);
INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
fstrand,fphase,gid,ftarget_start,ftarget_stop)
VALUES(?,?,?,?,?,?,?,?,?,?,?)
END
;
$self->{load_stuff}{sth}{lookup_ftype} = $lookup_type;
$self->{load_stuff}{sth}{insert_ftype} = $insert_type;
$self->{load_stuff}{sth}{lookup_fgroup} = $lookup_group;
$self->{load_stuff}{sth}{insert_fgroup} = $insert_group;
$self->{load_stuff}{sth}{insert_fdata} = $insert_data;
Bio/DB/GFF/Adaptor/dbi/mysql.pm view on Meta::CPAN
=cut
# this method is called when needed to look up a feature's ID
sub get_feature_id {
my $self = shift;
my ($ref,$start,$stop,$typeid,$groupid) = @_;
my $s = $self->{load_stuff};
unless ($s->{get_feature_id}) {
my $dbh = $self->features_db;
$s->{get_feature_id} =
$dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
}
my $sth = $s->{get_feature_id} or return;
$sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
my ($fid) = $sth->fetchrow_array;
return $fid;
}
sub _add_interval_stats_table {
my $self = shift;
my $schema = $self->schema;
Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm view on Meta::CPAN
This is the feature data table. Its columns are:
-
fid feature ID (integer)
fref reference sequence name (string)
fstart start position relative to reference (integer)
fstop stop position relative to reference (integer)
ftypeid feature type ID (integer)
fscore feature score (float); may be null
fstrand strand; one of "+" or "-"; may be null
fphase phase; one of 0, 1 or 2; may be null
feature_id group ID used to be 'gid' (integer)
ftarget_start for similarity features, the target start position (integer)
ftarget_stop for similarity features, the target stop position (integer)
Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature. However, in the current schema, query performance
suffers dramatically when this additional join is added.
=item cmap_feature (replaces fgroup)
Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm view on Meta::CPAN
sub make_features_order_by_part {
my $self = shift;
my $options = shift || {};
return "cmap_feature.feature_name";
}
=head2 create_cmap_viewer_link
Title : create_cmap_viewer_link
Usage : $link_str = $db->create_cmap_viewer_link(data_source=>$ds,group_id=>$gid)
Function:
Returns :
Args :
Status :
=cut
sub create_cmap_viewer_link {
my $self = shift;
my %args = @_;
my $data_source = $args{'data_source'};
my $gid = $args{'group_id'};
my $link_str = undef;
my $db = $self->features_db;
my $sql_str = qq[
select f.feature_name,
f.feature_type_accession feature_type_aid,
m.accession_id as map_aid,
ms.accession_id as map_set_aid
from cmap_feature f,
cmap_map m,
cmap_map_set ms
where f.map_id=m.map_id
and ms.map_set_id=m.map_set_id
and f.feature_id=$gid
];
my $result_ref = $db->selectrow_hashref($sql_str,{ Columns => {} });
if ( $result_ref ) {
$link_str='/cgi-bin/cmap/viewer?ref_map_set_aid='
. $result_ref->{'map_set_aid'}
. '&ref_map_aids='
. $result_ref->{'map_aid'}
. '&data_source='
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
use constant GETSEQCOORDS =><<END;
SELECT fref,
NVL(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup
WHERE fgroup.gname=?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
GROUP BY fref,fstrand,gclass,gname
END
;
use constant GETALIASCOORDS =><<END;
SELECT fref,
NVL(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
WHERE fattribute_to_feature.fattribute_value=?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
AND fattribute.fattribute_name='Alias'
AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
AND fattribute_to_feature.fid=fdata.fid
GROUP BY fref,fstrand,gclass,gname
END
;
use constant GETALIASLIKE =><<END;
SELECT fref,
NVL(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
WHERE fattribute_to_feature.fattribute_value LIKE ?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
AND fattribute.fattribute_name='Alias'
AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
AND fattribute_to_feature.fid=fdata.fid
GROUP BY fref,fstrand,gname
END
;
use constant GETFORCEDSEQCOORDS =><<END;
SELECT fref,
NVL(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand
FROM fdata,fgroup
WHERE fgroup.gname=?
AND fgroup.gclass=?
AND fdata.fref=?
AND fgroup.gid=fdata.gid
GROUP BY fref,fstrand,gclass
END
;
########################
# moved from mysqlopt.pm
########################
# this is the largest that any reference sequence can be (100 megabases)
use constant MAX_BIN => 100_000_000;
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
This is the feature data table. Its columns are:
fid feature ID (integer)
fref reference sequence name (string)
fstart start position relative to reference (integer)
fstop stop position relative to reference (integer)
ftypeid feature type ID (integer)
fscore feature score (float); may be null
fstrand strand; one of "+" or "-"; may be null
fphase phase; one of 0, 1 or 2; may be null
gid group ID (integer)
ftarget_start for similarity features, the target start position (integer)
ftarget_stop for similarity features, the target stop position (integer)
Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature. However, in the current schema, query performance
suffers dramatically when this additional join is added.
=item fgroup
This is the group table. There is one row for each group. Columns:
gid the group ID (integer)
gclass the class of the group (string)
gname the name of the group (string)
The group table serves multiple purposes. As you might expect, it is
used to cluster features that logically belong together, such as the
multiple exons of the same transcript. It is also used to assign a
name and class to a singleton feature. Finally, the group table is
used to identify the target of a similarity hit. This is consistent
with the way in which the group field is used in the GFF version 2
format.
The fgroup.gid field joins with the fdata.gid field.
Examples:
sql> select * from fgroup where gname='sjj_2L52.1';
+-------+-------------+------------+
| gid | gclass | gname |
+-------+-------------+------------+
| 69736 | PCR_product | sjj_2L52.1 |
+-------+-------------+------------+
1 row in set (0.70 sec)
sql> select fref,fstart,fstop from fdata,fgroup
where gclass='PCR_product' and gname = 'sjj_2L52.1'
and fdata.gid=fgroup.gid;
+---------------+--------+-------+
| fref | fstart | fstop |
+---------------+--------+-------+
| CHROMOSOME_II | 1586 | 2355 |
+---------------+--------+-------+
1 row in set (0.03 sec)
=item ftype
This table contains the feature types, one per row. Columns are:
ftypeid the feature type ID (integer)
fmethod the feature type method name (string)
fsource the feature type source name (string)
The ftype.ftypeid field joins with the fdata.ftypeid field. Example:
sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype
where gclass='PCR_product'
and gname = 'sjj_2L52.1'
and fdata.gid=fgroup.gid
and fdata.ftypeid=ftype.ftypeid;
+---------------+--------+-------+-------------+-----------+
| fref | fstart | fstop | fmethod | fsource |
+---------------+--------+-------+-------------+-----------+
| CHROMOSOME_II | 1586 | 2355 | PCR_product | GenePairs |
+---------------+--------+-------+-------------+-----------+
1 row in set (0.08 sec)
=item fdna
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
create table fdata (
fid INTEGER NOT NULL,
fref VARCHAR(100) DEFAULT '' NOT NULL,
fstart INTEGER DEFAULT '0' NOT NULL,
fstop INTEGER DEFAULT '0' NOT NULL,
fbin NUMBER DEFAULT '0.000000' NOT NULL,
ftypeid INTEGER DEFAULT '0' NOT NULL,
fscore NUMBER ,
fstrand VARCHAR2(3) CHECK (fstrand IN ('+','-')),
fphase VARCHAR2(3) CHECK (fphase IN ('0','1','2')),
gid INTEGER DEFAULT '0' NOT NULL,
ftarget_start INTEGER ,
ftarget_stop INTEGER ,
CONSTRAINT fdata_pk PRIMARY KEY (fid)
)
}, # fdata table
index=>{
fdata_fref_idx => q{
CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
},
fdata_ftypeid_idx => q{
CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
},
fdata_gid_idx => q{
CREATE INDEX fdata_gid_idx ON fdata (gid)
}
}, # fdata indexes
sequence=> {
fdata_fid_sq => q{
CREATE SEQUENCE fdata_fid_sq START WITH 1
}
}, # fdata sequences
trigger=> {
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
}
}# fdata triggers
}, # fdata
fgroup => {
table => q{
CREATE TABLE fgroup (
gid INTEGER NOT NULL,
gclass VARCHAR(100) ,
gname VARCHAR(100) ,
CONSTRAINT fgroup_pk PRIMARY KEY (gid)
)
}, # fgroup table
index => {
fgroup_gclass_idx => q{
CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
}
}, # fgroup indexes
sequence => {
fgroup_gid_sq => q{
CREATE SEQUENCE fgroup_gid_sq START WITH 1
}
}, # fgroup sequences
trigger => {
fgroup_gid_ai => q{
CREATE OR REPLACE TRIGGER fgroup_gid_ai
BEFORE INSERT ON fgroup
FOR EACH ROW WHEN (new.gid IS NULL OR new.gid = 0)
BEGIN
SELECT fgroup_gid_sq.nextval INTO :new.gid FROM dual;
END;
}
} # fgroup triggers
}, # fgroup
ftype => {
table => q{
CREATE TABLE ftype (
ftypeid INTEGER NOT NULL,
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
my @tables = map { "$_ WRITE"} $self->tables;
my $tables = join ', ',@tables;
$dbh->do("LOCK TABLES $tables");
}
my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
my $sequence_type = (keys %{$schema->{ftype}{sequence}})[0];
my $insertid_type = $dbh->prepare_delayed("SELECT $sequence_type.CURRVAL FROM dual");
my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
my $sequence_group = (keys %{$schema->{fgroup}{sequence}})[0];
my $insertid_group = $dbh->prepare_delayed("SELECT $sequence_group.CURRVAL FROM dual");
my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
my $sequence_attribute = (keys %{$schema->{fattribute}{sequence}})[0];
my $insertid_attribute = $dbh->prepare_delayed("SELECT $sequence_attribute.CURRVAL FROM dual");
my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
my $insert_data = $dbh->prepare_delayed(<<END);
INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
fstrand,fphase,gid,ftarget_start,ftarget_stop)
VALUES(?,?,?,?,?,?,?,?,?,?,?)
END
;
my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
my $sequence_data = (keys %{$schema->{fdata}{sequence}})[0];
my $insertid_data = $dbh->prepare_delayed("SELECT $sequence_data.CURRVAL FROM dual");
$self->{load_stuff}{sth}{lookup_ftype} = $lookup_type;
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
$search_string =~ tr/*?//d;
my @words = $search_string =~ /(\w+)/g;
my $regex = join '|',@words;
my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
my $search = join(' OR ',@searches);
my $query = <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
FROM fgroup,fattribute_to_feature,fdata,ftype
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND fdata.ftypeid=ftype.ftypeid
AND ($search)
END
;
my $sth = $self->dbh->do_query($query);
my @results;
while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
next unless $class && $name; # sorry, ignore NULL objects
Bio/DB/GFF/Adaptor/dbi/oracle.pm view on Meta::CPAN
$b*(1+floor(fstart/$b)) as fstop,
NVL2(fsource,fmethod||':'||fsource,fmethod),'bin',
count(*) as fscore,
'.','.','bin',
NVL2(fsource , fref||':'||fmethod||':'||fsource , fref||':'||fmethod),
NULL,NULL,NULL,NULL
END
;
} else {
$s = <<END;
fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
END
;
}
$s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
$s;
}
sub make_features_from_part_bkup {
my $self = shift;
my $sparse = shift;
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
use constant GETSEQCOORDS =><<END;
SELECT fref,
COALESCE(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup
WHERE lower(fgroup.gname) = lower(?)
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
GROUP BY fref,fstrand,gclass,gname
END
;
use constant GETALIASCOORDS =><<END;
SELECT fref,
COALESCE(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
WHERE lower(fattribute_to_feature.fattribute_value)=lower(?)
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
AND fattribute.fattribute_name='Alias'
AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
AND fattribute_to_feature.fid=fdata.fid
GROUP BY fref,fstrand,gclass,gname
END
;
use constant GETALIASLIKE =><<END;
SELECT fref,
COALESCE(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand,
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
WHERE lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
AND fattribute.fattribute_name='Alias'
AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
AND fattribute_to_feature.fid=fdata.fid
GROUP BY fref,fstrand,gname
END
;
use constant GETFORCEDSEQCOORDS =><<END;
SELECT fref,
COALESCE(gclass,'Sequence'),
min(fstart),
max(fstop),
fstrand
FROM fdata,fgroup
WHERE lower(fgroup.gname) = lower(?)
AND fgroup.gclass=?
AND lower(fdata.fref) = lower(?)
AND fgroup.gid=fdata.gid
GROUP BY fref,fstrand,gclass
END
;
use constant FULLTEXTWILDCARD => <<END;
SELECT distinct gclass,gname,fattribute_value
FROM fgroup,fattribute_to_feature,fdata
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
END
;
########################
# moved from mysqlopt.pm
########################
# this is the largest that any reference sequence can be (100 megabases)
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
This is the feature data table. Its columns are:
fid feature ID (integer)
fref reference sequence name (string)
fstart start position relative to reference (integer)
fstop stop position relative to reference (integer)
ftypeid feature type ID (integer)
fscore feature score (float); may be null
fstrand strand; one of "+" or "-"; may be null
fphase phase; one of 0, 1 or 2; may be null
gid group ID (integer)
ftarget_start for similarity features, the target start position (integer)
ftarget_stop for similarity features, the target stop position (integer)
Note that it would be desirable to normalize the reference sequence
name, since there are usually many features that share the same
reference feature. However, in the current schema, query performance
suffers dramatically when this additional join is added.
=item fgroup
This is the group table. There is one row for each group. Columns:
gid the group ID (integer)
gclass the class of the group (string)
gname the name of the group (string)
The group table serves multiple purposes. As you might expect, it is
used to cluster features that logically belong together, such as the
multiple exons of the same transcript. It is also used to assign a
name and class to a singleton feature. Finally, the group table is
used to identify the target of a similarity hit. This is consistent
with the way in which the group field is used in the GFF version 2
format.
The fgroup.gid field joins with the fdata.gid field.
Examples:
sql> select * from fgroup where gname='sjj_2L52.1';
+-------+-------------+------------+
| gid | gclass | gname |
+-------+-------------+------------+
| 69736 | PCR_product | sjj_2L52.1 |
+-------+-------------+------------+
1 row in set (0.70 sec)
sql> select fref,fstart,fstop from fdata,fgroup
where gclass='PCR_product' and gname = 'sjj_2L52.1'
and fdata.gid=fgroup.gid;
+---------------+--------+-------+
| fref | fstart | fstop |
+---------------+--------+-------+
| CHROMOSOME_II | 1586 | 2355 |
+---------------+--------+-------+
1 row in set (0.03 sec)
=item ftype
This table contains the feature types, one per row. Columns are:
ftypeid the feature type ID (integer)
fmethod the feature type method name (string)
fsource the feature type source name (string)
The ftype.ftypeid field joins with the fdata.ftypeid field. Example:
sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype
where gclass='PCR_product'
and gname = 'sjj_2L52.1'
and fdata.gid=fgroup.gid
and fdata.ftypeid=ftype.ftypeid;
+---------------+--------+-------+-------------+-----------+
| fref | fstart | fstop | fmethod | fsource |
+---------------+--------+-------+-------------+-----------+
| CHROMOSOME_II | 1586 | 2355 | PCR_product | GenePairs |
+---------------+--------+-------+-------------+-----------+
1 row in set (0.08 sec)
=item fdna
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
CREATE TABLE "fdata" (
"fid" serial NOT NULL,
"fref" character varying(100) DEFAULT '' NOT NULL,
"fstart" integer DEFAULT '0' NOT NULL,
"fstop" integer DEFAULT '0' NOT NULL,
"fbin" double precision DEFAULT '0.000000' NOT NULL,
"ftypeid" integer DEFAULT '0' NOT NULL,
"fscore" double precision DEFAULT NULL,
"fstrand" character varying(3) DEFAULT NULL,
"fphase" character varying(3) DEFAULT NULL,
"gid" integer DEFAULT '0' NOT NULL,
"ftarget_start" integer DEFAULT NULL,
"ftarget_stop" integer DEFAULT NULL,
CONSTRAINT chk_fdata_fstrand CHECK (fstrand IN ('+','-')),
CONSTRAINT chk_fdata_fphase CHECK (fphase IN ('0','1','2')),
CONSTRAINT pk_fdata PRIMARY KEY (fid)
)
}, # fdata table
#CONSTRAINT fref_fdata UNIQUE (fref, fbin, fstart, fstop, ftypeid, gid)
# fdata_fref_idx => q{ CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)},
index=>{
fdata_fref_idx => q{
CREATE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
},
fdata_ftypeid_idx => q{
CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
},
fdata_gid_idx => q{
CREATE INDEX fdata_gid_idx ON fdata (gid)
}
}, # fdata indexes
}, # fdata
fgroup => {
table => q{
CREATE TABLE "fgroup" (
"gid" serial NOT NULL,
"gclass" character varying(100) DEFAULT NULL,
"gname" character varying(100) DEFAULT NULL,
CONSTRAINT pk_fgroup PRIMARY KEY (gid)
)
}, # fgroup table
index => {
fgroup_gclass_idx => q{
CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
},
fgroup_gname_idx => q{
CREATE INDEX fgroup_gname_idx ON fgroup(gname)
},
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
if ($self->lock_on_load) {
my @tables = map { "$_ WRITE"} $self->tables;
my $tables = join ', ',@tables;
$dbh->do("LOCK TABLES $tables");
}
my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
my $insertid_type = $dbh->prepare_delayed("SELECT currval('ftype_ftypeid_seq')");
my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE lower(gname)=lower(?) AND gclass=?');
my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
my $insertid_group = $dbh->prepare_delayed("SELECT currval('fgroup_gid_seq')");
my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
my $insertid_attribute = $dbh->prepare_delayed("SELECT currval('fattribute_fattribute_id_seq')");
my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
my $insert_data = $dbh->prepare_delayed(<<END);
INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
fstrand,fphase,gid,ftarget_start,ftarget_stop)
VALUES(?,?,?,?,?,?,?,?,?,?,?)
END
;
my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
my $insertid_data = $dbh->prepare_delayed("SELECT currval('fdata_fid_seq')");
$self->{load_stuff}{sth}{lookup_ftype} = $lookup_type;
$self->{load_stuff}{sth}{insert_ftype} = $insert_type;
$self->{load_stuff}{sth}{insertid_ftype} = $insertid_type;
$self->{load_stuff}{sth}{lookup_fgroup} = $lookup_group;
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
'bin',
count(*) as fscore,
'.','.','bin',
CASE WHEN fsource IS NULL THEN fref||':'||fmethod
ELSE fref||':'||fmethod||':'||fsource,
NULL,NULL,NULL,NULL
END
;
} else {
$s = <<END;
fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,fgroup.gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
END
;
}
$s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
$s;
}
sub make_features_from_part_bkup {
my $self = shift;
my $sparse = shift;
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
$sth->finish;
return $count;
}
sub update_sequences {
my $self = shift;
my $dbh = $self->features_db;
$dbh->do("SELECT setval('public.fdata_fid_seq', max(fid)+1) FROM fdata");
$dbh->do("SELECT setval('public.fattribute_fattribute_id_seq', max(fattribute_id)+1) FROM fattribute");
$dbh->do("SELECT setval('public.fgroup_gid_seq', max(gid)+1) FROM fgroup");
$dbh->do("SELECT setval('public.ftype_ftypeid_seq', max(ftypeid)+1) FROM ftype");
1;
}
=head2 make_features_by_name_where_part
Title : make_features_by_name_where_part
Usage : $db->make_features_by_name_where_part
Function: Overrides a function in Bio::DB::GFF::Adaptor::dbi to insure
Bio/DB/GFF/Adaptor/dbi/pg.pm view on Meta::CPAN
return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
}
sub get_feature_id {
my $self = shift;
my ($ref,$start,$stop,$typeid,$groupid) = @_;
my $s = $self->{load_stuff};
unless ($s->{get_feature_id}) {
my $dbh = $self->features_db;
$s->{get_feature_id} =
$dbh->prepare_delayed('SELECT fid FROM fdata WHERE lower(fref)=lower(?) AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
}
my $sth = $s->{get_feature_id} or return;
$sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
my ($fid) = $sth->fetchrow_array;
return $fid;
}
sub _delete {
my $self = shift;
my $delete_spec = shift;
Bio/DB/GFF/Adaptor/dbi/pg_fts.pm view on Meta::CPAN
=cut
# a simple postgres adaptor
use strict;
use Bio::DB::GFF::Adaptor::dbi;
use base qw(Bio::DB::GFF::Adaptor::dbi::pg);
use constant FULLTEXTSEARCH => <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
FROM fgroup,fattribute_to_feature,fdata,ftype
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND fdata.ftypeid=ftype.ftypeid
AND (fattribute_to_feature.idxfti @@ to_tsquery('default', ?))
END
;
use constant FULLTEXTWILDCARD => <<END;
SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
FROM fgroup,fattribute_to_feature,fdata,ftype
WHERE fgroup.gid=fdata.gid
AND fdata.fid=fattribute_to_feature.fid
AND fdata.ftypeid=ftype.ftypeid
AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
END
;
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
return $self;
Bio/DB/Query/GenBank.pm view on Meta::CPAN
Title : new
Usage : $db = Bio::DB::Query::GenBank->new(@args)
Function: create new query object
Returns : new query object
Args : -db database (see below for allowable values)
-query query string
-mindate minimum date to retrieve from (YYYY/MM/DD)
-maxdate maximum date to retrieve from (YYYY/MM/DD)
-reldate relative date to retrieve from (days)
-datetype date field to use ('edat' or 'mdat')
-ids array ref of gids (overrides query)
-maxids the maximum number of IDs you wish to collect
(defaults to 100)
This method creates a new query object. Typically you will specify a
-db and a -query argument, possibly modified by -mindate, -maxdate, or
-reldate. -mindate and -maxdate specify minimum and maximum dates for
entries you are interested in retrieving, expressed in the form
YYYY/MM/DD. -reldate is used to fetch entries that are more recent
than the indicated number of days.
Bio/DB/SeqFeature/Store/DBI/SQLite.pm view on Meta::CPAN
$sth->finish;
}
sub _dump_update_attribute_index {
my $self = shift;
my ($obj,$id) = @_;
my $fh = $self->dump_filehandle('attribute');
my $dbh = $self->dbh;
for my $tag ($obj->all_tags) {
my $tagid = $self->_attributeid($tag);
for my $value ($obj->each_tag_value($tag)) {
# unlike DBI::mysql, don't quote, as quotes will be quoted when loaded
print $fh join("\t",$id,$tagid,$value),"\n";
}
}
}
sub _update_indexes {
my $self = shift;
my $obj = shift;
defined (my $id = $obj->primary_id) or return;
$self->SUPER::_update_indexes($obj);
Bio/DB/SeqFeature/Store/DBI/mysql.pm view on Meta::CPAN
}
sub _update_attribute_index {
my $self = shift;
my ($obj,$id) = @_;
my $attribute = $self->_attribute_table;
$self->_delete_index($attribute,$id);
my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
for my $tag ($obj->get_all_tags) {
my $tagid = $self->_attributeid($tag);
for my $value ($obj->get_tag_values($tag)) {
$sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
}
}
$sth->finish;
}
sub _genericid {
my $self = shift;
my ($table,$namefield,$name,$add_if_missing) = @_;
my $qualified_table = $self->_qualify($table);
my $sth = $self->_prepare(<<END);
Bio/DB/SeqFeature/Store/DBI/mysql.pm view on Meta::CPAN
print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
}
sub _dump_update_attribute_index {
my $self = shift;
my ($obj,$id) = @_;
my $fh = $self->dump_filehandle('attribute');
my $dbh = $self->dbh;
for my $tag ($obj->all_tags) {
my $tagid = $self->_attributeid($tag);
for my $value ($obj->each_tag_value($tag)) {
print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
}
}
}
sub coverage_array {
my $self = shift;
my ($seq_name,$start,$end,$types,$bins) =
rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
['TYPES','TYPE','PRIMARY_TAG'],'BINS'],@_);
Bio/Map/Clone.pm view on Meta::CPAN
$remark,$fpnumber,$seqtype,$seqstatus,$fpcremark,
$matche,$matcha,$matchp,
$range) = $self->_rearrange([qw(NAME MARKERS CONTIG TYPE
BANDS GEL GROUP REMARK FPNUMBER
SEQUENCETYPE SEQUENCESTATUS
FPCREMARK MATCHE MATCHA MATCHP
RANGE)],@args);
$self->name($name) if defined $name;
$self->markers($markers) if defined $markers;
$self->contigid($contig) if defined $contig;
$self->type($type) if defined $type;
$self->bands($bands) if defined $bands;
$self->gel($gel) if defined $gel;
$self->group($group) if defined $group;
$self->remark($remark) if defined $remark;
$self->fp_number($fpnumber) if defined $fpnumber;
$self->sequence_type($seqtype) if defined $seqtype;
$self->sequence_status($seqstatus) if defined $seqstatus;
$self->fpc_remark($fpcremark) if defined $fpcremark;
$self->range($range) if defined $range;
Bio/Map/Clone.pm view on Meta::CPAN
Args : none to get, OR string to set
=cut
sub group {
my ($self) = shift;
return $self->{'_group'} = shift if @_;
return $self->{'_group'};
}
=head2 contigid
Title : contigid
Usage : my $ctg = $cloneobj->contigid();
Function: Get/set the contig this clone belongs to
Returns : scalar representing the contig
Args : none to get, OR string to set
=cut
sub contigid {
my ($self) = shift;
$self->{'_contig'} = shift if @_;
return $self->{'_contig'} || 0;
}
=head2 each_markerid
Title : each_markerid
Usage : @markers = $cloneobj->each_markerid();
Function: retrieves all the elements in a map unordered
Bio/Map/FPCMarker.pm view on Meta::CPAN
*** This only supplies the ids set with the set_clones method ***
*** It has nothing to do with actual Bio::Map::MappableI objects ***
=cut
sub each_cloneid {
my ($self) = @_;
return $self->_each_element('clones');
}
=head2 each_contigid
Title : each_contigid
Usage : my @contigs = $map->each_contigid();
Function: retrieves all the contig ids in a map unordered
Returns : list of strings (ids)
Args : none
*** This only supplies the ids set with the set_contigs method ***
*** It has nothing to do with actual Bio::Map::MapI objects ***
=cut
sub each_contigid {
my ($self) = @_;
return $self->_each_element('contigs');
}
sub _each_element{
my ($self, $type) = @_;
$type = 'clones' unless defined $type;
$type = lc("_$type");
Bio/Map/Physical.pm view on Meta::CPAN
_remark => $remark,
_clones => \%clones,
_contigs => \%contigs,
_position => \%markerpos,
}, 'Bio::Map::FPCMarker');
$self->{'_markers'}{$marker}{'marker'} = $markerobj;
return $markerobj;
}
=head2 each_contigid
Title : each_contigid
Usage : my @contigs = $map->each_contigid();
Function: returns a list of contigs (numbers)
Returns : list of contigs
Args : none
=cut
sub each_contigid {
my ($self) = @_;
return keys (%{$self->{'_contigs'}});
}
=head2 get_contigobj
Title : get_contigobj
Usage : my $contigobj = $map->get_contigobj('CONTIG1');
Function: returns an object of the contig given in the argument
Returns : object of the contig
Bio/Map/Physical.pm view on Meta::CPAN
sub print_contiglist{
my ($self,$showall) = @_;
my $pos;
$showall = 0 if (!defined($showall));
my %_contigs = %{$self->{'_contigs'}};
my %_markers = %{$self->{'_markers'}};
my %_clones = %{$self->{'_clones'}};
my @contigs = $self->each_contigid();
my @sortedcontigs = sort {$a <=> $b } @contigs;
print "\n\nContig List\n\n";
foreach my $contig (@sortedcontigs) {
my %list;
my %alist;
my $ctgAnchor = $_contigs{$contig}{'anchor'};
my $ctgGroup = $_contigs{$contig}{'group'};
Bio/Map/Physical.pm view on Meta::CPAN
my $i;
my ($depth, $save_depth);
my ($x, $y);
my @stack;
my ($k, $j, $s);
my $pos;
my $contig;
# Calculate the position for the marker in the contig
my @contigs = $self->each_contigid();
my @sortedcontigs = sort {$a <=> $b } @contigs;
my $offset = 0;
my %gffclones;
my %gffcontigs;
my %gffmarkers;
my $basepair = 4096;
foreach my $contig (@sortedcontigs) {
if($_contigs{$contig}{'range'} ) {
$offset = $_contigs{$contig}{'range'}{'start'};
Bio/Map/Physical.pm view on Meta::CPAN
my $i;
my ($depth, $save_depth);
my ($x, $y);
my @stack;
my ($k, $j, $s);
my $pos;
my $contig;
# Calculate the position for the marker in the contig
my @contigs = $self->each_contigid();
my @sortedcontigs = sort {$a <=> $b } @contigs;
my $offset;
my %gffclones;
my %gffcontigs;
foreach my $marker ($self->each_markerid()) {
my (@ctgmarker, @sortedctgmarker);
my @clones = (keys %{$_markers{$marker}{'clones'}})
if (exists ($_markers{$marker}{'clones'} ));
Bio/Map/Physical.pm view on Meta::CPAN
=cut
sub _calc_contigposition{
my ($self) = @_;
my %_contigs = %{$self->{'_contigs'}};
my %_markers = %{$self->{'_markers'}};
my %_clones = %{$self->{'_clones'}};
my @contigs = $self->each_contigid();
my @sortedcontigs = sort {$a <=> $b } @contigs;
foreach my $contig (@sortedcontigs) {
my $position = 0;
my $group;
if (exists($_contigs{$contig}{'group'}) ) {
my %weightedmarkers;
my @mkrs = keys (%{$_contigs{$contig}{'markers'}})
Bio/Map/Physical.pm view on Meta::CPAN
Usage : $map->_calc_contiggroup();
Function: calculates the group of the contig
Returns : none
Args : none
=cut
sub _calc_contiggroup {
my ($self) = @_;
my %_contig = %{$self->{'_contigs'}};
my @contigs = $self->each_contigid();
foreach my $ctg (@contigs) {
my $chr = floor($ctg/1000);
$_contig{$ctg}{'group'} = $chr;
}
}
=head2 _setI<E<lt>TypeE<gt>>Ref
Title : _set<Type>Ref
Bio/Root/IO.pm view on Meta::CPAN
$roots = [$roots] unless ref $roots;
} else {
$self->warn("No root path(s) specified\n");
return 0;
}
my $root;
for $root (@{$roots}) {
$root =~ s#/\z##;
(undef, undef, my $rp) = lstat $root or next;
$rp &= 07777; # don't forget setuid, setgid, sticky bits
if ( -d _ ) {
# notabene: 0777 is for making readable in the first place,
# it's also intended to change it to writable in case we have
# to recurse in which case we are better than rm -rf for
# subtrees with strange permissions
chmod(0777, ($Is_VMS ? VMS::Filespec::fileify($root) : $root))
or $self->warn("Could not make directory '$root' read+writable: $!")
unless $safe;
if (opendir DIR, $root){
@files = readdir DIR;
Bio/Root/Utilities.pm view on Meta::CPAN
=cut
#--------------
sub file_info {
#--------------
my ($self, %param) = @_;
my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
$get ||= 'all';
$fmt ||= 'yyyy-mm-dd';
my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
$atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
if($get =~ /date/i) {
## I can get the elapsed time since the file was modified but
## it's not so straightforward to get the date in a nice format...
## Think about using a standard CPAN module for this, like
## Date::Manip or Date::DateCalc.
my $date = $mtime;
my $elsec = time - $mtime;
Bio/SearchIO/fasta.pm view on Meta::CPAN
if exists $hsp->{evalue};
$self->element({'Name' => 'Hsp_evalue2', 'Data' => $hsp->{evalue2} } )
if exists $hsp->{evalue2};
$self->element({'Name' => 'Hsp_bit-score', 'Data' => $hsp->{bits} } )
if exists $hsp->{bits};
$self->element({'Name' => 'Hsp_sw-score', 'Data' => $hsp->{'n-w'} } )
if exists $hsp->{'n-w'};
$self->element({'Name' => 'Hsp_sw-score', 'Data' => $hsp->{sw} } )
if exists $hsp->{sw};
$self->element({'Name' => 'Hsp_gaps', 'Data' => $hsp->{'%_gid'} } )
if exists $hsp->{'%_gid'};
$self->element({
'Name' => 'Hsp_identity',
'Data' =>
sprintf( "%.0f", $hsp->{'%_id'} * $hsp->{alen} )
}) if ( exists $hsp->{'%_id'} && exists $hsp->{alen} );
if ( exists $hsp->{'%_gid'} ) {
$self->element(
{
'Name' => 'Hsp_positive',
'Data' =>
sprintf( "%.0f", $hsp->{'%_gid'} * $hsp->{alen} )
}
) if exists $hsp->{'%_gid'} && exists $hsp->{alen};
} else {
$self->element(
{
'Name' => 'Hsp_positive',
'Data' =>
sprintf( "%.0f", $hsp->{'%_id'} * $hsp->{alen} )
}
) if ( exists $hsp->{'%_id'} && exists $hsp->{alen} );
}
Bio/SeqIO/entrezgene.pm view on Meta::CPAN
The C<-debug> and C<-locuslink> options slow down the parser.
Example code which looks for ontology terms:
my $eio = new Bio::SeqIO(-file => $file,
-format => 'entrezgene',
-service_record => 'yes');
while (my $seq = $eio->next_seq) {
my $gid = $seq->accession_number;
foreach my $ot ($ann->get_Annotations('OntologyTerm')) {
next if ($ot->term->authority eq 'STS marker'); # No STS markers
my $evid = $ot->comment;
$evid =~ s/evidence: //i;
my @ref = $ot->term->get_references;
my $id = $ot->identifier;
my $fid = 'GO:' . sprintf("%07u",$id);
print join("\t",$gid, $ot->ontology->name, $ot->name, $evid,
$fid, @ref?$ref[0]->medline:''), "\n";
}
}
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
Bio/SeqIO/game/gameWriter.pm view on Meta::CPAN
my $str = $feat->strand;
my $id = $self->_find_name($feat, 'standard_name')
|| $self->_find_name($feat, 'gene')
|| $self->_find_name($feat, $feat->primary_tag)
|| $self->_find_name($feat, 'locus_tag')
|| $self->_find_name($feat, 'symbol')
|| $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
Could not find a gene/feature ID, feature must have a primary tag or a tag
with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
EOM
my $gid = $self->_find_name($feat, 'gene') || $id;
$writer->startTag('annotation', id => $id);
$self->_element('name', $gid);
$self->_element('type', $feat->primary_tag);
$self->_render_tags( $feat,
\&_render_date_tags,
\&_render_dbxref_tags,
\&_render_comment_tags,
\&_render_tags_as_properties,
);
my @genes;
Bio/SeqIO/game/gameWriter.pm view on Meta::CPAN
# we are in a gene container; gene must then be one level down
@genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
}
for my $g ( @genes ) {
my $id ||= $self->_find_name($g, 'standard_name')
|| $self->_find_name($g, 'gene')
|| $self->_find_name($feat, 'locus_tag')
|| $self->_find_name($feat, 'symbol')
|| $self->throw("Could not find a gene ID");
my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);
$writer->startTag('gene', association => 'IS');
$self->_element('name', $gid);
$writer->endTag('gene');
my $proteins;
my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
my @variants = ('A' .. 'Z');
for my $mRNA (@mRNAs) {
my ($sn, @units);
# if the mRNA is a generic transcript, it must be a non-spliced RNA gene
Bio/SeqIO/game/gameWriter.pm view on Meta::CPAN
($add_seq{desc}) = $cds->get_tag_values('product_desc');
$cds->remove_tag('product_desc');
}
unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
my $start = $cds->start;
my $end = $cds->end;
my $str = $cds->strand;
my $acc = $self->{seq}->accession || $self->{seq}->display_id;
$str = $str < 0 ? '[-]' : '';
$add_seq{desc} = "translation from_gene[$gid] " .
"cds_boundaries:(" . $acc .
":$start..$end$str) transcript_info:[$name]";
}
$self->{add_seqs} ||= [];
push @{$self->{add_seqs}}, \%add_seq;
}
}
$writer->startTag('feature_set', id => $name);
Bio/SeqIO/game/gameWriter.pm view on Meta::CPAN
@units = reverse @units;
}
for my $unit ( @units ) {
if ( $unit->primary_tag eq 'exon' ) {
my $ename = $id;
$ename .= ':' . ++$count;
$self->_feature_span($ename, $unit);
}
elsif ( $unit->primary_tag eq 'start_codon' ) {
$self->_feature_span(($sn || $gid), $unit, $self->{curr_pname});
}
else {
my $uname = $unit->primary_tag . ":$id";
$self->_feature_span($uname, $unit);
}
}
$self->{curr_pname} = '';
$writer->endTag('feature_set');
}
Bio/Tools/Geneid.pm view on Meta::CPAN
=encoding utf-8
=head1 NAME
Bio::Tools::Geneid - Results of one geneid run
=head1 SYNOPSIS
use Bio::Tools::Geneid;
my $gid = Bio::Tools::Geneid(-file => "geneid.out");
while (my $gene = $gid->next_prediction)
{
my @transcripts = $gene->transcripts;
foreach my $t (@transcripts)
{
my @exons = $t->exons;
foreach my $e (@exons)
{
printf("Exon %d..%d\n", $e->start, $e->end);
}
}
t/data/popstats.prettybase
t/data/pre_rel9.swiss
t/data/Primate_mtDNA.nex
t/data/primedseq.fa
t/data/primer3_infile.txt
t/data/primer3_outfile.txt
t/data/primer3_output.txt
t/data/prints.out
t/data/promoterwise.out
t/data/protpars.phy
t/data/protpars_longid.phy
t/data/ps_scan/out.PrositeScan
t/data/pseudowise.out
t/data/psi_xml.dat
t/data/psiblastreport.out
t/data/purine_v081.infernal
t/data/puzzle.tre
t/data/PX1CG.gb
t/data/Q8GBD3.swiss
t/data/qrna-relloc.out
t/data/qualfile.qual
maintenance/big_split/file_classification.csv view on Meta::CPAN
,"t/data/trees.nexml.xml"
,"t/data/testaln2.fasta"
,"t/data/test.game"
,"t/data/GlimmerHMM.out"
,"t/data/bug2246.blast"
,"t/data/Primate_mtDNA.nex"
,"t/data/codeml4.mlc"
,"t/data/Rab1.chaos-xml"
,"t/data/NC_001284.gbk"
,"t/data/dna2.fa"
,"t/data/protpars_longid.phy"
,"t/data/cds_sample.embl"
,"t/data/SPAN_Family4nl.nex"
,"t/data/testdbaccnums.out"
,"t/data/longnames.aln"
,"t/data/tab2part.mif"
,"t/data/gmap_f9-reverse-strand.txt"
,"t/data/test1.wublastp"
,"t/data/genemark-fragment.out"
,"t/data/Q8GBD3.swiss"
,"t/data/stress_test_pubmed.xml"
maintenance/cvs2cl_by_file.pl view on Meta::CPAN
}
die "No mail domain found\n"
unless defined $Domain;
open (MAPFILE, "<$User_Passwd_File")
or die ("Unable to open $User_Passwd_File ($!)");
while (<MAPFILE>)
{
# all lines are valid
my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
my $expansion = '';
($expansion) = split (',', $gecos)
if defined $gecos && length $gecos;
my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
$expansions{$username} = "$expansion <$mailname>";
}
close (MAPFILE);
}
scripts/Bio-DB-GFF/bp_bulk_load_gff.pl view on Meta::CPAN
by using the --Temporary switch.
END
my @fasta_files_to_be_unlinked;
my @files = (FDATA,FTYPE,FGROUP,FDNA,FATTRIBUTE,FATTRIBUTE_TO_FEATURE);
foreach (@files) {
$FH{$_} = IO::File->new(">$tmpdir/$_.$$") or die $_,": $!";
$FH{$_}->autoflush;
}
if ( $use_pg ) {
$FH{FDATA() }->print("COPY fdata (fid, fref, fstart, fstop, fbin, ftypeid, fscore, fstrand, fphase, gid, ftarget_start, ftarget_stop) FROM stdin;\n");
$FH{FTYPE() }->print("COPY ftype (ftypeid, fmethod, fsource) FROM stdin;\n");
$FH{FGROUP() }->print("COPY fgroup (gid, gclass, gname) FROM stdin;\n");
$FH{FATTRIBUTE() }->print("COPY fattribute (fattribute_id, fattribute_name) FROM stdin;\n");
$FH{FATTRIBUTE_TO_FEATURE()}->print("COPY fattribute_to_feature (fid, fattribute_id, fattribute_value) FROM stdin;\n");
}
my $FID = 1;
my $GID = 1;
my $FTYPEID = 1;
my $ATTRIBUTEID = 1;
my %GROUPID = ();
my %FTYPEID = ();
my %ATTRIBUTEID = ();
scripts/Bio-DB-GFF/bp_bulk_load_gff.pl view on Meta::CPAN
for (my $i=0; $i < @$group_name; $i++) {
$group_class->[$i] ||= '\N';
$group_name->[$i] ||= '\N';
$target_start ||= '\N';
$target_stop ||= '\N';
$method ||= '\N';
$source ||= '\N';
my $fid = $FID++;
my $gid = $GROUPID{lc join('',$group_class->[$i],$group_name->[$i])} ||= $GID++;
my $ftypeid = $FTYPEID{lc join('',$source,$method)} ||= $FTYPEID++;
my $bin = bin($start,$stop,$db->min_bin);
$FH{ FDATA() }->print( join("\t",$fid,$ref,$start,$stop,$bin,$ftypeid,$score,$strand,$phase,$gid,$target_start,$target_stop),"\n" );
if ($use_mysqlcmap){
my $feature_id = next_number(
db => $cmap_db,
table_name => 'cmap_feature',
id_field => 'feature_id',
)
or die 'No feature id';
my $direction = $strand eq '-' ? -1:1;
$FH{ FGROUP() }->print(
join("\t",$feature_id,$feature_id,'NULL',0, $group_name->[$i],0,0,'NULL',1,$direction, $group_class->[$i],)
,"\n"
) unless $DONE{"G$gid"}++;
}
else {
$FH{ FGROUP() }->print( join("\t",$gid,$group_class->[$i],$group_name->[$i]),"\n") unless $DONE{"G$gid"}++;
}
$FH{ FTYPE() }->print( join("\t",$ftypeid,$method,$source),"\n" ) unless $DONE{"T$ftypeid"}++;
foreach (@$attributes) {
my ($key,$value) = @$_;
my $attributeid = $ATTRIBUTEID{$key} ||= $ATTRIBUTEID++;
$FH{ FATTRIBUTE() }->print( join("\t",$attributeid,$key),"\n" ) unless $DONE{"A$attributeid"}++;
$FH{ FATTRIBUTE_TO_FEATURE() }->print( join("\t",$fid,$attributeid,$value),"\n");
}