view release on metacpan or search on metacpan
Bio/Align/AlignI.pm view on Meta::CPAN
Bio::Align::AlignI - An interface for describing sequence alignments.
=head1 SYNOPSIS
# get a Bio::Align::AlignI somehow - typically using Bio::AlignIO system
# some descriptors
print $aln->length, "\n";
print $aln->num_residues, "\n";
print $aln->is_flush, "\n";
print $aln->num_sequences, "\n";
print $aln->percentage_identity, "\n";
print $aln->consensus_string(50), "\n";
# find the position in the alignment for a sequence location
$pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
# extract sequences and check values for the alignment column $pos
foreach $seq ($aln->each_seq) {
$res = $seq->subseq($pos, $pos);
$count{$res}++;
}
Bio/Align/AlignI.pm view on Meta::CPAN
}
=head1 Alignment descriptors
These read only methods describe the MSE in various ways.
=head2 consensus_string
Title : consensus_string
Usage : $str = $ali->consensus_string($threshold_percent)
Function : Makes a strict consensus
Returns : consensus string
Argument : Optional threshold ranging from 0 to 100.
The consensus residue has to appear at least threshold %
of the sequences at a given location, otherwise a '?'
character will be placed at that location.
(Default value = 0%)
=cut
Bio/Align/AlignI.pm view on Meta::CPAN
Argument : None
Note : replaces no_sequences
=cut
sub num_sequences {
my ($self) = @_;
$self->throw_not_implemented();
}
=head2 percentage_identity
Title : percentage_identity
Usage : $id = $align->percentage_identity
Function: The function calculates the percentage identity of the alignment
Returns : The percentage identity of the alignment (as defined by the
implementation)
Argument: None
=cut
sub percentage_identity{
my ($self) = @_;
$self->throw_not_implemented();
}
=head2 overall_percentage_identity
Title : overall_percentage_identity
Usage : $id = $align->overall_percentage_identity
Function: The function calculates the percentage identity of
the conserved columns
Returns : The percentage identity of the conserved columns
Args : None
=cut
sub overall_percentage_identity{
my ($self) = @_;
$self->throw_not_implemented();
}
=head2 average_percentage_identity
Title : average_percentage_identity
Usage : $id = $align->average_percentage_identity
Function: The function uses a fast method to calculate the average
percentage identity of the alignment
Returns : The average percentage identity of the alignment
Args : None
=cut
sub average_percentage_identity{
my ($self) = @_;
$self->throw_not_implemented();
}
=head1 Alignment positions
Methods to map a sequence position into an alignment column and back.
column_from_residue_number() does the former. The latter is really a
property of the sequence object and can done using
L<Bio::LocatableSeq::location_from_column>:
Bio/AlignIO/clustalw.pm view on Meta::CPAN
Title : new
Usage : $alignio = Bio::AlignIO->new(-format => 'clustalw',
-file => 'filename');
Function: returns a new Bio::AlignIO object to handle clustalw files
Returns : Bio::AlignIO::clustalw object
Args : -verbose => verbosity setting (-1, 0, 1, 2)
-file => name of file to read in or to write, with ">"
-fh => alternative to -file param - provide a filehandle
to read from or write to
-format => alignment format to process or produce
-percentages => display a percentage of identity
in each line of the alignment (clustalw only)
-linelength=> alignment output line length (default 60)
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
my ( $percentages, $ll ) =
$self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
defined $percentages && $self->percentages($percentages);
$self->line_length( $ll || $LINELENGTH );
}
=head2 next_aln
Title : next_aln
Usage : $aln = $stream->next_aln()
Function: returns the next alignment in the stream
Returns : Bio::Align::AlignI object
Args : NONE
Bio/AlignIO/clustalw.pm view on Meta::CPAN
$substring = "";
}
$self->_print(
sprintf(
"%-" . $max . "s %s\n",
$aln->displayname( $seq->get_nse() ), $substring
)
) or return;
}
my $percentages = '';
if ( $self->percentages ) {
my ($strcpy) = ($linesubstr);
my $count = ( $strcpy =~ tr/\*// );
$percentages =
sprintf( "\t%d%%", 100 * ( $count / length($linesubstr) ) );
}
$self->_print(
sprintf(
"%-" . $max . "s %s%s\n",
'', $linesubstr, $percentages
)
);
$self->_print( sprintf("\n\n") ) or return;
$count += $line_len;
}
}
$self->flush if $self->_flush_on_write && defined $self->_fh;
return 1;
}
=head2 percentages
Title : percentages
Usage : $obj->percentages($newval)
Function: Set the percentages flag - whether or not to show percentages in
each output line
Returns : value of percentages
Args : newvalue (optional)
=cut
sub percentages {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_percentages'} = $value;
}
return $self->{'_percentages'};
}
=head2 line_length
Title : line_length
Usage : $obj->line_length($newval)
Function: Set the alignment output line length
Returns : value of line_length
Args : newvalue (optional)
Bio/AlignIO/proda.pm view on Meta::CPAN
Title : new
Usage : $alignio = Bio::AlignIO->new(-format => 'proda',
-file => 'filename');
Function: returns a new Bio::AlignIO object to handle proda files
Returns : Bio::AlignIO::proda object
Args : -verbose => verbosity setting (-1, 0, 1, 2)
-file => name of file to read in or to write, with ">"
-fh => alternative to -file param - provide a filehandle
to read from or write to
-format => alignment format to process or produce
-percentages => display a percentage of identity
in each line of the alignment (proda only)
-linelength=> alignment output line length (default 60)
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
my ( $percentages, $ll ) =
$self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
defined $percentages && $self->percentages($percentages);
$self->line_length( $ll || $LINELENGTH );
}
=head2 next_aln
Title : next_aln
Usage : $aln = $stream->next_aln()
Function: returns the next alignment in the stream
Returns : Bio::Align::AlignI object
Args : NONE
Bio/AlignIO/proda.pm view on Meta::CPAN
Returns : 1 for success and 0 for error
Args : Bio::Align::AlignI object
=cut
sub write_aln {
my ($self,@aln) = @_;
$self->throw_not_implemented();
}
=head2 percentages
Title : percentages
Usage : $obj->percentages($newval)
Function: Set the percentages flag - whether or not to show percentages in
each output line
Returns : value of percentages
Args : newvalue (optional)
=cut
sub percentages {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_percentages'} = $value;
}
return $self->{'_percentages'};
}
=head2 line_length
Title : line_length
Usage : $obj->line_length($newval)
Function: Set the alignment output line length
Returns : value of line_length
Args : newvalue (optional)
Bio/AnalysisI.pm view on Meta::CPAN
=head2 last_event
Usage : $job->last_event
Returns : an XML string
Args : none
It returns a short XML document showing what happened last with this
job. This is the used DTD:
<!-- place for extensions -->
<!ENTITY % event_body_template "(state_changed | heartbeat_progress | percent_progress | time_progress | step_progress)">
<!ELEMENT analysis_event (message?, (%event_body_template;)?)>
<!ATTLIST analysis_event
timestamp CDATA #IMPLIED>
<!ELEMENT message (#PCDATA)>
<!ELEMENT state_changed EMPTY>
<!ENTITY % analysis_state "created | running | completed | terminated_by_request | terminated_by_error">
<!ATTLIST state_changed
previous_state (%analysis_state;) "created"
new_state (%analysis_state;) "created">
<!ELEMENT heartbeat_progress EMPTY>
<!ELEMENT percent_progress EMPTY>
<!ATTLIST percent_progress
percentage CDATA #REQUIRED>
<!ELEMENT time_progress EMPTY>
<!ATTLIST time_progress
remaining CDATA #REQUIRED>
<!ELEMENT step_progress EMPTY>
<!ATTLIST step_progress
total_steps CDATA #IMPLIED
steps_completed CDATA #REQUIRED>
Bio/Assembly/Contig.pm view on Meta::CPAN
}
=head2 Alignment descriptors
These read only methods describe the MSE in various ways.
=head2 consensus_string
Title : consensus_string
Usage : $str = $contig->consensus_string($threshold_percent)
Function : Makes a strict consensus
Returns :
Argument : Optional threshold ranging from 0 to 100.
The consensus residue has to appear at least threshold %
of the sequences at a given location, otherwise a '?'
character will be placed at that location.
(Default value = 0%)
=cut
Bio/Assembly/Contig.pm view on Meta::CPAN
Note : replaces no_sequences
=cut
sub num_sequences {
my ($self) = @_;
return scalar( keys %{ $self->{'_elem'} } );
}
=head2 percentage_identity
Title : percentage_identity
Usage : $id = $contig->percentage_identity
Function: The function calculates the percentage identity of the alignment
Returns : The percentage identity of the alignment (as defined by the
implementation)
Argument: None
=cut
sub percentage_identity{
my ($self) = @_;
$self->throw_not_implemented();
}
=head2 overall_percentage_identity
Title : percentage_identity
Usage : $id = $contig->percentage_identity
Function: The function calculates the percentage identity of
the conserved columns
Returns : The percentage identity of the conserved columns
Args : None
=cut
sub overall_percentage_identity{
my ($self) = @_;
$self->throw_not_implemented();
}
=head2 average_percentage_identity
Title : average_percentage_identity
Usage : $id = $contig->average_percentage_identity
Function: The function uses a fast method to calculate the average
percentage identity of the alignment
Returns : The average percentage identity of the alignment
Args : None
=cut
sub average_percentage_identity {
my ($self) = @_;
$self->throw_not_implemented();
}
=head2 Alignment positions
Methods to map a sequence position into an alignment column and back.
column_from_residue_number() does the former. The latter is really a
property of the sequence object and can done using
L<Bio::LocatableSeq::location_from_column>:
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
asmbl_id -> contig ID
sequence -> contig ungapped consensus sequence (ambiguities are lowercase)
lsequence -> gapped consensus sequence (lowercase ambiguities)
quality -> gapped consensus quality score (in hexadecimal)
seq_id -> *
com_name -> *
type -> *
method -> always 'asmg' *
ed_status -> *
redundancy -> fold coverage of the contig consensus
perc_N -> percent of ambiguities in the contig consensus
seq# -> number of sequences in the contig
full_cds -> *
cds_start -> start of coding sequence *
cds_end -> end of coding sequence *
ed_pn -> name of editor (always 'GRA') *
ed_date -> date and time of edition
comment -> some comments *
frameshift -> *
Each read has the following attributes:
Bio/Assembly/IO/tigr.pm view on Meta::CPAN
$self->_print("\n\n");
return 1;
}
=head2 _perc_N
Title : _perc_N
Usage : my $perc_N = $asmio->_perc_N($sequence_string)
Function: Calculate the percent of ambiguities in a sequence.
M R W S Y K X N are regarded as ambiguities in an aligned read
sequence by TIGR Assembler. In the case of a gapped contig
consensus sequence, all lowercase symbols are ambiguities, i.e.:
a c g t u m r w s y k x n.
Returns : decimal number
Args : string
=cut
sub _perc_N {
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
max_size size of (number of sequences in) the largest contig
spectrum hash representation of a contig spectrum
nof_seq number of sequences
avg_seq_len average sequence length
eff_asm_params reports effective assembly parameters
nof_overlaps number of overlaps (needs eff_asm_params)
min_overlap minimum overlap length in a contig (needs eff_asm_params)
min_identity minimum sequence identity percentage (needs eff_asm_params)
avg_overlap average overlap length (needs eff_asm_params)
avg_identity average overlap identity percentage (needs eff_asm_params)
Operations on the contig spectra:
to_string create a string representation of the spectrum
spectrum import a hash contig spectrum
assembly determine a contig spectrum from an assembly, contig or singlet
dissolve calculate a dissolved contig spectrum (depends on assembly)
cross produce a cross contig spectrum (depends on assembly)
add add a contig spectrum to an existing one
average make an average of several contig spectra
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
}
$avg_overlap = $self->{'_avg_overlap'};
return $avg_overlap;
}
=head2 min_identity
Title : min_identity
Usage : $csp->min_identity
Function: get/set the assembly minimum overlap identity percent
Returns : 0 < decimal < 100
Args : 0 < decimal < 100 [optional]
=cut
sub min_identity {
my ($self, $min_identity) = @_;
if (defined $min_identity) {
$self->throw("The minimum overlap percent identity must be strictly ".
"positive. Got '$min_identity'") if $min_identity < 1;
$self->{'_min_identity'} = $min_identity;
}
$min_identity = $self->{'_min_identity'};
return $min_identity;
}
=head2 avg_identity
Title : avg_identity
Usage : $csp->avg_identity
Function: get/set the assembly average overlap identity percent
Returns : 0 < decimal < 100
Args : 0 < decimal < 100 [optional]
=cut
sub avg_identity {
my ($self, $avg_identity) = @_;
if (defined $avg_identity) {
$self->throw("The average overlap percent identity must be strictly ".
"positive. Got '$avg_identity'") if $avg_identity < 1;
$self->{'_avg_identity'} = $avg_identity;
}
$avg_identity = $self->{'_avg_identity'};
return $avg_identity;
}
=head2 avg_seq_len
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
return $score;
}
=head2 _naive_assembler
Title : _naive_assembler
Usage :
Function: Reassemble the specified sequences only based on their position in
the contig. This naive assembly only verifies that the minimum
overlap length and percentage identity are respected. No actual
alignment is done
Returns : arrayref of contigs and singlets
Args : Bio::Assembly::Contig
array reference of sequence IDs to use [optional]
minimum overlap length (integer) [optional]
minimum percentage identity (integer) [optional]
=cut
sub _naive_assembler {
my ($self, $contig, $seqlist, $min_overlap, $min_identity) = @_;
# Use all reads if none was specified:
if (not defined $seqlist) {
for my $seq ($contig->each_seq) {
push @$seqlist, $seq->id;
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
}
=head2 _get_assembly_overlap_stats
Title : _get_assembly_overlap_stats
Usage : my ($avglength, $avgidentity, $minlength, $min_identity, $nof_overlaps)
= $csp->_get_assembly_overlap_stats($assemblyobj);
Function: Get statistics about pairwise overlaps in contigs of an assembly
Returns : average overlap length
average identity percent
minimum overlap length
minimum identity percent
number of overlaps
Args : Bio::Assembly::Scaffold, Contig or Singlet object
hash reference with the IDs of the sequences to consider [optional]
=cut
sub _get_assembly_overlap_stats {
my ($self, $assembly_obj, $seq_hash) = @_;
# Sanity check
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
Function: Get statistics about pairwise overlaps in a contig or singlet. The
statistics are obtained using graph theory: each read is a node
and the edges between 2 reads are weighted by minus the number of
conserved residues in the alignment between the 2 reads. The
minimum spanning tree of this graph represents the overlaps that
form the contig. Overlaps that do not satisfy the minimum overlap
length and similarity get a malus on their score.
Note: This function requires the optional BioPerl dependency
module called 'Graph'
Returns : average overlap length
average identity percent
minimum overlap length
minimum identity percent
number of overlaps
Args : Bio::Assembly::Contig or Singlet object
hash reference with the IDs of the sequences to consider [optional]
=cut
sub _get_contig_overlap_stats {
my ($self, $contig_obj, $seq_hash) = @_;
# Sanity check
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
return $avg_length, $avg_identity, $min_length, $min_identity, $nof_overlaps;
}
=head2 _overlap_alignment
Title : _overlap_alignment
Usage :
Function: Produce an alignment of the overlapping section of two sequences of
a contig. Minimum overlap length and percentage identity can be
specified. Return undef if the sequences do not overlap or do not
meet the minimum overlap criteria.
Return : Bio::SimpleAlign object reference
alignment overlap length
alignment overlap identity
Args : Bio::Assembly::Contig object reference
Bio::LocatableSeq contig sequence 1
Bio::LocatableSeq contig sequence 2
minimum overlap length [optional]
minimum overlap identity percentage[optional]
=cut
sub _overlap_alignment {
my ($self, $contig, $qseq, $tseq, $min_overlap, $min_identity) = @_;
# get query and target sequence position
my $qpos = $contig->get_seq_coord($qseq);
my $tpos = $contig->get_seq_coord($tseq);
# check that there is an overlap
my $qend = $qpos->end;
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
$aln->add_seq($alseq);
$alseq = Bio::LocatableSeq->new(
-id => 2,
-seq => $tstring,
-start => 1,
-end => $overlap - $tgaps,
-alphabet => 'dna',
);
$aln->add_seq($alseq);
# check overlap percentage identity
my $identity = $aln->overall_percentage_identity;
return if defined $min_identity && $identity < $min_identity;
# all checks passed, return alignment
return $aln, $overlap, $identity;
}
=head2 _contig_graph
Title : _contig_graph
Bio/Assembly/Tools/ContigSpectrum.pm view on Meta::CPAN
Function: Creates a graph data structure of the contig.The graph is undirected.
The vertices are the reads of the contig and edges are the overlap
between the reads. The edges are weighted by the opposite of the
overlap, so it is negative and the better the overlap, the lower the
weight.
Return : Graph object or undef
hashref of overlaps (score, length, identity) for each read pair
Args : Bio::Assembly::Contig object reference
hash reference with the IDs of the sequences to consider [optional]
minimum overlap length (integer) [optional]
minimum percentage identity (integer) [optional]
=cut
sub _contig_graph {
my ($self, $contig_obj, $seq_hash, $min_overlap, $min_identity) = @_;
# Sanity checks
if( !ref $contig_obj || ! $contig_obj->isa('Bio::Assembly::Contig') ) {
$self->throw("Unable to process non Bio::Assembly::Contig ".
"object [".ref($contig_obj)."]");
Bio/CodonUsage/Table.pm view on Meta::CPAN
my $freq = $self->aa_frequency($aa);
$aa_freqs{$aa} = $freq;
}
return \%aa_freqs;
}
=head2 codon_abs_frequency
Title : codon_abs_frequency
Usage : my $freq = $cdtable->codon_abs_frequency('CTG');
Purpose : To return the frequency of that codon as a percentage
of all codons in the organism.
Returns : a percentage frequency
Args : a non-ambiguous codon string
=cut
sub codon_abs_frequency {
my ($self, $a) = @_;
my $cod = uc $a;
if ($self->_check_codon($cod)) {
my $ctable = Bio::Tools::CodonTable->new;
$ctable->id($self->genetic_code() );
Bio/CodonUsage/Table.pm view on Meta::CPAN
return $self->{'_table'}{$aa}{$cod}{'per1000'}/10 ;
}
else {return 0;}
}
=head2 codon_rel_frequency
Title : codon_rel_frequency
Usage : my $freq = $cdtable->codon_rel_frequency('CTG');
Purpose : To return the frequency of that codon as a percentage
of codons coding for the same amino acid. E.g., ATG and TGG
would return 100 as those codons are unique.
Returns : a percentage frequency
Args : a non-ambiguous codon string
=cut
sub codon_rel_frequency {
my ($self, $a) = @_;
my $cod = uc $a;
if ($self->_check_codon($cod)) {
my $ctable = Bio::Tools::CodonTable->new;
Bio/CodonUsage/Table.pm view on Meta::CPAN
Returns : A reference to a hash where keys are 1 letter amino acid codes
and values are references to arrays of codons whose frequency
is above the threshold.
Arguments: a minimum threshold frequency
=cut
sub probable_codons {
my ($self, $threshold) = @_;
if (!$threshold || $threshold < 0 || $threshold > 100) {
$self->throw(" I need a threshold percentage ");
}
my %return_hash;
for my $a(keys %STRICTAA) {
my @common_codons;
my $aa =$Bio::SeqUtils::THREECODE{$a};
for my $codon (keys %{ $self->{'_table'}{$aa}}) {
if ($self->{'_table'}{$aa}{$codon}{'rel_freq'} > $threshold/100){
push @common_codons, $codon;
}
}
Bio/CodonUsage/Table.pm view on Meta::CPAN
$self->warn(" need to give a codon sequence as a parameter ");
return 0;
}
}
=head2 get_coding_gc
Title : get_coding_gc
Usage : my $count = $cdtable->get_coding_gc(1);
Purpose : To return the percentage GC composition for the organism at
codon positions 1,2 or 3, or an average for all coding sequence
('all').
Returns : a number (%-age GC content) or 0 if these fields are undefined
Args : 1,2,3 or 'all'.
=cut
sub get_coding_gc {
my $self = shift;
if (! @_) {
Bio/CodonUsage/Table.pm view on Meta::CPAN
}
}#end of outer else
}
=head2 set_coding_gc
Title : set_coding_gc
Usage : my $count = $cdtable->set_coding_gc(-1=>55.78);
Purpose : To set the percentage GC composition for the organism at
codon positions 1,2 or 3, or an average for all coding sequence
('all').
Returns : void
Args : a hash where the key must be 1,2,3 or 'all' and the value the %age GC
at that codon position..
=cut
sub set_coding_gc {
my ($self, $key, $value) = @_;
Bio/CodonUsage/Table.pm view on Meta::CPAN
if !exists($self->{'_cds_count'});
return $self->{'_cds_count'} || 0.00;
}
=head2 aa_frequency
Title : aa_frequency
Usage : my $freq = $cdtable->aa_frequency('Leu');
Purpose : To retrieve the frequency of an amino acid in the organism
Returns : a percentage
Args : a 1 letter or 3 letter string representing the amino acid
=cut
sub aa_frequency {
my ($self, $a) = @_;
## process args ##
Bio/CodonUsage/Table.pm view on Meta::CPAN
my $freq = 0;
map {$freq += $self->{'_table'}{$aa3}{$_}{'per1000'} } keys %{$self->{'_table'}{$aa3}};
return sprintf("%.2f", $freq/10);
}
=head2 common_codon
Title : common_codon
Usage : my $freq = $cdtable->common_codon('Leu');
Purpose : To retrieve the frequency of the most common codon of that aa
Returns : a percentage
Args : a 1 letter or 3 letter string representing the amino acid
=cut
sub common_codon{
my ($self, $a) = @_;
my $aa = lc $a;
$aa =~ s/^(\w)/\U$1/;
Bio/CodonUsage/Table.pm view on Meta::CPAN
}
return $max;
}else {return 0;}
}
=head2 rare_codon
Title : rare_codon
Usage : my $freq = $cdtable->rare_codon('Leu');
Purpose : To retrieve the frequency of the least common codon of that aa
Returns : a percentage
Args : a 1 letter or 3 letter string representing the amino acid
=cut
sub rare_codon {
my ($self, $a) = @_;
my $aa = lc $a;
$aa =~ s/^(\w)/\U$1/;
if ($self->_check_aa($aa)) {
my $aa3 = $Bio::SeqUtils::THREECODE{$aa};
Bio/DB/GFF.pm view on Meta::CPAN
The stop of the annotation relative to the reference sequence. Start
is always less than or equal to stop.
=item 6.
score
For annotations that are associated with a numeric score (for example,
a sequence similarity), this field describes the score. The score
units are completely unspecified, but for sequence similarities, it is
typically percent identity. Annotations that don't have a score can
use "."
=item 7.
strand
For those annotations which are strand-specific, this field is the
strand on which the annotation resides. It is "+" for the forward
strand, "-" for the reverse strand, or "." for annotations that are
not stranded.
Bio/Map/MapI.pm view on Meta::CPAN
Usage : my @common_elements = $map->common_elements(\@other_maps);
my @common_elements = Bio::Map::SimpleMap->common_elements(\@maps);
Function: Find the elements that are common to multiple maps.
Returns : array of Bio::Map::MappableI
Args : arg #1 = L<Bio::Map::MapI> to compare this one to, or an array ref
of such objects (mandatory)
arg #2 = optionally, one or more of the key => value pairs below
-min_num => int : the minimum number of input maps an element
must be found on before before returned
[default is 1]
-min_percent => number : as above, but the minimum percentage of
input maps [default is 100 - note that this
will effectively override all other options]
-require_self => 1|0 : require that all output elements at least
be on the calling map [default is 1, has no
effect when the second usage form is used]
-required => \@maps : require that all output elements be on at
least all the maps supplied here
=cut
Bio/Map/MapI.pm view on Meta::CPAN
@maps = @{$maps_ref};
}
elsif ($maps_ref->isa('Bio::Map::MapI')) {
@maps = ($maps_ref);
}
if (ref($self)) {
unshift(@maps, $self);
}
$self->throw("Need at least 2 maps") unless @maps >= 2;
my %args = (-min_num => 1, -min_percent => 100, -require_self => 1, -required => undef, @extra_args);
my $min_num = $args{-min_num};
if ($args{-min_percent}) {
my $mn = @maps / 100 * $args{-min_percent};
if ($mn > $min_num) {
$min_num = $mn;
}
}
my %required = map { $_ => 1 } $args{-required} ? @{$args{-required}} : ();
$required{$self} = 1 if ref($self) && $args{-require_self};
my @required = keys %required;
my %map_elements;
my %elements;
Bio/Map/Mappable.pm view on Meta::CPAN
-relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
of each Position's relative position to the
thing described by that Relative
-min_pos_num => int : the minimum number of positions that must
be in a group before it will be returned
[default is 1]
-min_mappables_num => int : the minimum number of different
mappables represented by the
positions in a group before it
will be returned [default is 1]
-min_mappables_percent => number : as above, but the minimum
percentage of input mappables
[default is 0]
-min_map_num => int : the minimum number of different
maps represented by the positions
in a group before it will be
returned [default is 1]
-min_map_percent => number : as above, but the minimum
percentage of maps known by the
input mappables [default is 0]
-require_self => 1|0 : require that at least one of the
calling object's positions be in
each group [default is 1, has no
effect when the second usage form
is used]
-required => \@mappables : require that at least one position
for each mappable supplied in this
array ref be in each group
Bio/Map/Mappable.pm view on Meta::CPAN
of each Position's relative position to the
thing described by that Relative
-min_pos_num => int : the minimum number of positions that must
be in a group before the intersection will
be calculated and returned [default is 1]
-min_mappables_num => int : the minimum number of different
mappables represented by the
positions in a group before the
intersection will be calculated
and returned [default is 1]
-min_mappables_percent => number : as above, but the minimum
percentage of input mappables
[default is 0]
-min_map_num => int : the minimum number of different
maps represented by the positions
in a group before the intersection
will be calculated and returned
[default is 1]
-min_map_percent => number : as above, but the minimum
percentage of maps known by the
input mappables [default is 0]
-require_self => 1|0 : require that at least one of the
calling object's positions be in
each group [default is 1, has no
effect when the second usage form
is used]
-required => \@mappables : require that at least one position
for each mappable supplied in this
array ref be in each group
Bio/Map/Mappable.pm view on Meta::CPAN
of each Position's relative position to the
thing described by that Relative
-min_pos_num => int : the minimum number of positions that must
be in a group before the union will be
calculated and returned [default is 1]
-min_mappables_num => int : the minimum number of different
mappables represented by the
positions in a group before the
union will be calculated and
returned [default is 1]
-min_mappables_percent => number : as above, but the minimum
percentage of input mappables
[default is 0]
-min_map_num => int : the minimum number of different
maps represented by the positions
in a group before the union will
be calculated and returned
[default is 1]
-min_map_percent => number : as above, but the minimum
percentage of maps known by the
input mappables [default is 0]
-require_self => 1|0 : require that at least one of the
calling object's positions be in
each group [default is 1, has no
effect when the second usage form
is used]
-required => \@mappables : require that at least one position
for each mappable supplied in this
array ref be in each group
Bio/Map/Mappable.pm view on Meta::CPAN
# do a RangeI-related comparison by calling the corresponding PositionI method
# on all the requested Positions of our Mappables
sub _compare {
my ($self, $method, $input, @extra_args) = @_;
$self->throw("Must supply an object or array ref of them") unless ref($input);
$self->throw("Wrong number of extra args (should be key => value pairs)") unless @extra_args % 2 == 0;
my @compares = ref($input) eq 'ARRAY' ? @{$input} : ($input);
my %args = (-map => undef, -relative => undef, -min_pos_num => 1,
-min_mappables_num => 1, -min_mappables_percent => 0,
-min_map_num => 1, -min_map_percent => 0,
-require_self => 0, -required => undef, -min_overlap_percent => 0, @extra_args);
my $map = $args{-map};
my $rel = $args{-relative};
my $overlap = $args{-min_overlap_percent};
my $min_pos_num = $args{-min_pos_num};
my $min_pables_num = $args{-min_mappables_num};
if ($args{-min_mappables_percent}) {
my $mn = (@compares + (ref($self) ? 1 : 0)) / 100 * $args{-min_mappables_percent};
if ($mn > $min_pables_num) {
$min_pables_num = $mn;
}
}
my $min_map_num = $args{-min_map_num};
if ($args{-min_map_percent}) {
my %known_maps;
foreach my $pable (@compares, ref($self) ? ($self) : ()) {
foreach my $known ($pable->known_maps) {
$known_maps{$known->unique_id} = 1;
}
}
my $mn = scalar(keys %known_maps) / 100 * $args{-min_map_percent};
if ($mn > $min_map_num) {
$min_map_num = $mn;
}
}
my %required = map { $_ => 1 } $args{-required} ? @{$args{-required}} : ();
my (@mine, @yours);
if (ref($self)) {
@mine = $self->get_positions($map);
if ($args{-require_self}) {
Bio/Map/PositionI.pm view on Meta::CPAN
Title : overlaps
Usage : if ($p1->overlaps($p2)) {...}
Function: Tests if $p1 overlaps $p2.
Returns : True if the positions overlap (regardless of map), false otherwise
Args : arg #1 = a Bio::RangeI (eg. a Bio::Map::Position) to compare this
one to (mandatory)
arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
arg #3 = optional Bio::Map::RelativeI to ask if the Positions
overlap in terms of their relative position to the thing
described by that Relative
arg #4 = optional minimum percentage length of the overlap before
reporting an overlap exists (default 0)
=cut
sub overlaps {
# overriding the RangeI implementation so we can handle Relative
my ($self, $other, $so, $rel, $min_percent) = @_;
$min_percent ||= 0;
my ($own_min, $other_min) = (0, 0);
if ($min_percent > 0) {
$own_min = (($self->length / 100) * $min_percent) - 1;
$other_min = (($other->length / 100) * $min_percent) - 1;
}
my ($own_start, $own_end) = $self->_pre_rangei($self, $rel);
my ($other_start, $other_end) = $self->_pre_rangei($other, $rel);
return ($self->_testStrand($other, $so) and not
(($own_start + $own_min > $other_end or $own_end - $own_min < $other_start) ||
($own_start > $other_end - $other_min or $own_end < $other_start + $other_min)));
}
Bio/Map/PositionI.pm view on Meta::CPAN
Usage : my @disc_ranges = Bio::Map::Position->disconnected_ranges(@ranges);
Function: Creates the minimal set of positions such that each input position is
fully contained by at least one output position, and none of the
output positions overlap.
Returns : Bio::Map::Mappable with the calculated disconnected ranges
Args : a Bio::Map::PositionI to compare this one to, or a list of such,
OR
a single Bio::Map::PositionI or array ref of such AND a
Bio::Map::RelativeI to consider all Position's co-ordinates in terms
of their relative position to the thing described by that Relative,
AND, optionally, an int for the minimum percentage of overlap that
must be present before considering two ranges to be overlapping
(default 0)
=cut
sub disconnected_ranges {
# overriding the RangeI implementation so we can transfer map and handle
# Relative
my ($self, @args) = @_;
$self->throw("Not enough arguments") unless @args >= 1;
Bio/Map/Prediction.pm view on Meta::CPAN
-map => Bio::Map::GeneMap->get(-gene => 'gene1',
-species => 'species1'),
-start => 950,
-end => 960);
# etc.
# find the places where predictions agree
use Bio::Map::GeneRelative;
my $rel = Bio::Map::GeneRelative->new(-gene => 0);
my $di = Bio::Map::Mappable->disconnected_intersections([$pred1, $pred2],
-min_mappables_percent => 100,
-min_map_percent => 100,
-relative => $rel);
my @positions = $di->get_positions;
=head1 DESCRIPTION
For example, used to model transcription factor binding site predictions, which
can have multiple locations in several maps.
=head1 FEEDBACK
Bio/Matrix/PSM/ProtMatrix.pm view on Meta::CPAN
=head2 regexp
Title : regexp
Usage :
Function : Returns a case-insensitive regular expression which matches the
IUPAC convention. X's in consensus sequence will match anything.
Throws :
Example :
Returns : string
Args : Threshold for calculating consensus sequence (number in range 0-100
representing a percentage). Threshold defaults to 20.
=cut
sub regexp {
my $self = shift;
my $threshold = 20;
if ( @_ ) { my $threshold = shift };
my @alphabet = @{$self->{_alphabet}};
my $width = $self->width;
Bio/Matrix/PSM/ProtMatrix.pm view on Meta::CPAN
=head2 regexp_array
Title : regexp_array
Usage :
Function : Returns an array of position-specific regular expressions.
X's in consensus sequence will match anything.
Throws :
Example :
Returns : Array of position-specific regular expressions.
Args : Threshold for calculating consensus sequence (number in range 0-100
representing a percentage). Threshold defaults to 20.
Notes : Simply calls regexp method in list context.
=cut
sub regexp_array {
my $self = shift;
return @{ $self->regexp };
}
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
Function: Builds a new Bio::Search::HSP::GenericHSP object
Returns : Bio::Search::HSP::GenericHSP
Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
-evalue => evalue
-pvalue => pvalue
-bits => bit value for HSP
-score => score value for HSP (typically z-score but depends on
analysis)
-hsp_length=> Length of the HSP (including gaps)
-identical => # of residues that that matched identically
-percent_identity => (optional) percent identity
-conserved => # of residues that matched conservatively
(only protein comparisons;
conserved == identical in nucleotide comparisons)
-hsp_gaps => # of gaps in the HSP
-query_gaps => # of gaps in the query in the alignment
-hit_gaps => # of gaps in the subject in the alignment
-query_name => HSP Query sequence name (if available)
-query_start => HSP Query start (in original query sequence coords)
-query_end => HSP Query end (in original query sequence coords)
-query_length=> total length of the query sequence
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
Title : hsp_length
Usage : my $len = $hsp->hsp_length()
Function: shortcut length('hsp')
Returns : floating point between 0 and 100
Args : none
=cut
sub hsp_length { return shift->length('hsp', shift); }
=head2 percent_identity
Title : percent_identity
Usage : my $percentid = $hsp->percent_identity()
Function: Returns the calculated percent identity for an HSP
Returns : floating point between 0 and 100
Args : none
=cut
sub percent_identity {
my $self = shift;
unless ($self->{_did_prepi}) {
$self->_pre_pi;
}
return $self->SUPER::percent_identity(@_);
}
=head2 frame
Title : frame
Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
Function: Set the Frame for both query and subject and insure that
they agree.
This overrides the frame() method implementation in
FeaturePair.
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
$self->{_created_sff} = 1;
$self->{_making_sff} = 0;
}
# before calling the num_* methods
sub _pre_similar_stats {
my $self = shift;
my $identical = $self->{IDENTICAL};
my $conserved = $self->{CONSERVED};
my $percent_id = $self->{PERCENT_IDENTITY};
if (! defined $identical) {
if (! defined $percent_id) {
$self->warn("Did not defined the number of identical matches or overall percent identity in the HSP; assuming 0");
$identical = 0;
}
else {
$identical = sprintf("%.0f",$percent_id * $self->{HSP_LENGTH});
}
}
if (! defined $conserved) {
$self->warn("Did not define the number of conserved matches in the HSP; assuming conserved == identical ($identical)")
if( $self->{ALGORITHM} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
$conserved = $identical;
}
$self->{IDENTICAL} = $identical;
$self->{CONSERVED} = $conserved;
Bio/Search/HSP/GenericHSP.pm view on Meta::CPAN
: scalar( $hit_seq =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-'
my $offset = $self->{'_sbjct_offset'} || 1;
$self->gaps('hit', $hg/$offset);
}
if( ! defined $gaps ) {
$gaps = $self->gaps("query") + $self->gaps("hit");
}
$self->gaps('total', $gaps);
}
# before percent_identity
sub _pre_pi {
my $self = shift;
$self->{_did_prepi} = 1;
$self->percent_identity($self->{PERCENT_IDENTITY} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH} > 0 );
}
1;
Bio/Search/HSP/HMMERHSP.pm view on Meta::CPAN
this may be greater than either hit or query )
Returns : integer
Args : arg 1: 'query' = length of query seq (without gaps)
'hit' = length of hit seq (without gaps)
'total' = length of alignment (with gaps)
default = 'total'
arg 2: [optional] integer length value to set for specific type
=cut
=head2 percent_identity
Title : percent_identity
Usage : my $percentid = $hsp->percent_identity()
Function: Returns the calculated percent identity for an HSP
Returns : floating point between 0 and 100
Args : none
=cut
=head2 frame
Title : frame
Bio/Search/HSP/HSPI.pm view on Meta::CPAN
'total' = length of alignment (with gaps)
default = 'total'
Args : none
=cut
sub length{
shift->throw_not_implemented();
}
=head2 percent_identity
Title : percent_identity
Usage : my $percentid = $hsp->percent_identity()
Function: Returns the calculated percent identity for an HSP
Returns : floating point between 0 and 100
Args : none
=cut
sub percent_identity{
my ($self) = @_;
return $self->frac_identical('hsp') * 100;
}
=head2 get_aln
Title : get_aln
Usage : my $aln = $hsp->get_aln
Function: Returns a Bio::SimpleAlign representing the HSP alignment
Returns : Bio::SimpleAlign
Bio/Search/HSP/ModelHSP.pm view on Meta::CPAN
=head2 generate_cigar_string
=cut
sub generate_cigar_string {
my $self = shift;
$self->warn('$hsp->generate_cigar_string not implemented for Model-based searches');
return;
}
=head2 percent_identity
=cut
sub percent_identity {
my $self = shift;
$self->warn('$hsp->percent_identity not implemented for Model-based searches');
return;
}
############## PRIVATE ##############
# the following method postprocesses HSP data in cases where the sequences
# aren't complete (which can trigger a validation error)
{
my $SEQ_REGEX = qr/\*\[\s*(\d+)\s*\]\*/;
Bio/Search/HSP/PullHSPI.pm view on Meta::CPAN
Args : @args received in new().
=cut
sub _setup {
my ($self, @args) = @_;
# fields most subclasses probably will want
$self->_fields( { ( hsp_length => undef,
identical => undef,
percent_identity => undef,
conserved => undef,
hsp_gaps => undef,
query_gaps => undef,
hit_gaps => undef,
evalue => undef,
pvalue => undef,
score => undef,
query_start => undef,
query_end => undef,
query_string => undef,
Bio/Search/HSP/PullHSPI.pm view on Meta::CPAN
Function: shortcut length('hsp')
Returns : floating point between 0 and 100
Args : none
=cut
sub hsp_length {
return shift->length('total');
}
=head2 percent_identity
Title : percent_identity
Usage : my $percentid = $hsp->percent_identity()
Function: Returns the calculated percent identity for an HSP
Returns : floating point between 0 and 100
Args : none
=cut
sub percent_identity{
my ($self) = @_;
return $self->frac_identical('hsp') * 100;
}
=head2 get_aln
Title : get_aln
Usage : my $aln = $hsp->get_aln
Function: Returns a Bio::SimpleAlign representing the HSP alignment
Returns : Bio::SimpleAlign
Bio/Search/SearchUtils.pm view on Meta::CPAN
# too hard as these don't always match
# $hash{"hsp$hspcount\_evalue"} =
# ( sprintf("%.0e",$hsp->evalue) =~ /e\-?(\d+)/ );
$hash{"hsp$hspcount\_qs"} = $hsp->query->start;
$hash{"hsp$hspcount\_qe"} = $hsp->query->end;
$hash{"hsp$hspcount\_qstr"} = $hsp->query->strand;
$hash{"hsp$hspcount\_hs"} = $hsp->hit->start;
$hash{"hsp$hspcount\_he"} = $hsp->hit->end;
$hash{"hsp$hspcount\_hstr"} = $hsp->hit->strand;
#$hash{"hsp$hspcount\_pid"} = sprintf("%d",$hsp->percent_identity);
#$hash{"hsp$hspcount\_fid"} = sprintf("%.2f",$hsp->frac_identical);
$hash{"hsp$hspcount\_gaps"} = $hsp->gaps('total');
$hspcount++;
}
$hitcount++;
}
return %hash;
}
sub _warn_about_no_hsps {
Bio/Search/Tiling/TilingI.pm view on Meta::CPAN
system and the length reporting method (see STATISTICS
METHODS above)
=cut
sub frac_identical {
my ($self, $type, @args) = @_;
$self->throw_not_implemented;
}
=head2 percent_identity
Title : percent_identity
Usage : $tiling->percent_identity($type)
Function: Return the fraction of sequence length consisting
of identical pairs as a percentage
Returns : scalar float
Args : scalar $type, one of 'hit', 'subject', 'query'
=cut
sub percent_identity {
my ($self, $type, @args) = @_;
return $self->frac_identical($type, @args) * 100;
}
=head2 frac_conserved
Title : frac_conserved
Usage : $tiling->frac_conserved($type)
Function: Return the fraction of sequence length consisting
of conserved pairs
Bio/Search/Tiling/TilingI.pm view on Meta::CPAN
system and the length reporting method (see STATISTICS
METHODS above)
=cut
sub frac_conserved{
my ($self, $type, @args) = @_;
$self->throw_not_implemented;
}
=head2 percent_conserved
Title : percent_conserved
Usage : $tiling->percent_conserved($type)
Function: Return the fraction of sequence length consisting
of conserved pairs as a percentage
Returns : scalar float
Args : scalar $type, one of 'hit', 'subject', 'query'
=cut
sub percent_conserved {
my ($self, $type, @args) = @_;
return $self->frac_conserved($type, @args) * 100;
}
=head2 frac_aligned
Title : frac_aligned
Usage : $tiling->frac_aligned($type)
Function: Return the fraction of B<input> sequence length consisting
that was aligned by the algorithm
Bio/SearchIO/Writer/TextResultWriter.pm view on Meta::CPAN
sub hit_filter {
my $hit = shift;
return $hit->length E<gt> 100; # test if length of the hit sequence
# long enough
}
my $writer = Bio::SearchIO::Writer::TextResultWriter->new(
-filters => { 'HIT' =E<gt> \&hit_filter }
);
Another example would be to filter HSPs on percent identity, let's
only include HSPs which are 75% identical or better.
sub hsp_filter {
my $hsp = shift;
return $hsp->percent_identity E<gt> 75;
}
my $writer = Bio::SearchIO::Writer::TextResultWriter->new(
-filters => { 'HSP' =E<gt> \&hsp_filter }
);
See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
This module will use the module Text::Wrap if it is installed to wrap
the Query description line. If you do not have Text::Wrap installed
Bio/SearchIO/blasttable.pm view on Meta::CPAN
}
# -m 9 only
elsif(m{^#\s+((?:\S+?)?BLAST[NPX])\s+(.+)}) {
($alg, $ver) = ($1, $2);
next;
}
next if /^#/ || /^\s*$/;
my @fields = split;
next if @fields == 1;
my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
$qstart,$qend,$hstart,$hend,$evalue,$bits);
# WU-BLAST-specific
my ($num_scores, $raw_score, $identities, $positives, $percent_pos,
$qgap_blocks,$qgaps, $sgap_blocks, $sgaps, $qframe,
$sframe);
# NCBI -m8 and -m9
if (@fields == 12) {
($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
$qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
# NCBI -m8 and -m9, v 2.2.18+
} elsif (@fields == 13) {
($qname, $hname, $percent_id, $percent_pos, $hsp_len, $mismatches, $gapsm,
$qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
}
# WU-BLAST -mformat 2 and 3
elsif ((@fields == 22) or (@fields == 24)) {
($qname,$hname,$evalue,$num_scores, $bits, $raw_score, $hsp_len,
$identities, $positives,$mismatches, $percent_id, $percent_pos,
$qgap_blocks, $qgaps, $sgap_blocks, $sgaps, $qframe, $qstart,
$qend, $sframe, $hstart,$hend,) = @fields;
# we need total gaps in the alignment
$gapsm=$qgaps+$sgaps;
}
if (@fields == 12 || @fields == 13) {
# need to determine total gaps in the alignment for NCBI output
# since NCBI reports number of gapopens and NOT total gaps
my $qlen = abs($qstart - $qend) + 1;
Bio/SearchIO/blasttable.pm view on Meta::CPAN
$self->element({'Name' => 'Hit_id',
'Data' => $hname});
# we'll store the 1st hsp bits as the hit bits
$self->element({'Name' => 'Hit_bits',
'Data' => $bits});
# we'll store the 1st hsp value as the hit evalue
$self->element({'Name' => 'Hit_signif',
'Data' => $evalue});
}
my $identical = $hsp_len - $mismatches - $gapsm;
# If $positives value is absent, try to recover it from $percent_pos,
# this is better than letting the program to assume "conserved == identical"
if (not defined $positives and defined $percent_pos) {
$positives = sprintf "%d", ($percent_pos * $hsp_len / 100);
}
$self->start_element({'Name' => 'Hsp'});
$self->element({'Name' => 'Hsp_evalue',
'Data' => $evalue});
$self->element({'Name' => 'Hsp_bit-score',
'Data' => $bits});
$self->element({'Name' => 'Hsp_identity',
'Data' => $identical});
$self->element({'Name' => 'Hsp_positive',
'Data' => $positives});
Bio/SearchIO/gmap_f9.pm view on Meta::CPAN
my $path = {};
(
$path->{query},
$path->{db},
$path->{path_num},
$path->{path_total_num},
$path->{query_length},
$path->{exon_count},
$path->{trimmed_coverage},
$path->{percent_identity},
$path->{query_start},
$path->{query_end},
$path->{whole_genome_start},
$path->{whole_genome_end},
$path->{chromosome},
$path->{chromo_start},
$path->{chromo_end},
$path->{strand},
$path->{sense},
$path->{md5},
) =
($_ =~ qr|
>
([^ ]*)[ ] # the query id}, followed by a space
([^ ]*)[ ] # the genome database, followed by a space
(\d+)/(\d+)[ ] # path_num/path_total_num (e.g. 3/12)
(\d+)[ ] # query length, followed by a space
(\d+)[ ] # hsp/exon count, followed by a space
(\d+\.\d*)[ ] # trimmed coverage
(\d+\.\d*)[ ] # percent identity
(\d+)\.\.(\d+)[ ] # query start .. query end, followed by space
(\d+)\.\.(\d+)[ ] # whole genome s..e, followed by space
(\d+): # chromosome number
(\d+)\.\.(\d+)[ ] # chromo s..e, followed by a space
([+-])[ ] # strand, followed by a space
dir:(.*) # dir:sense or dir:antisense
[ ]md5:([\dabcdefg]+) # md5 signature
|x
);
Bio/SearchIO/gmap_f9.pm view on Meta::CPAN
$path->{db} or $self->throw("db was not found in path line.");
$path->{path_num} or $self->throw("path_num was not found in path line.");
$path->{path_total_num} or
$self->throw("path_total_num was not found in path line.");
$path->{query_length} or
$self->throw("query_length was not found in path line.");
$path->{exon_count} or
$self->throw("exon_count was not found in path line.");
$path->{trimmed_coverage} or
$self->throw("trimmed_coverage was not found in path line.");
$path->{percent_identity} or
$self->throw("percent_identity was not found in path line.");
$path->{query_start} or
$self->throw("query_start was not found in path line.");
$path->{query_end} or
$self->throw("query_end was not found in path line.");
$path->{whole_genome_start} or
$self->throw("whole_genome_start was not found in path line.");
$path->{whole_genome_end} or
$self->throw("whole_genome_end was not found in path line.");
$path->{chromosome} or
$self->throw("chromosome was not found in path line.");
Bio/SearchIO/gmap_f9.pm view on Meta::CPAN
sub _parse_alignment_line {
my $self = shift;
my $a_line = shift;
my $align = {};
(
$align->{chromo_start},
$align->{chromo_end},
$align->{query_start},
$align->{query_end},
$align->{percent_identity},
$align->{align_length},
$align->{intron_length},
) =
($_ =~ qr|
[\t]
([\d]+)[ ] # start in chromosome coord.
([\d]+)[ ] # end in chromosome coord.
([\d]+)[ ] # start in query coord.
([\d]+)[ ] # end in query coord.
([\d]+) # percent identity (as integer)
[\t].*[\t] # skip the edit script
([\d]+) # length of alignment block.
[\t]*([\d]+)* # length of following intron.
|x
);
$align->{chromo_start}
or $self->throw("chromo_start missing in alignment line.");
$align->{chromo_end},
or $self->throw("chromo_end was missing in alignment line.");
$align->{query_start},
or $self->throw("query_start was missing in alignment line.");
$align->{query_end},
or $self->throw("query_end was missing in alignment line.");
$align->{percent_identity},
or $self->throw("percent_identity was missing in alignment line.");
$align->{align_length},
or $self->throw("align_length was missing in alignment line.");
return $align;
}
1;
Bio/SearchIO/psl.pm view on Meta::CPAN
$q_num_insert, $q_base_insert, $t_num_insert, $t_base_insert,
$strand, $q_name, $q_length, $q_start,
$q_end, $t_name, $t_length, $t_start,
$t_end, $block_count, $block_sizes, $q_starts,
$t_starts
) = split;
$q_length > 0 or $self->throw("parse error, invalid query length '$q_length'");
my $score = sprintf( "%.2f", 100 * ( $matches + $mismatches + $rep_matches ) / $q_length );
# this is overall percent identity...
my $match_total = $matches + $mismatches + $rep_matches;
$match_total > 0
or $self->throw("parse error, matches + mismatches + rep_matches must be > 0!");
my $percent_id = sprintf("%.2f", 100 * ( $matches + $rep_matches ) / $match_total );
# Remember Jim's code is 0 based
if ( defined $lastquery
&& $lastquery ne $q_name )
{
$self->end_element( { 'Name' => 'Hit' } );
$self->end_element( { 'Name' => 'PSLOutput' } );
$self->_pushback($_);
return $self->end_document;
}
Bio/SearchIO/waba.pm view on Meta::CPAN
local $/ = "\n";
local $_;
my ($curquery,$curhit);
my $state = -1;
$self->start_document();
my @hit_signifs;
while( defined ($_ = $self->_readline )) {
if( $state == -1 ) {
my ($qid, $qhspid,$qpercent, $junk,
$alnlen,$qdb,$qacc,$qstart,$qend,$qstrand,
$hitdb,$hacc,$hstart,$hend,
$hstrand) =
( /^(\S+)\.(\S+)\s+align\s+ # get the queryid
(\d+(\.\d+)?)\%\s+ # get the percentage
of\s+(\d+)\s+ # get the length of the alignment
(\S+)\s+ # this is the query database
(\S+):(\-?\d+)\-(\-?\d+) # The accession:start-end for query
\s+([\-\+]) # query strand
\s+(\S+)\. # hit db
(\S+):(\-?\d+)\-(\-?\d+) # The accession:start-end for hit
\s+([\-\+])\s*$ # hit strand
/ox );
# Curses. Jim's code is 0 based, the following is to readjust
Bio/SeqEvolution/Factory.pm view on Meta::CPAN
-alphabet => 'dna') );
$dnamut->add_Allele( Bio::Variation::Allele->new(-seq => $3,
-alphabet => 'dna') );
$dnamut;
} @{$self->{'_mutations'}}
}
sub get_alignment_identity {
my $self = shift;
return $self->{'_align'}->overall_percentage_identity;
}
sub get_alignmet {
my $self = shift;
return $self->{'_align'}->remove_gaps('-', 'all-gaps');
}
=head1 Internal methods
Bio/SeqFeature/Primer.pm view on Meta::CPAN
my ($self, %args) = @_;
my $salt = 0.2;
if ($args{'-salt'}) {
$salt = $args{'-salt'}
};
my $seqobj = $self->seq();
my $length = $seqobj->length();
my $seqdata = Bio::Tools::SeqStats->count_monomers($seqobj);
my $gc=$$seqdata{'G'} + $$seqdata{'C'};
my $percent_gc = ($gc/$length)*100;
my $tm = 81.5+(16.6*(log($salt)/log(10)))+(0.41*$percent_gc) - (600/$length);
return $tm;
}
=head2 primary_tag, source_tag, location, start, end, strand...
The documentation of L<Bio::SeqFeature::Generic> describes all the methods that
L<Bio::SeqFeature::Primer> object inherit.
=cut
Bio/SeqIO/ace.pm view on Meta::CPAN
# Return if there weren't any DNA or peptide objects
return unless $type;
# Choose the molecule type
my $mol_type = $bio_mol_type{lc $type}
or $self->throw("Can't get Bio::Seq molecule type for '$type'");
# Remove quotes from $id
$id =~ s/^"|"$//g;
# Un-escape forward slashes, double quotes, percent signs,
# semi-colons, tabs, and backslashes (if you're mad enough
# to have any of these as part of object names in your acedb
# database).
$id =~ s/\\([\/"%;\t\\])/$1/g;
#"
# Called as next_seq(), so give back a Bio::Seq
return $self->sequence_factory->create(
-seq => $_,
-primary_id => $id,
-display_id => $id,
Bio/SeqUtils.pm view on Meta::CPAN
=head2 evolve
Title : evolve
Usage : my $newseq = Bio::SeqUtils->
evolve($seq, $similarity, $transition_transversion_rate);
Function: Mutates the sequence by point mutations until the similarity of
the new sequence has decreased to the required level.
Transition/transversion rate is adjustable.
Returns : A new Bio::PrimarySeq object
Args : sequence object
percentage similarity (e.g. 80)
tr/tv rate, optional, defaults to 1 (= 1:1)
Set the verbosity of the Bio::SeqUtils object to positive integer to
see the mutations as they happen.
This method works only on nucleotide sequences. It prints a warning if
you set the target similarity to be less than 25%.
Transition/transversion ratio is an observed attribute of an sequence
comparison. We are dealing here with the transition/transversion rate