BioPerl
view release on metacpan or search on metacpan
Bio/DB/IndexedBase.pm view on Meta::CPAN
my ($self, $index_file, $write) = @_;
my %offsets;
my $flags = $write ? O_CREAT|O_RDWR : O_RDONLY;
my @dbmargs = $self->dbmargs;
tie %offsets, 'AnyDBM_File', $index_file, $flags, 0644, @dbmargs
or $self->throw( "Could not open index file $index_file: $!");
return \%offsets;
}
sub _close_index {
# Close index file
my ($self, $index) = @_;
untie %$index;
return 1;
}
# Compiling the below regular expression speeds up _parse_compound_id
my $compound_id = qr/^ (.+?) (?:\:([\d_]+)(?:,|-|\.\.)([\d_]+))? (?:\/(.+))? $/x;
sub _parse_compound_id {
# Handle compound IDs:
# $db->seq($id)
# $db->seq($id, $start, $stop, $strand)
# $db->seq("$id:$start,$stop")
# $db->seq("$id:$start..$stop")
# $db->seq("$id:$start-$stop")
# $db->seq("$id:$start,$stop/$strand")
# $db->seq("$id:$start..$stop/$strand")
# $db->seq("$id:$start-$stop/$strand")
# $db->seq("$id/$strand")
my ($self, $id, $start, $stop, $strand) = @_;
if ( (not defined $start ) &&
(not defined $stop ) &&
(not defined $strand) &&
($id =~ m{$compound_id}) ) {
# Start, stop and strand not provided and ID looks like a compound ID
($id, $start, $stop, $strand) = ($1, $2, $3, $4);
}
# Start, stop and strand defaults
$stop ||= $self->length($id) || 0; # 0 if sequence not found in database
$start ||= ($stop > 0) ? 1 : 0;
$strand ||= 1;
# Convert numbers such as 1_000_000 to 1000000
$start =~ s/_//g;
$stop =~ s/_//g;
if ($start > $stop) {
# Change the strand
($start, $stop) = ($stop, $start);
$strand *= -1;
}
return $id, $start, $stop, $strand;
}
sub _guess_alphabet {
# Determine the molecular type of the given sequence string:
# 'dna', 'rna', 'protein' or '' (unknown/empty)
my ($self, $string) = @_;
# Handle IUPAC residues like PrimarySeq does
my $alphabet = Bio::PrimarySeq::_guess_alphabet_from_string($self, $string, 1);
return $alphabet eq 'dna' ? DNA
: $alphabet eq 'rna' ? RNA
: $alphabet eq 'protein' ? PROTEIN
: NA;
}
sub _makeid {
# Process the header line by applying any transformation given in -makeid
my ($self, $header_line) = @_;
return ref($self->{makeid}) eq 'CODE' ? $self->{makeid}->($header_line) : $1;
}
sub _check_linelength {
# Check that the line length is valid. Generate an error otherwise.
my ($self, $linelength) = @_;
return if not defined $linelength;
$self->throw(
"Each line of the file must be less than 65,536 characters. Line ".
"$. is $linelength chars."
) if $linelength > 65535;
}
sub _calc_termination_length {
# Try the beginning of the file to determine termination length
# Account for crlf-terminated Windows and Mac files
my ($self, $file) = @_;
my $fh = IO::File->new($file) or $self->throw( "Could not open $file: $!");
# In Windows, text files have '\r\n' as line separator, but when reading in
# text mode Perl will only show the '\n'. This means that for a line "ABC\r\n",
# "length $_" will report 4 although the line is 5 bytes in length.
# We assume that all lines have the same line separator and only read current line.
my $init_pos = tell($fh);
my $curr_line = <$fh>;
my $pos_diff = tell($fh) - $init_pos;
my $correction = $pos_diff - length $curr_line;
close $fh;
$self->{termination_length} = ($curr_line =~ /\r\n$/) ? 2 : 1+$correction;
return $self->{termination_length};
}
sub _calc_offset {
# Get the offset of the n-th residue of the sequence with the given ID
# and termination length (tl)
my ($self, $id, $n) = @_;
my $tl = $self->{termination_length};
$n--;
my ($offset, $seqlen, $linelen) = (&{$self->{unpackmeth}}($self->{offsets}{$id}))[0,1,3];
$n = 0 if $n < 0;
$n = $seqlen-1 if $n >= $seqlen;
return $offset + $linelen * int($n/($linelen-$tl)) + $n % ($linelen-$tl);
}
sub _fh {
( run in 0.991 second using v1.01-cache-2.11-cpan-39bf76dae61 )