BioX-Seq
view release on metacpan or search on metacpan
lib/BioX/Seq/Fetch.pm view on Meta::CPAN
$ll_mismatch = 0;
}
elsif ($line =~ /^[A-Za-z\-\*\.]+(\r?\n?)$/) {
if ($bl_mismatch) {
close $idx;
unlink $fn_idx;
die "Base length mismatch\n";
}
if ($ll_mismatch) {
close $idx;
unlink $fn_idx;
die "Line length mismatch\n";
}
my $ll = length $line;
my $bl = $ll - length $1;
$curr_len += $bl;
$curr_bases //= $bl;
$curr_bytes //= $ll;
$bl_mismatch = 1 if ($bl != $curr_bases);
$ll_mismatch = 1 if ($ll != $curr_bytes);
}
elsif ($line =~ /\S/) {
close $idx;
unlink $fn_idx;
die "Unexpected content: $line";
}
}
# write remaining index
# uncoverable branch false
if (defined $curr_id) {
say {$idx} join "\t",
$curr_id,
$curr_len,
$curr_offset,
$curr_bases,
$curr_bytes,
;
}
close $idx;
#reset filehandle
seek $self->{fh}, 0, 0;
return 1;
}
sub _load_faidx {
my ($self) = @_;
my $fn_idx = $self->{fn} . '.fai';
$self->write_index if (! -e $fn_idx);
# make sure FASTA file is not newer than its index; otherwise index file
# may be out of date and cause silent corruption downstream
my $mtime_fas = stat($self->{fn})->mtime;
my $mtime_idx = stat($fn_idx)->mtime;
if ($mtime_fas > $mtime_idx) {
die "Index file exists but is older than FASTA file and probably out"
. " of date. Refresh or remove the existing index before"
. " proceeding.\n"
}
my @ids;
open my $in, '<', $fn_idx
or die "Error opening index file: $!\n";
while (my $line = <$in>) {
chomp $line;
my ($name, $len, $offset, $bases_per_line, $bytes_per_line)
= split "\t", $line;
die "ERROR: $fn_idx contains duplicate entries"
if ( defined $self->{idx}->{$name} );
my $eol = $bytes_per_line - $bases_per_line;
$self->{idx}->{$name} = [$len, $offset, $bases_per_line, $eol];
push @ids, $name;
}
close $in;
$self->{ids} = \@ids;
return;
}
sub ids {
my ($self) = @_;
return @{ $self->{ids} };
}
sub length {
my ($self, $id) = @_;
return $self->{idx}->{$id}->[0];
}
sub fetch_seq {
my ($self, $id, $start_bp, $end_bp) = @_;
return undef if (! defined $self->{idx}->{$id});
my ($len, $off, $bpl, $eol) = @{ $self->{idx}->{$id} };
my $fh = $self->{fh};
$start_bp = $start_bp // 1;
$end_bp = $end_bp // $len;
--$start_bp; #make 0-based
die "Coordinates out of bounds" if ($start_bp < 0 || $end_bp > $len);
my $l = $end_bp - $start_bp;
seek $fh, $off + $start_bp + int(($start_bp)/$bpl)*$eol, 0;
read($fh, my $seq, $l + int(($l+$start_bp%$bpl)/$bpl)*$eol);
$seq =~ s/\s//g;
( run in 0.963 second using v1.01-cache-2.11-cpan-98e64b0badf )