BioX-Seq
view release on metacpan or search on metacpan
lib/BioX/Seq/Stream.pm view on Meta::CPAN
require IO::Uncompress::Gunzip;
$fh = IO::Uncompress::Gunzip->new($fn, MultiStream => 1);
}
else {
open $fh, '-|', $GZIP_BIN, '-dc', $fn
or die "Error opening gzip stream: $!\n";
}
}
elsif (substr($magic,0,3) eq MAGIC_BZIP) {
close $fh;
if (! defined $BZIP_BIN) {
# fall back on Perl-based method (but can be SLOOOOOW!)
require IO::Uncompress::Bunzip2;
$fh = IO::Uncompress::Bunzip2->new($fn, MultiStream => 1);
}
else {
open $fh, '-|', $BZIP_BIN, '-dc', $fn
or die "Error opening bzip2 stream: $!\n";
}
}
elsif (substr($magic,0,4) eq MAGIC_ZSTD) {
die "no zstd backend found\n" if (! defined $ZSTD_BIN);
close $fh;
open $fh, '-|', $ZSTD_BIN, '-dc', $fn
or die "Error opening zstd stream: $!\n";
}
elsif (substr($magic,0,2) eq MAGIC_DSRC) {
die "no dsrc backend found\n" if (! defined $DSRC_BIN);
close $fh;
open $fh, '-|', $DSRC_BIN, 'd', '-s', $fn
or die "Error opening dsrc stream: $!\n";
}
elsif (substr($magic,0,4) eq MAGIC_FQZC) {
die "no fqz backend found\n" if (! defined $FQZC_BIN);
close $fh;
open $fh, '-|', $FQZC_BIN, '-d', $fn
or die "Error opening fqz_comp stream: $!\n";
}
elsif (substr($magic,0,6) eq MAGIC_XZ) {
die "no xz backend found\n" if (! defined $XZ_BIN);
close $fh;
open $fh, '-|', $XZ_BIN, '-dc', $fn
or die "Error opening xz stream: $!\n";
}
else {
seek($fh,0,0);
}
}
$self->{fh} = $fh;
}
else {
$self->{fh} = \*STDIN;
}
# handle files coming from different platforms
#my @layers = PerlIO::get_layers($self->{fh});
#binmode($self->{fh},':unix:stdio:crlf');
$self->_guess_format;
$self->_init;
return $self;
}
sub fast {
my ($self, $bool) = @_;
$self->{fast} = $bool // 1;
}
sub _guess_format {
my ($self) = @_;
# Filetype guessing must be based on first two bytes (or less)
# which are stored in an object buffer
my $r = (read $self->{fh}, $self->{buffer}, 2);
die "failed to read initial bytes" if ($r != 2);
my $search_path = abs_path(__FILE__);
$search_path =~ s/\.pm$//i;
my @matched;
for my $module ( glob "$search_path/*.pm" ) {
my ($name,$path,$suff) = fileparse($module, qr/\.pm/i);
my $classname = blessed($self) . "::$name";
eval "require $classname";
if ($classname->_check_type($self)) {
push @matched, $classname;
}
}
die "Failed to guess filetype\n" if (scalar(@matched) < 1);
# uncoverable branch true
die "Multiple filetypes matched\n" if (scalar(@matched) > 1);
eval "require $matched[0]";
bless $self => $matched[0];
}
1;
__END__
=head1 NAME
BioX::Seq::Stream - Parse FASTA and FASTQ files sequentially
=head1 SYNOPSIS
use BioX::Seq::Stream;
my $parser = BioX::Seq::Stream->new; #defaults to STDIN
my $parser = BioX::Seq::Stream->new( $filename );
my $parser = BioX::Seq::Stream->new( $filehandle );
while (my $seq = $parser->next_seq) {
# $seq is a BioX::Seq object
}
=head1 DESCRIPTION
C<BioX::Seq::Stream> is a sequential parser for FASTA and FASTQ files. It
should handle any valid input, with the exception of the use of semi-colons to
indicate FASTA comments (this could be easily implemented, but I have never
seen an actual FASTA file like this in the wild, and the NCBI FASTA
specification does not allow for this usage). In particular, it will properly
handle FASTQ files with multi-line (wrapped) sequence and quality strings. I
have never seen a FASTQ file like this either, but apparently this is
technically valid and a few software programs will still create files like
this.
=head1 CONSTRUCTOR
=head2 new
my $parser = BioX::Seq::Stream->new();
my $parser = BioX::Seq::Stream->new( $filename );
my $parser = BioX::Seq::Stream->new( $filehandle );
my $parser = BioX::Seq::Stream->new( $filename, %args );
Create a new C<BioX::Seq::Stream> parser. If no arguments are given (or if the
first argument given has an undefined value), the parser will read from STDIN.
Otherwise, the parser will determine whether a filename or a filehandle is
provided and act accordingly. Returns a C<BioX::Seq::Stream> parser object.
The first argument is always a filename or filehandle. Subsequent key/value
arguments can include:
( run in 1.474 second using v1.01-cache-2.11-cpan-0d23b851a93 )