BioX-Seq

 view release on metacpan or  search on metacpan

lib/BioX/Seq/Stream.pm  view on Meta::CPAN

                    require IO::Uncompress::Gunzip;
                    $fh = IO::Uncompress::Gunzip->new($fn, MultiStream => 1);
                }
                else {
                    open $fh, '-|', $GZIP_BIN, '-dc', $fn
                        or die "Error opening gzip stream: $!\n";
                }
            }
            elsif (substr($magic,0,3) eq MAGIC_BZIP) {
                close $fh;
                if (! defined $BZIP_BIN) {
                    # fall back on Perl-based method (but can be SLOOOOOW!)
                    require IO::Uncompress::Bunzip2;
                    $fh = IO::Uncompress::Bunzip2->new($fn, MultiStream => 1);
                }
                else {
                    open $fh, '-|', $BZIP_BIN, '-dc', $fn
                        or die "Error opening bzip2 stream: $!\n";
                }
            }
            elsif (substr($magic,0,4) eq MAGIC_ZSTD) {
                die "no zstd backend found\n" if (! defined $ZSTD_BIN);
                close $fh;
                open $fh, '-|', $ZSTD_BIN, '-dc', $fn
                    or die "Error opening zstd stream: $!\n";
            }
            elsif (substr($magic,0,2) eq MAGIC_DSRC) {
                die "no dsrc backend found\n" if (! defined $DSRC_BIN);
                close $fh;
                open $fh, '-|', $DSRC_BIN, 'd', '-s', $fn
                    or die "Error opening dsrc stream: $!\n";
            }
            elsif (substr($magic,0,4) eq MAGIC_FQZC) {
                die "no fqz backend found\n" if (! defined $FQZC_BIN);
                close $fh;
                open $fh, '-|', $FQZC_BIN, '-d', $fn
                    or die "Error opening fqz_comp stream: $!\n";
            }
            elsif (substr($magic,0,6) eq MAGIC_XZ) {
                die "no xz backend found\n" if (! defined $XZ_BIN);
                close $fh;
                open $fh, '-|', $XZ_BIN, '-dc', $fn
                    or die "Error opening xz stream: $!\n";
            }
            else {
                seek($fh,0,0);
            }

        }
        $self->{fh} = $fh;

    }
    else {
        $self->{fh} = \*STDIN;
    }

    # handle files coming from different platforms
    #my @layers = PerlIO::get_layers($self->{fh});
    #binmode($self->{fh},':unix:stdio:crlf');

    $self->_guess_format;

    $self->_init;

    return $self;

}

sub fast {

    my ($self, $bool) = @_;
    $self->{fast} = $bool // 1;

}

sub _guess_format {

    my ($self) = @_;

    # Filetype guessing must be based on first two bytes (or less)
    # which are stored in an object buffer
    my $r = (read $self->{fh}, $self->{buffer}, 2);
    die "failed to read initial bytes" if ($r != 2);

    my $search_path = abs_path(__FILE__);
    $search_path =~ s/\.pm$//i;
    my @matched;
    for my $module ( glob "$search_path/*.pm" ) {
        my ($name,$path,$suff) = fileparse($module, qr/\.pm/i);
        my $classname = blessed($self) . "::$name";
        eval "require $classname";
        if ($classname->_check_type($self)) {
            push @matched, $classname;
        }
    }

    die "Failed to guess filetype\n"   if (scalar(@matched) < 1);
    # uncoverable branch true
    die "Multiple filetypes matched\n" if (scalar(@matched) > 1);

    eval "require $matched[0]";
    bless $self => $matched[0];

}


1;


__END__

=head1 NAME

BioX::Seq::Stream - Parse FASTA and FASTQ files sequentially

=head1 SYNOPSIS

    use BioX::Seq::Stream;

    my $parser = BioX::Seq::Stream->new; #defaults to STDIN
    my $parser = BioX::Seq::Stream->new( $filename );
    my $parser = BioX::Seq::Stream->new( $filehandle );

    while (my $seq = $parser->next_seq) {

        # $seq is a BioX::Seq object

    }

=head1 DESCRIPTION

C<BioX::Seq::Stream> is a sequential parser for FASTA and FASTQ files. It
should handle any valid input, with the exception of the use of semi-colons to
indicate FASTA comments (this could be easily implemented, but I have never
seen an actual FASTA file like this in the wild, and the NCBI FASTA
specification does not allow for this usage). In particular, it will properly
handle FASTQ files with multi-line (wrapped) sequence and quality strings. I
have never seen a FASTQ file like this either, but apparently this is
technically valid and a few software programs will still create files like
this.

=head1 CONSTRUCTOR

=head2 new

    my $parser = BioX::Seq::Stream->new();
    my $parser = BioX::Seq::Stream->new( $filename );
    my $parser = BioX::Seq::Stream->new( $filehandle );
    my $parser = BioX::Seq::Stream->new( $filename, %args );

Create a new C<BioX::Seq::Stream> parser. If no arguments are given (or if the
first argument given has an undefined value), the parser will read from STDIN.
Otherwise, the parser will determine whether a filename or a filehandle is
provided and act accordingly. Returns a C<BioX::Seq::Stream> parser object.

The first argument is always a filename or filehandle. Subsequent key/value
arguments can include:



( run in 1.474 second using v1.01-cache-2.11-cpan-0d23b851a93 )