BioX-Seq

 view release on metacpan or  search on metacpan

lib/BioX/Seq/Stream.pm  view on Meta::CPAN

package BioX::Seq::Stream;

use 5.012;
use strict;
use warnings;

use IPC::Cmd qw/can_run/;
use Scalar::Util qw/blessed openhandle/;
use BioX::Seq;
use POSIX qw/ceil/;
use Cwd qw/abs_path/;
use File::Basename qw/fileparse/;

# define or search for binary locations
# if these are not available
our $GZIP_BIN = can_run('pigz')   // can_run('gzip');
our $BZIP_BIN = can_run('pbzip2') // can_run('bzip2');
our $ZSTD_BIN = can_run('pzstd')  // can_run('zstd');
our $DSRC_BIN = can_run('dsrc2')  // can_run('dsrc');
our $FQZC_BIN = can_run('fqz_comp');
our $XZ_BIN   = can_run('xz');

use constant MAGIC_GZIP => pack('C3', 0x1f, 0x8b, 0x08);
use constant MAGIC_DSRC => pack('C2', 0xaa, 0x02);
use constant MAGIC_BZIP => 'BZh';
use constant MAGIC_FQZC => '.fqz';
use constant MAGIC_BAM  => pack('C4', 0x42, 0x41, 0x4d, 0x01);
use constant MAGIC_2BIT => pack('C4', 0x1a, 0x41, 0x27, 0x43);
use constant MAGIC_ZSTD => pack('C4', 0x28, 0xB5, 0x2F, 0xFD);
use constant MAGIC_XZ   => pack('C6', 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00);

sub new {

    my ($class,$fn, %args) = @_;

    my $self = bless {} => $class;

    # 'fast' mode turns off parser sanity-checking in places
    if ($args{fast}) {
        $self->fast( $args{fast} );
    }

    if (defined $fn) {

        my $fh = openhandle($fn); # can pass filehandle too;
        if (! defined $fh) { # otherwise assume filename
            
            #if passed a filename, try to determine if compressed
            open $fh, '<', $fn or die "Error opening $fn for reading\n";

            #read first six bytes as raw
            #this causes a memory leak as opened filehandles are not properly
            #closed again. Should work without setting binary mode anyway.
            #my $old_layers = join '', map {":$_"} PerlIO::get_layers($fh);
            #binmode($fh);
            read( $fh, my $magic, 6 );
            #binmode($fh, $old_layers); 

            #check for compression and open stream if found
            if (substr($magic,0,3) eq MAGIC_GZIP) {
                close $fh;
                if (! defined $GZIP_BIN) {
                    # fall back on Perl-based method (but can be SLOOOOOW!)
                    require IO::Uncompress::Gunzip;
                    $fh = IO::Uncompress::Gunzip->new($fn, MultiStream => 1);
                }
                else {
                    open $fh, '-|', $GZIP_BIN, '-dc', $fn
                        or die "Error opening gzip stream: $!\n";
                }
            }
            elsif (substr($magic,0,3) eq MAGIC_BZIP) {
                close $fh;
                if (! defined $BZIP_BIN) {
                    # fall back on Perl-based method (but can be SLOOOOOW!)
                    require IO::Uncompress::Bunzip2;
                    $fh = IO::Uncompress::Bunzip2->new($fn, MultiStream => 1);
                }
                else {
                    open $fh, '-|', $BZIP_BIN, '-dc', $fn
                        or die "Error opening bzip2 stream: $!\n";
                }
            }
            elsif (substr($magic,0,4) eq MAGIC_ZSTD) {
                die "no zstd backend found\n" if (! defined $ZSTD_BIN);
                close $fh;
                open $fh, '-|', $ZSTD_BIN, '-dc', $fn
                    or die "Error opening zstd stream: $!\n";
            }
            elsif (substr($magic,0,2) eq MAGIC_DSRC) {
                die "no dsrc backend found\n" if (! defined $DSRC_BIN);
                close $fh;
                open $fh, '-|', $DSRC_BIN, 'd', '-s', $fn
                    or die "Error opening dsrc stream: $!\n";
            }
            elsif (substr($magic,0,4) eq MAGIC_FQZC) {

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.652 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )