App-Fasops

 view release on metacpan or  search on metacpan

lib/App/Fasops/Command/separate.pm  view on Meta::CPAN

package App::Fasops::Command::separate;
use strict;
use warnings;
use autodie;

use App::Fasops -command;
use App::RL::Common;
use App::Fasops::Common;

sub abstract {
    return 'separate blocked fasta files by species';
}

sub opt_spec {
    return (
        [ "outdir|o=s", "Output location, [stdout] for screen", { default => '.' } ],
        [ "suffix|s=s", "Extensions of output files",           { default => '.fasta' } ],
        [ "rm|r",       "If outdir exists, remove it before operating" ],
        [ "rc",         "Revcom sequences when chr_strand is '-'" ],
        [ "nodash",     "Remove dashes ('-') from sequences" ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "fasops separate [options] <infile> [more infiles]";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= <<'MARKDOWN';

* <infiles> are paths to axt files, .axt.gz is supported
* infile == stdin means reading from STDIN

MARKDOWN

    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( !@{$args} ) {
        my $message = "This command need one or more input files.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        next if lc $_ eq "stdin";
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

    if ( !exists $opt->{outdir} ) {
        $opt->{outdir} = Path::Tiny::path( $args->[0] )->absolute . ".separate";
    }
    if ( -e $opt->{outdir} ) {
        if ( $opt->{rm} ) {
            Path::Tiny::path( $opt->{outdir} )->remove_tree;
        }
    }

    if ( lc( $opt->{outdir} ) ne "stdout" ) {
        Path::Tiny::path( $opt->{outdir} )->mkpath;
    }
}

sub execute {
    my ( $self, $opt, $args ) = @_;

    for my $infile ( @{$args} ) {
        my $in_fh;
        if ( lc $infile eq "stdin" ) {
            $in_fh = *STDIN{IO};
        }
        else {
            $in_fh = IO::Zlib->new( $infile, "rb" );
        }

        my $content = '';    # content of one block
        while (1) {
            last if $in_fh->eof and $content eq '';
            my $line = '';
            if ( !$in_fh->eof ) {
                $line = $in_fh->getline;
            }
            if ( ( $line eq '' or $line =~ /^\s+$/ ) and $content ne '' ) {
                my $info_of = App::Fasops::Common::parse_block($content);
                $content = '';

                for my $key ( keys %{$info_of} ) {
                    my $info = $info_of->{$key};
                    if ( $opt->{nodash} ) {
                        $info->{seq} =~ tr/-//d;
                    }
                    if ( $opt->{rc} and $info->{strand} ne "+" ) {
                        $info->{seq}
                            = App::Fasops::Common::revcom( $info->{seq} );



( run in 2.167 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )