App-Egaz

 view release on metacpan or  search on metacpan

lib/App/Egaz/Command/normalize.pm  view on Meta::CPAN

package App::Egaz::Command::normalize;
use strict;
use warnings;
use autodie;

use App::Egaz -command;
use App::Egaz::Common;

sub abstract {
    return 'normalize lav files';
}

sub opt_spec {
    return (
        [ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
        [ 'tlen=i',      'target length',                        { default => 0 }, ],
        [ 'qlen=i',      'query length',                         { default => 0 }, ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "egaz normalize [options] <infile>";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= <<MARKDOWN;

* infile == stdin means reading from STDIN
* Start coordinates of output is 1-based
* Set --tlen and/or --qlen on partitioned sequences
* Ported from kentUtils src/hg/utils/automation/blastz-normalizeLav

MARKDOWN

    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( @{$args} != 1 ) {
        my $message = "This command need one input file.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

}

sub execute {
    my ( $self, $opt, $args ) = @_;

    #----------------------------#
    # write outputs
    #----------------------------#
    my $out_fh;
    if ( lc( $opt->{outfile} ) eq "stdout" ) {
        $out_fh = *STDOUT{IO};
    }
    else {
        open $out_fh, ">", $opt->{outfile};
    }

    #----------------------------#
    # load lav
    #----------------------------#
    my $lav_content = Path::Tiny::path( $args->[0] )->slurp;
    my @lavs        = grep {/^[ds] /} split /\#\:lav\n/, $lav_content;
    my $d_stanza    = shift @lavs;
    $d_stanza = "d {\n  normalize-lav $opt->{tlen} $opt->{qlen}\n}\n" . $d_stanza;

    print {$out_fh} "#:lav\n";
    print {$out_fh} $d_stanza;

    for my $lav (@lavs) {
        print {$out_fh} "#:lav\n";

        my $t_from = 0;
        my $q_from = 0;
        my $t_to   = 0;
        my $q_to   = 0;
        my $isrc   = 0;

        #----------------------------#
        # s-stanza
        #----------------------------#
        # "<filename>[-]" <start> <stop> [<rev_comp_flag> <sequence_number>]
        $lav =~ /s \{\s+(.+?)\s+\}/s;
        my $s_stanza = $1;
        my @s_lines  = $s_stanza =~ /(.+ \s+ \d+ \s+ \d+ \s+ \d+ \s+ \d+)/gx;
        if ( scalar @s_lines != 2 ) {
            Carp::croak "s-stanza error.\n";
        }



( run in 0.627 second using v1.01-cache-2.11-cpan-f56aa216473 )