App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/normalize.pm view on Meta::CPAN
package App::Egaz::Command::normalize;
use strict;
use warnings;
use autodie;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'normalize lav files';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ 'tlen=i', 'target length', { default => 0 }, ],
[ 'qlen=i', 'query length', { default => 0 }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz normalize [options] <infile>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<MARKDOWN;
* infile == stdin means reading from STDIN
* Start coordinates of output is 1-based
* Set --tlen and/or --qlen on partitioned sequences
* Ported from kentUtils src/hg/utils/automation/blastz-normalizeLav
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} != 1 ) {
my $message = "This command need one input file.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# write outputs
#----------------------------#
my $out_fh;
if ( lc( $opt->{outfile} ) eq "stdout" ) {
$out_fh = *STDOUT{IO};
}
else {
open $out_fh, ">", $opt->{outfile};
}
#----------------------------#
# load lav
#----------------------------#
my $lav_content = Path::Tiny::path( $args->[0] )->slurp;
my @lavs = grep {/^[ds] /} split /\#\:lav\n/, $lav_content;
my $d_stanza = shift @lavs;
$d_stanza = "d {\n normalize-lav $opt->{tlen} $opt->{qlen}\n}\n" . $d_stanza;
print {$out_fh} "#:lav\n";
print {$out_fh} $d_stanza;
for my $lav (@lavs) {
print {$out_fh} "#:lav\n";
my $t_from = 0;
my $q_from = 0;
my $t_to = 0;
my $q_to = 0;
my $isrc = 0;
#----------------------------#
# s-stanza
#----------------------------#
# "<filename>[-]" <start> <stop> [<rev_comp_flag> <sequence_number>]
$lav =~ /s \{\s+(.+?)\s+\}/s;
my $s_stanza = $1;
my @s_lines = $s_stanza =~ /(.+ \s+ \d+ \s+ \d+ \s+ \d+ \s+ \d+)/gx;
if ( scalar @s_lines != 2 ) {
Carp::croak "s-stanza error.\n";
}
( run in 0.627 second using v1.01-cache-2.11-cpan-f56aa216473 )