App-Rangeops
view release on metacpan or search on metacpan
lib/App/Rangeops/Command/filter.pm view on Meta::CPAN
package App::Rangeops::Command::filter;
use strict;
use warnings;
use autodie;
use App::Rangeops -command;
use App::Rangeops::Common;
sub abstract {
return 'filter links by numbers of ranges or length difference';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen." ],
[ "number|n=s", "Numbers of ranges, a valid IntSpan runlist.", ],
[ "ratio|r=f",
"Ratio of lengths differences. The suggested value is [0.8]",
],
);
}
sub usage_desc {
return "rangeops filter [options] <infiles>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= "\tIt's assumed that all ranges in input files are valid.\n";
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( !@{$args} ) {
$self->usage_error("This command need one or more input files.");
}
for ( @{$args} ) {
next if lc $_ eq "stdin";
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
if ( !exists $opt->{outfile} ) {
$opt->{outfile}
= Path::Tiny::path( $args->[0] )->absolute . ".filter.tsv";
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# Loading
#----------------------------#
my @lines;
for my $file ( @{$args} ) {
for my $line ( App::RL::Common::read_lines($file) ) {
my @parts = split /\t/, $line;
if ( defined $opt->{number} ) {
my $intspan = AlignDB::IntSpan->new;
$intspan->merge( $opt->{number} );
next unless $intspan->contains( scalar @parts );
}
if ( defined $opt->{ratio} ) {
my @lengths;
for my $part (@parts) {
my $info = App::RL::Common::decode_header($part);
next unless App::RL::Common::info_is_valid($info);
push @lengths, ( $info->{end} - $info->{start} + 1 );
}
my ( $l_min, $l_max ) = List::MoreUtils::PP::minmax(@lengths);
my $diff_ratio = sprintf "%.3f", $l_min / $l_max;
next if ( $diff_ratio < $opt->{ratio} );
}
push @lines, $line; # May produce duplicated lines
}
}
@lines = List::MoreUtils::PP::uniq(@lines);
#----------------------------#
# Output
#----------------------------#
my $out_fh;
if ( lc( $opt->{outfile} ) eq "stdout" ) {
$out_fh = \*STDOUT;
}
( run in 1.325 second using v1.01-cache-2.11-cpan-39bf76dae61 )