App-Rangeops
view release on metacpan or search on metacpan
lib/App/Rangeops/Command/merge.pm view on Meta::CPAN
package App::Rangeops::Command::merge;
use strict;
use warnings;
use autodie;
use MCE;
use MCE::Flow Sereal => 1;
use MCE::Candy;
use App::Rangeops -command;
use App::Rangeops::Common;
sub abstract {
return 'merge overlapped ranges via overlapping graph';
}
sub opt_spec {
return (
[ "coverage|c=f",
"When larger than this ratio, merge ranges, default is [0.95]",
{ default => 0.95 },
],
[ "outfile|o=s", "Output filename. [stdout] for screen." ],
[ "parallel|p=i",
"Run in parallel mode. Default is [1].",
{ default => 1 },
],
[ "verbose|v", "Verbose mode.", ],
);
}
sub usage_desc {
return "rangeops merge [options] <infiles>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= "\tMerged ranges are always on positive strands.\n";
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( !@{$args} ) {
$self->usage_error("This command need one or more input files.");
}
for ( @{$args} ) {
next if lc $_ eq "stdin";
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
if ( !exists $opt->{outfile} ) {
$opt->{outfile}
= Path::Tiny::path( $args->[0] )->absolute . ".merge.tsv";
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# Loading
#----------------------------#Ï
my $graph_of_chr = {};
my $info_of = {};
for my $file ( @{$args} ) {
my @lines = App::RL::Common::read_lines($file);
for my $line (@lines) {
for my $part ( split /\t/, $line ) {
my $info = App::RL::Common::decode_header($part);
next unless App::RL::Common::info_is_valid($info);
my $chr = $info->{chr};
if ( !exists $graph_of_chr->{$chr} ) {
$graph_of_chr->{$chr} = Graph->new( directed => 0 );
}
my $range = App::RL::Common::encode_header( $info, 1 );
if ( !$graph_of_chr->{$chr}->has_vertex($range) ) {
$graph_of_chr->{$chr}->add_vertex($range);
$info->{intspan} = AlignDB::IntSpan->new;
$info->{intspan}->add_pair( $info->{start}, $info->{end} );
$info_of->{$range} = $info;
print STDERR "Add range $range\n" if $opt->{verbose};
}
}
}
}
#----------------------------#
# Coverages
#----------------------------#
my $worker = sub {
my ( $self, $chunk_ref, $chunk_id ) = @_;
my $chr = $chunk_ref->[0];
my $g = $graph_of_chr->{$chr};
my @ranges = sort $g->vertices;
( run in 1.692 second using v1.01-cache-2.11-cpan-5837b0d9d2c )