App-Rangeops
view release on metacpan or search on metacpan
lib/App/Rangeops/Command/merge.pm view on Meta::CPAN
if ( !exists $opt->{outfile} ) {
$opt->{outfile}
= Path::Tiny::path( $args->[0] )->absolute . ".merge.tsv";
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# Loading
#----------------------------#Ï
my $graph_of_chr = {};
my $info_of = {};
for my $file ( @{$args} ) {
my @lines = App::RL::Common::read_lines($file);
for my $line (@lines) {
for my $part ( split /\t/, $line ) {
my $info = App::RL::Common::decode_header($part);
next unless App::RL::Common::info_is_valid($info);
my $chr = $info->{chr};
if ( !exists $graph_of_chr->{$chr} ) {
$graph_of_chr->{$chr} = Graph->new( directed => 0 );
}
my $range = App::RL::Common::encode_header( $info, 1 );
if ( !$graph_of_chr->{$chr}->has_vertex($range) ) {
$graph_of_chr->{$chr}->add_vertex($range);
$info->{intspan} = AlignDB::IntSpan->new;
$info->{intspan}->add_pair( $info->{start}, $info->{end} );
$info_of->{$range} = $info;
print STDERR "Add range $range\n" if $opt->{verbose};
}
}
}
}
#----------------------------#
# Coverages
#----------------------------#
my $worker = sub {
my ( $self, $chunk_ref, $chunk_id ) = @_;
my $chr = $chunk_ref->[0];
my $g = $graph_of_chr->{$chr};
my @ranges = sort $g->vertices;
my @edges;
for my $i ( 0 .. $#ranges ) {
my $range_i = $ranges[$i];
printf STDERR " " x 4 . "Range %d / %d\t%s\n", $i, $#ranges,
$range_i
if $opt->{verbose};
my $set_i = $info_of->{$range_i}{intspan};
for my $j ( $i + 1 .. $#ranges ) {
my $range_j = $ranges[$j];
my $set_j = $info_of->{$range_j}{intspan};
my $i_set = $set_i->intersect($set_j);
if ( $i_set->is_not_empty ) {
my $coverage_i = $i_set->size / $set_i->size;
my $coverage_j = $i_set->size / $set_j->size;
if ( $coverage_i >= $opt->{coverage}
and $coverage_j >= $opt->{coverage} )
{
push @edges, [ $ranges[$i], $ranges[$j] ];
printf STDERR " " x 8
. "Merge with Range %d / %d\t%s\n",
$j, $#ranges, $range_j
if $opt->{verbose};
}
}
}
}
printf STDERR "Gather %d edges\n", scalar @edges if $opt->{verbose};
MCE->gather( $chr, \@edges );
};
MCE::Flow::init {
chunk_size => 1,
max_workers => $opt->{parallel},
};
my %edge_of = mce_flow $worker, ( sort keys %{$graph_of_chr} );
MCE::Flow::finish;
for my $chr ( sort keys %{$graph_of_chr} ) {
print STDERR " " x 4 . "Add edges to chromosome [$chr]\n"
if $opt->{verbose};
for my $edge ( @{ $edge_of{$chr} } ) {
$graph_of_chr->{$chr}->add_edge( @{$edge} );
}
}
#----------------------------#
# Merging
#----------------------------#
my @lines;
for my $chr ( sort keys %{$graph_of_chr} ) {
my $g = $graph_of_chr->{$chr};
my @cc = $g->connected_components;
# filter cc with single range
@cc = grep { scalar @{$_} > 1 } @cc;
for my $c (@cc) {
printf STDERR "\n" . " " x 4 . "Merge %s ranges\n", scalar @{$c}
if $opt->{verbose};
my $merge_set = AlignDB::IntSpan->new;
for my $range ( @{$c} ) {
my $set = $info_of->{$range}{intspan};
$merge_set->merge($set);
}
my $merge_range = App::RL::Common::encode_header(
{ chr => $chr,
strand => "+",
start => $merge_set->min,
end => $merge_set->max,
}
);
( run in 2.160 seconds using v1.01-cache-2.11-cpan-5a3173703d6 )