App-Rangeops

 view release on metacpan or  search on metacpan

lib/App/Rangeops/Command/merge.pm  view on Meta::CPAN

package App::Rangeops::Command::merge;
use strict;
use warnings;
use autodie;

use MCE;
use MCE::Flow Sereal => 1;
use MCE::Candy;

use App::Rangeops -command;
use App::Rangeops::Common;

sub abstract {
    return 'merge overlapped ranges via overlapping graph';
}

sub opt_spec {
    return (
        [   "coverage|c=f",
            "When larger than this ratio, merge ranges, default is [0.95]",
            { default => 0.95 },
        ],
        [ "outfile|o=s", "Output filename. [stdout] for screen." ],
        [   "parallel|p=i",
            "Run in parallel mode. Default is [1].",
            { default => 1 },
        ],
        [ "verbose|v", "Verbose mode.", ],
    );
}

sub usage_desc {
    return "rangeops merge [options] <infiles>";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= "\tMerged ranges are always on positive strands.\n";

    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( !@{$args} ) {
        $self->usage_error("This command need one or more input files.");
    }
    for ( @{$args} ) {
        next if lc $_ eq "stdin";
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

    if ( !exists $opt->{outfile} ) {
        $opt->{outfile}
            = Path::Tiny::path( $args->[0] )->absolute . ".merge.tsv";
    }
}

sub execute {
    my ( $self, $opt, $args ) = @_;

    #----------------------------#
    # Loading
    #----------------------------#Ï€
    my $graph_of_chr = {};
    my $info_of      = {};
    for my $file ( @{$args} ) {
        my @lines = App::RL::Common::read_lines($file);
        for my $line (@lines) {
            for my $part ( split /\t/, $line ) {
                my $info = App::RL::Common::decode_header($part);
                next unless App::RL::Common::info_is_valid($info);

                my $chr = $info->{chr};
                if ( !exists $graph_of_chr->{$chr} ) {
                    $graph_of_chr->{$chr} = Graph->new( directed => 0 );
                }

                my $range = App::RL::Common::encode_header( $info, 1 );
                if ( !$graph_of_chr->{$chr}->has_vertex($range) ) {
                    $graph_of_chr->{$chr}->add_vertex($range);
                    $info->{intspan} = AlignDB::IntSpan->new;
                    $info->{intspan}->add_pair( $info->{start}, $info->{end} );
                    $info_of->{$range} = $info;
                    print STDERR "Add range $range\n" if $opt->{verbose};
                }
            }
        }
    }

    #----------------------------#
    # Coverages
    #----------------------------#
    my $worker = sub {
        my ( $self, $chunk_ref, $chunk_id ) = @_;

        my $chr = $chunk_ref->[0];

        my $g      = $graph_of_chr->{$chr};
        my @ranges = sort $g->vertices;



( run in 1.692 second using v1.01-cache-2.11-cpan-5837b0d9d2c )