App-Anchr

 view release on metacpan or  search on metacpan

lib/App/Anchr/Command/group.pm  view on Meta::CPAN

package App::Anchr::Command::group;
use strict;
use warnings;
use autodie;

use App::Anchr -command;
use App::Anchr::Common;

use constant abstract => "group anthors by long reads";

sub opt_spec {
    return (
        [ "dir|d=s", "output directory", ],
        [ "range|r=s",    "ranges of anchors",            { required => 1 }, ],
        [ "coverage|c=i", "minimal coverage",             { default  => 2 }, ],
        [ "max=i",        "max distance",                 { default  => 5000 }, ],
        [ "len|l=i",      "minimal length of overlaps",   { default  => 1000 }, ],
        [ "idt|i=f",      "minimal identity of overlaps", { default  => 0.85 }, ],
        [ "keep",         "don't remove multi-matched reads", ],
        [ 'oa=s',         'overlaps between anchors', ],
        [ "parallel|p=i", "number of threads",            { default  => 4 }, ],
        [ "verbose|v",    "verbose mode", ],
        [ "png",          "write a png file via graphviz", ],
        { show_defaults => 1, }
    );
}

sub usage_desc {
    return "anchr group [options] <dazz DB> <ovlp file>";
}

sub description {
    my $desc;
    $desc .= ucfirst(abstract) . ".\n";
    $desc .= "\tThis command relies on an existing dazz db.\n";
    return $desc;
}

sub validate_args {
    my ( $self, $opt, $args ) = @_;

    if ( @{$args} != 2 ) {
        my $message = "This command need one or more input files.\n\tIt found";
        $message .= sprintf " [%s]", $_ for @{$args};
        $message .= ".\n";
        $self->usage_error($message);
    }
    for ( @{$args} ) {
        if ( !Path::Tiny::path($_)->is_file ) {
            $self->usage_error("The input file [$_] doesn't exist.");
        }
    }

    if ( !AlignDB::IntSpan->valid( $opt->{range} ) ) {
        $self->usage_error("Invalid --range [$opt->{range}]\n");
    }

    if ( $opt->{oa} ) {
        if ( !Path::Tiny::path( $opt->{oa} )->is_file ) {
            $self->usage_error("The overlap file [$opt->{oa}] doesn't exist.\n");
        }
    }

    if ( !exists $opt->{dir} ) {
        $opt->{dir}
            = Path::Tiny::path( $args->[0] )->parent->child("group")->absolute->stringify;
    }
}

sub execute {
    my ( $self, $opt, $args ) = @_;

    #@type Path::Tiny
    my $out_dir = Path::Tiny::path( $opt->{dir} );
    $out_dir->mkpath();

    # absolute paths before we chdir to $out_dir
    my $fn_dazz = Path::Tiny::path( $args->[0] )->absolute->stringify;
    my $fn_ovlp = Path::Tiny::path( $args->[1] )->absolute->stringify;

    #@type AlignDB::IntSpan
    my $anchor_range = AlignDB::IntSpan->new->add_runlist( $opt->{range} );

lib/App/Anchr/Command/group.pm  view on Meta::CPAN

                        my $distances_ref
                            = $graph->get_edge_attribute( $members[$i], $members[$j], "distances" );

                        my $long_ids_ref
                            = $graph->get_edge_attribute( $members[$i], $members[$j], "long_ids" );

                        $long_id_set->add( @{$long_ids_ref} );

                        $relation_of->{ $members[$i] }{ $members[$j] }
                            = [ $distances_ref, $long_ids_ref ];

                        for my $long_id ( @{$long_ids_ref} ) {
                            push @anchor_long_pairs, [ $members[$i], $long_id ];
                            push @anchor_long_pairs, [ $members[$j], $long_id ];
                        }
                    }
                }
            }

            # serials to names
            my $name_of
                = App::Anchr::Common::serial2name( $fn_dazz, [ @members, $long_id_set->as_array ] );

            my $fn_relation = $out_dir->child("$basename.relation.tsv");
            $fn_relation->remove;
            for my $key_i ( sort keys %{$relation_of} ) {
                for my $key_j ( sort keys %{ $relation_of->{$key_i} } ) {
                    my $str_dis = join( ",", @{ $relation_of->{$key_i}{$key_j}[0] } );
                    my $str_long = join( ",",
                        map { $name_of->{$_} } @{ $relation_of->{$key_i}{$key_j}[1] } );
                    my $line = sprintf "%s\t%s\t%s\t%s\n", $name_of->{$key_i},
                        $name_of->{$key_j}, $str_dis, $str_long;
                    $fn_relation->append($line);
                }
            }

            @anchor_long_pairs = sort
                map { sprintf( "%s\t%s\n", $name_of->{ $_->[0] }, $name_of->{ $_->[1] } ) }
                @anchor_long_pairs;
            @anchor_long_pairs = App::Fasops::Common::uniq(@anchor_long_pairs);
            $out_dir->child("$basename.restrict.tsv")->spew(@anchor_long_pairs);
        }

        #----------------------------#
        # long reads
        #----------------------------#
        if ( !$long_id_set->is_empty ) {
            my $cmd;
            $cmd .= "DBshow -U $fn_dazz ";
            $cmd .= join " ", $long_id_set->as_array;
            $cmd .= " | faops filter -l 0 stdin stdout";
            $cmd .= " > " . $out_dir->child("$basename.long.fasta")->stringify;

            system $cmd;
        }

        $cc_serial++;
    }
    printf STDERR "CC count %d\n", scalar(@ccs);

    if ( $opt->{png} ) {
        App::Anchr::Common::g2gv0( $graph, $fn_dazz . ".png" );
    }
}

1;



( run in 3.120 seconds using v1.01-cache-2.11-cpan-df04353d9ac )