App-Anchr
view release on metacpan or search on metacpan
lib/App/Anchr/Command/group.pm view on Meta::CPAN
package App::Anchr::Command::group;
use strict;
use warnings;
use autodie;
use App::Anchr -command;
use App::Anchr::Common;
use constant abstract => "group anthors by long reads";
sub opt_spec {
return (
[ "dir|d=s", "output directory", ],
[ "range|r=s", "ranges of anchors", { required => 1 }, ],
[ "coverage|c=i", "minimal coverage", { default => 2 }, ],
[ "max=i", "max distance", { default => 5000 }, ],
[ "len|l=i", "minimal length of overlaps", { default => 1000 }, ],
[ "idt|i=f", "minimal identity of overlaps", { default => 0.85 }, ],
[ "keep", "don't remove multi-matched reads", ],
[ 'oa=s', 'overlaps between anchors', ],
[ "parallel|p=i", "number of threads", { default => 4 }, ],
[ "verbose|v", "verbose mode", ],
[ "png", "write a png file via graphviz", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "anchr group [options] <dazz DB> <ovlp file>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= "\tThis command relies on an existing dazz db.\n";
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} != 2 ) {
my $message = "This command need one or more input files.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
if ( !AlignDB::IntSpan->valid( $opt->{range} ) ) {
$self->usage_error("Invalid --range [$opt->{range}]\n");
}
if ( $opt->{oa} ) {
if ( !Path::Tiny::path( $opt->{oa} )->is_file ) {
$self->usage_error("The overlap file [$opt->{oa}] doesn't exist.\n");
}
}
if ( !exists $opt->{dir} ) {
$opt->{dir}
= Path::Tiny::path( $args->[0] )->parent->child("group")->absolute->stringify;
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#@type Path::Tiny
my $out_dir = Path::Tiny::path( $opt->{dir} );
$out_dir->mkpath();
# absolute paths before we chdir to $out_dir
my $fn_dazz = Path::Tiny::path( $args->[0] )->absolute->stringify;
my $fn_ovlp = Path::Tiny::path( $args->[1] )->absolute->stringify;
#@type AlignDB::IntSpan
my $anchor_range = AlignDB::IntSpan->new->add_runlist( $opt->{range} );
lib/App/Anchr/Command/group.pm view on Meta::CPAN
my $distances_ref
= $graph->get_edge_attribute( $members[$i], $members[$j], "distances" );
my $long_ids_ref
= $graph->get_edge_attribute( $members[$i], $members[$j], "long_ids" );
$long_id_set->add( @{$long_ids_ref} );
$relation_of->{ $members[$i] }{ $members[$j] }
= [ $distances_ref, $long_ids_ref ];
for my $long_id ( @{$long_ids_ref} ) {
push @anchor_long_pairs, [ $members[$i], $long_id ];
push @anchor_long_pairs, [ $members[$j], $long_id ];
}
}
}
}
# serials to names
my $name_of
= App::Anchr::Common::serial2name( $fn_dazz, [ @members, $long_id_set->as_array ] );
my $fn_relation = $out_dir->child("$basename.relation.tsv");
$fn_relation->remove;
for my $key_i ( sort keys %{$relation_of} ) {
for my $key_j ( sort keys %{ $relation_of->{$key_i} } ) {
my $str_dis = join( ",", @{ $relation_of->{$key_i}{$key_j}[0] } );
my $str_long = join( ",",
map { $name_of->{$_} } @{ $relation_of->{$key_i}{$key_j}[1] } );
my $line = sprintf "%s\t%s\t%s\t%s\n", $name_of->{$key_i},
$name_of->{$key_j}, $str_dis, $str_long;
$fn_relation->append($line);
}
}
@anchor_long_pairs = sort
map { sprintf( "%s\t%s\n", $name_of->{ $_->[0] }, $name_of->{ $_->[1] } ) }
@anchor_long_pairs;
@anchor_long_pairs = App::Fasops::Common::uniq(@anchor_long_pairs);
$out_dir->child("$basename.restrict.tsv")->spew(@anchor_long_pairs);
}
#----------------------------#
# long reads
#----------------------------#
if ( !$long_id_set->is_empty ) {
my $cmd;
$cmd .= "DBshow -U $fn_dazz ";
$cmd .= join " ", $long_id_set->as_array;
$cmd .= " | faops filter -l 0 stdin stdout";
$cmd .= " > " . $out_dir->child("$basename.long.fasta")->stringify;
system $cmd;
}
$cc_serial++;
}
printf STDERR "CC count %d\n", scalar(@ccs);
if ( $opt->{png} ) {
App::Anchr::Common::g2gv0( $graph, $fn_dazz . ".png" );
}
}
1;
( run in 3.120 seconds using v1.01-cache-2.11-cpan-df04353d9ac )