App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/blastlink.pm view on Meta::CPAN
package App::Egaz::Command::blastlink;
use strict;
use warnings;
use autodie;
use MCE;
use MCE::Flow;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'link sequences by blastn';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ "coverage|c=f", "coverage of identical matches", { default => 0.9 }, ],
[ "batch=i", "batch size of blast records", { default => 500000 }, ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz blastlink [options] <infile>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<'MARKDOWN';
* <infile> is reports produced by `egaz blastn`
* <infile> can't be stdin
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} != 1 ) {
my $message = "This command need one input file.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# Parse reports
#----------------------------#
print STDERR "Parse reports\n";
my $worker = sub {
my ( $self, $chunk_ref, $chunk_id ) = @_;
my $wid = MCE->wid;
print STDERR "* Process task [$chunk_id] by worker #$wid\n" if $opt->{verbose};
my @lines = @{$chunk_ref};
my @links;
for my $line (@lines) {
next if $line =~ /^#/;
chomp $line;
# qseqid sseqid qstart qend sstart send qlen slen nident
my @fields = grep {defined} split /\s+/, $line;
if ( @fields != 9 ) {
print "Fields error: $line\n";
next;
}
my $query_name = $fields[0];
my $hit_name = $fields[1];
next if $query_name eq $hit_name;
my $query_length = $fields[6];
my $hit_length = $fields[7];
my $max_length = List::Util::max( $query_length, $hit_length );
next if $query_length / $max_length < $opt->{coverage};
next if $hit_length / $max_length < $opt->{coverage};
my $identical_match = $fields[8];
next if $identical_match / $max_length < $opt->{coverage};
my ( $h_start, $h_end ) = ( $fields[4], $fields[5] );
my $strand = "+";
if ( $h_start > $h_end ) {
( $h_start, $h_end ) = ( $h_end, $h_start );
$strand = "-";
( run in 3.883 seconds using v1.01-cache-2.11-cpan-fe3c2283af0 )