App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/exactmatch.pm view on Meta::CPAN
package App::Egaz::Command::exactmatch;
use strict;
use warnings;
use autodie;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'exact matched positions in genome sequences';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ 'length|l=i', 'match length of mummer', { default => 100 }, ],
[ 'discard=i', 'discard blocks with copy number larger than this', { default => 0 }, ],
[ 'debug', 'write a file of exactmatch.json for debugging', ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz exactmatch [options] <infile> <genome.fa>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<'MARKDOWN';
* <infile> can't be stdin
* `mummer` or `sparsemem` should be in $PATH
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} != 2 ) {
my $message = "This command need two input files.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# Run sparsemem
#----------------------------#
#@type Path::Tiny
my $mem_result = Path::Tiny->tempfile;
App::Egaz::Common::run_sparsemem( $mem_result, $args->[0], $args->[1], $opt->{length} );
#----------------------------#
# Parse sparsemem
#----------------------------#
tie my %match_of, "Tie::IxHash";
my $fh_mem = $mem_result->openr;
my $cur_name;
while ( my $line = <$fh_mem> ) {
if ( $line =~ /^\>/ ) {
$line =~ s/^\>\s*//;
chomp $line;
$cur_name = $line;
}
elsif ( $line =~ /^\s*[\w-]+/ ) {
chomp $line;
# XII 1065146 1 6369
# ID
# the position in the reference sequence
# the position in the query sequence
# the length of the match
my @fields = grep {/\S+/} split /\s+/, $line;
if ( @fields != 4 ) {
warn " Line [$line] seems wrong.\n";
next;
}
if ( !exists $match_of{$cur_name} ) {
$match_of{$cur_name} = [];
}
push @{ $match_of{$cur_name} }, \@fields;
}
else { # Blank line, do nothing
}
( run in 1.273 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )