App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/maskfasta.pm view on Meta::CPAN
package App::Egaz::Command::maskfasta;
use strict;
use warnings;
use autodie;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'soft/hard-masking sequences in a fasta file';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ 'hard', 'change masked regions to N', ],
[ 'len|l=i', 'sequence line length', { default => 80 }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz maskfasta [options] <infile> <runlist.yml>";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} != 2 ) {
my $message = "This command need two input files.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
next if lc $_ eq "stdin";
if ( !Path::Tiny::path($_)->is_file ) {
$self->usage_error("The input file [$_] doesn't exist.");
}
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# load files
#----------------------------#
my $seq_of = App::Fasops::Common::read_fasta( $args->[0] );
my $set_single
= App::RL::Common::runlist2set( YAML::Syck::LoadFile( $args->[1] ) );
my $out_fh;
if ( lc( $opt->{outfile} ) eq "stdout" ) {
$out_fh = *STDOUT{IO};
}
else {
open $out_fh, ">", $opt->{outfile};
}
#----------------------------#
# processing
#----------------------------#
for my $seq_name ( keys %{$seq_of} ) {
my $seq = $seq_of->{$seq_name};
if ( exists $set_single->{$seq_name} ) {
my AlignDB::IntSpan $mask_set = $set_single->{$seq_name};
# empty set have no @sets
my @sets = $mask_set->sets;
for my AlignDB::IntSpan $set (@sets) {
my $offset = $set->min - 1;
my $length = $set->size;
my $str = substr $seq, $offset, $length;
if ( $opt->{hard} ) {
my $str_len = length $str;
$str = 'N' x $str_len;
}
else {
$str = lc $str;
}
substr $seq, $offset, $length, $str;
( run in 1.160 second using v1.01-cache-2.11-cpan-98e64b0badf )