Bio-MUST-Core

 view release on metacpan or  search on metacpan

bin/inst-qual-filter.pl  view on Meta::CPAN

#!/usr/bin/env perl
# PODNAME: inst-qual-filter.pl
# ABSTRACT: Discard low-quality nt seqs in FASTA files (optimized)
# CONTRIBUTOR: Valerian LUPO <valerian.lupo@doct.uliege.be>

use Modern::Perl '2011';
use autodie;

use Getopt::Euclid qw(:vars);
use Smart::Comments;

use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:seqtypes);
use Bio::MUST::Core::Utils qw(secure_outfile);
use aliased 'Bio::MUST::Core::Ali';


my @bad_seqs;

my $purity_filter = sub {
    my $seq = shift;

    # compute purity
    my $purity = $seq->purity;
    if ($purity < $ARGV_min_purity) {
        push @bad_seqs, $seq;
        return;
    }

    # store allowed seqs
    my $str = '>' . $seq->full_id . "\n";
    $str .= $seq->wrapped_str;

    return $str;
};

for my $infile (@ARGV_infiles) {
    # --out-suffix is required in this script
    my $outfile = secure_outfile($infile, $ARGV_out_suffix);

    ### Processing: $infile
    Ali->instant_store(
        $outfile, { infile => $infile, coderef => $purity_filter }
    );
}

if ($ARGV_filter_out) {

    ### Storing filtered seqs in: $ARGV_filter_out
    my $ali = Ali->new( seqs => \@bad_seqs, guessing => 0 );
    $ali->store_fasta($ARGV_filter_out);
}

__END__

=pod

=head1 NAME

inst-qual-filter.pl - Discard low-quality nt seqs in FASTA files (optimized)

=head1 VERSION

version 0.240390

=head1 USAGE

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.464 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )