Bio-MUST-Core
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
bin/inst-qual-filter.pl view on Meta::CPAN
#!/usr/bin/env perl
# PODNAME: inst-qual-filter.pl
# ABSTRACT: Discard low-quality nt seqs in FASTA files (optimized)
# CONTRIBUTOR: Valerian LUPO <valerian.lupo@doct.uliege.be>
use Modern::Perl '2011';
use autodie;
use Getopt::Euclid qw(:vars);
use Smart::Comments;
use Bio::MUST::Core;
use Bio::MUST::Core::Constants qw(:seqtypes);
use Bio::MUST::Core::Utils qw(secure_outfile);
use aliased 'Bio::MUST::Core::Ali';
my @bad_seqs;
my $purity_filter = sub {
my $seq = shift;
# compute purity
my $purity = $seq->purity;
if ($purity < $ARGV_min_purity) {
push @bad_seqs, $seq;
return;
}
# store allowed seqs
my $str = '>' . $seq->full_id . "\n";
$str .= $seq->wrapped_str;
return $str;
};
for my $infile (@ARGV_infiles) {
# --out-suffix is required in this script
my $outfile = secure_outfile($infile, $ARGV_out_suffix);
### Processing: $infile
Ali->instant_store(
$outfile, { infile => $infile, coderef => $purity_filter }
);
}
if ($ARGV_filter_out) {
### Storing filtered seqs in: $ARGV_filter_out
my $ali = Ali->new( seqs => \@bad_seqs, guessing => 0 );
$ali->store_fasta($ARGV_filter_out);
}
__END__
=pod
=head1 NAME
inst-qual-filter.pl - Discard low-quality nt seqs in FASTA files (optimized)
=head1 VERSION
version 0.240390
=head1 USAGE
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.464 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )