Bio-Kmer
view release on metacpan or search on metacpan
lib/Bio/Kmer.pm view on Meta::CPAN
The BioPerl way
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Kmer;
# Load up any Bio::SeqIO object. Quality values will be
# faked internally to help with compatibility even if
# a fastq file is given.
my $seqin = Bio::SeqIO->new(-file=>"input.fasta");
my $kmer=Bio::Kmer->new($seqin);
my $kmerHash=$kmer->kmers();
my $countOfCounts=$kmer->histogram();
=head1 DESCRIPTION
A module for helping with kmer analysis. The basic methods help count kmers and can produce a count of counts. Currently this module only supports fastq format. Although this module can count kmers with pure perl, it is recommended to give the opti...
=head1 DEPENDENCIES
* BioPerl
* Jellyfish >=2
* Perl threads
* Perl >=5.10
=head1 VARIABLES
=over
=item $Bio::Kmer::iThreads
Boolean describing whether the module instance is using threads
=back
=head1 METHODS
=over
=item Bio::Kmer->new($filename, \%options)
Create a new instance of the kmer counter. One object per file.
Filename can be either a file path or a Bio::SeqIO object.
Applicable arguments for \%options:
Argument Default Description
kmercounter perl What kmer counter software to use.
Choices: Perl, Jellyfish.
kmerlength|k 21 Kmer length
numcpus 1 This module uses perl
multithreading with pure perl or
can supply this option to other
software like jellyfish.
gt 1 If the count of kmers is fewer
than this, ignore the kmer. This
might help speed analysis if you
do not care about low-count kmers.
sample 1 Retain only a percentage of kmers.
1 is 100%; 0 is 0%
Only works with the perl kmer counter.
verbose 0 Print more messages.
Examples:
my $kmer=Bio::Kmer->new("file.fastq.gz",{kmercounter=>"jellyfish",numcpus=>4});
=back
=cut
sub new{
my($class,$seqfile,$settings)=@_;
croak "ERROR: need a sequence file or a Bio::SeqIO object" if(!$seqfile);
# Set optional parameter defaults
$$settings{kmerlength} = $$settings{kmerlength} || $$settings{k} || 21;
$$settings{numcpus} ||= 1;
$$settings{gt} ||= 1;
$$settings{kmercounter} ||= "perl";
$$settings{tempdir} ||= tempdir("Kmer.pm.XXXXXX",TMPDIR=>1,CLEANUP=>1);
$$settings{sample} = 1 if(!defined($$settings{sample}));
$$settings{verbose} ||= 0;
# If the first parameter $seqfile is a Bio::SeqIO object,
# then send it to a file to dovetail with the rest of
# this module.
if(ref($seqfile) && $seqfile->isa("Bio::SeqIO")){
# BioPerl isn't required at compile tile but is
# required if the user starts with a BioPerl object.
eval {
require Bio::SeqIO;
require Bio::Seq::Quality;
};
if($@){
croak "ERROR: cannot load Bio::SeqIO and Bio::Seq::Quality, but I need to because you supplied a Bio::SeqIO object";
}
my $tempfile="$$settings{tempdir}/bioperl_input.fastq";
my $out=Bio::SeqIO->new(-file=>">".$tempfile);
while(my $seq=$seqfile->next_seq){
my $qual = $seq->qual || "I" x $seq->length;
my $seqWithQual = Bio::Seq::Quality->new(
# TODO preserve qual values if they exist, but
# for now, it doesn't really matter.
-qual=> $qual,
-seq => $seq->seq,
-id => $seq->id,
-verbose => -1,
);
$out->write_seq($seqWithQual);
}
$out->close; # make sure output is flushed
# now redefine the seqfile as the new file on disk.
$seqfile=$tempfile;
}
# Check if we have a valid sequence file path or at
# the very least, double check that the file path we just
( run in 0.713 second using v1.01-cache-2.11-cpan-5735350b133 )