Bio-Kmer
view release on metacpan or search on metacpan
lib/Bio/Kmer.pm view on Meta::CPAN
#!/usr/bin/env perl
# Kmer.pm: a kmer counting module
# Author: Lee Katz <lkatz@cdc.gov>
package Bio::Kmer;
require 5.10.0;
our $VERSION=0.55;
use strict;
use warnings;
use List::Util qw/max/;
use File::Basename qw/basename fileparse dirname/;
use File::Temp qw/tempdir tempfile/;
use File::Path qw/remove_tree/;
use Data::Dumper qw/Dumper/;
use IO::Uncompress::Gunzip;
use File::Which qw/which/;
use Carp qw/croak carp confess/;
our $iThreads; # boolean for whether threads are loaded
BEGIN{
eval{
require threads;
require threads::shared;
$iThreads = 1;
};
if($@){
$iThreads = 0;
}
}
use Exporter qw/import/;
our @EXPORT_OK = qw(
);
our @fastqExt=qw(.fastq.gz .fastq .fq .fq.gz);
our @fastaExt=qw(.fasta .fna .faa .mfa .fas .fa);
our @bamExt=qw(.sorted.bam .bam);
our @vcfExt=qw(.vcf.gz .vcf);
our @richseqExt=qw(.gbk .gbf .gb .embl);
our @sffExt=qw(.sff);
our @samExt=qw(.sam .bam);
our $fhStick :shared; # Helps us open only one file at a time
our $enqueueStick :shared; # Helps control access to the kmer queue
# TODO if 'die' is imported by a script, redefine
# sig die in that script as this function.
local $SIG{'__DIE__'} = sub { my $e = $_[0]; $e =~ s/(at [^\s]+? line \d+\.$)/\nStopped $1/; die("$0: ".(caller(1))[3].": ".$e); };
my $startTime = time();
sub logmsg{
local $0 = basename $0;
my $tid = 0;
if($iThreads){
$tid = threads->tid;
}
my $elapsedTime = time() - $startTime;
print STDERR "$0.$tid $elapsedTime @_\n";
}
=pod
=head1 NAME
Bio::Kmer - Helper module for Kmer Analysis.
=head1 SYNOPSIS
A module for helping with kmer analysis.
use strict;
use warnings;
use Bio::Kmer;
my $kmer=Bio::Kmer->new("file.fastq.gz",{kmercounter=>"jellyfish",numcpus=>4});
my $kmerHash=$kmer->kmers();
my $countOfCounts=$kmer->histogram();
my $minimizers = $kmer->minimizers();
my $minimizerCluster = $kmer->minimizerCluster();
The BioPerl way
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Kmer;
# Load up any Bio::SeqIO object. Quality values will be
# faked internally to help with compatibility even if
# a fastq file is given.
my $seqin = Bio::SeqIO->new(-file=>"input.fasta");
my $kmer=Bio::Kmer->new($seqin);
my $kmerHash=$kmer->kmers();
my $countOfCounts=$kmer->histogram();
=head1 DESCRIPTION
A module for helping with kmer analysis. The basic methods help count kmers and can produce a count of counts. Currently this module only supports fastq format. Although this module can count kmers with pure perl, it is recommended to give the opti...
=head1 DEPENDENCIES
* BioPerl
* Jellyfish >=2
* Perl threads
* Perl >=5.10
=head1 VARIABLES
( run in 1.966 second using v1.01-cache-2.11-cpan-140bd7fdf52 )