Bio-Kmer

 view release on metacpan or  search on metacpan

lib/Bio/Kmer.pm  view on Meta::CPAN

#!/usr/bin/env perl

# Kmer.pm: a kmer counting module
# Author: Lee Katz <lkatz@cdc.gov>

package Bio::Kmer;
require 5.10.0;
our $VERSION=0.55;

use strict;
use warnings;

use List::Util qw/max/;
use File::Basename qw/basename fileparse dirname/;
use File::Temp qw/tempdir tempfile/;
use File::Path qw/remove_tree/;
use Data::Dumper qw/Dumper/;
use IO::Uncompress::Gunzip;
use File::Which qw/which/;
use Carp qw/croak carp confess/;

our $iThreads; # boolean for whether threads are loaded
BEGIN{
  eval{
    require threads;
    require threads::shared;
    $iThreads = 1;
  };
  if($@){
    $iThreads = 0;
  }
}

use Exporter qw/import/;
our @EXPORT_OK = qw(
           );

our @fastqExt=qw(.fastq.gz .fastq .fq .fq.gz);
our @fastaExt=qw(.fasta .fna .faa .mfa .fas .fa);
our @bamExt=qw(.sorted.bam .bam);
our @vcfExt=qw(.vcf.gz .vcf);
our @richseqExt=qw(.gbk .gbf .gb .embl);
our @sffExt=qw(.sff);
our @samExt=qw(.sam .bam);

our $fhStick :shared;      # Helps us open only one file at a time
our $enqueueStick :shared; # Helps control access to the kmer queue


# TODO if 'die' is imported by a script, redefine
# sig die in that script as this function.
local $SIG{'__DIE__'} = sub { my $e = $_[0]; $e =~ s/(at [^\s]+? line \d+\.$)/\nStopped $1/; die("$0: ".(caller(1))[3].": ".$e); };

my $startTime = time();
sub logmsg{
  local $0 = basename $0; 
  my $tid = 0;
  if($iThreads){
    $tid = threads->tid;
  }
  my $elapsedTime = time() - $startTime;
  print STDERR "$0.$tid $elapsedTime @_\n";
}

=pod

=head1 NAME

Bio::Kmer - Helper module for Kmer Analysis.

=head1 SYNOPSIS

A module for helping with kmer analysis.

  use strict;
  use warnings;
  use Bio::Kmer;
  
  my $kmer=Bio::Kmer->new("file.fastq.gz",{kmercounter=>"jellyfish",numcpus=>4});
  my $kmerHash=$kmer->kmers();
  my $countOfCounts=$kmer->histogram();

  my $minimizers = $kmer->minimizers();
  my $minimizerCluster = $kmer->minimizerCluster();

The BioPerl way

  use strict;
  use warnings;
  use Bio::SeqIO;
  use Bio::Kmer;

  # Load up any Bio::SeqIO object. Quality values will be
  # faked internally to help with compatibility even if
  # a fastq file is given.
  my $seqin = Bio::SeqIO->new(-file=>"input.fasta");
  my $kmer=Bio::Kmer->new($seqin);
  my $kmerHash=$kmer->kmers();
  my $countOfCounts=$kmer->histogram();

=head1 DESCRIPTION

A module for helping with kmer analysis. The basic methods help count kmers and can produce a count of counts.  Currently this module only supports fastq format.  Although this module can count kmers with pure perl, it is recommended to give the opti...

=head1 DEPENDENCIES

  * BioPerl
  * Jellyfish >=2
  * Perl threads
  * Perl >=5.10

=head1 VARIABLES



( run in 1.966 second using v1.01-cache-2.11-cpan-140bd7fdf52 )