App-SimulateReads
view release on metacpan or search on metacpan
t/lib/TestsFor/App/SimulateReads/Simulator.pm view on Meta::CPAN
package TestsFor::App::SimulateReads::Simulator;
# ABSTRACT: Tests for 'App::SimulateReads::Simulator' class
use App::SimulateReads::Base 'test';
use App::SimulateReads::Fastq::SingleEnd;
use App::SimulateReads::Fastq::PairedEnd;
use autodie;
use base 'TestsFor';
use constant {
VERBOSE => 0,
COUNT_LOOPS_BY => 'coverage',
COVERAGE => 8,
STRAND_BIAS => 'random',
SEQID_WEIGHT => 'length',
SEQUENCING_TYPE => 'paired-end',
SEQUENCING_SYSTEM => 'poisson',
SEED => time,
JOBS => 2,
OUTPUT_GZIP => 0,
SEQ_SYS => 'poisson',
QUALITY_SIZE => 10,
GENOME => '.data.fa',
GENOME_SIZE => 2280,
COVERAGE => 5,
PREFIX => 'ponga',
OUTPUT_SINGLE_END => 'ponga_R1_001.fastq',
OUTPUT_PAIRED_END => ['ponga_R1_001.fastq', 'ponga_R2_001.fastq'],
OUTPUT_COUNTS => 'ponga_counts.tsv'
};
sub startup : Tests(startup) {
my $test = shift;
$test->SUPER::startup;
my $class = ref $test;
$class->mk_classdata('default_attr');
$class->mk_classdata('default_sg_single_end');
$class->mk_classdata('default_sg_paired_end');
my $fasta = q{>Chr1
TTACTGCTTTTAACATTACAGTAACTGTTACAGGTTCCAGCAGGCTAACTGGGTGGAAAT
GAGTTTGGTTTCACTTAGTCTCTCTAAAGAGAAAGCAAGTCGGTAGACTAATACCTAATA
AAAGCAAAGCTGCCAACAATTGAAATTGCCTAGGCTGCTCTGTGTGTCCCACATGCATGG
GTGTGGGTGCCAGTGTGTGTGCGTGTGTGCATGCATGTGCATGTGTGTTGGGATAGAGTG
GTAAGAAAATGGGAAATAATAAGAATGTTCAGTCCATAGCCCTTCATTATAAAAAGGTGA
GCTGTAATAAATACTAGTGCCACATTTAGCCAAAACTTTACTCCAGCCAAAGGTGATATT
TTCATGATAACATCCTGTGATTGCTTTGTTCTTCGTCTTTTATGTTCTTCCTAGATGGGC
TCAGAACATACAAGAATTAAGTACACATCTTATTTTCCAGTGATAATGCTACCGGCAAAT
TCTGTTGTTTGTATAAACATCAGCCATGTTTATATAACTAAACTAGTGTTTTGTTTTGTC
TCCCAATCTCCTACCATCCCATCCTAATATGACAGAAGTAATTCCTGAGTTGCTTCTGAA
>Chr2
AATTCAGCAAGAAATTAGACCAAATGGTGGCTTAATGCTGCATTGATTTGGCTATCAATT
TGTTTTCACTTTTCTGCAAAATAATTAATACATTATTAAATTGAATTGTGCTGATGCCAC
>Chr3
AGTTGTTCTTATCTCAAGTGTCTTAAAATTCATTTAATTTGTTTTTCCTTTGGTTTCATT
ATTCAAATTTTAACTTCAGTTCTCAAGATTTTATCTGATGGAAGAGATGGAGTCCATTAC
TGTTTTCACTTTTCTGCAAAATAATTAATACATTATTAAATTGAATTGTGCTGATGCCAC
AGTTGTTCTTATCTCAAGTGTCTTAAAATTCATTTAATTTGTTTTTCCTTTGGTTTCATT
>Chr4
TAAGGACTCCATTGTGCTCCATCATGCCAGAGTTGTAAAATAGATCTTTTAAAGGAAATT
TACTGTGATTTTTTTTCTATTTAAGAGCTTCCTCTCCAGTTGAGCATGTAAGAAAATTAT
ACCAGGAGAATACAGTAAACTCTATGAGGCAAGCTATAAACATGTAGCATTGTGATTAGG
GCTGGTTCTCCTTCTAGAGACATGGTAGGATTGCAATTTCATACCATCCTTGAAGTTAGA
GAGAGCCACGTGACTCATTTAGCCAATGAACTGTGAGCAGAATGACATGTCACTTCCAGC
TGAAGCTTTAACAATCTGAGAGACATTCATACATTTTCCATGTGCTGTAGCCTTATACCC
AAAGCCTGGGTCCCAAGTGACCATGACAGGCAGAGCTCCCTGTTGAGCCACAGAGATTTA
GAGAATGGCTGTTAACACAGCATAATCCAGCCCATCCTGACTAATCTGATATTAACATGT
ATAATAAAGAATTCTATCAATGCTGAGGGAAGATGACTAGTTAAGGTCCTAGGTTGCAAG
TCTCAAAACCTCTTCTAAGGATTGTAGACAGGAAATTAAATGACTTCTAGTCCCTAGAGT
TCCCAATCTCCTACCATCCCATCCTAATATGACAGAAGTAATTCCTGAGTTGCTTCTGAA
TCCCAATCTCCTACCATCCCATCCTAATATGACAGAAGTAATTCCTGAGTTGCTTCTGAA
TCCCAATCTCCTACCATCCCATCCTAATATGACAGAAGTAATTCCTGAGTTGCTTCTGAA
ACCAGAGCTTCCCTCAGAACCCTTAGCCTGCCAGATGGCTTCTTGGAGAGCCCTCACTCA
CTTTTCTCCTTCTGCTATTGCTGCTCATTCATTCCAGTTTTTAAAAATTCATCTTTATCC
>Chr5
AGGAACCTCGCTTCTAGAAAAGTCATACAGGTGCTTCCAGGAGGCTACATGGGCACCCAT
ATTTTTCTAGCCACTTTCATTAGACCAATGCAGCAGAGAAGAAAAGCCTCAATAATTATT
ATGACATGGCATGTTAGGATACCAAGTAAATTGCATTTGTAAAATGTGATTTTCTGTTGG
TGTTCACTTCAGCTCTACTGACATTTGGTAAGTATTATTGACTGACTGACTAACTAATGT
GGTCATTAGTCTTCATAAAGAAAGGCTCTCTACAAAAACGGAGGGATGCCCTTTTTCTGG
CATTTAATACGTAAGAAATTGCCTCCAATAGAAACCAGAGTTGCCTGATTACTATCAGCA
CAGGAGAAATGTATTAATGTGCCTTTCTAGTAACAGGTTTTTAGAAAGTCAAATATAAAC};
open my $fh, ">" => GENOME;
print $fh "$fasta\n";
close $fh;
}
sub setup : Tests(setup) {
my $test = shift;
$LOG_VERBOSE = VERBOSE;
$test->SUPER::setup;
my %default_attr = (
prefix => PREFIX,
output_gzip => OUTPUT_GZIP,
fasta_file => GENOME,
coverage => COVERAGE,
jobs => JOBS,
seqid_weight => 'length',
count_loops_by => 'coverage',
strand_bias => 'random',
seed => SEED
);
my %sg_single_end = (
%default_attr,
fastq => App::SimulateReads::Fastq::SingleEnd->new(
quality_profile => 'poisson',
read_size => QUALITY_SIZE,
sequencing_error => 0.1,
template_id => 'sr0001 simulation_read length=%r position=%c:%t-%n'
)
);
my %sg_paired_end = (
%default_attr,
fastq => App::SimulateReads::Fastq::PairedEnd->new(
quality_profile => 'poisson',
read_size => QUALITY_SIZE,
sequencing_error => 0.1,
fragment_mean => 50,
fragment_stdd => 10,
template_id => 'sr0001 simulation_read length=%r position=%c:%t-%n'
)
);
$test->default_attr(\%default_attr);
$test->default_sg_single_end($test->class_to_test->new(%sg_single_end));
$test->default_sg_paired_end($test->class_to_test->new(%sg_paired_end));
}
sub cleanup : Tests(shutdown) {
my $test = shift;
unlink GENOME;
$test->SUPER::shutdown;
}
sub constructor : Tests(18) {
my $test = shift;
my $class = $test->class_to_test;
my $sg = $test->default_sg_single_end;
my %default_attr = %{ $test->default_attr };
while (my ($attr, $value) = each %default_attr) {
can_ok $sg, $attr;
is $sg->$attr, $value, "The value for $attr shold be correct";
}
}
sub run_simulation : Tests(9) {
my $test = shift;
my $output_single_end = OUTPUT_SINGLE_END;
my $output_paired_end = OUTPUT_PAIRED_END;
my $output_counts = OUTPUT_COUNTS;
my $fastq_count = sub {
my $file = shift;
my $entries = 0;
my %chr_acm;
open my $fh, "<" => $file;
my $mark = 4;
my $acm = 0;
while (<$fh>) {
chomp;
if (++$acm == 1) {
$entries++;
my @tmp1 = split / /;
my @tmp2 = split /:/ => $tmp1[-1];
my @tmp3 = split /=/ => $tmp2[0];
$chr_acm{$tmp3[1]}++;
} elsif ($acm == $mark) {
$acm = 0;
}
}
close $fh;
return (\%chr_acm, $entries);
};
# Testing single-end
my $sg = $test->default_sg_single_end;
$sg->run_simulation;
ok -f $output_single_end,
"run_simulation must create a fastq file for single-end";
my ($chr_acm, $entries) = $fastq_count->($output_single_end);
is int((GENOME_SIZE * $sg->coverage) / $sg->fastq->read_size), $entries,
"run_simulation must create a fastq with the right number of entries for single-end";
my $str_sort = join " " => sort { $chr_acm->{$a} <=> $chr_acm->{$b} } keys %$chr_acm;
ok((lc $str_sort eq lc "Chr2 Chr3 Chr5 Chr1 Chr4" || lc $str_sort eq lc "Chr3 Chr2 Chr5 Chr1 Chr4"),
"chromossome frequency must follow a weighted raffle pattern for single-end");
unlink $output_single_end;
# Testing paired-end
$sg = $test->default_sg_paired_end;
$sg->run_simulation;
for my $i (0..1) {
ok -f $output_paired_end->[$i],
"run_simulation must create fastq file for paired-end $i";
($chr_acm, $entries) = $fastq_count->($output_paired_end->[$i]);
is int((GENOME_SIZE * $sg->coverage) / ($sg->fastq->read_size * 2)), $entries,
"run_simulation must create a fastq with the right number of entries for paired-end $i";
my $str_sort = join " " => sort { $chr_acm->{$a} <=> $chr_acm->{$b} } keys %$chr_acm;
ok((lc $str_sort eq lc "Chr2 Chr3 Chr5 Chr1 Chr4" || lc $str_sort eq lc "Chr3 Chr2 Chr5 Chr1 Chr4"),
"chromossome frequency must follow a weighted raffle pattern ($str_sort) for paired-end $i");
unlink $output_paired_end->[$i];
}
( run in 0.489 second using v1.01-cache-2.11-cpan-39bf76dae61 )