Bio-MLST-Check
view release on metacpan or search on metacpan
t/Output/SpreadsheetRow.t view on Meta::CPAN
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
BEGIN { unshift(@INC, './lib') }
BEGIN {
use Test::Most;
use Bio::MLST::CompareAlleles;
use Bio::MLST::SequenceType;
use_ok('Bio::MLST::Spreadsheet::Row');
}
note('Prepare a row for the output spreadsheets based on the results from the ST comparison and lookup.');
my $compare_alleles = Bio::MLST::CompareAlleles->new(
sequence_filename => 't/data/contigs.fa',
allele_filenames => ['t/data/adk.tfa','t/data/purA.tfa','t/data/recA.tfa'],
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
contamination_alleles => 'test_contamination'
);
my $sequence_type_obj = Bio::MLST::SequenceType->new(
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
matching_names => $compare_alleles->found_sequence_names,
non_matching_names => [],
report_lowest_st => 1
);
ok((my $spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Spreadsheet row constructor with a normal valid input.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', 4,'','',2,3,1], 'Construct the row for the allele number spreadsheet with valid input data.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', 4,'','','GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCGCATT...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
'Construct the row for the genomic data spreadsheet with valid input data.');
$compare_alleles->contamination(1);
ok(($spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Spreadsheet row constructor where there is contamination detected.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', 4,'','test_contamination',2,3,1], 'Construct the row for the allele number spreadsheet where there is contamination detected.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', 4,'','test_contamination','GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCG...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
'Construct the row for the genomic data spreadsheet where there is contamination detected.');
$compare_alleles->contamination(0);
# no match for adk
$compare_alleles = Bio::MLST::CompareAlleles->new(
sequence_filename => 't/data/contigs.fa',
allele_filenames => ['t/data/adk_less_than_95_percent.tfa','t/data/purA.tfa','t/data/recA.tfa'],
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
matching_names => $compare_alleles->found_sequence_names,
non_matching_names => [],
report_lowest_st => 1
);
ok(($spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Spreadsheet row constructor where there is no hit for one of the alleles.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', "1~",'Unknown','','U',3,1], 'One of the alleles has no hits so is marked as Unknown and has a tilda before the ST to indicate a partial match.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', "1~",'Unknown','','U',
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
'One of the alleles has no hits so is marked as Unknown and has a tilda before the ST to indicate a partial match.');
# near match
$compare_alleles = Bio::MLST::CompareAlleles->new(
sequence_filename => 't/data/contigs.fa',
allele_filenames => ['t/data/adk_contamination.tfa','t/data/purA.tfa','t/data/recA.tfa'],
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
matching_names => $compare_alleles->found_sequence_names,
non_matching_names => [],
report_lowest_st => 1
);
ok(($spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Theres contamination in the input alleles.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', "1~",'Novel ST','adk-2,adk-3',3,3,1], 'There is contamination so display both alleles for the gene and flag as a partial ST match.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', "1~",'Novel ST','adk-2,adk-3','AGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
'There is contamination so display both alleles for the gene and flag as a partial ST match.');
# best near match
$compare_alleles = Bio::MLST::CompareAlleles->new(
sequence_filename => 't/data/contigs.fa',
allele_filenames => ['t/data/adk_contamination.tfa','t/data/purA.tfa','t/data/recA.tfa'],
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
matching_names => $compare_alleles->found_sequence_names,
non_matching_names => [],
);
$spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles);
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', "4~",'Novel ST','adk-2,adk-3',3,3,1], 'There is contamination and the ST is novel so display the nearest match.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', "4~",'Novel ST','adk-2,adk-3','AGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
'There is contamination and the ST is novel so display the nearest match.');
$compare_alleles = Bio::MLST::CompareAlleles->new(
sequence_filename => 't/data/contigs_novel.fa',
allele_filenames => ['t/data/adk.tfa','t/data/purA.tfa','t/data/recA.tfa'],
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
( run in 0.740 second using v1.01-cache-2.11-cpan-39bf76dae61 )