Bio-MLST-Check

 view release on metacpan or  search on metacpan

t/Output/SpreadsheetRow.t  view on Meta::CPAN

#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;

BEGIN { unshift(@INC, './lib') }
BEGIN {
    use Test::Most;
    use Bio::MLST::CompareAlleles;
    use Bio::MLST::SequenceType;
    use_ok('Bio::MLST::Spreadsheet::Row');
}

note('Prepare a row for the output spreadsheets based on the results from the ST comparison and lookup.');

my $compare_alleles = Bio::MLST::CompareAlleles->new(
  sequence_filename => 't/data/contigs.fa',
  allele_filenames  => ['t/data/adk.tfa','t/data/purA.tfa','t/data/recA.tfa'],
  profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
  contamination_alleles => 'test_contamination'
);
my $sequence_type_obj = Bio::MLST::SequenceType->new(
  profiles_filename  => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
  matching_names     => $compare_alleles->found_sequence_names,
  non_matching_names => [],
  report_lowest_st   => 1
);


ok((my $spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Spreadsheet row constructor with a normal valid input.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', 4,'','',2,3,1], 'Construct the row for the allele number spreadsheet with valid input data.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', 4,'','','GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCGCTGGTTAAAGGGCGCATT...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
 'Construct the row for the genomic data spreadsheet with valid input data.');
 

$compare_alleles->contamination(1);
ok(($spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Spreadsheet row constructor where there is contamination detected.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', 4,'','test_contamination',2,3,1], 'Construct the row for the allele number spreadsheet where there is contamination detected.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', 4,'','test_contamination','GGGGAAAGGGACTCAGGCTCAGTTCATCATGGAGAAATATGGTATTCCGCAAATCTCCACTGGCGATATGCTGCGTGCTGCGGTCAAATCTGGCTCCGAGCTGGGTAAACAAGCAAAAGACATTATGGATGCTGGCAAACTGGTTACCGACGAACTGGTGATCGCG...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
 'Construct the row for the genomic data spreadsheet where there is contamination detected.');
$compare_alleles->contamination(0);

# no match for adk
$compare_alleles = Bio::MLST::CompareAlleles->new(
  sequence_filename => 't/data/contigs.fa',
  allele_filenames  => ['t/data/adk_less_than_95_percent.tfa','t/data/purA.tfa','t/data/recA.tfa'],
  profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
  profiles_filename  => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
  matching_names     => $compare_alleles->found_sequence_names,
  non_matching_names => [],
  report_lowest_st   => 1
);
ok(($spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Spreadsheet row constructor where there is no hit for one of the alleles.');

is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', "1~",'Unknown','','U',3,1], 'One of the alleles has no hits so is marked as Unknown and has a tilda before the ST to indicate a partial match.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', "1~",'Unknown','','U',
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
 'One of the alleles has no hits so is marked as Unknown and has a tilda before the ST to indicate a partial match.');

# near match
$compare_alleles = Bio::MLST::CompareAlleles->new(
  sequence_filename => 't/data/contigs.fa',
  allele_filenames  => ['t/data/adk_contamination.tfa','t/data/purA.tfa','t/data/recA.tfa'],
  profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
  profiles_filename  => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
  matching_names     => $compare_alleles->found_sequence_names,
  non_matching_names => [],
  report_lowest_st   => 1
);
ok(($spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles)),'Theres contamination in the input alleles.');
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', "1~",'Novel ST','adk-2,adk-3',3,3,1], 'There is contamination so display both alleles for the gene and flag as a partial ST match.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', "1~",'Novel ST','adk-2,adk-3','AGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
 'There is contamination so display both alleles for the gene and flag as a partial ST match.');

# best near match
$compare_alleles = Bio::MLST::CompareAlleles->new(
  sequence_filename => 't/data/contigs.fa',
  allele_filenames  => ['t/data/adk_contamination.tfa','t/data/purA.tfa','t/data/recA.tfa'],
  profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
  profiles_filename  => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
  matching_names     => $compare_alleles->found_sequence_names,
  non_matching_names => [],
);
$spreadsheet_row_obj = Bio::MLST::Spreadsheet::Row->new(sequence_type_obj => $sequence_type_obj, compare_alleles => $compare_alleles);
is_deeply($spreadsheet_row_obj->allele_numbers_row, ['contigs', "4~",'Novel ST','adk-2,adk-3',3,3,1], 'There is contamination and the ST is novel so display the nearest match.');
is_deeply($spreadsheet_row_obj->genomic_row, ['contigs', "4~",'Novel ST','adk-2,adk-3','AGACGATCAGGAAGAAACCGTACGTAAACGTCTGGTTGAATACCATCAGATGACAGCACCGCTGATCGGCTACTACTCCAAAGAAGCTGAAGCGGGTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...
'ATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTGGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCC...
'CGCACGTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGAAATCTGTGACGCCCTGGCGCGTTCTGGTGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTGACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCACATGGGCCTTGCGGCACGTATGATGAGCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAA...
 'There is contamination and the ST is novel so display the nearest match.');

$compare_alleles = Bio::MLST::CompareAlleles->new(
   sequence_filename => 't/data/contigs_novel.fa',
     allele_filenames  => ['t/data/adk.tfa','t/data/purA.tfa','t/data/recA.tfa'],
     profiles_filename => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',
);
$sequence_type_obj = Bio::MLST::SequenceType->new(
  profiles_filename  => 't/data/databases/Escherichia_coli_1/profiles/escherichia_coli.txt',



( run in 0.740 second using v1.01-cache-2.11-cpan-39bf76dae61 )