Bio-MUST-Drivers
view release on metacpan or search on metacpan
#!/usr/bin/env perl
use Test::Most;
use autodie;
use feature qw(say);
use List::AllUtils;
use Module::Runtime qw(use_module);
use Path::Class qw(file);
use Tie::IxHash;
use Bio::MUST::Core;
use Bio::MUST::Drivers::CdHit;
say 'Note: tests designed for: CD-HIT version 4.8.1 (built on Jul 5 2019)';
my $class = 'Bio::MUST::Drivers::CdHit';
# Note: provisioning system is not enabled to help tests to pass on CPANTS
my $app = use_module('Bio::MUST::Provision::CdHit')->new;
unless ( $app->condition ) {
plan skip_all => <<"EOT";
skipped all CD-HIT tests!
If you want to use this module you need to install the CD-HIT executable:
https://github.com/weizhongli/cdhit
If you --force installation, I will eventually try to install CD-HIT with brew:
https://brew.sh/
EOT
}
# TODO: fix this as CD-HIT formula currently fails on OS X Mojave
# This can be done with a --build-from-source option of brew
# expected members for
my $exp_clstr_file = file('test', 'cdHit.out.groups');
open my $in, '<', $exp_clstr_file;
tie my %exp_members_for, 'Tie::IxHash';
while (my $line = <$in>) {
chomp $line;
my ($repr, $memb_str) = $line =~ m/(\S+) \s* : \s* (.*)/xms;
$exp_members_for{$repr} = [ split /\s+/, $memb_str ];
}
# expected representative seqs
my $exp_repr_seq_file = file('test', 'cdHit.out.fasta');
my $exp_repr_seqs = Bio::MUST::Core::Ali->load($exp_repr_seq_file);
my %exp_repr_for = (
'1053365|ASA38802.1' => '360866|ASA39338.1',
'1053365|ASA38803.1' => '360866|ASA39339.1',
'1053365|ASA38804.1' => '360866|ASA39340.1',
'1053365|ASA38805.1' => '360866|ASA39341.1',
'1053365|ASA38806.1' => '360866|ASA39342.1',
'1053365|ASA38809.1' => '360866|ASA39345.1',
'1053365|ASA38810.1' => '360866|ASA39346.1',
'1715899|ASA38533.1' => '360866|ASA39338.1',
'1715899|ASA38534.1' => '360866|ASA39339.1',
'1715900|ASA38624.1' => '360866|ASA39339.1',
'2010898|ASA38355.1' => '360866|ASA39339.1'
);
# call cd-hit
my $cdh = $class->new( seqs => file('test', 'cdHit.in.fasta') );
# cluster members
is_deeply [ $cdh->all_cluster_names ],
[ keys %exp_members_for ],
'got expected list of representative ids'
;
for my $repr ( $cdh->all_cluster_names ) {
is_deeply [ map { $_->full_id } @{ $cdh->seq_ids_for($repr) } ],
$exp_members_for{$repr},
"got expected list of member SeqIds for representative: $repr"
;
}
is_deeply [ map { [ map { $_->full_id } @{$_} ] } $cdh->all_cluster_seq_ids ],
[ values %exp_members_for ],
'got expected SeqIds for cluster members'
;
# representative Seqs
is_deeply $cdh->count_representatives,
$exp_repr_seqs->count_seqs,
'got expected number of representatives (clusters)'
;
is_deeply [ $cdh->all_representatives ],
[ $exp_repr_seqs->all_seqs ],
'got expected list of representative Seqs'
;
for my $id ( $cdh->all_cluster_names ) {
is_deeply $cdh->get_representative_with_id($id)->seq,
( run in 0.835 second using v1.01-cache-2.11-cpan-bbe5e583499 )