Bio-FastParsers

 view release on metacpan or  search on metacpan

t/clusterable.t  view on Meta::CPAN

#!/usr/bin/env perl

use Test::Most;

use autodie;
use feature qw(say);

use Path::Class qw(file);
use Tie::IxHash;
use Try::Tiny;

use Bio::FastParsers;


# perl -nle 'if (m/Cluster (\d+)/) { $cluster = $1; next } ($id,$type) = m/>(\S+)\.{3} (\*|at)/; $hash{$cluster}{repr} = $id if $type eq q{*}; push @{$hash{$cluster}{memb}}, $id if $type eq q{at}; END{ for $cluster (sort { $a <=> $b } keys %hash) { p...

# perl -MSort::Naturally -nle 'my ($id, $member_str) = split ": "; my @members = split q{ }, $member_str; $hash{$id} = \@members; END{ @list = sort { scalar @{$hash{$b}} <=> scalar @{$hash{$a}} || ncmp($a, $b) } keys %hash; print join "\n", @list }' ...

# for f in `cat cdHit.out.ids`; do grep $f cdHit.out.groups ; done

check_clusters(
    'Bio::FastParsers::CdHit', {
        infile   => file('test', 'cdHit.out.clstr'),
        expfile1 => file('test', 'cdHit.out.groups'),
        expfile2 => file('test', 'cdHit.out.ids'),
        idmfile  => file('test', 'cdHit.out.clstr.idm'),
    }
);

# perl -MTie::IxHash -anle 'BEGIN{ tie %hash, 'Tie::IxHash' } push @{ $hash{$F[8]} }, () if $F[0] eq 'C'; push @{ $hash{$F[9]} }, $F[8] if $F[0] eq 'H'; END{ while (($repr, $members) = each %hash) { print $repr . q{: } . join q{ }, @$members } }' ucl...

# perl -MSort::Naturally -nle 'my ($id, $member_str) = split ": "; my @members = split q{ }, $member_str; $hash{$id} = \@members; END{ @list = sort { scalar @{$hash{$b}} <=> scalar @{$hash{$a}} || ncmp($a, $b) } keys %hash; print join "\n", @list }' ...

check_clusters(
    'Bio::FastParsers::Uclust', {
        infile   => file('test', 'uclust.uc'),
        expfile1 => file('test', 'uclust.uc.groups'),
        expfile2 => file('test', 'uclust.uc.ids'),
        idmfile  => file('test', 'uclust.uc.idm'),
    }
);


sub check_clusters {
    my $class = shift;
    my $args  = shift // {};

    my ($infile, $expfile1, $expfile2, $idmfile)
        = @{$args}{ qw(infile expfile1 expfile2 idmfile) };

    explain $class;

    tie my %exp_members_for, 'Tie::IxHash';

    open my $in, '<', $expfile1;
    while (my $line = <$in>) {
        chomp $line;
        my ($repr, $memb_str) = $line =~ m/(\S+) \s* : \s* (.*)/xms;
        $exp_members_for{$repr} = [ split /\s+/, $memb_str ];
    }

    my @exp_ids = $expfile2->slurp( chomp => 1 );

    my $report = $class->new( file => $infile );

    is_deeply [ $report->all_representatives ], [ keys %exp_members_for ],
        'got expected list of representatives';
    is_deeply [ $report->all_representatives_by_cluster_size ], \@exp_ids,
        'got expected list of representatives by decreasing cluster size';



( run in 2.420 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )