Bio-Community
view release on metacpan or search on metacpan
lib/Bio/Community/IO.pm view on Meta::CPAN
# Overriding new... Is there a better alternative?
func new ($class, @args) {
my $real_class = Scalar::Util::blessed($class) || $class;
# These all come from the same base, Moose::Object, so this is fine
my $params = $real_class->BUILDARGS(@args);
my $format = delete $params->{'-format'};
if (not defined $format) {
# Try to guess format
my $guesser = Bio::Community::IO::FormatGuesser->new();
if ($params->{'-file'}) {
$guesser->file( $params->{'-file'} );
} elsif ($params->{'-fh'}) {
$guesser->fh( $params->{'-fh'} );
}
$format = $guesser->guess;
}
if (not defined $format) {
$real_class->throw("Could not automatically detect input format.");
}
# Use the real driver class here
$real_class = __PACKAGE__.'::Driver::'.$format;
Module::Runtime::use_module($real_class);
$class->throw("Module $real_class does not implement a community IO stream")
unless $real_class->does('Bio::Community::Role::IO');
lib/Bio/Community/IO/FormatGuesser.pm view on Meta::CPAN
=head1 NAME
Bio::Community::IO::FormatGuesser - Determine the format used by a community file
=head1 SYNOPSIS
use Bio::Community::IO::FormatGuesser;
my $guesser = Bio::Community::IO::FormatGuesser->new(
-file => 'file.txt',
);
my $format = $guesser->guess;
=head1 DESCRIPTION
Given a file containing one or several communities, try to guess the file format
used by examining the file content (not by looking at the file name).
The guess() method will examine the data, line by line, until it finds a line
that is specific to a format. If no conclusive guess can be made, undef is returned.
If the Bio::Community::IO::FormatGuesser object is given a filehandle which is
seekable, it will be restored to its original position on return from the
guess() method.
=head2 Formats
The following formats are currently supported:
=over
=item *
generic (tab-delimited matrix, site-by-species table, QIIME summarized OTU tables, ...)
lib/Bio/Community/IO/FormatGuesser.pm view on Meta::CPAN
at your option, any later version of Perl 5 you may have available.
=head1 APPENDIX
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
=head2 new
Function: Create a new Bio::Community::IO::FormatGuesser object
Usage : my $guesser = Bio::Community::IO::FormatGuesser->new( );
Args : -text, -file or -fh. If more than one of these arguments was
provided, only one is used: -text has precendence over -file, which
has precedence over -fh.
Returns : a new Bio::Community::IO::FormatGuesser object
=cut
package Bio::Community::IO::FormatGuesser;
lib/Bio/Community/IO/FormatGuesser.pm view on Meta::CPAN
unifrac => \&_possibly_unifrac ,
generic => \&_possibly_generic ,
qiime => \&_possibly_qiime ,
);
my $real_re = qr/^(?:(?i)(?:[+-]?)(?:(?=[.]?[0123456789])(?:[0123456789]*)(?:(?:[.])(?:[0123456789]{0,}))?)(?:(?:[E])(?:(?:[+-]?)(?:[0123456789]+))|))$/;
# regular expression to match a real number, taken from Regexp::Common
=head2 file
Usage : my $file = $guesser->file;
Function: Get or set the file from which to guess the format
Args : file path (string)
Returns : file path (string)
=cut
has 'file' => (
is => 'rw',
isa => 'Str',
required => 0,
lazy => 1,
default => undef,
init_arg => '-file',
predicate => '_has_file',
);
=head2 fh
Usage : my $fh = $guesser->fh;
Function: Get or set the file handle from which to guess the format.
Args : file handle
Returns : file handle
=cut
has 'fh' => (
is => 'rw',
isa => 'FileHandle',
required => 0,
lazy => 1,
default => undef,
init_arg => '-fh',
predicate => '_has_fh',
);
=head2 text
Usage : my $text = $guesser->text;
Function: Get or set the text from which to guess the format. In most, if not
all cases, the first few lines of a text string should be enough to
determine the format.
Args : text string
Returns : text string
=cut
has 'text' => (
is => 'rw',
isa => 'Str',
required => 0,
lazy => 1,
default => undef,
init_arg => '-text',
predicate => '_has_text',
);
=head2 guess
Function: Guess the file format
Usage : my $format = $guesser->guess;
Args : format string (e.g. generic, qiime, etc)
Returns : format string (e.g. generic, qiime, etc)
=cut
method guess () {
my $format;
# Prepare input
my ($in, $original_pos);
{
####local $Bio::Root::IO::HAS_EOL = 1; # Need Bioperl-dev (>1.6.922) for this to work
if ($self->_has_text) {
$in = Bio::Root::IO->new(-string => $self->text);
} elsif ($self->_has_file) {
$in = Bio::Root::IO->new(-file => $self->file);
t/IO/FormatGuesser.t view on Meta::CPAN
use strict;
use warnings;
use Bio::Root::Test;
use_ok($_) for qw(
Bio::Community::IO::FormatGuesser
);
my ($guesser, $text, $fh, $file, $line);
# Bare object
ok $guesser = Bio::Community::IO::FormatGuesser->new(), 'bare object';
isa_ok $guesser, 'Bio::Community::IO::FormatGuesser';
# Test mixed input
$text = <<EOF;
{
"id":null,
"format": "Biological Observation Matrix 0.9.1-dev",
"format_url": "http://biom-format.org",
"type": "OTU table",
"generated_by": "QIIME revision 1.4.0-dev",
"date": "2011-12-19T19:00:00",
EOF
open $fh, '<', test_input_file('qiime_w_greengenes_taxo.txt');
$file = test_input_file('gaas_compo.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new(
-file => $file, # gaas
-text => $text, # biom
-fh => $fh, # qiime
), 'mixed input';
is $guesser->file, $file;
is $guesser->text, $text;
is $guesser->fh, $fh;
is $guesser->guess, 'biom';
close $fh;
# Test input text
ok $guesser = Bio::Community::IO::FormatGuesser->new(), 'text input';
ok $guesser->text($text);
is $guesser->guess, 'biom';
# Test input filehandle
open $fh, '<', test_input_file('biom_minimal_dense.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -fh => $fh ), 'filehandle input';
is $guesser->fh, $fh;
is $guesser->guess, 'biom';
$line = <$fh>;
chomp $line;
is $line, '{', 'filehandle was rewinded';
close $fh;
# Test biom input file
$file = test_input_file('biom_minimal_dense.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'biom files';
is $guesser->file, $file;
is $guesser->guess, 'biom';
$file = test_input_file('biom_rich_sparse.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'biom';
$file = test_input_file('biom_float.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'biom';
$file = test_input_file('biom_dups.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'biom';
$file = test_input_file('biom_invalid.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'biom';
# Test generic input file
$file = test_input_file('generic_table_win.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'generic files (Windows)';
is $guesser->file, $file;
is $guesser->guess, 'generic';
$file = test_input_file('generic_table_mac.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'generic files (Mac)';
is $guesser->file, $file;
is $guesser->guess, 'generic';
$file = test_input_file('generic_table.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'generic files (Linux)';
is $guesser->file, $file;
is $guesser->guess, 'generic';
$file = test_input_file('generic_table_tricky.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'generic';
$file = test_input_file('qiime_w_silva_taxo_L2.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'generic';
# Test gaas input file
$file = test_input_file('gaas_compo.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'gaas files';
is $guesser->file, $file;
is $guesser->guess, 'gaas';
$file = test_input_file('gaas_seq_compo.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'gaas';
$file = test_input_file('gaas_other.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'gaas';
# Test unifrac input file
$file = test_input_file('unifrac_qualitative.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'unifrac files';
is $guesser->file, $file;
is $guesser->guess, 'unifrac';
$file = test_input_file('unifrac_quantitative.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'unifrac';
$file = test_input_file('unifrac_quantitative_tricky.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'unifrac';
# Test qiime input file
$file = test_input_file('qiime_w_no_taxo.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'qiime files';
is $guesser->file, $file;
is $guesser->guess, 'qiime';
$file = test_input_file('qiime_w_greengenes_taxo.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'qiime';
$file = test_input_file('qiime_alt_header.txt'); # alternative header
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'qiime';
$file = test_input_file('qiime_single_community.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'qiime';
$file = test_input_file('qiime_w_silva_taxo_and_dups.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'qiime';
$file = test_input_file('qiime_w_two_communities.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'qiime';
$file = test_input_file('qiime_alt_header.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file );
is $guesser->file, $file;
is $guesser->guess, 'qiime';
# Test unknown format
$file = test_input_file('lorem_ipsum.txt');
ok $guesser = Bio::Community::IO::FormatGuesser->new( -file => $file ), 'unknown file';
is $guesser->file, $file;
is $guesser->guess, undef;
# Test empty string
$text = '';
ok $guesser = Bio::Community::IO::FormatGuesser->new( -text => $text ), 'empty string';
is $guesser->text, $text;
is $guesser->guess, undef;
done_testing();
exit;
( run in 0.719 second using v1.01-cache-2.11-cpan-748bfb374f4 )