Bio-Community
view release on metacpan or search on metacpan
lib/Bio/Community/IO/FormatGuesser.pm view on Meta::CPAN
# Read next line (and convert line endings). Exit if no lines left.
$line_num++;
chomp $line;
# Skip white and empty lines.
next if $line =~ /^\s*$/;
# Split fields
my @fields = split /\t/, $line;
# Try all formats remaining
%ok_formats = ();
my ($test_format, $test_function);
while ( ($test_format, $test_function) = each (%test_formats) ) {
my $score = &$test_function(\@fields, $line, $line_num);
if ( $score == 2 ) {
# This line is specific of this format
%ok_formats = ( $test_format => undef );
last;
} elsif ($score == 1) {
# Line is possibly in this format
$ok_formats{$test_format} = undef;
} else {
# Do not try to match this format with upcoming lines
delete $test_formats{$test_format};
}
}
# Exit if there was a match to only one format
if (scalar keys %ok_formats == 1) {
last;
}
# Exit if no formats left to try
if (scalar keys %test_formats == 0) {
last;
}
# Give up after having tested 100 lines
if ($line_num >= 100) {
last;
}
}
# If several formats matched. Assume 'generic' if possible, undef otherwise
if (scalar keys %ok_formats > 1) {
for my $ok_format (keys %ok_formats) {
if (not $ok_format eq 'generic') {
delete $ok_formats{$ok_format};
}
}
}
if (scalar keys %ok_formats == 1) {
$format = (keys %ok_formats)[0];
}
# Cleanup
if ($in->noclose) {
# Reset filehandle cursor to original location
seek($self->fh, $original_pos, 0)
or $self->throw("Could not reset the cursor to its original position: $!");
}
$in->close;
return $format;
}
#----- Format-specific methods -----#
# These methods return:
# 1 is the given line is possibly in this format
# 2 if they are sure
func _possibly_biom ($fields, $line, $line_num) {
# Example:
# {
# "id":null,
# "format": "Biological Observation Matrix 0.9.1-dev",
# "format_url": "http://biom-format.org",
# ...
my $ok = 0;
if ($line_num == 1) {
if ($line =~ m/^{/) {
$ok = 1;
}
} else {
if ( ($line =~ m/"\S+":/) ||
($line =~ m/Biological Observation Matrix/) ) {
$ok = 2; # biom for sure
}
}
return $ok;
}
func _possibly_generic ($fields, $line, $line_num) {
# Example:
# Species gut soda lake
# Streptococcus 241 334
# ...
# Columns from the second to the last must contain numbers.
my $ok = 0;
my $num_fields = scalar @$fields;
if ($num_fields >= 2) {
if ($line_num == 1) {
$ok = 1;
} else {
for my $i (1 .. $num_fields - 1) {
if ($fields->[$i] =~ $real_re) {
$ok = 1;
} else {
$ok = 0;
last;
}
}
}
}
return $ok;
}
( run in 1.453 second using v1.01-cache-2.11-cpan-39bf76dae61 )