Bio-InterProScanWrapper
view release on metacpan or search on metacpan
lib/Bio/InterProScanWrapper/ParseInterProOutput.pm view on Meta::CPAN
package Bio::InterProScanWrapper::ParseInterProOutput;
# ABSTRACT: parse the GFF files produced by interproscan
use Moose;
use Bio::InterProScanWrapper::Exceptions;
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_file' => ( is => 'ro', isa => 'Str', default => 'output.gff' );
has '_output_file_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_file_fh' );
has '_remove_sequence_filter' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__remove_sequence_filter' );
sub _build__output_file_fh
{
my ($self) = @_;
open(my $fh, '>', $self->output_file) or Bio::InterProScanWrapper::Exceptions::CouldntWriteToFile->throw(
error => "Couldnt write to file: " . $self->output_file );
return $fh;
}
sub _header
{
my ($self) = @_;
return '##gff-version 3'."\n";
}
sub merge_files
{
my ($self) = @_;
print {$self->_output_file_fh} $self->_header;
for my $input_file (@{$self->gff_files})
{
my $input_fh = $self->_input_single_gff_file_fh($input_file);
while(<$input_fh>)
{
print {$self->_output_file_fh} $_;
}
close($input_fh);
}
close($self->_output_file_fh);
}
sub _input_single_gff_file_fh
{
my ($self, $filename) = @_;
open(my $fh,'-|', 'cat '.$filename. ' | '.$self->_remove_sequence_filter ) or Bio::InterProScanWrapper::Exceptions::FileNotFound->throw(
error => "Couldnt open file: " . $self->output_file );
return $fh;
}
# Cut out the FASTA sequence at the bottom of the file
sub _build__remove_sequence_filter {
my ($self) = @_;
return 'sed -n \'/##sequence-region/,/>/p\' | grep -v \'>\'';
}
no Moose;
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
( run in 0.582 second using v1.01-cache-2.11-cpan-d8267643d1d )