Bio-InterProScanWrapper

 view release on metacpan or  search on metacpan

lib/Bio/InterProScanWrapper/ParseInterProOutput.pm  view on Meta::CPAN

package Bio::InterProScanWrapper::ParseInterProOutput;

# ABSTRACT: parse the GFF files produced by interproscan


use Moose;
use Bio::InterProScanWrapper::Exceptions;

has 'gff_files'   => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'output_file' => ( is => 'ro', isa => 'Str', default => 'output.gff' );

has '_output_file_fh'  => ( is => 'ro',     lazy    => 1, builder => '_build__output_file_fh' );
has '_remove_sequence_filter'  => ( is => 'ro',   isa => 'Str',   lazy    => 1, builder => '_build__remove_sequence_filter' );

sub _build__output_file_fh
{
  my ($self) = @_;
  open(my $fh, '>', $self->output_file) or Bio::InterProScanWrapper::Exceptions::CouldntWriteToFile->throw(
    error => "Couldnt write to file: " . $self->output_file );
  return  $fh;
}

sub _header
{
  my ($self) = @_;
  return '##gff-version 3'."\n";
}

sub merge_files
{
  my ($self) = @_;
  
  print {$self->_output_file_fh} $self->_header;
  for my $input_file (@{$self->gff_files})
  {
    my $input_fh = $self->_input_single_gff_file_fh($input_file);
    while(<$input_fh>)
    {
      print {$self->_output_file_fh} $_;
    }    
    close($input_fh);
  }
  close($self->_output_file_fh);
}

sub _input_single_gff_file_fh
{
  my ($self, $filename) = @_;
  
  open(my $fh,'-|', 'cat '.$filename. ' | '.$self->_remove_sequence_filter ) or Bio::InterProScanWrapper::Exceptions::FileNotFound->throw(
    error => "Couldnt open file: " . $self->output_file );
  return $fh;
}


# Cut out the FASTA sequence at the bottom of the file
sub _build__remove_sequence_filter {
    my ($self) = @_;
    return 'sed -n \'/##sequence-region/,/>/p\' | grep -v \'>\'';
}

no Moose;
__PACKAGE__->meta->make_immutable;

1;

__END__

=pod



( run in 0.582 second using v1.01-cache-2.11-cpan-d8267643d1d )