AcePerl

 view release on metacpan or  search on metacpan

Ace/Sequence.pm  view on Meta::CPAN

      last BLOCK unless defined ($refseq);

      if (ref($refseq) && ref($refseq) eq 'ARRAY') {
	$arrayref = $refseq;
	last BLOCK;
      }

      if (ref($refseq) && ($refseq->can('smapped'))) {
	croak "Reference sequence has no common ancestor with sequence"
	  unless $self->parent eq $refseq->parent;
	my ($a,$b,$c) = @{$refseq->{refseq}};
	#	$b += $refseq->offset;
	$b += $refseq->offset;
	$arrayref = [$refseq,$b,$refseq->strand];
	last BLOCK;
      }


      # look up reference sequence in database if we aren't given
      # database object already
      $refseq = $self->db->fetch('Sequence' => $refseq)
	unless $refseq->isa('Ace::Object');
      croak "Invalid reference sequence" unless $refseq;

      # find position of ref sequence in parent strand
      my ($r_parent,$r_offset,$r_length,$r_strand) = find_parent($refseq);
      croak "Reference sequence has no common ancestor with sequence" 
	unless $r_parent eq $self->{parent};

      # set to array reference containing this information
      $arrayref = [$refseq,$r_offset,$r_strand];
    }
    $self->{refseq} = $arrayref;
  }
  return unless $prev;
  return $self->parent if $self->absolute;
  return wantarray ? @{$prev} : $prev->[0];
}

# return strand
sub strand { return $_[0]->{strand} }

# return reference strand
sub r_strand {
  my $self = shift;
  return "+1" if $self->absolute;
  if (my ($ref,$r_offset,$r_strand) = $self->refseq) {
    return $r_strand;
  } else {
    return $self->{strand}
  }
}

sub offset { $_[0]->{offset} }
sub p_offset { $_[0]->{p_offset} }

sub smapped { 1; }
sub type    { 'Sequence' }
sub subtype { }

sub debug {
  my $self = shift;
  my $d = $self->{_debug};
  $self->{_debug} = shift if @_;
  $d;
}

# return the database this sequence is associated with
sub db {
  return Ace->name2db($_[0]->{db} ||= $_[0]->source->db);
}

sub start {
  my ($self,$abs) = @_;
  $abs = $self->absolute unless defined $abs;
  return $self->{p_offset} + $self->{offset} + 1 if $abs;

  if ($self->refseq) {
    my ($ref,$r_offset,$r_strand) = $self->refseq;
    return $r_strand < 0 ? 1 + $r_offset - ($self->{p_offset} + $self->{offset})
                         : 1 + $self->{p_offset} + $self->{offset} - $r_offset;
  }

  else {
    return $self->{offset} +1;
  }

}

sub end { 
  my ($self,$abs) = @_;
  my $start = $self->start($abs);
  my $f = $self->{length} > 0 ? 1 : -1;  # for stupid 1-based adjustments
  if ($abs && $self->refseq ne $self->parent) {
    my $r_strand = $self->r_strand;
    return $start - $self->{length} + $f 
      if $r_strand < 0 or $self->{strand} < 0 or $self->{length} < 0;
    return  $start + $self->{length} - $f
  }
  return  $start + $self->{length} - $f if $self->r_strand eq $self->{strand};
  return  $start - $self->{length} + $f;
}

# turn on absolute coordinates (relative to reference sequence)
sub absolute {
  my $self = shift;
  my $prev = $self->{absolute};
  $self->{absolute} = $_[0] if defined $_[0];
  return $prev;
}

# human readable string (for debugging)
sub asString {
  my $self = shift;
  if ($self->absolute) {
    return join '',$self->parent,'/',$self->start,',',$self->end;
  } elsif (my $ref = $self->refseq){
    my $label = $ref->isa('Ace::Sequence::Feature') ? $ref->info : "$ref";
    return join '',$label,'/',$self->start,',',$self->end;

  } else {
    join '',$self->source,'/',$self->start,',',$self->end;
  }
}

sub cmp {
  my ($self,$arg,$reversed) = @_;
  if (ref($arg) and $arg->isa('Ace::Sequence')) {
    my $cmp = $self->parent cmp $arg->parent 
      || $self->start <=> $arg->start;
    return $reversed ? -$cmp : $cmp;
  }
  my $name = $self->asString;
  return $reversed ? $arg cmp $name : $name cmp $arg;
}

# Return the DNA
sub dna {
  my $self = shift;
  return $self->{dna} if $self->{dna};
  my $raw = $self->_query('seqdna');
  $raw=~s/^>.*\n//m;
  $raw=~s/^\/\/.*//mg;
  $raw=~s/\n//g;
  $raw =~ s/\0+\Z//; # blasted nulls!
  my $effective_strand = $self->end >= $self->start ? '+1' : '-1';
  _complement(\$raw) if $self->r_strand ne $effective_strand;
  return $self->{dna} = $raw;
}

# return a gff file
sub gff {
  my $self = shift;
  my ($abs,$features) = rearrange([['ABS','ABSOLUTE'],'FEATURES'],@_);
  $abs = $self->absolute unless defined $abs;

  # can provide list of feature names, such as 'similarity', or 'all' to get 'em all
  #  !THIS IS BROKEN; IT SHOULD LOOK LIKE FEATURE()!
  my $opt = $self->_feature_filter($features);

  my $gff = $self->_gff($opt);
  warn $gff if $self->debug;

  $self->transformGFF(\$gff) unless $abs;
  return $gff;
}

# return a GFF object using the optional GFF.pm module
sub GFF {
  my $self = shift;
  my ($filter,$converter) = @_;  # anonymous subs
  croak "GFF module not installed" unless require GFF;
  require GFF::Filehandle;

  my @lines = grep !/^\/\//,split "\n",$self->gff(@_);
  local *IN;
  local ($^W) = 0;  # prevent complaint by GFF module
  tie *IN,'GFF::Filehandle',\@lines;
  my $gff = GFF::GeneFeatureSet->new;
  $gff->read(\*IN,$filter,$converter) if $gff;
  return $gff;
}

# Get the features table.  Can filter by type/subtype this way:
# features('similarity:EST','annotation:assembly_tag')
sub features {
  my $self = shift;
  my ($filter,$opt) = $self->_make_filter(@_);

  # get raw gff file
  my $gff = $self->gff(-features=>$opt);

  # turn it into a list of features
  my @features = $self->_make_features($gff,$filter);

  if ($self->automerge) {  # automatic merging
    # fetch out constructed transcripts and clones
    my %types = map {lc($_)=>1} (@$opt,@_);
    if ($types{'transcript'}) {
      push @features,$self->_make_transcripts(\@features);
      @features = grep {$_->type !~ /^(intron|exon)$/ } @features;
    }
    push @features,$self->_make_clones(\@features)      if $types{'clone'};
    if ($types{'similarity'}) {
      my @f = $self->_make_alignments(\@features);
      @features = grep {$_->type ne 'similarity'} @features;
      push @features,@f;
    }
  }

  return wantarray ? @features : \@features;
}

# A little bit more complex - assemble a list of "transcripts"
# consisting of Ace::Sequence::Transcript objects.  These objects
# contain a list of exons and introns.
sub transcripts {
  my $self    = shift;
  my $curated = shift;
  my $ef       = $curated ? "exon:curated"   : "exon";
  my $if       = $curated ? "intron:curated" : "intron";
  my $sf       = $curated ? "Sequence:curated" : "Sequence";

Ace/Sequence.pm  view on Meta::CPAN

	}
      }
      $s .= $expr;
    }
    $s .= "return;\n }";

    $sub = eval $s;
    croak $@ if $@;
  } else {
    $sub = sub { 1; }
  }
  return ($sub,$promiscuous ? [] : [keys %filter]);
}

# turn a GFF file and a filter into a list of Ace::Sequence::Feature objects
sub _make_features {
  my $self = shift;
  my ($gff,$filter) = @_;

  my ($r,$r_offset,$r_strand) = $self->refseq;
  my $parent = $self->parent;
  my $abs    = $self->absolute;
  if ($abs) {
    $r_offset  = 0;
    $r = $parent;
    $r_strand = '+1';
  }
  my @features = map {Ace::Sequence::Feature->new($parent,$r,$r_offset,$r_strand,$abs,$_)}
                 grep !m@^(?:\#|//)@ && $filter->($_),split("\n",$gff);
}


# low level GFF call, no changing absolute to relative coordinates
sub _gff {
  my $self = shift;
  my ($opt,$db) = @_;
  my $data = $self->_query("seqfeatures -version 2 $opt",$db);
  $data =~ s/\0+\Z//;
  return $data; #blasted nulls!
}

# shortcut for running a gif query
sub _query {
  my $self = shift;
  my $command = shift;
  my $db      = shift || $self->db;

  my $parent = $self->parent;
  my $start = $self->start(1);
  my $end   = $self->end(1);
  ($start,$end) = ($end,$start) if $start > $end;  #flippity floppity

  my $coord   = "-coords $start $end";

  # BAD BAD HACK ALERT - CHECKS THE QUERY THAT IS PASSED DOWN
  # ALSO MAKES THINGS INCOMPATIBLE WITH PRIOR 4.9 servers.
#  my $opt     = $command =~ /seqfeatures/ ? '-nodna' : '';
  my $opt = '-noclip';

  my $query = "gif seqget $parent $opt $coord ; $command";
  warn $query if $self->debug;

  return $db->raw_query("gif seqget $parent $opt $coord ; $command");
}

# utility function -- reverse complement
sub _complement {
  my $dna = shift;
  $$dna =~ tr/GATCgatc/CTAGctag/;
  $$dna = scalar reverse $$dna;
}

sub _feature_filter {
  my $self = shift;
  my $features = shift;
  return '' unless $features;
  my $opt = '';
  $opt = '+feature ' . join('|',@$features) if ref($features) eq 'ARRAY' && @$features;
  $opt = "+feature $features" unless ref $features;
  $opt;
}

1;

=head1 NAME

Ace::Sequence - Examine ACeDB Sequence Objects

=head1 SYNOPSIS

    # open database connection and get an Ace::Object sequence
    use Ace::Sequence;

    $db  = Ace->connect(-host => 'stein.cshl.org',-port => 200005);
    $obj = $db->fetch(Predicted_gene => 'ZK154.3');

    # Wrap it in an Ace::Sequence object 
    $seq = Ace::Sequence->new($obj);

    # Find all the exons
    @exons = $seq->features('exon');

    # Find all the exons predicted by various versions of "genefinder"
    @exons = $seq->features('exon:genefinder.*');

    # Iterate through the exons, printing their start, end and DNA
    for my $exon (@exons) {
      print join "\t",$exon->start,$exon->end,$exon->dna,"\n";
    }

    # Find the region 1000 kb upstream of the first exon
    $sub = Ace::Sequence->new(-seq=>$exons[0],
                              -offset=>-1000,-length=>1000);

    # Find all features in that area
    @features = $sub->features;

    # Print its DNA
    print $sub->dna;

    # Create a new Sequence object from the first 500 kb of chromosome 1



( run in 0.692 second using v1.01-cache-2.11-cpan-39bf76dae61 )