IntervalTree

 view release on metacpan or  search on metacpan

lib/IntervalTree.pm  view on Meta::CPAN

package IntervalTree;

use 5.006;
use POSIX qw(ceil); 
use List::Util qw(max min);
use strict;
use warnings;
no warnings 'once';

our $VERSION = '0.05';

=head1 NAME

IntervalTree.pm

=head1 VERSION

Version 0.01

=head1 DESCRIPTION

Data structure for performing intersect queries on a set of intervals which
preserves all information about the intervals (unlike bitset projection methods).

=cut

# Historical note:
#    This module original contained an implementation based on sorted endpoints
#    and a binary search, using an idea from Scott Schwartz and Piotr Berman.
#    Later an interval tree implementation was implemented by Ian for Galaxy's
#    join tool (see `bx.intervals.operations.quicksect.py`). This was then
#    converted to Cython by Brent, who also added support for
#    upstream/downstream/neighbor queries. This was modified by James to
#    handle half-open intervals strictly, to maintain sort order, and to
#    implement the same interface as the original Intersecter.

=head1 SYNOPSIS


Data structure for performing window intersect queries on a set of 
of possibly overlapping 1d intervals.

Usage
=====

Create an empty IntervalTree

    use IntervalTree;
    my $intersecter = IntervalTree->new();

An interval is a start and end position and a value (possibly None).
You can add any object as an interval:

    $intersecter->insert( 0, 10, "food" );
    $intersecter->insert( 3, 7, {foo=>'bar'} );

    $intersecter->find( 2, 5 );
    # output: ['food', {'foo'=>'bar'}]

If the object has start and end attributes (like the IntervalTree::Interval class) there
is are some shortcuts:

    my $intersecter = IntervalTree->new();
    $intersecter->insert_interval( IntervalTree::Interval->new( 0, 10 ) );
    $intersecter->insert_interval( IntervalTree::Interval->new( 3, 7 ) );
    $intersecter->insert_interval( IntervalTree::Interval->new( 3, 40 ) );
    $intersecter->insert_interval( IntervalTree::Interval->new( 13, 50 ) );

    $intersecter->find( 30, 50 );
    # output: [IntervalTree::Interval(3, 40), IntervalTree::Interval(13, 50)]

    $intersecter->find( 100, 200 );
    # output: []

Before/after for intervals

    $intersecter->before_interval( IntervalTree::Interval->new( 10, 20 ) );
    # output: [IntervalTree::Interval(3, 7)]

    $intersecter->before_interval( IntervalTree::Interval->new( 5, 20 ) );
    # output: []

Upstream/downstream

    $intersecter->upstream_of_interval(IntervalTree::Interval->new(11, 12));
    # output: [IntervalTree::Interval(0, 10)]
    $intersecter->upstream_of_interval(IntervalTree::Interval->new(11, 12, undef, undef, "-"));
    # output: [IntervalTree::Interval(13, 50)]

lib/IntervalTree.pm  view on Meta::CPAN


our $EmptyNode = IntervalTree::Node->new( 0, 0, IntervalTree::Interval->new(0, 0));

sub nlog {
  return -1.0 / log(0.5);
}

sub left_node {
  my ($self) = @_;
  return $self->{cleft} != $EmptyNode ? $self->{cleft} : undef;
}

sub right_node {
  my ($self) = @_;
  return $self->{cright} != $EmptyNode ? $self->{cright}  : undef;
}

sub root_node {
  my ($self) = @_;
  return $self->{croot} != $EmptyNode ? $self->{croot} : undef;
}
    
sub str {
  my ($self) = @_;
  return "Node($self->{start}, $self->{end})";
}

sub new {
  my ($class, $start, $end, $interval) = @_;
  # Perl lacks the binomial distribution, so we convert a
  # uniform into a binomial because it naturally scales with
  # tree size.  Also, perl's uniform is perfect since the
  # upper limit is not inclusive, which gives us undefined here.
  my $self = {};
  $self->{priority} = POSIX::ceil(nlog() * log(-1.0/(1.0 * rand() - 1)));
  $self->{start}    = $start;
  $self->{end}      = $end;
  $self->{interval} = $interval;
  $self->{maxend}   = $end;
  $self->{minstart} = $start;
  $self->{minend}   = $end;
  $self->{cleft}    = $EmptyNode;
  $self->{cright}   = $EmptyNode;
  $self->{croot}    = $EmptyNode;
  return bless $self, $class;
}

=head2 insert
  
Insert a new IntervalTree::Node into the tree of which this node is
currently the root. The return value is the new root of the tree (which
may or may not be this node!)

=cut

sub insert {
  my ($self, $start, $end, $interval) = @_;
  my $croot = $self;
  # If starts are the same, decide which to add interval to based on
  # end, thus maintaining sortedness relative to start/end
  my $decision_endpoint = $start;
  if ($start == $self->{start}) {
    $decision_endpoint = $end;
  }

  if ($decision_endpoint > $self->{start}) {
    # insert to cright tree
    if ($self->{cright} != $EmptyNode) {
      $self->{cright} = $self->{cright}->insert( $start, $end, $interval );
    }
    else {
      $self->{cright} = IntervalTree::Node->new( $start, $end, $interval );
    }
    # rebalance tree
    if ($self->{priority} < $self->{cright}{priority}) {
      $croot = $self->rotate_left();
    }
  }
  else {
    # insert to cleft tree
    if ($self->{cleft} != $EmptyNode) {
      $self->{cleft} = $self->{cleft}->insert( $start, $end, $interval);
    }
    else {
      $self->{cleft} = IntervalTree::Node->new( $start, $end, $interval);
    }
    # rebalance tree
    if ($self->{priority} < $self->{cleft}{priority}) {
      $croot = $self->rotate_right();
    }
  }

  $croot->set_ends();
  $self->{cleft}{croot}  = $croot;
  $self->{cright}{croot} = $croot;
  return $croot;
}

sub rotate_right {
  my ($self) = @_;
  my $croot = $self->{cleft};
  $self->{cleft}  = $self->{cleft}{cright};
  $croot->{cright} = $self;
  $self->set_ends();
  return $croot;
}

sub rotate_left {
  my ($self) = @_;
  my $croot = $self->{cright};
  $self->{cright} = $self->{cright}{cleft};
  $croot->{cleft}  = $self;
  $self->set_ends();
  return $croot;
}

sub set_ends {
  my ($self) = @_;
  if ($self->{cright} != $EmptyNode && $self->{cleft} != $EmptyNode) {
    $self->{maxend} = max($self->{end}, $self->{cright}{maxend}, $self->{cleft}{maxend});
    $self->{minend} = min($self->{end}, $self->{cright}{minend}, $self->{cleft}{minend});
    $self->{minstart} = min($self->{start}, $self->{cright}{minstart}, $self->{cleft}{minstart});
  }
  elsif ( $self->{cright} != $EmptyNode) {
    $self->{maxend} = max($self->{end}, $self->{cright}{maxend});
    $self->{minend} = min($self->{end}, $self->{cright}{minend});



( run in 0.411 second using v1.01-cache-2.11-cpan-524268b4103 )