IntervalTree
view release on metacpan or search on metacpan
lib/IntervalTree.pm view on Meta::CPAN
package IntervalTree;
use 5.006;
use POSIX qw(ceil);
use List::Util qw(max min);
use strict;
use warnings;
no warnings 'once';
our $VERSION = '0.05';
=head1 NAME
IntervalTree.pm
=head1 VERSION
Version 0.01
=head1 DESCRIPTION
Data structure for performing intersect queries on a set of intervals which
preserves all information about the intervals (unlike bitset projection methods).
=cut
# Historical note:
# This module original contained an implementation based on sorted endpoints
# and a binary search, using an idea from Scott Schwartz and Piotr Berman.
# Later an interval tree implementation was implemented by Ian for Galaxy's
# join tool (see `bx.intervals.operations.quicksect.py`). This was then
# converted to Cython by Brent, who also added support for
# upstream/downstream/neighbor queries. This was modified by James to
# handle half-open intervals strictly, to maintain sort order, and to
# implement the same interface as the original Intersecter.
=head1 SYNOPSIS
Data structure for performing window intersect queries on a set of
of possibly overlapping 1d intervals.
Usage
=====
Create an empty IntervalTree
use IntervalTree;
my $intersecter = IntervalTree->new();
An interval is a start and end position and a value (possibly None).
You can add any object as an interval:
$intersecter->insert( 0, 10, "food" );
$intersecter->insert( 3, 7, {foo=>'bar'} );
$intersecter->find( 2, 5 );
# output: ['food', {'foo'=>'bar'}]
If the object has start and end attributes (like the IntervalTree::Interval class) there
is are some shortcuts:
my $intersecter = IntervalTree->new();
$intersecter->insert_interval( IntervalTree::Interval->new( 0, 10 ) );
$intersecter->insert_interval( IntervalTree::Interval->new( 3, 7 ) );
$intersecter->insert_interval( IntervalTree::Interval->new( 3, 40 ) );
$intersecter->insert_interval( IntervalTree::Interval->new( 13, 50 ) );
$intersecter->find( 30, 50 );
# output: [IntervalTree::Interval(3, 40), IntervalTree::Interval(13, 50)]
$intersecter->find( 100, 200 );
# output: []
Before/after for intervals
$intersecter->before_interval( IntervalTree::Interval->new( 10, 20 ) );
# output: [IntervalTree::Interval(3, 7)]
$intersecter->before_interval( IntervalTree::Interval->new( 5, 20 ) );
# output: []
Upstream/downstream
$intersecter->upstream_of_interval(IntervalTree::Interval->new(11, 12));
# output: [IntervalTree::Interval(0, 10)]
$intersecter->upstream_of_interval(IntervalTree::Interval->new(11, 12, undef, undef, "-"));
# output: [IntervalTree::Interval(13, 50)]
lib/IntervalTree.pm view on Meta::CPAN
our $EmptyNode = IntervalTree::Node->new( 0, 0, IntervalTree::Interval->new(0, 0));
sub nlog {
return -1.0 / log(0.5);
}
sub left_node {
my ($self) = @_;
return $self->{cleft} != $EmptyNode ? $self->{cleft} : undef;
}
sub right_node {
my ($self) = @_;
return $self->{cright} != $EmptyNode ? $self->{cright} : undef;
}
sub root_node {
my ($self) = @_;
return $self->{croot} != $EmptyNode ? $self->{croot} : undef;
}
sub str {
my ($self) = @_;
return "Node($self->{start}, $self->{end})";
}
sub new {
my ($class, $start, $end, $interval) = @_;
# Perl lacks the binomial distribution, so we convert a
# uniform into a binomial because it naturally scales with
# tree size. Also, perl's uniform is perfect since the
# upper limit is not inclusive, which gives us undefined here.
my $self = {};
$self->{priority} = POSIX::ceil(nlog() * log(-1.0/(1.0 * rand() - 1)));
$self->{start} = $start;
$self->{end} = $end;
$self->{interval} = $interval;
$self->{maxend} = $end;
$self->{minstart} = $start;
$self->{minend} = $end;
$self->{cleft} = $EmptyNode;
$self->{cright} = $EmptyNode;
$self->{croot} = $EmptyNode;
return bless $self, $class;
}
=head2 insert
Insert a new IntervalTree::Node into the tree of which this node is
currently the root. The return value is the new root of the tree (which
may or may not be this node!)
=cut
sub insert {
my ($self, $start, $end, $interval) = @_;
my $croot = $self;
# If starts are the same, decide which to add interval to based on
# end, thus maintaining sortedness relative to start/end
my $decision_endpoint = $start;
if ($start == $self->{start}) {
$decision_endpoint = $end;
}
if ($decision_endpoint > $self->{start}) {
# insert to cright tree
if ($self->{cright} != $EmptyNode) {
$self->{cright} = $self->{cright}->insert( $start, $end, $interval );
}
else {
$self->{cright} = IntervalTree::Node->new( $start, $end, $interval );
}
# rebalance tree
if ($self->{priority} < $self->{cright}{priority}) {
$croot = $self->rotate_left();
}
}
else {
# insert to cleft tree
if ($self->{cleft} != $EmptyNode) {
$self->{cleft} = $self->{cleft}->insert( $start, $end, $interval);
}
else {
$self->{cleft} = IntervalTree::Node->new( $start, $end, $interval);
}
# rebalance tree
if ($self->{priority} < $self->{cleft}{priority}) {
$croot = $self->rotate_right();
}
}
$croot->set_ends();
$self->{cleft}{croot} = $croot;
$self->{cright}{croot} = $croot;
return $croot;
}
sub rotate_right {
my ($self) = @_;
my $croot = $self->{cleft};
$self->{cleft} = $self->{cleft}{cright};
$croot->{cright} = $self;
$self->set_ends();
return $croot;
}
sub rotate_left {
my ($self) = @_;
my $croot = $self->{cright};
$self->{cright} = $self->{cright}{cleft};
$croot->{cleft} = $self;
$self->set_ends();
return $croot;
}
sub set_ends {
my ($self) = @_;
if ($self->{cright} != $EmptyNode && $self->{cleft} != $EmptyNode) {
$self->{maxend} = max($self->{end}, $self->{cright}{maxend}, $self->{cleft}{maxend});
$self->{minend} = min($self->{end}, $self->{cright}{minend}, $self->{cleft}{minend});
$self->{minstart} = min($self->{start}, $self->{cright}{minstart}, $self->{cleft}{minstart});
}
elsif ( $self->{cright} != $EmptyNode) {
$self->{maxend} = max($self->{end}, $self->{cright}{maxend});
$self->{minend} = min($self->{end}, $self->{cright}{minend});
( run in 0.411 second using v1.01-cache-2.11-cpan-524268b4103 )