AlignDB-IntSpan
view release on metacpan or search on metacpan
lib/AlignDB/IntSpan.pm view on Meta::CPAN
package AlignDB::IntSpan;
use strict;
use warnings;
use Carp;
use Scalar::Util;
use Scalar::Util::Numeric;
use overload (
q{0+} => sub { Carp::confess "Can't numerify an AlignDB::IntSpan\n" },
q{bool} => q{is_not_empty},
q{""} => q{as_string},
# use Perl standard behaviours for other operations
fallback => 1,
);
our $VERSION = '1.1.1';
my $POS_INF = 2_147_483_647 - 1; # INT_MAX - 1
my $NEG_INF = ( -2_147_483_647 - 1 ) + 1; # INT_MIN + 1
sub POS_INF {
return $POS_INF - 1;
}
sub NEG_INF {
return $NEG_INF;
}
sub EMPTY_STRING {
return '-';
}
sub new {
my $class = shift;
my $self = {};
$self->{edges} = [];
bless $self, $class;
$self->add(@_) if @_ > 0;
return $self;
}
sub valid {
my $this = shift;
my $runlist = shift;
my $class = ref($this) || $this;
my $set = new $class;
eval { $set->_runlist_to_ranges($runlist) };
return $@ ? 0 : 1;
}
sub clear {
my $self = shift;
$self->{edges} = [];
return $self;
}
sub edges_ref {
my $self = shift;
return $self->{edges};
}
sub edges {
my $self = shift;
return @{ $self->edges_ref };
}
sub edge_size {
my $self = shift;
return scalar $self->edges;
}
sub span_size {
my $self = shift;
return $self->edge_size / 2;
}
sub as_string {
my $self = shift;
if ( $self->is_empty ) {
return $self->EMPTY_STRING;
}
my @runs;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
push @runs, $lower == $upper ? $lower : "$lower-$upper";
}
return join( ',', @runs );
}
sub as_array {
my $self = shift;
my @elements;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
push @elements, ( $lower .. $upper );
}
return @elements;
}
sub ranges {
my $self = shift;
my @ranges;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
push @ranges, ( $lower, $upper );
}
return @ranges;
}
sub spans {
my $self = shift;
my @spans;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
push @spans, [ $lower, $upper ];
}
if (@spans) {
return @spans;
}
else {
return;
}
}
sub sets {
my $self = shift;
my @sets;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
push @sets, Scalar::Util::blessed($self)->new("$lower-$upper");
}
if (@sets) {
return @sets;
}
else {
return;
}
}
sub runlists {
my $self = shift;
if ( $self->is_empty ) {
return $self->EMPTY_STRING;
}
my @runlists;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
my $string = $lower == $upper ? $lower : $lower . '-' . $upper;
push @runlists, $string;
}
if (@runlists) {
return @runlists;
}
else {
return;
}
}
sub cardinality {
my $self = shift;
my $cardinality = 0;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
$cardinality += $upper - $lower + 1;
}
return $cardinality;
}
sub is_empty {
my $self = shift;
my $result = $self->edge_size == 0 ? 1 : 0;
return $result;
}
sub is_not_empty {
my $self = shift;
return !$self->is_empty;
}
sub is_neg_inf {
lib/AlignDB/IntSpan.pm view on Meta::CPAN
$self->invert;
$self->add_range(@_);
$self->invert;
return $self;
}
#@returns AlignDB::IntSpan
sub remove {
my $self = shift;
my $first = shift;
if ( ref $first eq __PACKAGE__ ) {
$self->remove_range( $first->ranges );
}
elsif ( Scalar::Util::Numeric::isint($first) ) {
if ( scalar @_ > 0 ) {
$self->remove_range( $self->_list_to_ranges( $first, @_ ) );
}
else {
$self->remove_range( $first, $first );
}
}
else {
$self->remove_range( $self->_runlist_to_ranges($first) );
}
return $self;
}
#@returns AlignDB::IntSpan
sub merge {
my $self = shift;
for my $supplied (@_) {
my @ranges = $self->_real_set($supplied)->ranges;
$self->add_range(@ranges);
}
return $self;
}
#@returns AlignDB::IntSpan
sub subtract {
my $self = shift;
return $self if $self->is_empty;
for my $supplied (@_) {
my @ranges = $self->_real_set($supplied)->ranges;
$self->remove_range(@ranges);
}
return $self;
}
#@returns AlignDB::IntSpan
sub copy {
my $self = shift;
my $copy = Scalar::Util::blessed($self)->new;
$copy->{edges} = [ $self->edges ];
return $copy;
}
#@returns AlignDB::IntSpan
sub union {
my $self = shift;
my $new = $self->copy;
$new->merge(@_);
return $new;
}
#@returns AlignDB::IntSpan
sub complement {
my $self = shift;
my $new = $self->copy;
$new->invert;
return $new;
}
#@returns AlignDB::IntSpan
sub diff {
my $self = shift;
return $self if $self->is_empty;
my $new = $self->copy;
$new->subtract(@_);
return $new;
}
#@returns AlignDB::IntSpan
sub intersect {
my $self = shift;
return $self if $self->is_empty;
my $new = $self->complement;
for my $supplied (@_) {
my $temp_set = $self->_real_set($supplied)->complement;
$new->merge($temp_set);
}
$new->invert;
return $new;
}
#@method
#@returns AlignDB::IntSpan
sub xor {
return intersect( union(@_), intersect(@_)->complement );
}
sub equal {
lib/AlignDB/IntSpan.pm view on Meta::CPAN
my $span_size = $upper - $lower + 1;
if ( $index > $element_after + $span_size ) {
$element_after += $span_size;
}
else {
$member = $upper - ( $index - $element_after ) + 1;
last;
}
}
return $member;
}
sub index {
my $self = shift;
my $member = shift;
my $index;
my $element_before = 0;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
my $span_size = $upper - $lower + 1;
if ( $member >= $lower and $member <= $upper ) {
$index = $member - $lower + 1 + $element_before;
last;
}
else {
$element_before += $span_size;
}
}
return $index;
}
#@returns AlignDB::IntSpan
sub slice {
my $self = shift;
my $from = shift;
my $to = shift;
if ( $from < 1 ) {
carp "Start index less than 1\n";
$from = 1;
}
my $slice = $self->_splice( $from, $to - $from + 1 );
return $slice;
}
sub _splice {
my $self = shift;
my $offset = shift;
my $length = shift;
#@type AlignDB::IntSpan
my $slice = Scalar::Util::blessed($self)->new;
my @edges = $self->edges;
while ( @edges > 1 ) {
my ( $lower, $upper ) = @edges[ 0, 1 ];
my $span_size = $upper - $lower;
if ( $offset <= $span_size ) {
last;
}
else {
splice( @edges, 0, 2 );
$offset -= $span_size;
}
}
@edges
or return $slice; # empty set
$edges[0] += $offset - 1;
my @slices = $self->_splice_length( \@edges, $length );
while (@slices) {
my $lower = shift @slices;
my $upper = shift(@slices) - 1;
$slice->add_pair( $lower, $upper );
}
return $slice;
}
sub _splice_length {
my $self = shift;
my $edges_ref = shift;
my $length = shift;
if ( !defined $length ) {
return @{$edges_ref}; # everything
}
if ( $length <= 0 ) {
return (); # empty
}
my @slices;
while ( @$edges_ref > 1 ) {
my ( $lower, $upper ) = @$edges_ref[ 0, 1 ];
my $span_size = $upper - $lower;
if ( $length <= $span_size ) {
last;
}
else {
push @slices, splice( @$edges_ref, 0, 2 );
$length -= $span_size;
}
}
if (@$edges_ref) {
my $lower = shift @$edges_ref;
push @slices, $lower, $lower + $length;
}
return @slices;
}
sub min {
my $self = shift;
if ( $self->is_empty ) {
return;
}
else {
return $self->edges_ref->[0];
}
}
sub max {
my $self = shift;
if ( $self->is_empty ) {
return;
}
else {
return $self->edges_ref->[-1] - 1;
}
}
sub grep_set {
my $self = shift;
my $code_ref = shift;
my @sub_elements;
for ( $self->elements ) {
if ( $code_ref->() ) {
push @sub_elements, $_;
}
}
my $sub_set = Scalar::Util::blessed($self)->new(@sub_elements);
return $sub_set;
}
sub map_set {
my $self = shift;
my $code_ref = shift;
my @map_elements;
for ( $self->elements ) {
for my $element ( $code_ref->() ) {
if ( defined $element ) {
push @map_elements, $element;
}
}
}
my $map_set = Scalar::Util::blessed($self)->new(@map_elements);
return $map_set;
}
sub substr_span {
my $self = shift;
my $string = shift;
my $sub_string = "";
my @spans = $self->spans;
for (@spans) {
my ( $lower, $upper ) = @$_;
my $length = $upper - $lower + 1;
$sub_string .= substr( $string, $lower - 1, $length );
}
return $sub_string;
}
#@returns AlignDB::IntSpan
sub banish_span {
my $self = shift;
my $start = shift;
my $end = shift;
my $remove_length = $end - $start + 1;
my $new = $self->map_set(
sub {
$_ < $start ? $_
: $_ > $end ? $_ - $remove_length
: ();
}
);
return $new;
}
#@returns AlignDB::IntSpan
sub cover {
my $self = shift;
my $cover = Scalar::Util::blessed($self)->new;
if ( $self->is_not_empty ) {
$cover->add_pair( $self->min, $self->max );
}
return $cover;
}
#@returns AlignDB::IntSpan
sub holes {
my $self = shift;
my $holes = Scalar::Util::blessed($self)->new;
if ( $self->is_empty or $self->is_universal ) {
# empty set and universal set have no holes
}
else {
my $c_set = $self->complement;
my @ranges = $c_set->ranges;
# Remove infinite arms of complement set
if ( $c_set->is_neg_inf ) {
shift @ranges;
shift @ranges;
}
if ( $c_set->is_pos_inf ) {
pop @ranges;
pop @ranges;
}
$holes->add_range(@ranges);
}
return $holes;
}
#@returns AlignDB::IntSpan
sub inset {
my $self = shift;
my $n = shift;
my $inset = Scalar::Util::blessed($self)->new;
my @edges = $self->edges;
while (@edges) {
my $lower = shift @edges;
my $upper = shift(@edges) - 1;
if ( $lower != $self->NEG_INF ) {
$lower += $n;
}
if ( $upper != $self->POS_INF ) {
$upper -= $n;
}
$inset->add_pair( $lower, $upper )
if $lower <= $upper;
}
return $inset;
}
#@returns AlignDB::IntSpan
sub trim {
my $self = shift;
my $n = shift;
return $self->inset($n);
}
#@returns AlignDB::IntSpan
sub pad {
my $self = shift;
my $n = shift;
return $self->inset( -$n );
}
#@returns AlignDB::IntSpan
sub excise {
my $self = shift;
my $minlength = shift;
my $set = Scalar::Util::blessed($self)->new;
map { $set->merge($_) } grep { $_->size >= $minlength } $self->sets;
return $set;
}
#@returns AlignDB::IntSpan
sub fill {
my $self = shift;
my $maxlength = shift;
my $set = $self->copy;
if ( $maxlength > 0 ) {
for my $hole ( $set->holes->sets ) {
if ( $hole->size <= $maxlength ) {
$set->merge($hole);
}
}
}
return $set;
}
sub overlap {
my $self = shift;
my $supplied = shift;
return $self->intersect($supplied)->size;
}
sub distance {
my $self = shift;
my $supplied = shift;
return unless $self->size and $supplied->size;
my $overlap = $self->overlap($supplied);
return -$overlap if $overlap;
my $min_d;
for my $span1 ( $self->sets ) {
for my $span2 ( $supplied->sets ) {
my $d1 = abs( $span1->min - $span2->max );
my $d2 = abs( $span1->max - $span2->min );
my $d = $d1 < $d2 ? $d1 : $d2;
if ( !defined $min_d or $d < $min_d ) {
$min_d = $d;
}
}
}
return $min_d;
}
#@returns AlignDB::IntSpan
sub find_islands {
my $self = shift;
my $supplied = shift;
my $island;
if ( ref $supplied eq __PACKAGE__ ) {
$island = $self->_find_islands_set($supplied);
}
elsif ( Scalar::Util::Numeric::isint($supplied) ) {
$island = $self->_find_islands_int($supplied);
}
else {
Carp::confess "Don't know how to deal with input to find_island\n";
}
return $island;
}
sub _find_islands_int {
my $self = shift;
my $number = shift;
my $island = Scalar::Util::blessed($self)->new;
# if $pos & 1, i.e. $pos is odd number, $val is in the set
my $pos = $self->_find_pos( $number + 1, 0 );
if ( $pos & 1 ) {
my @ranges = $self->ranges;
$island->add_range( $ranges[ $pos - 1 ], $ranges[$pos] );
}
return $island;
}
sub _find_islands_set {
my $self = shift;
my $supplied = shift;
my $islands = Scalar::Util::blessed($self)->new;
if ( $self->overlap($supplied) ) {
for my $subset ( $self->sets ) {
$islands->merge($subset) if $subset->overlap($supplied);
}
}
return $islands;
}
#@returns AlignDB::IntSpan
sub nearest_island {
my $self = shift;
my $supplied = shift;
if ( ref $supplied eq __PACKAGE__ ) { # just OK
}
elsif ( Scalar::Util::Numeric::isint($supplied) ) {
$supplied = Scalar::Util::blessed($self)->new($supplied);
}
else {
Carp::confess "Don't know how to deal with input to nearest_island\n";
}
my $island = Scalar::Util::blessed($self)->new;
my $min_d;
for my $s ( $self->sets ) {
for my $ss ( $supplied->sets ) {
next if $s->overlap($ss);
my $d = $s->distance($ss);
if ( !defined $min_d or $d <= $min_d ) {
if ( defined $min_d and $d == $min_d ) {
$island->merge($s);
}
else {
$min_d = $d;
$island = $s->copy;
}
}
}
}
return $island;
}
sub at_island {
my $self = shift;
my $index = shift;
return if $index == 0 or abs($index) > $self->span_size;
my @islands = $self->sets;
return $index < 0 ? $islands[$index] : $islands[ $index - 1 ];
}
#----------------------------------------------------------#
# Internal methods
#----------------------------------------------------------#
# Converts a list of integers into pairs of ranges
sub _list_to_ranges {
my $self = shift;
my @list = sort { $a <=> $b } @_;
my @ranges;
my $count = scalar @list;
my $pos = 0;
while ( $pos < $count ) {
my $end = $pos + 1;
$end++ while $end < $count && $list[$end] <= $list[ $end - 1 ] + 1;
push @ranges, ( $list[$pos], $list[ $end - 1 ] );
$pos = $end;
}
return @ranges;
}
# Converts a runlist into pairs of ranges
sub _runlist_to_ranges {
my $self = shift;
my $runlist = shift;
$runlist =~ s/\s|_//g;
return if $runlist eq $self->EMPTY_STRING;
my @ranges;
for my $run ( split ",", $runlist ) {
if ( $run =~ /^ (-?\d+) $/x ) {
push @ranges, ( $1, $1 );
}
elsif ( $run =~ /^ (-?\d+) - (-?\d+) $/x ) {
Carp::confess "Bad order: $runlist\n" if $1 > $2;
push @ranges, ( $1, $2 );
}
else {
Carp::confess "Bad syntax: $runlist\n";
}
}
return @ranges;
}
# Converts a set specification into a set
sub _real_set {
my $self = shift;
my $supplied = shift;
if ( defined $supplied and ref $supplied eq __PACKAGE__ ) {
return $supplied;
}
else {
return Scalar::Util::blessed($self)->new($supplied);
}
}
# Return the index of the first element >= the supplied value.
#
# If the supplied value is larger than any element in the list the returned
# value will be equal to the size of the list.
#
# If $pos & 1, i.e. $pos is odd number, $val is in the set
sub _find_pos {
my $self = shift;
my $val = shift;
my $low = shift;
my $edges_ref = $self->edges_ref;
my $high = $self->edge_size;
while ( $low < $high ) {
my $mid = int( ( $low + $high ) / 2 );
if ( $val < $edges_ref->[$mid] ) {
$high = $mid;
}
elsif ( $val > $edges_ref->[$mid] ) {
$low = $mid + 1;
}
else {
return $mid;
}
}
return $low;
}
#----------------------------------------------------------#
# Aliases
#----------------------------------------------------------#
sub runlist { shift->as_string(@_); }
sub elements { shift->as_array(@_); }
sub size { shift->cardinality(@_); }
sub count { shift->cardinality(@_); }
sub contains { shift->contains_all(@_); }
sub intersection { shift->intersect(@_); }
sub equals { shift->equal(@_); }
1; # Magic true value required at end of module
__END__
=pod
=encoding UTF-8
=head1 NAME
AlignDB::IntSpan - Handling of sets containing integer spans.
=head1 SYNOPSIS
use AlignDB::IntSpan;
( run in 1.236 second using v1.01-cache-2.11-cpan-39bf76dae61 )