AlignDB-IntSpan

 view release on metacpan or  search on metacpan

lib/AlignDB/IntSpan.pm  view on Meta::CPAN

package AlignDB::IntSpan;
use strict;
use warnings;

use Carp;
use Scalar::Util;
use Scalar::Util::Numeric;

use overload (
    q{0+}   => sub { Carp::confess "Can't numerify an AlignDB::IntSpan\n" },
    q{bool} => q{is_not_empty},
    q{""}   => q{as_string},

    # use Perl standard behaviours for other operations
    fallback => 1,
);

our $VERSION = '1.1.1';

my $POS_INF = 2_147_483_647 - 1;             # INT_MAX - 1
my $NEG_INF = ( -2_147_483_647 - 1 ) + 1;    # INT_MIN + 1

sub POS_INF {
    return $POS_INF - 1;
}

sub NEG_INF {
    return $NEG_INF;
}

sub EMPTY_STRING {
    return '-';
}

sub new {
    my $class = shift;
    my $self  = {};
    $self->{edges} = [];
    bless $self, $class;
    $self->add(@_) if @_ > 0;
    return $self;
}

sub valid {
    my $this    = shift;
    my $runlist = shift;

    my $class = ref($this) || $this;
    my $set = new $class;

    eval { $set->_runlist_to_ranges($runlist) };
    return $@ ? 0 : 1;
}

sub clear {
    my $self = shift;
    $self->{edges} = [];
    return $self;
}

sub edges_ref {
    my $self = shift;
    return $self->{edges};
}

sub edges {
    my $self = shift;
    return @{ $self->edges_ref };
}

sub edge_size {
    my $self = shift;
    return scalar $self->edges;
}

sub span_size {
    my $self = shift;
    return $self->edge_size / 2;
}

sub as_string {
    my $self = shift;

    if ( $self->is_empty ) {
        return $self->EMPTY_STRING;
    }

    my @runs;
    my @edges = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        push @runs, $lower == $upper ? $lower : "$lower-$upper";
    }

    return join( ',', @runs );
}

sub as_array {
    my $self = shift;

    my @elements;
    my @edges = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        push @elements, ( $lower .. $upper );
    }

    return @elements;
}

sub ranges {
    my $self = shift;

    my @ranges;
    my @edges = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        push @ranges, ( $lower, $upper );
    }

    return @ranges;
}

sub spans {
    my $self = shift;

    my @spans;
    my @edges = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        push @spans, [ $lower, $upper ];
    }

    if (@spans) {
        return @spans;
    }
    else {
        return;
    }
}

sub sets {
    my $self = shift;

    my @sets;
    my @edges = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        push @sets, Scalar::Util::blessed($self)->new("$lower-$upper");
    }

    if (@sets) {
        return @sets;
    }
    else {
        return;
    }
}

sub runlists {
    my $self = shift;

    if ( $self->is_empty ) {
        return $self->EMPTY_STRING;
    }

    my @runlists;
    my @edges = $self->edges;
    while (@edges) {
        my $lower  = shift @edges;
        my $upper  = shift(@edges) - 1;
        my $string = $lower == $upper ? $lower : $lower . '-' . $upper;
        push @runlists, $string;
    }

    if (@runlists) {
        return @runlists;
    }
    else {
        return;
    }
}

sub cardinality {
    my $self = shift;

    my $cardinality = 0;
    my @edges       = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        $cardinality += $upper - $lower + 1;
    }

    return $cardinality;
}

sub is_empty {
    my $self = shift;
    my $result = $self->edge_size == 0 ? 1 : 0;
    return $result;
}

sub is_not_empty {
    my $self = shift;
    return !$self->is_empty;
}

sub is_neg_inf {

lib/AlignDB/IntSpan.pm  view on Meta::CPAN


    $self->invert;
    $self->add_range(@_);
    $self->invert;

    return $self;
}

#@returns AlignDB::IntSpan
sub remove {
    my $self  = shift;
    my $first = shift;

    if ( ref $first eq __PACKAGE__ ) {
        $self->remove_range( $first->ranges );
    }
    elsif ( Scalar::Util::Numeric::isint($first) ) {
        if ( scalar @_ > 0 ) {
            $self->remove_range( $self->_list_to_ranges( $first, @_ ) );
        }
        else {
            $self->remove_range( $first, $first );
        }
    }
    else {
        $self->remove_range( $self->_runlist_to_ranges($first) );
    }

    return $self;
}

#@returns AlignDB::IntSpan
sub merge {
    my $self = shift;

    for my $supplied (@_) {
        my @ranges = $self->_real_set($supplied)->ranges;
        $self->add_range(@ranges);
    }

    return $self;
}

#@returns AlignDB::IntSpan
sub subtract {
    my $self = shift;
    return $self if $self->is_empty;

    for my $supplied (@_) {
        my @ranges = $self->_real_set($supplied)->ranges;
        $self->remove_range(@ranges);
    }

    return $self;
}

#@returns AlignDB::IntSpan
sub copy {
    my $self = shift;

    my $copy = Scalar::Util::blessed($self)->new;
    $copy->{edges} = [ $self->edges ];

    return $copy;
}

#@returns AlignDB::IntSpan
sub union {
    my $self = shift;

    my $new = $self->copy;
    $new->merge(@_);

    return $new;
}

#@returns AlignDB::IntSpan
sub complement {
    my $self = shift;

    my $new = $self->copy;
    $new->invert;

    return $new;
}

#@returns AlignDB::IntSpan
sub diff {
    my $self = shift;

    return $self if $self->is_empty;

    my $new = $self->copy;
    $new->subtract(@_);

    return $new;
}

#@returns AlignDB::IntSpan
sub intersect {
    my $self = shift;

    return $self if $self->is_empty;

    my $new = $self->complement;
    for my $supplied (@_) {
        my $temp_set = $self->_real_set($supplied)->complement;
        $new->merge($temp_set);
    }
    $new->invert;

    return $new;
}

#@method
#@returns AlignDB::IntSpan
sub xor {
    return intersect( union(@_), intersect(@_)->complement );
}

sub equal {

lib/AlignDB/IntSpan.pm  view on Meta::CPAN

        my $span_size = $upper - $lower + 1;

        if ( $index > $element_after + $span_size ) {
            $element_after += $span_size;
        }
        else {
            $member = $upper - ( $index - $element_after ) + 1;
            last;
        }
    }

    return $member;
}

sub index {
    my $self   = shift;
    my $member = shift;

    my $index;
    my $element_before = 0;

    my @edges = $self->edges;
    while (@edges) {
        my $lower     = shift @edges;
        my $upper     = shift(@edges) - 1;
        my $span_size = $upper - $lower + 1;

        if ( $member >= $lower and $member <= $upper ) {
            $index = $member - $lower + 1 + $element_before;
            last;
        }
        else {
            $element_before += $span_size;
        }
    }

    return $index;
}

#@returns AlignDB::IntSpan
sub slice {
    my $self = shift;
    my $from = shift;
    my $to   = shift;

    if ( $from < 1 ) {
        carp "Start index less than 1\n";
        $from = 1;
    }
    my $slice = $self->_splice( $from, $to - $from + 1 );

    return $slice;
}

sub _splice {
    my $self   = shift;
    my $offset = shift;
    my $length = shift;

    #@type AlignDB::IntSpan
    my $slice = Scalar::Util::blessed($self)->new;

    my @edges = $self->edges;

    while ( @edges > 1 ) {
        my ( $lower, $upper ) = @edges[ 0, 1 ];
        my $span_size = $upper - $lower;

        if ( $offset <= $span_size ) {
            last;
        }
        else {
            splice( @edges, 0, 2 );
            $offset -= $span_size;
        }
    }

    @edges
        or return $slice;    # empty set

    $edges[0] += $offset - 1;

    my @slices = $self->_splice_length( \@edges, $length );
    while (@slices) {
        my $lower = shift @slices;
        my $upper = shift(@slices) - 1;
        $slice->add_pair( $lower, $upper );
    }

    return $slice;
}

sub _splice_length {
    my $self      = shift;
    my $edges_ref = shift;
    my $length    = shift;

    if ( !defined $length ) {
        return @{$edges_ref};    # everything
    }

    if ( $length <= 0 ) {
        return ();               # empty
    }

    my @slices;

    while ( @$edges_ref > 1 ) {
        my ( $lower, $upper ) = @$edges_ref[ 0, 1 ];
        my $span_size = $upper - $lower;

        if ( $length <= $span_size ) {
            last;
        }
        else {
            push @slices, splice( @$edges_ref, 0, 2 );
            $length -= $span_size;
        }
    }

    if (@$edges_ref) {
        my $lower = shift @$edges_ref;
        push @slices, $lower, $lower + $length;
    }

    return @slices;
}

sub min {
    my $self = shift;

    if ( $self->is_empty ) {
        return;
    }
    else {
        return $self->edges_ref->[0];
    }
}

sub max {
    my $self = shift;

    if ( $self->is_empty ) {
        return;
    }
    else {
        return $self->edges_ref->[-1] - 1;
    }
}

sub grep_set {
    my $self     = shift;
    my $code_ref = shift;

    my @sub_elements;
    for ( $self->elements ) {
        if ( $code_ref->() ) {
            push @sub_elements, $_;
        }

    }
    my $sub_set = Scalar::Util::blessed($self)->new(@sub_elements);

    return $sub_set;
}

sub map_set {
    my $self     = shift;
    my $code_ref = shift;

    my @map_elements;
    for ( $self->elements ) {
        for my $element ( $code_ref->() ) {
            if ( defined $element ) {
                push @map_elements, $element;
            }
        }

    }
    my $map_set = Scalar::Util::blessed($self)->new(@map_elements);

    return $map_set;
}

sub substr_span {
    my $self   = shift;
    my $string = shift;

    my $sub_string = "";
    my @spans      = $self->spans;

    for (@spans) {
        my ( $lower, $upper ) = @$_;
        my $length = $upper - $lower + 1;

        $sub_string .= substr( $string, $lower - 1, $length );
    }

    return $sub_string;
}

#@returns AlignDB::IntSpan
sub banish_span {
    my $self  = shift;
    my $start = shift;
    my $end   = shift;

    my $remove_length = $end - $start + 1;

    my $new = $self->map_set(
        sub {
                  $_ < $start ? $_
                : $_ > $end   ? $_ - $remove_length
                :               ();
        }
    );

    return $new;
}

#@returns AlignDB::IntSpan
sub cover {
    my $self = shift;

    my $cover = Scalar::Util::blessed($self)->new;
    if ( $self->is_not_empty ) {
        $cover->add_pair( $self->min, $self->max );
    }
    return $cover;
}

#@returns AlignDB::IntSpan
sub holes {
    my $self = shift;

    my $holes = Scalar::Util::blessed($self)->new;

    if ( $self->is_empty or $self->is_universal ) {

        # empty set and universal set have no holes
    }
    else {
        my $c_set  = $self->complement;
        my @ranges = $c_set->ranges;

        # Remove infinite arms of complement set
        if ( $c_set->is_neg_inf ) {
            shift @ranges;
            shift @ranges;
        }
        if ( $c_set->is_pos_inf ) {
            pop @ranges;
            pop @ranges;
        }
        $holes->add_range(@ranges);
    }

    return $holes;
}

#@returns AlignDB::IntSpan
sub inset {
    my $self = shift;
    my $n    = shift;

    my $inset = Scalar::Util::blessed($self)->new;
    my @edges = $self->edges;
    while (@edges) {
        my $lower = shift @edges;
        my $upper = shift(@edges) - 1;
        if ( $lower != $self->NEG_INF ) {
            $lower += $n;
        }
        if ( $upper != $self->POS_INF ) {
            $upper -= $n;
        }
        $inset->add_pair( $lower, $upper )
            if $lower <= $upper;
    }

    return $inset;
}

#@returns AlignDB::IntSpan
sub trim {
    my $self = shift;
    my $n    = shift;
    return $self->inset($n);
}

#@returns AlignDB::IntSpan
sub pad {
    my $self = shift;
    my $n    = shift;
    return $self->inset( -$n );
}

#@returns AlignDB::IntSpan
sub excise {
    my $self      = shift;
    my $minlength = shift;

    my $set = Scalar::Util::blessed($self)->new;
    map { $set->merge($_) } grep { $_->size >= $minlength } $self->sets;

    return $set;
}

#@returns AlignDB::IntSpan
sub fill {
    my $self      = shift;
    my $maxlength = shift;

    my $set = $self->copy;
    if ( $maxlength > 0 ) {
        for my $hole ( $set->holes->sets ) {
            if ( $hole->size <= $maxlength ) {
                $set->merge($hole);
            }
        }
    }
    return $set;
}

sub overlap {
    my $self     = shift;
    my $supplied = shift;
    return $self->intersect($supplied)->size;
}

sub distance {
    my $self     = shift;
    my $supplied = shift;

    return unless $self->size and $supplied->size;

    my $overlap = $self->overlap($supplied);
    return -$overlap if $overlap;

    my $min_d;
    for my $span1 ( $self->sets ) {
        for my $span2 ( $supplied->sets ) {
            my $d1 = abs( $span1->min - $span2->max );
            my $d2 = abs( $span1->max - $span2->min );
            my $d  = $d1 < $d2 ? $d1 : $d2;
            if ( !defined $min_d or $d < $min_d ) {
                $min_d = $d;
            }
        }
    }

    return $min_d;
}

#@returns AlignDB::IntSpan
sub find_islands {
    my $self     = shift;
    my $supplied = shift;

    my $island;
    if ( ref $supplied eq __PACKAGE__ ) {
        $island = $self->_find_islands_set($supplied);
    }
    elsif ( Scalar::Util::Numeric::isint($supplied) ) {
        $island = $self->_find_islands_int($supplied);
    }
    else {
        Carp::confess "Don't know how to deal with input to find_island\n";
    }

    return $island;
}

sub _find_islands_int {
    my $self   = shift;
    my $number = shift;

    my $island = Scalar::Util::blessed($self)->new;

    # if $pos & 1, i.e. $pos is odd number, $val is in the set
    my $pos = $self->_find_pos( $number + 1, 0 );
    if ( $pos & 1 ) {
        my @ranges = $self->ranges;
        $island->add_range( $ranges[ $pos - 1 ], $ranges[$pos] );
    }

    return $island;
}

sub _find_islands_set {
    my $self     = shift;
    my $supplied = shift;

    my $islands = Scalar::Util::blessed($self)->new;

    if ( $self->overlap($supplied) ) {
        for my $subset ( $self->sets ) {
            $islands->merge($subset) if $subset->overlap($supplied);
        }
    }

    return $islands;
}

#@returns AlignDB::IntSpan
sub nearest_island {
    my $self     = shift;
    my $supplied = shift;

    if ( ref $supplied eq __PACKAGE__ ) {    # just OK
    }
    elsif ( Scalar::Util::Numeric::isint($supplied) ) {
        $supplied = Scalar::Util::blessed($self)->new($supplied);
    }
    else {
        Carp::confess "Don't know how to deal with input to nearest_island\n";
    }

    my $island = Scalar::Util::blessed($self)->new;
    my $min_d;
    for my $s ( $self->sets ) {
        for my $ss ( $supplied->sets ) {
            next if $s->overlap($ss);
            my $d = $s->distance($ss);
            if ( !defined $min_d or $d <= $min_d ) {
                if ( defined $min_d and $d == $min_d ) {
                    $island->merge($s);
                }
                else {
                    $min_d  = $d;
                    $island = $s->copy;
                }
            }
        }
    }

    return $island;
}

sub at_island {
    my $self  = shift;
    my $index = shift;

    return if $index == 0 or abs($index) > $self->span_size;

    my @islands = $self->sets;

    return $index < 0 ? $islands[$index] : $islands[ $index - 1 ];
}

#----------------------------------------------------------#
# Internal methods
#----------------------------------------------------------#
# Converts a list of integers into pairs of ranges
sub _list_to_ranges {
    my $self = shift;

    my @list = sort { $a <=> $b } @_;
    my @ranges;
    my $count = scalar @list;
    my $pos   = 0;
    while ( $pos < $count ) {
        my $end = $pos + 1;
        $end++ while $end < $count && $list[$end] <= $list[ $end - 1 ] + 1;
        push @ranges, ( $list[$pos], $list[ $end - 1 ] );
        $pos = $end;
    }

    return @ranges;
}

# Converts a runlist into pairs of ranges
sub _runlist_to_ranges {
    my $self = shift;

    my $runlist = shift;
    $runlist =~ s/\s|_//g;
    return if $runlist eq $self->EMPTY_STRING;

    my @ranges;

    for my $run ( split ",", $runlist ) {
        if ( $run =~ /^ (-?\d+) $/x ) {
            push @ranges, ( $1, $1 );
        }
        elsif ( $run =~ /^ (-?\d+) - (-?\d+) $/x ) {
            Carp::confess "Bad order: $runlist\n" if $1 > $2;
            push @ranges, ( $1, $2 );
        }
        else {
            Carp::confess "Bad syntax: $runlist\n";
        }
    }

    return @ranges;
}

# Converts a set specification into a set
sub _real_set {
    my $self     = shift;
    my $supplied = shift;

    if ( defined $supplied and ref $supplied eq __PACKAGE__ ) {
        return $supplied;
    }
    else {
        return Scalar::Util::blessed($self)->new($supplied);
    }
}

# Return the index of the first element >= the supplied value.
#
# If the supplied value is larger than any element in the list the returned
# value will be equal to the size of the list.
#
# If $pos & 1, i.e. $pos is odd number, $val is in the set
sub _find_pos {
    my $self = shift;
    my $val  = shift;
    my $low  = shift;

    my $edges_ref = $self->edges_ref;
    my $high      = $self->edge_size;

    while ( $low < $high ) {
        my $mid = int( ( $low + $high ) / 2 );
        if ( $val < $edges_ref->[$mid] ) {
            $high = $mid;
        }
        elsif ( $val > $edges_ref->[$mid] ) {
            $low = $mid + 1;
        }
        else {
            return $mid;
        }
    }

    return $low;
}

#----------------------------------------------------------#
# Aliases
#----------------------------------------------------------#

sub runlist      { shift->as_string(@_); }
sub elements     { shift->as_array(@_); }
sub size         { shift->cardinality(@_); }
sub count        { shift->cardinality(@_); }
sub contains     { shift->contains_all(@_); }
sub intersection { shift->intersect(@_); }
sub equals       { shift->equal(@_); }

1;    # Magic true value required at end of module

__END__

=pod

=encoding UTF-8

=head1 NAME

AlignDB::IntSpan - Handling of sets containing integer spans.

=head1 SYNOPSIS

    use AlignDB::IntSpan;



( run in 1.236 second using v1.01-cache-2.11-cpan-39bf76dae61 )