KSx-Highlight-Summarizer

 view release on metacpan or  search on metacpan

lib/KSx/Highlight/Summarizer.pm  view on Meta::CPAN

use KinoSearch::Highlight::Highlighter;

use strict;

use List::Util qw 'min';
use Number::Range;

use Hash::Util::FieldHash::Compat 'fieldhashes';
fieldhashes \my( %ellipsis, %summ_len, %page_h, %encoder );

sub _range_endpoints {
  my $range = shift;
  my @range = $range->range;
  my $previous = shift @range;
  my $subrange = [($previous) x 2];
  my @arrays;
  foreach my $current (@range) {
    if ($current == ($previous + 1)) {
      $subrange->[1] = $current;
    }
    else {

lib/KSx/Highlight/Summarizer.pm  view on Meta::CPAN

    # determine the rough boundaries of the excerpts
    my $range = new Number::Range;
    my $summ_len = $summ_len{$self};
    for(@locs) {
        no warnings; # suppress Number::Range’s nasty warnings
	my $start = $_-$limit;
	$start = 0 if $start < 0;
        $range->addrange($start . '..' . min($start+$x_len, $text_length));
	last if !$summ_len || $range->size >= $summ_len;
    }
    my @excerpt_bounds = _range_endpoints($range);
#use DDS; warn Dump \@excerpt_bounds if $summ_len;

    # close small gaps between ranges
    for(my $c = 1; $c < @excerpt_bounds;++$c) {
        $excerpt_bounds[$c][0] - $excerpt_bounds[$c-1][1] <= 10 and
            $excerpt_bounds[$c-1][1] = $excerpt_bounds[$c][1],
            splice(@excerpt_bounds, $c, 1),
            --$c;
    }



( run in 0.564 second using v1.01-cache-2.11-cpan-524268b4103 )