Algorithm-AhoCorasick

 view release on metacpan or  search on metacpan

lib/Algorithm/AhoCorasick/SearchMachine.pm  view on Meta::CPAN


    # build transition links
    foreach my $p (@{$self->{keywords}}) {
	my $nd = $self->{root};
	foreach my $c (split //, $p) {
	    my $ndNew = $nd->get_transition($c);
	    if (!$ndNew) {
		$ndNew = Algorithm::AhoCorasick::Node->new(parent => $nd, char => $c);
		$nd->add_transition($ndNew);
	    }

	    $nd = $ndNew;
	}

	$nd->add_result($p);
    }

    # build failure links
    my @nodes;
    foreach my $nd ($self->{root}->transitions) {
	$nd->failure($self->{root});
	push @nodes, $nd->transitions;
    }

    while (@nodes) {
	my @newNodes;

	foreach my $nd (@nodes) {
	    my $r = $nd->parent->failure;
	    my $c = $nd->char;

	    while ($r && !($r->get_transition($c))) {
		$r = $r->failure;
	    }

	    if (!$r) {
		$nd->failure($self->{root});
	    } else {
		my $tc = $r->get_transition($c);
		$nd->failure($tc);

		foreach my $result ($tc->results) {
		    $nd->add_result($result);
		}
	    }

	    push @newNodes, $nd->transitions;
	}

	@nodes = @newNodes;
    }

    $self->{root}->failure($self->{root});
    $self->{state} = $self->{root};
}

sub feed {
    my ($self, $text, $callback) = @_;

    my $index = 0;
    my $l = length($text);
    while ($index < $l) {
	my $trans = undef;
	while (1) {
	    $trans = $self->{state}->get_transition(substr($text, $index, 1));
	    last if ($self->{state} == $self->{root}) || $trans;
	    $self->{state} = $self->{state}->failure;
	}

	if ($trans) {
	    $self->{state} = $trans;
	}

	foreach my $found ($self->{state}->results) {
	    my $rv = &$callback($index - length($found) + 1, $found);
	    if ($rv) {
		return $rv;
	    }
	}

	++$index;
    }

    return undef;
}

package Algorithm::AhoCorasick::Node;

use strict;
use warnings;
use Scalar::Util qw(weaken);

sub new {
    my $class = shift;

    my $self = { @_ };
    $self->{results} = { };
    $self->{transitions} = { };
    weaken $self->{parent} if $self->{parent};
    return bless $self, $class;
}

sub char {
    my $self = shift;

    if (!exists($self->{char})) {
	die "root node has no character";
    }

    return $self->{char};
}

sub parent {
    my $self = shift;

    if (!exists($self->{parent})) {
	die "root node has no parent";
    }

    return $self->{parent};
}

sub failure {
    my $self = shift;

    if (@_) {
        $self->{failure} = $_[0];
        weaken $self->{failure};
    }

    return $self->{failure};
}

# Returns transition to the specified character, or undef.
sub get_transition {



( run in 0.639 second using v1.01-cache-2.11-cpan-39bf76dae61 )