Algorithm-AhoCorasick-XS

 view release on metacpan or  search on metacpan

Matcher.cpp  view on Meta::CPAN

        }
        return s;
    }

    vector<match> Matcher::match_details(const string& input) const {
        return search(input, false);
    }

    Matcher::~Matcher() {
        cleanup(root);
    }

    // root is a tree, provided we ignore a) references back to the root
    //   and b) the fail pointer
    void Matcher::cleanup(Trie *node) {
        for (Trie *child: node->children) {
            if (child && child != root) {
                cleanup(child);
            }
        }
        if (node->next && node->next != root) {
            cleanup(node->next);
        }
        delete node;
    }

    void Matcher::build() {
        root = new Trie();
        int i = 0;

        // 1. Build the keyword tree
        for (string& word : words) {
            Trie *node = root->add_word(word);
            node->out.push_front(i);
            i++;
        }

        root->fail = root;

        std::queue<Trie *> queue;
        queue.push(root);

        while (queue.size()) {
            Trie *n = queue.front();
            queue.pop();

            // Reverse the order of this node's out, so earlier keywords are first
            n->out.reverse();

            // add children to queue
            for (Trie *child: n->children) {
                while (child) {
                    queue.push(child);
                    child = child->next;
                }
            }

            // root's fail function is root
            if (n == root) continue;

            // Follow fail function from parent to find the longest suffix
            // (or reach the root)
            Trie *fail = n->parent->fail;
            while (fail->get_child(n->label) == nullptr && fail != root) {
                fail = fail->fail;
            }

            // We found the suffix...
            n->fail = fail->get_child(n->label);

            // ??
            if (n->fail == nullptr) n->fail = root;
            if (n->fail == n) n->fail = root;
        }
    }

    vector<match> Matcher::search(const string& text, bool stopAfterOne) const {
        Trie *n = root;
        vector <match> matches;
        size_t position = 0;
        for (const unsigned char ch : text) {
            // If it doesn't match, follow the fail links
            while (n->get_child(ch) == nullptr && n != root) {
                // Follow fails.
                // If nothing else, this will succeed once n = root, as fail is defined
                // for all characters.
                n = n->fail;
            }

            if (n == root) {
                n = n->get_child(ch);
                if (n == nullptr) n = root;
            }
            else n = n->get_child(ch);

            Trie *no = n;
            while (no != root) {
                for (int matchOffset : no->out) {
                    matches.push_back( {
                        words[matchOffset],
                        position - words[matchOffset].size() + 1,
                        position,
                    } );
                    if (stopAfterOne) {
                        return matches;
                    }
                }
                no = no->fail;
            }
            position++;
        }

        return matches;
    }
}



( run in 0.980 second using v1.01-cache-2.11-cpan-2398b32b56e )