Apache-AxKit-Language-SpellCheck

 view release on metacpan or  search on metacpan

SpellCheck.pm  view on Meta::CPAN

# $Id: SpellCheck.pm,v 1.5 2005/01/27 00:45:38 nachbaur Exp $

package Apache::AxKit::Language::SpellCheck;

use base Apache::AxKit::Language;
use strict;

use AxKit;
use Apache;
use Apache::Request;
use Apache::AxKit::Language;
use Apache::AxKit::Provider;
use Text::Aspell;
use Cwd;

our $VERSION = 0.03;
our $NS = 'http://axkit.org/2004/07/17-spell-check#';

sub stylesheet_exists () { 0; }

sub handler {
    my $class = shift;
    my ($r, $xml_provider, undef, $last_in_chain) = @_;
    
    #
    # Create and init the speller object
    my $spell = new Text::Aspell;
    $spell->set_option('sug-mode', 'fast');
    $spell->set_option('lang', $r->dir_config("AxSpellLanguage") || 'en_US');
    my $max_suggestion = $r->dir_config("AxSpellSuggestions") || 3;

    #
    # Process the list of elements we need to skip
    my %skip_elements = ();
    foreach my $element (split(/\s+/, $r->dir_config("AxSpellSkipElements") )) {
        if ($element !~ /^(?:\{(.*?)\})?([\w\d\-\_]+)$/) {
            die "The element \"$element\" is invalid in AxSpellSkipElements";
        }
        my $ns   = $1;
        my $node = $2;
        $skip_elements{$ns}->{$node} = 1;
    }

    #
    # Load the DOM object
    my $dom = $r->pnotes('dom_tree');
    unless ($dom) {
        my $xmlstring = $r->pnotes('xml_string');
        my $parser = XML::LibXML->new();
        $parser->expand_entities(1);
        $dom = $parser->parse_string($xmlstring, $r->uri());
    }

    #
    # Find the root node
    my $root = $dom->documentElement();
    $root->setNamespace($NS, 'sp', 0);

    #
    # Iterate through all the text nodes
    foreach my $text_node ($root->findnodes('//text()')) {

        #
        # Skip if our parent is in the exclude list
        my $parent = $text_node->parentNode;
        if ($skip_elements{$parent->namespaceURI}->{$parent->localname}) {
            #warn "Skipping " . $text_node->data . " due to parent " . $parent->nodeName . "\n";
            next;
        }

        my @nodes = ();
        my $pre_text = undef;
        my $changed = 0;

        #
        # Loop through the words in this text ndoe
        foreach my $word (split(/\b/, $text_node->data)) {

            #
            # Skip empty strings and non-spellable words
            next unless defined $word;
            unless ($word =~ /^\p{L}+$/i) {
                $pre_text .= $word;
                next;
            }

            #
            # Check the word against the spellchecker
            if ($spell->check($word)) {
                $pre_text .= $word;
            }
            
            #
            # The word isn't spelled right, add elements
            else {
                $changed++;

                #
                # Add an initial text node if the unspelled word is somewhere in the middle
                push @nodes, XML::LibXML::Text->new($pre_text) if (length($pre_text));
                $pre_text = undef;

                #
                # Add the root element for this spelling block
                my $element = $dom->createElementNS($NS, "incorrect");

                #
                # Iterate and add our suggestions
                my $counter = 0;
                if ($max_suggestion) {



( run in 0.474 second using v1.01-cache-2.11-cpan-5b529ec07f3 )