AI-Prolog

 view release on metacpan or  search on metacpan

lib/AI/Prolog/Parser.pm  view on Meta::CPAN

package AI::Prolog::Parser;
$REVISION = '$Id: Parser.pm,v 1.9 2005/08/06 23:28:40 ovid Exp $';

$VERSION = '0.10';
use strict;
use warnings;
use Carp qw( confess croak );
use Regexp::Common;
use Hash::Util 'lock_keys';

# debugging stuff
use Clone;
use Text::Balanced qw/extract_quotelike extract_delimited/;

use aliased 'AI::Prolog::Engine';
use aliased 'AI::Prolog::KnowledgeBase';
use aliased 'AI::Prolog::Parser::PreProcessor';
use aliased 'AI::Prolog::Term';
use aliased 'AI::Prolog::Term::Number';
use aliased 'AI::Prolog::TermList';
use aliased 'AI::Prolog::TermList::Clause';
use aliased 'AI::Prolog::TermList::Primitive';

my $ATOM = qr/[[:alpha:]][[:alnum:]_]*/;

use constant NULL => 'null';

sub new {
    my ( $class, $string ) = @_;
    my $self = bless {
        _str      => PreProcessor->process($string),
        _posn     => 0,
        _start    => 0,
        _varnum   => 0,
        _internal => 0,
        _vardict  => {},
    } => $class;
    lock_keys %$self;
    return $self;
}

sub _vardict_to_string {
    my $self = shift;
    return "{"
        . (
        join ', ' => map { join '=' => $_->[0], $_->[1] }
            sort { $a->[2] <=> $b->[2] }
            map { [ $_, $self->_sortable_term( $self->{_vardict}{$_} ) ] }
            keys %{ $self->{_vardict} }
        ) . "}";
}

sub _sortable_term {
    my ( $self, $term ) = @_;
    my $string = $term->to_string;
    my $number = substr $string => 1;
    return $string, $number;
}

sub to_string {
    my $self   = shift;
    my $output = Clone::clone($self);
    $output->{_vardict} = $self->_vardict_to_string;
    return "{"
        . substr( $self->{_str}, 0, $self->{_posn} ) . " ^ "
        . substr( $self->{_str}, $self->{_posn} ) . " | "
        . $self->_vardict_to_string . " }";
}

sub _posn    { shift->{_posn} }
sub _str     { shift->{_str} }
sub _start   { shift->{_start} }
sub _varnum  { shift->{_varnum} }
sub _vardict { shift->{_vardict} }

sub _internal {
    my $self = shift;
    if (@_) {
        $self->{_internal} = shift;
        return $self;
    }
    return $self->{_internal};
}

# get the current character
sub current {
    my $self = shift;
    return '#' if $self->empty;
    return substr $self->{_str} => $self->{_posn}, 1;
}

# peek at the next character
sub peek {
    my $self = shift;
    return '#' if $self->empty;
    return substr( $self->{_str} => ( $self->{_posn} + 1 ), 1 ) || '#';
}

# is the parsestring empty?
sub empty {
    my $self = shift;
    return $self->{_posn} >= length $self->{_str};
}

my $LINENUM = 1;

sub linenum {
    my $self = shift;
    if (@_) {
        $LINENUM = shift;
        return $self;
    }
    $LINENUM;
}

sub advance_linenum {
    my $self = shift;
    $LINENUM++;
}

# Move a character forward
sub advance {
    my $self = shift;

    # print $self->current; # XXX
    $self->{_posn}++ unless $self->{_posn} >= length $self->{_str};
    $self->advance_linenum if $self->current =~ /[\r\n]/;
}

# all three get methods must be called before advance
# recognize a name (sequence of alphanumerics)
# XXX the java methods do not directly translate, so
#     we need to revisit this if it breaks
# XXX Update:  There was a subtle bug.  I think
#     I've nailed it, though.  The string index was off by one
sub getname {
    my $self = shift;

    $self->{_start} = $self->{_posn};
    my $getname;
    if ( $self->current =~ /['"]/ ) {

     # Normally, Prolog distinguishes between single and double quoted strings
        my $string = substr $self->{_str} => $self->{_start};
        $getname = extract_delimited($string);
        $self->{_posn} += length $getname;
        return substr $getname => 1, length($getname) - 2;  # strip the quotes
    }
    else {
        my $string = substr $self->{_str} => $self->{_start};
        ($getname) = $string =~ /^($ATOM)/;
        $self->{_posn} += length $getname;
        return $getname;
    }
}

# recognize a number
# XXX same issues as getname
sub getnum {
    my $self = shift;

    $self->{_start} = $self->{_posn};
    my $string = substr $self->{_str} => $self->{_start};
    my ($getnum) = $string =~ /^($RE{num}{real})/;
    if ( '.' eq substr $getnum => -1, 1 ) {
        $getnum = substr $getnum => 0, length($getnum) - 1;
    }
    $self->{_posn} += length $getnum;
    return $getnum;
}

# get the term corresponding to a name.
# if the name is new, create a new variable
sub getvar {
    my $self   = shift;
    my $string = $self->getname;
    my $term   = $self->{_vardict}{$string};
    unless ($term) {
        $term = Term->new( $self->{_varnum}++ );    # XXX wrong _varnum?
        $self->{_vardict}{$string} = $term;
    }
    return ( $term, $string );
}

my $ANON = 'a';

sub get_anon {
    my $self = shift;

    # HACK!!!
    my $string = '___' . $ANON++;
    $self->advance;
    my $term = $self->{_vardict}{$string};
    unless ($term) {
        $term = Term->new( $self->{_varnum}++ );    # XXX wrong _varnum?
        $self->{_vardict}{$string} = $term;
    }
    return ( $term, $string );
}

# handle errors in one place
sub parseerror {
    my ( $self, $character ) = @_;
    my $linenum = $self->linenum;
    croak "Unexpected character: ($character) at line number $linenum";
}

# skips whitespace and prolog comments
sub skipspace {
    my $self = shift;
    $self->advance while $self->current =~ /[[:space:]]/;
    _skipcomment($self);
}

# XXX Other subtle differences
sub _skipcomment {
    my $self = shift;
    if ( $self->current eq '%' ) {
        while ( $self->current ne "\n" && $self->current ne "#" ) {
            $self->advance;
        }
        $self->skipspace;
    }
    if ( $self->current eq "/" ) {
        $self->advance;
        if ( $self->current ne "*" ) {
            $self->parseerror("Expecting '*' after '/'");
        }
        $self->advance;
        while ( $self->current ne "*" && $self->current ne "#" ) {
            $self->advance;
        }
        $self->advance;
        if ( $self->current ne "/" ) {
            $self->parseerror("Expecting terminating '/' on comment");
        }
        $self->advance;
        $self->skipspace;
    }
}

# reset the variable dictionary
sub nextclause {
    my $self = shift;
    $self->{_vardict} = {};
    $self->{_varnum}  = 0;
}

# takes a hash and extends it with the clauses in the string
# $program is a string representing a prolog program
# $db is an initial program that will be augmented with the
# clauses parsed.
# class method, not an instance method
sub consult {
    my ( $class, $program, $db ) = @_;
    $db ||= KnowledgeBase->new;
    my $self = $class->new($program);
    $self->linenum(1);
    $self->skipspace;

    until ( $self->empty ) {
        my $termlist = $self->_termlist;

        my $head = $termlist->term;
        my $body = $termlist->next;

        my $is_primitive = $body && $body->isa(Primitive);
        unless ($is_primitive) {
            my $predicate = $head->predicate;
            $is_primitive = exists $db->{primitives}{$predicate};
        }
        my $add = $is_primitive ? 'add_primitive' : 'add_clause';
        my $clause = Clause->new( $head, $body );
        my $adding_builtins = Engine->_adding_builtins;
        $clause->is_builtin(1) if $adding_builtins;
        $db->$add( $clause, $adding_builtins );
        $self->skipspace;
        $self->nextclause;    # new set of vars
    }
    return $db;
}

sub resolve {
    my ( $class, $db ) = @_;
    foreach my $termlist ( values %{ $db->ht } ) {
        $termlist->resolve($db);
    }
}

sub _termlist {
    my ($self)   = @_;
    my $termlist = TermList->new;
    my @ts       = $self->_term;
    $self->skipspace;

    if ( $self->current eq ':' ) {
        $self->advance;

        if ( $self->current eq '=' ) {

            # we're parsing a primitive
            $self->advance;
            $self->skipspace;
            my $id = $self->getnum;
            $self->skipspace;
            $termlist->{term} = $ts[0];
            $termlist->{next} = Primitive->new($id);
        }
        elsif ( $self->current ne '-' ) {
            $self->parseerror("Expected '-' after ':'");
        }
        else {
            $self->advance;
            $self->skipspace;

            push @ts => $self->_term;
            $self->skipspace;

            while ( $self->current eq ',' ) {
                $self->advance;
                $self->skipspace;
                push @ts => $self->_term;
                $self->skipspace;
            }

            my @tsl;
            for my $j ( reverse 1 .. $#ts ) {
                $tsl[$j] = $termlist->new( $ts[$j], $tsl[ $j + 1 ] );
            }

            $termlist->{term} = $ts[0];
            $termlist->{next} = $tsl[1];
        }
    }
    else {
        $termlist->{term} = $ts[0];
        $termlist->{next} = undef;
    }

    if ( $self->current ne '.' ) {
        $self->parseerror("Expected '.' Got '@{[$self->current]}'");
    }
    $self->advance;
    return $termlist;
}

# This constructor is the simplest way to construct a term.  The term is given
# in standard notation.
# Example: my $term = Term->new(Parser->new("p(1,a(X,b))"));
sub _term {
    my ($self) = @_;
    my $term = Term->new( undef, 0 );
    my $ts   = [];
    my $i    = 0;

    $self->skipspace;    # otherwise we crash when we hit leading
                         # spaces
    if ( $self->current =~ /^[[:lower:]'"]$/ ) {
        $term->{functor} = $self->getname;
        $term->{bound}   = 1;
        $term->{deref}   = 0;

        if ( '(' eq $self->current ) {
            $self->advance;
            $self->skipspace;
            $ts->[ $i++ ] = $self->_term;
            $self->skipspace;

            while ( ',' eq $self->current ) {
                $self->advance;
                $self->skipspace;
                $ts->[ $i++ ] = $self->_term;
                $self->skipspace;
            }

            if ( ')' ne $self->current ) {
                $self->parseerror(
                    "Expecting: ')'.  Got (@{[$self->current]})");
            }

            $self->advance;
            $term->{args} = [];

            $term->{args}[$_] = $ts->[$_] for 0 .. ( $i - 1 );
            $term->{arity} = $i;
        }
        else {
            $term->{arity} = 0;
        }
    }
    elsif ( $self->current =~ /^[[:upper:]]$/ ) {
        $term->{bound} = 1;
        $term->{deref} = 1;
        my ( $ref, $string ) = $self->getvar;
        $term->{ref}     = $ref;
        $term->{varname} = $string;
    }
    elsif ( '_' eq $self->current && $self->peek =~ /^[\]\|\.;\s\,\)]$/ ) {

        # temporary hack to allow anonymous variables
        # this should really be cleaned up
        $term->{bound} = 1;
        $term->{deref} = 1;
        my ( $ref, $string ) = $self->get_anon;
        $term->{ref}     = $ref;
        $term->{varname} = $string;
    }
    elsif ( $self->current =~ /^[-.[:digit:]]$/ ) {
        return Number->new( $self->getnum );
    }



( run in 1.301 second using v1.01-cache-2.11-cpan-39bf76dae61 )