AI-Prolog

 view release on metacpan or  search on metacpan

lib/AI/Prolog/Parser.pm  view on Meta::CPAN

package AI::Prolog::Parser;
$REVISION = '$Id: Parser.pm,v 1.9 2005/08/06 23:28:40 ovid Exp $';

$VERSION = '0.10';
use strict;
use warnings;
use Carp qw( confess croak );
use Regexp::Common;
use Hash::Util 'lock_keys';

# debugging stuff
use Clone;
use Text::Balanced qw/extract_quotelike extract_delimited/;

use aliased 'AI::Prolog::Engine';
use aliased 'AI::Prolog::KnowledgeBase';
use aliased 'AI::Prolog::Parser::PreProcessor';
use aliased 'AI::Prolog::Term';
use aliased 'AI::Prolog::Term::Number';
use aliased 'AI::Prolog::TermList';
use aliased 'AI::Prolog::TermList::Clause';
use aliased 'AI::Prolog::TermList::Primitive';

my $ATOM = qr/[[:alpha:]][[:alnum:]_]*/;

use constant NULL => 'null';

sub new {
    my ( $class, $string ) = @_;
    my $self = bless {
        _str      => PreProcessor->process($string),
        _posn     => 0,
        _start    => 0,
        _varnum   => 0,
        _internal => 0,
        _vardict  => {},
    } => $class;
    lock_keys %$self;
    return $self;
}

sub _vardict_to_string {
    my $self = shift;
    return "{"
        . (
        join ', ' => map { join '=' => $_->[0], $_->[1] }
            sort { $a->[2] <=> $b->[2] }
            map { [ $_, $self->_sortable_term( $self->{_vardict}{$_} ) ] }
            keys %{ $self->{_vardict} }
        ) . "}";
}

sub _sortable_term {
    my ( $self, $term ) = @_;
    my $string = $term->to_string;
    my $number = substr $string => 1;
    return $string, $number;
}

sub to_string {
    my $self   = shift;
    my $output = Clone::clone($self);
    $output->{_vardict} = $self->_vardict_to_string;
    return "{"
        . substr( $self->{_str}, 0, $self->{_posn} ) . " ^ "
        . substr( $self->{_str}, $self->{_posn} ) . " | "
        . $self->_vardict_to_string . " }";
}

sub _posn    { shift->{_posn} }
sub _str     { shift->{_str} }
sub _start   { shift->{_start} }
sub _varnum  { shift->{_varnum} }
sub _vardict { shift->{_vardict} }

sub _internal {
    my $self = shift;
    if (@_) {
        $self->{_internal} = shift;
        return $self;
    }
    return $self->{_internal};
}

# get the current character
sub current {
    my $self = shift;
    return '#' if $self->empty;
    return substr $self->{_str} => $self->{_posn}, 1;
}

# peek at the next character
sub peek {
    my $self = shift;
    return '#' if $self->empty;
    return substr( $self->{_str} => ( $self->{_posn} + 1 ), 1 ) || '#';
}

# is the parsestring empty?
sub empty {
    my $self = shift;
    return $self->{_posn} >= length $self->{_str};
}

my $LINENUM = 1;

sub linenum {
    my $self = shift;
    if (@_) {
        $LINENUM = shift;
        return $self;
    }
    $LINENUM;
}

sub advance_linenum {
    my $self = shift;
    $LINENUM++;
}

# Move a character forward
sub advance {
    my $self = shift;

    # print $self->current; # XXX
    $self->{_posn}++ unless $self->{_posn} >= length $self->{_str};
    $self->advance_linenum if $self->current =~ /[\r\n]/;
}

# all three get methods must be called before advance
# recognize a name (sequence of alphanumerics)
# XXX the java methods do not directly translate, so
#     we need to revisit this if it breaks
# XXX Update:  There was a subtle bug.  I think
#     I've nailed it, though.  The string index was off by one
sub getname {
    my $self = shift;

    $self->{_start} = $self->{_posn};
    my $getname;
    if ( $self->current =~ /['"]/ ) {

     # Normally, Prolog distinguishes between single and double quoted strings
        my $string = substr $self->{_str} => $self->{_start};
        $getname = extract_delimited($string);
        $self->{_posn} += length $getname;
        return substr $getname => 1, length($getname) - 2;  # strip the quotes
    }
    else {
        my $string = substr $self->{_str} => $self->{_start};
        ($getname) = $string =~ /^($ATOM)/;
        $self->{_posn} += length $getname;
        return $getname;
    }
}

# recognize a number
# XXX same issues as getname
sub getnum {
    my $self = shift;

    $self->{_start} = $self->{_posn};
    my $string = substr $self->{_str} => $self->{_start};
    my ($getnum) = $string =~ /^($RE{num}{real})/;
    if ( '.' eq substr $getnum => -1, 1 ) {
        $getnum = substr $getnum => 0, length($getnum) - 1;
    }
    $self->{_posn} += length $getnum;
    return $getnum;
}

# get the term corresponding to a name.
# if the name is new, create a new variable
sub getvar {
    my $self   = shift;
    my $string = $self->getname;
    my $term   = $self->{_vardict}{$string};
    unless ($term) {
        $term = Term->new( $self->{_varnum}++ );    # XXX wrong _varnum?
        $self->{_vardict}{$string} = $term;
    }
    return ( $term, $string );
}

my $ANON = 'a';

sub get_anon {
    my $self = shift;

    # HACK!!!
    my $string = '___' . $ANON++;
    $self->advance;
    my $term = $self->{_vardict}{$string};
    unless ($term) {
        $term = Term->new( $self->{_varnum}++ );    # XXX wrong _varnum?
        $self->{_vardict}{$string} = $term;
    }
    return ( $term, $string );
}

# handle errors in one place
sub parseerror {
    my ( $self, $character ) = @_;
    my $linenum = $self->linenum;
    croak "Unexpected character: ($character) at line number $linenum";
}

# skips whitespace and prolog comments
sub skipspace {
    my $self = shift;
    $self->advance while $self->current =~ /[[:space:]]/;
    _skipcomment($self);
}

# XXX Other subtle differences
sub _skipcomment {
    my $self = shift;
    if ( $self->current eq '%' ) {
        while ( $self->current ne "\n" && $self->current ne "#" ) {
            $self->advance;
        }
        $self->skipspace;
    }



( run in 1.017 second using v1.01-cache-2.11-cpan-39bf76dae61 )