AI-Prolog
view release on metacpan or search on metacpan
lib/AI/Prolog/Parser.pm view on Meta::CPAN
package AI::Prolog::Parser;
$REVISION = '$Id: Parser.pm,v 1.9 2005/08/06 23:28:40 ovid Exp $';
$VERSION = '0.10';
use strict;
use warnings;
use Carp qw( confess croak );
use Regexp::Common;
use Hash::Util 'lock_keys';
# debugging stuff
use Clone;
use Text::Balanced qw/extract_quotelike extract_delimited/;
use aliased 'AI::Prolog::Engine';
use aliased 'AI::Prolog::KnowledgeBase';
use aliased 'AI::Prolog::Parser::PreProcessor';
use aliased 'AI::Prolog::Term';
use aliased 'AI::Prolog::Term::Number';
use aliased 'AI::Prolog::TermList';
use aliased 'AI::Prolog::TermList::Clause';
use aliased 'AI::Prolog::TermList::Primitive';
my $ATOM = qr/[[:alpha:]][[:alnum:]_]*/;
use constant NULL => 'null';
sub new {
my ( $class, $string ) = @_;
my $self = bless {
_str => PreProcessor->process($string),
_posn => 0,
_start => 0,
_varnum => 0,
_internal => 0,
_vardict => {},
} => $class;
lock_keys %$self;
return $self;
}
sub _vardict_to_string {
my $self = shift;
return "{"
. (
join ', ' => map { join '=' => $_->[0], $_->[1] }
sort { $a->[2] <=> $b->[2] }
map { [ $_, $self->_sortable_term( $self->{_vardict}{$_} ) ] }
keys %{ $self->{_vardict} }
) . "}";
}
sub _sortable_term {
my ( $self, $term ) = @_;
my $string = $term->to_string;
my $number = substr $string => 1;
return $string, $number;
}
sub to_string {
my $self = shift;
my $output = Clone::clone($self);
$output->{_vardict} = $self->_vardict_to_string;
return "{"
. substr( $self->{_str}, 0, $self->{_posn} ) . " ^ "
. substr( $self->{_str}, $self->{_posn} ) . " | "
. $self->_vardict_to_string . " }";
}
sub _posn { shift->{_posn} }
sub _str { shift->{_str} }
sub _start { shift->{_start} }
sub _varnum { shift->{_varnum} }
sub _vardict { shift->{_vardict} }
sub _internal {
my $self = shift;
if (@_) {
$self->{_internal} = shift;
return $self;
}
return $self->{_internal};
}
# get the current character
sub current {
my $self = shift;
return '#' if $self->empty;
return substr $self->{_str} => $self->{_posn}, 1;
}
# peek at the next character
sub peek {
my $self = shift;
return '#' if $self->empty;
return substr( $self->{_str} => ( $self->{_posn} + 1 ), 1 ) || '#';
}
# is the parsestring empty?
sub empty {
my $self = shift;
return $self->{_posn} >= length $self->{_str};
}
my $LINENUM = 1;
sub linenum {
my $self = shift;
if (@_) {
$LINENUM = shift;
return $self;
}
$LINENUM;
}
sub advance_linenum {
my $self = shift;
$LINENUM++;
}
# Move a character forward
sub advance {
my $self = shift;
# print $self->current; # XXX
$self->{_posn}++ unless $self->{_posn} >= length $self->{_str};
$self->advance_linenum if $self->current =~ /[\r\n]/;
}
# all three get methods must be called before advance
# recognize a name (sequence of alphanumerics)
# XXX the java methods do not directly translate, so
# we need to revisit this if it breaks
# XXX Update: There was a subtle bug. I think
# I've nailed it, though. The string index was off by one
sub getname {
my $self = shift;
$self->{_start} = $self->{_posn};
my $getname;
if ( $self->current =~ /['"]/ ) {
# Normally, Prolog distinguishes between single and double quoted strings
my $string = substr $self->{_str} => $self->{_start};
$getname = extract_delimited($string);
$self->{_posn} += length $getname;
return substr $getname => 1, length($getname) - 2; # strip the quotes
}
else {
my $string = substr $self->{_str} => $self->{_start};
($getname) = $string =~ /^($ATOM)/;
$self->{_posn} += length $getname;
return $getname;
}
}
# recognize a number
# XXX same issues as getname
sub getnum {
my $self = shift;
$self->{_start} = $self->{_posn};
my $string = substr $self->{_str} => $self->{_start};
my ($getnum) = $string =~ /^($RE{num}{real})/;
if ( '.' eq substr $getnum => -1, 1 ) {
$getnum = substr $getnum => 0, length($getnum) - 1;
}
$self->{_posn} += length $getnum;
return $getnum;
}
# get the term corresponding to a name.
# if the name is new, create a new variable
sub getvar {
my $self = shift;
my $string = $self->getname;
my $term = $self->{_vardict}{$string};
unless ($term) {
$term = Term->new( $self->{_varnum}++ ); # XXX wrong _varnum?
$self->{_vardict}{$string} = $term;
}
return ( $term, $string );
}
my $ANON = 'a';
sub get_anon {
my $self = shift;
# HACK!!!
my $string = '___' . $ANON++;
$self->advance;
my $term = $self->{_vardict}{$string};
unless ($term) {
$term = Term->new( $self->{_varnum}++ ); # XXX wrong _varnum?
$self->{_vardict}{$string} = $term;
}
return ( $term, $string );
}
# handle errors in one place
sub parseerror {
my ( $self, $character ) = @_;
my $linenum = $self->linenum;
croak "Unexpected character: ($character) at line number $linenum";
}
# skips whitespace and prolog comments
sub skipspace {
my $self = shift;
$self->advance while $self->current =~ /[[:space:]]/;
_skipcomment($self);
}
# XXX Other subtle differences
sub _skipcomment {
my $self = shift;
if ( $self->current eq '%' ) {
while ( $self->current ne "\n" && $self->current ne "#" ) {
$self->advance;
}
$self->skipspace;
}
( run in 1.017 second using v1.01-cache-2.11-cpan-39bf76dae61 )