Marpa-R2
view release on metacpan or search on metacpan
html/lib/Marpa/R2/HTML/Config/Compile.pm view on Meta::CPAN
is_included_statement ::= element kw_is kw_included kw_in <group>
action => do_is_included_statement
element ::= start_tag
is_a_included_statement ::= element kw_is kw_a flow kw_included kw_in <group>
action => do_is_a_included_statement
is_statement ::= element kw_is flow
action => do_is_statement
contains_statement ::= element kw_contains contents
action => do_contains_statement
contents ::= content_item*
action => do_array
list_assignment ::= list op_assign list_members
action => do_array_assignment
list_members ::= list_member*
action => do_array
list_member ::= ruby_symbol
list_member ::= list
content_item ::= element | <group> | kw_PCDATA | kw_CDATA
ruby_statement ::= ruby_symbol op_ruby ruby_symbol_list
action => do_ruby_statement
ruby_symbol_list ::= ruby_symbol*
action => do_array
ruby_symbol ::= kw_PCDATA | kw_CDATA
| start_tag | group_start_tag | wildcard_start_tag
| end_tag | group_end_tag | wildcard_end_tag
| list
END_OF_GRAMMAR
my $grammar = Marpa::R2::Grammar->new(
{ start => 'translation_unit',
action_object => __PACKAGE__,
rules =>$source,
default_action => 'do_what_I_mean'
}
);
$grammar->precompute();
return $grammar;
}
sub source_by_location_range {
my ( $self, $start, $end ) = @_;
my $positions = $self->{positions};
my $start_pos = $start > 0 ? $positions->[$start] : 0;
my $end_pos = $positions->[$end];
return substr ${ $self->{source_ref} }, $start_pos, $end_pos - $start_pos;
} ## end sub source_by_location_range
sub compile {
my ($source_ref) = @_;
# A quasi-object, not used outside this routine
my $self = bless {}, __PACKAGE__;
my %species_handler = (
cruft => 'SPE_CRUFT',
comment => 'SPE_COMMENT',
pi => 'SPE_PI',
decl => 'SPE_DECL',
document => 'SPE_TOP',
whitespace => 'SPE_WHITESPACE',
pcdata => 'SPE_PCDATA',
cdata => 'SPE_CDATA',
prolog => 'SPE_PROLOG',
trailer => 'SPE_TRAILER',
);
my @core_rules = ();
my %runtime_tag = ();
my %primary_group_by_tag = ();
$self->{primary_group_by_tag} = \%primary_group_by_tag;
{
LINE:
for my $line ( split /\n/xms,
$Marpa::R2::HTML::Internal::Core::CORE_BNF )
{
my $definition = $line;
chomp $definition;
$definition =~ s/ [#] .* //xms; # Remove comments
next LINE
if not $definition =~ / \S /xms; # ignore all-whitespace line
my $sequence = ( $definition =~ s/ [*] \s* $//xms );
if ( $definition =~ s/ \s* [:][:][=] \s* / /xms ) {
# Production is Ordinary BNF rule
my @symbols = ( split q{ }, $definition );
my $lhs = shift @symbols;
my %rule_descriptor = (
lhs => $lhs,
rhs => \@symbols,
);
if ($sequence) {
$rule_descriptor{min} = 0;
}
if ( my $handler = $species_handler{$lhs} ) {
$rule_descriptor{action} = $handler;
}
elsif ( $lhs =~ /^ELE_/xms ) {
$rule_descriptor{action} = "$lhs";
}
push @core_rules, \%rule_descriptor;
next LINE;
} ## end if ( $definition =~ s/ \s* [:][:][=] \s* / /xms )
die "Badly formed line in grammar description: $line";
} ## end LINE: for my $line ( split /\n/xms, ...)
}
my @core_symbols = map { ( $_->{lhs}, @{ $_->{rhs} } ) } @core_rules;
# Start out by closing the context and contents of everything
my %symbol_table = map {
$_ =>
[ 'Reserved by the core grammar', 'Reserved by the core grammar' ]
} @core_symbols;
$self->{symbol_table} = \%symbol_table;
# A few token symbols are allowed as contents -- most non-element
# tokens are included via the SGML group
for my $token_symbol (qw(cdata pcdata)) {
$symbol_table{$token_symbol}->[CONTEXT_CLOSED] = 0;
}
# Many groups are defined to to be used
for my $group_symbol (
qw( GRP_anywhere GRP_pcdata GRP_cdata GRP_mixed GRP_block GRP_head GRP_inline)
)
{
$symbol_table{$group_symbol}->[CONTEXT_CLOSED] = 0;
} ## end for my $group_symbol ( ...)
# Flow symbols are almost all allowed as contents
FLOW_SYMBOL:
for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } @core_symbols ) {
# The SGML flow is included automatically as needed
# and should not be explicity specified
next FLOW_SYMBOL if $flow_symbol eq 'FLO_SGML';
$symbol_table{$flow_symbol}->[CONTEXT_CLOSED] = 0;
} ## end for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } ...)
# A few groups are also extensible
for my $group_symbol (qw( GRP_anywhere GRP_block GRP_head GRP_inline )) {
$symbol_table{$group_symbol}->[CONTENTS_CLOSED] = 0;
}
# As very special cases the contents of the <head> and <body>
# elements can be changed
for my $element_symbol (qw( ELE_head ELE_body )) {
$symbol_table{$element_symbol}->[CONTENTS_CLOSED] = 0;
}
{
# Make sure everything for which we have a handler was defined in
# the core grammar
my @species_not_defined = grep { not defined $symbol_table{$_} }
keys %species_handler;
if ( scalar @species_not_defined ) {
die
'Definitions for the following required text components are missing: ',
join q{ }, @species_not_defined;
}
}
my %ruby_config = ();
my %lists = ();
$self->{ruby_config} = \%ruby_config;
$self->{lists} = \%lists;
$self->{source_ref} = $source_ref;
my @positions = (0);
$self->{positions} = \@positions;
state $grammar = create_grammar();
my $recce = Marpa::R2::Recognizer->new({ grammar => $grammar});
my $string = ${$source_ref};
my $length = length $string;
pos $string = 0;
TOKEN: while ( pos $string < $length ) {
# skip comment
next TOKEN if $string =~ m/\G \s* [#] [^\n]* \n/gcxms;
# skip whitespace
next TOKEN if $string =~ m/\G\s+/gcxms;
# read other tokens
( run in 1.159 second using v1.01-cache-2.11-cpan-99c4e6809bf )