Marpa-R2

 view release on metacpan or  search on metacpan

html/lib/Marpa/R2/HTML/Config/Compile.pm  view on Meta::CPAN

is_included_statement ::= element kw_is kw_included kw_in <group>
    action => do_is_included_statement
element ::= start_tag
is_a_included_statement ::= element kw_is kw_a flow kw_included kw_in <group>
    action => do_is_a_included_statement
is_statement ::= element kw_is flow
    action => do_is_statement
contains_statement ::= element kw_contains contents
    action => do_contains_statement
contents ::= content_item*
    action => do_array
list_assignment ::= list op_assign list_members
    action => do_array_assignment
list_members ::= list_member*
    action => do_array
list_member ::= ruby_symbol
list_member ::= list
content_item ::= element | <group> | kw_PCDATA | kw_CDATA
ruby_statement ::= ruby_symbol op_ruby ruby_symbol_list
    action => do_ruby_statement
ruby_symbol_list ::= ruby_symbol*
    action => do_array
ruby_symbol ::= kw_PCDATA | kw_CDATA
  | start_tag | group_start_tag | wildcard_start_tag
  | end_tag | group_end_tag | wildcard_end_tag
  | list
END_OF_GRAMMAR
 
    my $grammar = Marpa::R2::Grammar->new(
       { start => 'translation_unit',
       action_object => __PACKAGE__,
       rules =>$source,
       default_action => 'do_what_I_mean'
       }
    );
    $grammar->precompute();
   return $grammar;
}

sub source_by_location_range {
    my ( $self, $start, $end ) = @_;
    my $positions = $self->{positions};
    my $start_pos = $start > 0 ? $positions->[$start] : 0;
    my $end_pos   = $positions->[$end];
    return substr ${ $self->{source_ref} }, $start_pos, $end_pos - $start_pos;
} ## end sub source_by_location_range

sub compile {
    my ($source_ref) = @_;

    # A quasi-object, not used outside this routine
    my $self = bless {}, __PACKAGE__;

    my %species_handler = (
        cruft      => 'SPE_CRUFT',
        comment    => 'SPE_COMMENT',
        pi         => 'SPE_PI',
        decl       => 'SPE_DECL',
        document   => 'SPE_TOP',
        whitespace => 'SPE_WHITESPACE',
        pcdata     => 'SPE_PCDATA',
        cdata      => 'SPE_CDATA',
        prolog     => 'SPE_PROLOG',
        trailer    => 'SPE_TRAILER',
    );

    my @core_rules           = ();
    my %runtime_tag          = ();
    my %primary_group_by_tag = ();
    $self->{primary_group_by_tag} = \%primary_group_by_tag;

    {
        LINE:
        for my $line ( split /\n/xms,
            $Marpa::R2::HTML::Internal::Core::CORE_BNF )
        {
            my $definition = $line;
            chomp $definition;
            $definition =~ s/ [#] .* //xms;    # Remove comments
            next LINE
                if not $definition =~ / \S /xms;  # ignore all-whitespace line
            my $sequence = ( $definition =~ s/ [*] \s* $//xms );
            if ( $definition =~ s/ \s* [:][:][=] \s* / /xms ) {

                # Production is Ordinary BNF rule
                my @symbols         = ( split q{ }, $definition );
                my $lhs             = shift @symbols;
                my %rule_descriptor = (
                    lhs => $lhs,
                    rhs => \@symbols,
                );
                if ($sequence) {
                    $rule_descriptor{min} = 0;
                }
                if ( my $handler = $species_handler{$lhs} ) {
                    $rule_descriptor{action} = $handler;
                }
                elsif ( $lhs =~ /^ELE_/xms ) {
                    $rule_descriptor{action} = "$lhs";
                }
                push @core_rules, \%rule_descriptor;
                next LINE;
            } ## end if ( $definition =~ s/ \s* [:][:][=] \s* / /xms )
            die "Badly formed line in grammar description: $line";
        } ## end LINE: for my $line ( split /\n/xms, ...)
    }

    my @core_symbols = map { ( $_->{lhs}, @{ $_->{rhs} } ) } @core_rules;

    # Start out by closing the context and contents of everything
    my %symbol_table = map {
        $_ =>
            [ 'Reserved by the core grammar', 'Reserved by the core grammar' ]
    } @core_symbols;
    $self->{symbol_table} = \%symbol_table;

    # A few token symbols are allowed as contents -- most non-element
    # tokens are included via the SGML group
    for my $token_symbol (qw(cdata pcdata)) {
        $symbol_table{$token_symbol}->[CONTEXT_CLOSED] = 0;
    }

    # Many groups are defined to to be used
    for my $group_symbol (
        qw( GRP_anywhere GRP_pcdata GRP_cdata GRP_mixed GRP_block GRP_head GRP_inline)
        )
    {
        $symbol_table{$group_symbol}->[CONTEXT_CLOSED] = 0;
    } ## end for my $group_symbol ( ...)

    # Flow symbols are almost all allowed as contents
    FLOW_SYMBOL:
    for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } @core_symbols ) {

        # The SGML flow is included automatically as needed
        # and should not be explicity specified
        next FLOW_SYMBOL if $flow_symbol eq 'FLO_SGML';
        $symbol_table{$flow_symbol}->[CONTEXT_CLOSED] = 0;
    } ## end for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } ...)

    # A few groups are also extensible
    for my $group_symbol (qw( GRP_anywhere GRP_block GRP_head GRP_inline )) {
        $symbol_table{$group_symbol}->[CONTENTS_CLOSED] = 0;
    }

    # As very special cases the contents of the <head> and <body>
    # elements can be changed
    for my $element_symbol (qw( ELE_head ELE_body )) {
        $symbol_table{$element_symbol}->[CONTENTS_CLOSED] = 0;
    }

    {
        # Make sure everything for which we have a handler was defined in
        # the core grammar
        my @species_not_defined = grep { not defined $symbol_table{$_} }
            keys %species_handler;
        if ( scalar @species_not_defined ) {
            die
                'Definitions for the following required text components are missing: ',
                join q{ }, @species_not_defined;
        }
    }

    my %ruby_config = ();
    my %lists       = ();
    $self->{ruby_config} = \%ruby_config;
    $self->{lists} = \%lists;
    $self->{source_ref} = $source_ref;
    my @positions = (0);
    $self->{positions} = \@positions;

    state $grammar = create_grammar();
    my $recce = Marpa::R2::Recognizer->new({ grammar => $grammar});
    my $string = ${$source_ref};
    my $length = length $string;
    pos $string = 0;
    TOKEN: while ( pos $string < $length ) {

        # skip comment
        next TOKEN if $string =~ m/\G \s* [#] [^\n]* \n/gcxms;

        # skip whitespace
        next TOKEN if $string =~ m/\G\s+/gcxms;

        # read other tokens



( run in 1.159 second using v1.01-cache-2.11-cpan-99c4e6809bf )