Marpa-R2

 view release on metacpan or  search on metacpan

g/config/default.txt  view on Meta::CPAN

<meta> is a *empty included in %head
<meter> is a *inline included in %inline
<nobr> is a *inline included in %inline
<noframes> is a *mixed included in %block
<noscript> is a *mixed included in %block
<object> contains <param> %mixed
<object> is included in %anywhere
<ol> contains <li>
<ol> is included in %block
<optgroup> contains <option>
<option> is *pcdata
<output> is a *inline included in %inline
<p> is a *inline included in %block
<param> is *empty
<plaintext> is a *cdata included in %block
<pre> is a *inline included in %block
<progress> is a *inline included in %inline
<q> is a *inline included in %inline
<rb> is a *inline included in %inline
<rbc> is a *inline included in %inline
<rp> is a *inline included in %inline
<rt> is a *inline included in %inline
<rtc> is a *inline included in %inline
<ruby> is a *inline included in %inline
<s> is a *inline included in %inline
<samp> is a *inline included in %inline
<script> is a *cdata included in %anywhere
<select> contains <optgroup> <option>
<select> is included in %inline
<small> is a *inline included in %inline
<span> is a *inline included in %inline
<strike> is a *inline included in %inline
<strong> is a *inline included in %inline
<style> is a *cdata included in %head
<sub> is a *inline included in %inline
<sup> is a *inline included in %inline
<table> contains <caption> <col> <colgroup>
<table> contains <tbody> <tfoot> <thead>
<table> is included in %block
<tbody> contains <tr>
<td> is *mixed
<textarea> is a *cdata included in %anywhere
<tfoot> contains <tr>
<th> is *mixed
<thead> contains <tr>
<time> is a *inline included in %inline
<title> is a *pcdata included in %head
<tr> contains <th> <td>
<tt> is a *inline included in %inline
<u> is a *inline included in %inline
<ul> contains <li>
<ul> is included in %block
<var> is a *inline included in %inline
<video> is a *inline included in %inline
<wbr> is a *inline included in %inline
<xmp> is a *cdata included in %block

@head_rubies   = <html> <head>
@block_rubies  = <html> <head> <body>
@inline_rubies = @block_rubies <tbody> <tr> <td> <p>

<html> ->
<head> -> <html>
<body> -> <html> <head>
CDATA -> @inline_rubies
PCDATA -> @inline_rubies

html/lib/Marpa/R2/HTML.pm  view on Meta::CPAN

    my $SYMID_WHITESPACE = $tracer->symbol_by_name('WHITESPACE');
    my $SYMID_PI = $tracer->symbol_by_name('PI');
    my $SYMID_C = $tracer->symbol_by_name('C');
    my $SYMID_D = $tracer->symbol_by_name('D');
    my $SYMID_EOF = $tracer->symbol_by_name('EOF');

    my @raw_tokens = ();
    my $p          = HTML::Parser->new(
        api_version => 3,
        start_h     => [
            \@raw_tokens, q{tagname,'S',line,column,offset,offset_end,is_cdata,attr}
        ],
        end_h =>
            [ \@raw_tokens, q{tagname,'E',line,column,offset,offset_end,is_cdata} ],
        text_h => [
            \@raw_tokens,
            qq{'$SYMID_WHITESPACE','T',line,column,offset,offset_end,is_cdata}
        ],
        comment_h =>
            [ \@raw_tokens, qq{'$SYMID_C','C',line,column,offset,offset_end,is_cdata} ],
        declaration_h =>
            [ \@raw_tokens, qq{'$SYMID_D','D',line,column,offset,offset_end,is_cdata} ],
        process_h =>
            [ \@raw_tokens, qq{'$SYMID_PI','PI',line,column,offset,offset_end,is_cdata} ],
        unbroken_text => 1
    );

    $p->parse( ${$document} );
    $p->eof;

    my @html_parser_tokens = ();
    HTML_PARSER_TOKEN:
    for my $raw_token (@raw_tokens) {
        my ( undef, $token_type, $line, $column, $offset, $offset_end, $is_cdata, $attr ) =
            @{$raw_token};

        PROCESS_TOKEN_TYPE: {
            if ($is_cdata) {
                $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] =
                    $SYMID_CDATA;
                last PROCESS_TOKEN_TYPE;
            }
            if ( $token_type eq 'T' ) {

                # White space as defined in HTML 4.01
                # space (x20); ASCII tab (x09); ASCII form feed (x0C;); Zero-width space (x200B)
                # and the two characters which appear in line breaks:
                # carriage return (x0D) and line feed (x0A)

html/lib/Marpa/R2/HTML/Config/Compile.pm  view on Meta::CPAN

    # A quasi-object, not used outside this routine
    my $self = bless {}, __PACKAGE__;

    my %species_handler = (
        cruft      => 'SPE_CRUFT',
        comment    => 'SPE_COMMENT',
        pi         => 'SPE_PI',
        decl       => 'SPE_DECL',
        document   => 'SPE_TOP',
        whitespace => 'SPE_WHITESPACE',
        pcdata     => 'SPE_PCDATA',
        cdata      => 'SPE_CDATA',
        prolog     => 'SPE_PROLOG',
        trailer    => 'SPE_TRAILER',
    );

    my @core_rules           = ();
    my %runtime_tag          = ();
    my %primary_group_by_tag = ();
    $self->{primary_group_by_tag} = \%primary_group_by_tag;

    {

html/lib/Marpa/R2/HTML/Config/Compile.pm  view on Meta::CPAN


    # Start out by closing the context and contents of everything
    my %symbol_table = map {
        $_ =>
            [ 'Reserved by the core grammar', 'Reserved by the core grammar' ]
    } @core_symbols;
    $self->{symbol_table} = \%symbol_table;

    # A few token symbols are allowed as contents -- most non-element
    # tokens are included via the SGML group
    for my $token_symbol (qw(cdata pcdata)) {
        $symbol_table{$token_symbol}->[CONTEXT_CLOSED] = 0;
    }

    # Many groups are defined to to be used
    for my $group_symbol (
        qw( GRP_anywhere GRP_pcdata GRP_cdata GRP_mixed GRP_block GRP_head GRP_inline)
        )
    {
        $symbol_table{$group_symbol}->[CONTEXT_CLOSED] = 0;
    } ## end for my $group_symbol ( ...)

    # Flow symbols are almost all allowed as contents
    FLOW_SYMBOL:
    for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } @core_symbols ) {

        # The SGML flow is included automatically as needed

html/lib/Marpa/R2/HTML/Config/Core.pm  view on Meta::CPAN


our $CORE_BNF = <<'END_OF_CORE_BNF';
# The tokens are not used directly
# because, in order to have handlers
# deal with them individually, I need
# a rule with which to associate the
# handler.
comment ::= C
pi ::= PI
decl ::= D
pcdata ::= PCDATA
cdata ::= CDATA
whitespace ::= WHITESPACE
cruft ::= CRUFT

FLO_SGML ::= GRP_SGML*
GRP_SGML ::= comment
GRP_SGML ::= pi
GRP_SGML ::= decl
GRP_SGML ::= whitespace
GRP_SGML ::= cruft

html/lib/Marpa/R2/HTML/Config/Core.pm  view on Meta::CPAN

# E_x is end tag
#   The contents of many elements consists of zero or more items

# Top-level structure
document ::= prolog ELE_html trailer EOF
prolog ::= FLO_SGML
trailer ::= FLO_SGML
ELE_html ::= S_html Contents_html E_html
Contents_html ::= FLO_SGML ELE_head FLO_SGML ELE_body FLO_SGML

# FLO_empty and FLO_cdata
# do NOT allow SGML items as part of
# their flow
FLO_empty ::=

# In FLO_cdata, disallow all SGML components,
# but include cruft.
FLO_cdata ::= GRP_cdata*
GRP_cdata ::= CRUFT
GRP_cdata ::= cdata

FLO_mixed ::= GRP_mixed*
GRP_mixed ::= GRP_block
GRP_mixed ::= GRP_inline

FLO_block ::= GRP_block*
GRP_block ::= GRP_SGML
GRP_block ::= GRP_anywhere

FLO_head ::= GRP_head*
GRP_head ::= GRP_SGML
GRP_head ::= GRP_anywhere

FLO_inline ::= GRP_inline*
GRP_inline ::= GRP_SGML
GRP_inline ::= pcdata
GRP_inline ::= cdata
GRP_inline ::= GRP_anywhere

FLO_pcdata ::= GRP_pcdata*
GRP_pcdata ::= GRP_SGML
GRP_pcdata ::= pcdata
GRP_pcdata ::= cdata

END_OF_CORE_BNF

1;

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                             'S_optgroup',
                             'Contents_ELE_optgroup',
                             'E_optgroup'
                           ]
                },
                {
                  'action' => 'ELE_option',
                  'lhs' => 'ELE_option',
                  'rhs' => [
                             'S_option',
                             'FLO_pcdata',
                             'E_option'
                           ]
                },
                {
                  'action' => 'ELE_p',
                  'lhs' => 'ELE_p',
                  'rhs' => [
                             'S_p',
                             'FLO_inline',
                             'E_p'

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                           ]
                },
                {
                  'lhs' => 'FLO_block',
                  'min' => 0,
                  'rhs' => [
                             'GRP_block'
                           ]
                },
                {
                  'lhs' => 'FLO_cdata',
                  'min' => 0,
                  'rhs' => [
                             'GRP_cdata'
                           ]
                },
                {
                  'lhs' => 'FLO_empty',
                  'rhs' => []
                },
                {
                  'lhs' => 'FLO_head',
                  'min' => 0,
                  'rhs' => [

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                           ]
                },
                {
                  'lhs' => 'FLO_mixed',
                  'min' => 0,
                  'rhs' => [
                             'GRP_mixed'
                           ]
                },
                {
                  'lhs' => 'FLO_pcdata',
                  'min' => 0,
                  'rhs' => [
                             'GRP_pcdata'
                           ]
                },
                {
                  'lhs' => 'GRP_ELE_applet',
                  'rhs' => [
                             'ELE_param'
                           ]
                },
                {
                  'lhs' => 'GRP_ELE_applet',

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                             'GRP_SGML'
                           ]
                },
                {
                  'lhs' => 'GRP_block',
                  'rhs' => [
                             'GRP_anywhere'
                           ]
                },
                {
                  'lhs' => 'GRP_cdata',
                  'rhs' => [
                             'CRUFT'
                           ]
                },
                {
                  'lhs' => 'GRP_cdata',
                  'rhs' => [
                             'cdata'
                           ]
                },
                {
                  'lhs' => 'GRP_head',
                  'rhs' => [
                             'GRP_SGML'
                           ]
                },
                {
                  'lhs' => 'GRP_head',

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                },
                {
                  'lhs' => 'GRP_inline',
                  'rhs' => [
                             'GRP_anywhere'
                           ]
                },
                {
                  'lhs' => 'GRP_inline',
                  'rhs' => [
                             'cdata'
                           ]
                },
                {
                  'lhs' => 'GRP_inline',
                  'rhs' => [
                             'pcdata'
                           ]
                },
                {
                  'lhs' => 'GRP_mixed',
                  'rhs' => [
                             'GRP_block'
                           ]
                },
                {
                  'lhs' => 'GRP_mixed',
                  'rhs' => [
                             'GRP_inline'
                           ]
                },
                {
                  'lhs' => 'GRP_pcdata',
                  'rhs' => [
                             'GRP_SGML'
                           ]
                },
                {
                  'lhs' => 'GRP_pcdata',
                  'rhs' => [
                             'cdata'
                           ]
                },
                {
                  'lhs' => 'GRP_pcdata',
                  'rhs' => [
                             'pcdata'
                           ]
                },
                {
                  'action' => 'SPE_CDATA',
                  'lhs' => 'cdata',
                  'rhs' => [
                             'CDATA'
                           ]
                },
                {
                  'action' => 'SPE_COMMENT',
                  'lhs' => 'comment',
                  'rhs' => [
                             'C'
                           ]

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                  'lhs' => 'document',
                  'rhs' => [
                             'prolog',
                             'ELE_html',
                             'trailer',
                             'EOF'
                           ]
                },
                {
                  'action' => 'SPE_PCDATA',
                  'lhs' => 'pcdata',
                  'rhs' => [
                             'PCDATA'
                           ]
                },
                {
                  'action' => 'SPE_PI',
                  'lhs' => 'pi',
                  'rhs' => [
                             'PI'
                           ]

html/lib/Marpa/R2/HTML/Config/Default.pm  view on Meta::CPAN

                 'keygen' => 'FLO_inline',
                 'label' => 'FLO_inline',
                 'link' => 'FLO_empty',
                 'mark' => 'FLO_inline',
                 'meta' => 'FLO_empty',
                 'meter' => 'FLO_inline',
                 'nobr' => 'FLO_inline',
                 'noframes' => 'FLO_mixed',
                 'noscript' => 'FLO_mixed',
                 'output' => 'FLO_inline',
                 'plaintext' => 'FLO_cdata',
                 'pre' => 'FLO_inline',
                 'progress' => 'FLO_inline',
                 'q' => 'FLO_inline',
                 'rb' => 'FLO_inline',
                 'rbc' => 'FLO_inline',
                 'rp' => 'FLO_inline',
                 'rt' => 'FLO_inline',
                 'rtc' => 'FLO_inline',
                 'ruby' => 'FLO_inline',
                 's' => 'FLO_inline',
                 'samp' => 'FLO_inline',
                 'script' => 'FLO_cdata',
                 'small' => 'FLO_inline',
                 'span' => 'FLO_inline',
                 'strike' => 'FLO_inline',
                 'strong' => 'FLO_inline',
                 'style' => 'FLO_cdata',
                 'sub' => 'FLO_inline',
                 'sup' => 'FLO_inline',
                 'textarea' => 'FLO_cdata',
                 'time' => 'FLO_inline',
                 'title' => 'FLO_pcdata',
                 'tt' => 'FLO_inline',
                 'u' => 'FLO_inline',
                 'var' => 'FLO_inline',
                 'video' => 'FLO_inline',
                 'wbr' => 'FLO_inline',
                 'xmp' => 'FLO_cdata'
               };
$RUBY_SLIPPERS_RANK_BY_NAME = {
                                '<!element>' => {
                                                  '</*>' => 1
                                                },
                                '<%head>' => {
                                               '</*>' => 1,
                                               'S_head' => 2,
                                               'S_html' => 3
                                             },

html/pod/HTML.pod  view on Meta::CPAN


The C<:CDATA> pseudoclass specifies the action for
CDATA terminals.
Its action is called once for each non-whitespace raw C<text> event
that is not reclassed as cruft.
(B<Raw text>
is text in which any markup and entities should be left as is.)

More precisely,
a C<:CDATA> terminal is created from any
L<HTML::Parser> C<text> event that has the C<is_cdata> flag on;
that contains a non-whitespace character
as defined in the HTML 4.01 specification
(L<http://www.w3.org/TR/html4/struct/text.html#h-9.1>);
and that is not reclassed as cruft.

=head2 :COMMENT

The C<:COMMENT> pseudoclass specifies the action for HTML comments.
Its action is called once for every C<HTML::Parser> C<comment> event that
is not reclassed as cruft.

html/pod/HTML.pod  view on Meta::CPAN


=head2 :PCDATA

The C<:PCDATA> pseudoclass specifies the action for
PCDATA terminals.
Its action is called once for each non-whitespace non-raw C<text> event
that is not reclassed as cruft.

More precisely,
a C<:PCDATA> terminal is created from any
L<HTML::Parser> C<text> event that has the C<is_cdata> flag B<off>;
that contains a non-whitespace character
as defined in the HTML 4.01 specification
(L<http://www.w3.org/TR/html4/struct/text.html#h-9.1>);
and that is not reclassed as cruft.

Markup and entities in
C<:PCDATA> text
are expected to be interpreted eventually,
but it can be counter-productive to do this
during parsing.

html/t/cfg_fmt.t  view on Meta::CPAN

        -during-<span>
          -more inline stuff-</span></p><p>
        -new block-
      </p></acme></body>
</html>
END_OF_EXPECTED_OUTPUT
run_one_test( $test_name, $test_html, \$test_config, \$expected_output );

$test_name = 'Block element containing PCDATA';
$test_config =
    ${$default_config} . '<acme> is a *pcdata included in %block';
# $test_html is same as in previous test
$expected_output = <<'END_OF_EXPECTED_OUTPUT';
<html>
  <head>
  </head>
  <body>
    <acme>
      -during-</acme><p>
      <span>
        -more inline stuff-</span></p><p>



( run in 0.407 second using v1.01-cache-2.11-cpan-454fe037f31 )