view release on metacpan or search on metacpan
g/config/default.txt view on Meta::CPAN
<meta> is a *empty included in %head
<meter> is a *inline included in %inline
<nobr> is a *inline included in %inline
<noframes> is a *mixed included in %block
<noscript> is a *mixed included in %block
<object> contains <param> %mixed
<object> is included in %anywhere
<ol> contains <li>
<ol> is included in %block
<optgroup> contains <option>
<option> is *pcdata
<output> is a *inline included in %inline
<p> is a *inline included in %block
<param> is *empty
<plaintext> is a *cdata included in %block
<pre> is a *inline included in %block
<progress> is a *inline included in %inline
<q> is a *inline included in %inline
<rb> is a *inline included in %inline
<rbc> is a *inline included in %inline
<rp> is a *inline included in %inline
<rt> is a *inline included in %inline
<rtc> is a *inline included in %inline
<ruby> is a *inline included in %inline
<s> is a *inline included in %inline
<samp> is a *inline included in %inline
<script> is a *cdata included in %anywhere
<select> contains <optgroup> <option>
<select> is included in %inline
<small> is a *inline included in %inline
<span> is a *inline included in %inline
<strike> is a *inline included in %inline
<strong> is a *inline included in %inline
<style> is a *cdata included in %head
<sub> is a *inline included in %inline
<sup> is a *inline included in %inline
<table> contains <caption> <col> <colgroup>
<table> contains <tbody> <tfoot> <thead>
<table> is included in %block
<tbody> contains <tr>
<td> is *mixed
<textarea> is a *cdata included in %anywhere
<tfoot> contains <tr>
<th> is *mixed
<thead> contains <tr>
<time> is a *inline included in %inline
<title> is a *pcdata included in %head
<tr> contains <th> <td>
<tt> is a *inline included in %inline
<u> is a *inline included in %inline
<ul> contains <li>
<ul> is included in %block
<var> is a *inline included in %inline
<video> is a *inline included in %inline
<wbr> is a *inline included in %inline
<xmp> is a *cdata included in %block
@head_rubies = <html> <head>
@block_rubies = <html> <head> <body>
@inline_rubies = @block_rubies <tbody> <tr> <td> <p>
<html> ->
<head> -> <html>
<body> -> <html> <head>
CDATA -> @inline_rubies
PCDATA -> @inline_rubies
html/lib/Marpa/R2/HTML.pm view on Meta::CPAN
my $SYMID_WHITESPACE = $tracer->symbol_by_name('WHITESPACE');
my $SYMID_PI = $tracer->symbol_by_name('PI');
my $SYMID_C = $tracer->symbol_by_name('C');
my $SYMID_D = $tracer->symbol_by_name('D');
my $SYMID_EOF = $tracer->symbol_by_name('EOF');
my @raw_tokens = ();
my $p = HTML::Parser->new(
api_version => 3,
start_h => [
\@raw_tokens, q{tagname,'S',line,column,offset,offset_end,is_cdata,attr}
],
end_h =>
[ \@raw_tokens, q{tagname,'E',line,column,offset,offset_end,is_cdata} ],
text_h => [
\@raw_tokens,
qq{'$SYMID_WHITESPACE','T',line,column,offset,offset_end,is_cdata}
],
comment_h =>
[ \@raw_tokens, qq{'$SYMID_C','C',line,column,offset,offset_end,is_cdata} ],
declaration_h =>
[ \@raw_tokens, qq{'$SYMID_D','D',line,column,offset,offset_end,is_cdata} ],
process_h =>
[ \@raw_tokens, qq{'$SYMID_PI','PI',line,column,offset,offset_end,is_cdata} ],
unbroken_text => 1
);
$p->parse( ${$document} );
$p->eof;
my @html_parser_tokens = ();
HTML_PARSER_TOKEN:
for my $raw_token (@raw_tokens) {
my ( undef, $token_type, $line, $column, $offset, $offset_end, $is_cdata, $attr ) =
@{$raw_token};
PROCESS_TOKEN_TYPE: {
if ($is_cdata) {
$raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] =
$SYMID_CDATA;
last PROCESS_TOKEN_TYPE;
}
if ( $token_type eq 'T' ) {
# White space as defined in HTML 4.01
# space (x20); ASCII tab (x09); ASCII form feed (x0C;); Zero-width space (x200B)
# and the two characters which appear in line breaks:
# carriage return (x0D) and line feed (x0A)
html/lib/Marpa/R2/HTML/Config/Compile.pm view on Meta::CPAN
# A quasi-object, not used outside this routine
my $self = bless {}, __PACKAGE__;
my %species_handler = (
cruft => 'SPE_CRUFT',
comment => 'SPE_COMMENT',
pi => 'SPE_PI',
decl => 'SPE_DECL',
document => 'SPE_TOP',
whitespace => 'SPE_WHITESPACE',
pcdata => 'SPE_PCDATA',
cdata => 'SPE_CDATA',
prolog => 'SPE_PROLOG',
trailer => 'SPE_TRAILER',
);
my @core_rules = ();
my %runtime_tag = ();
my %primary_group_by_tag = ();
$self->{primary_group_by_tag} = \%primary_group_by_tag;
{
html/lib/Marpa/R2/HTML/Config/Compile.pm view on Meta::CPAN
# Start out by closing the context and contents of everything
my %symbol_table = map {
$_ =>
[ 'Reserved by the core grammar', 'Reserved by the core grammar' ]
} @core_symbols;
$self->{symbol_table} = \%symbol_table;
# A few token symbols are allowed as contents -- most non-element
# tokens are included via the SGML group
for my $token_symbol (qw(cdata pcdata)) {
$symbol_table{$token_symbol}->[CONTEXT_CLOSED] = 0;
}
# Many groups are defined to to be used
for my $group_symbol (
qw( GRP_anywhere GRP_pcdata GRP_cdata GRP_mixed GRP_block GRP_head GRP_inline)
)
{
$symbol_table{$group_symbol}->[CONTEXT_CLOSED] = 0;
} ## end for my $group_symbol ( ...)
# Flow symbols are almost all allowed as contents
FLOW_SYMBOL:
for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } @core_symbols ) {
# The SGML flow is included automatically as needed
html/lib/Marpa/R2/HTML/Config/Core.pm view on Meta::CPAN
our $CORE_BNF = <<'END_OF_CORE_BNF';
# The tokens are not used directly
# because, in order to have handlers
# deal with them individually, I need
# a rule with which to associate the
# handler.
comment ::= C
pi ::= PI
decl ::= D
pcdata ::= PCDATA
cdata ::= CDATA
whitespace ::= WHITESPACE
cruft ::= CRUFT
FLO_SGML ::= GRP_SGML*
GRP_SGML ::= comment
GRP_SGML ::= pi
GRP_SGML ::= decl
GRP_SGML ::= whitespace
GRP_SGML ::= cruft
html/lib/Marpa/R2/HTML/Config/Core.pm view on Meta::CPAN
# E_x is end tag
# The contents of many elements consists of zero or more items
# Top-level structure
document ::= prolog ELE_html trailer EOF
prolog ::= FLO_SGML
trailer ::= FLO_SGML
ELE_html ::= S_html Contents_html E_html
Contents_html ::= FLO_SGML ELE_head FLO_SGML ELE_body FLO_SGML
# FLO_empty and FLO_cdata
# do NOT allow SGML items as part of
# their flow
FLO_empty ::=
# In FLO_cdata, disallow all SGML components,
# but include cruft.
FLO_cdata ::= GRP_cdata*
GRP_cdata ::= CRUFT
GRP_cdata ::= cdata
FLO_mixed ::= GRP_mixed*
GRP_mixed ::= GRP_block
GRP_mixed ::= GRP_inline
FLO_block ::= GRP_block*
GRP_block ::= GRP_SGML
GRP_block ::= GRP_anywhere
FLO_head ::= GRP_head*
GRP_head ::= GRP_SGML
GRP_head ::= GRP_anywhere
FLO_inline ::= GRP_inline*
GRP_inline ::= GRP_SGML
GRP_inline ::= pcdata
GRP_inline ::= cdata
GRP_inline ::= GRP_anywhere
FLO_pcdata ::= GRP_pcdata*
GRP_pcdata ::= GRP_SGML
GRP_pcdata ::= pcdata
GRP_pcdata ::= cdata
END_OF_CORE_BNF
1;
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
'S_optgroup',
'Contents_ELE_optgroup',
'E_optgroup'
]
},
{
'action' => 'ELE_option',
'lhs' => 'ELE_option',
'rhs' => [
'S_option',
'FLO_pcdata',
'E_option'
]
},
{
'action' => 'ELE_p',
'lhs' => 'ELE_p',
'rhs' => [
'S_p',
'FLO_inline',
'E_p'
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
]
},
{
'lhs' => 'FLO_block',
'min' => 0,
'rhs' => [
'GRP_block'
]
},
{
'lhs' => 'FLO_cdata',
'min' => 0,
'rhs' => [
'GRP_cdata'
]
},
{
'lhs' => 'FLO_empty',
'rhs' => []
},
{
'lhs' => 'FLO_head',
'min' => 0,
'rhs' => [
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
]
},
{
'lhs' => 'FLO_mixed',
'min' => 0,
'rhs' => [
'GRP_mixed'
]
},
{
'lhs' => 'FLO_pcdata',
'min' => 0,
'rhs' => [
'GRP_pcdata'
]
},
{
'lhs' => 'GRP_ELE_applet',
'rhs' => [
'ELE_param'
]
},
{
'lhs' => 'GRP_ELE_applet',
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
'GRP_SGML'
]
},
{
'lhs' => 'GRP_block',
'rhs' => [
'GRP_anywhere'
]
},
{
'lhs' => 'GRP_cdata',
'rhs' => [
'CRUFT'
]
},
{
'lhs' => 'GRP_cdata',
'rhs' => [
'cdata'
]
},
{
'lhs' => 'GRP_head',
'rhs' => [
'GRP_SGML'
]
},
{
'lhs' => 'GRP_head',
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
},
{
'lhs' => 'GRP_inline',
'rhs' => [
'GRP_anywhere'
]
},
{
'lhs' => 'GRP_inline',
'rhs' => [
'cdata'
]
},
{
'lhs' => 'GRP_inline',
'rhs' => [
'pcdata'
]
},
{
'lhs' => 'GRP_mixed',
'rhs' => [
'GRP_block'
]
},
{
'lhs' => 'GRP_mixed',
'rhs' => [
'GRP_inline'
]
},
{
'lhs' => 'GRP_pcdata',
'rhs' => [
'GRP_SGML'
]
},
{
'lhs' => 'GRP_pcdata',
'rhs' => [
'cdata'
]
},
{
'lhs' => 'GRP_pcdata',
'rhs' => [
'pcdata'
]
},
{
'action' => 'SPE_CDATA',
'lhs' => 'cdata',
'rhs' => [
'CDATA'
]
},
{
'action' => 'SPE_COMMENT',
'lhs' => 'comment',
'rhs' => [
'C'
]
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
'lhs' => 'document',
'rhs' => [
'prolog',
'ELE_html',
'trailer',
'EOF'
]
},
{
'action' => 'SPE_PCDATA',
'lhs' => 'pcdata',
'rhs' => [
'PCDATA'
]
},
{
'action' => 'SPE_PI',
'lhs' => 'pi',
'rhs' => [
'PI'
]
html/lib/Marpa/R2/HTML/Config/Default.pm view on Meta::CPAN
'keygen' => 'FLO_inline',
'label' => 'FLO_inline',
'link' => 'FLO_empty',
'mark' => 'FLO_inline',
'meta' => 'FLO_empty',
'meter' => 'FLO_inline',
'nobr' => 'FLO_inline',
'noframes' => 'FLO_mixed',
'noscript' => 'FLO_mixed',
'output' => 'FLO_inline',
'plaintext' => 'FLO_cdata',
'pre' => 'FLO_inline',
'progress' => 'FLO_inline',
'q' => 'FLO_inline',
'rb' => 'FLO_inline',
'rbc' => 'FLO_inline',
'rp' => 'FLO_inline',
'rt' => 'FLO_inline',
'rtc' => 'FLO_inline',
'ruby' => 'FLO_inline',
's' => 'FLO_inline',
'samp' => 'FLO_inline',
'script' => 'FLO_cdata',
'small' => 'FLO_inline',
'span' => 'FLO_inline',
'strike' => 'FLO_inline',
'strong' => 'FLO_inline',
'style' => 'FLO_cdata',
'sub' => 'FLO_inline',
'sup' => 'FLO_inline',
'textarea' => 'FLO_cdata',
'time' => 'FLO_inline',
'title' => 'FLO_pcdata',
'tt' => 'FLO_inline',
'u' => 'FLO_inline',
'var' => 'FLO_inline',
'video' => 'FLO_inline',
'wbr' => 'FLO_inline',
'xmp' => 'FLO_cdata'
};
$RUBY_SLIPPERS_RANK_BY_NAME = {
'<!element>' => {
'</*>' => 1
},
'<%head>' => {
'</*>' => 1,
'S_head' => 2,
'S_html' => 3
},
html/pod/HTML.pod view on Meta::CPAN
The C<:CDATA> pseudoclass specifies the action for
CDATA terminals.
Its action is called once for each non-whitespace raw C<text> event
that is not reclassed as cruft.
(B<Raw text>
is text in which any markup and entities should be left as is.)
More precisely,
a C<:CDATA> terminal is created from any
L<HTML::Parser> C<text> event that has the C<is_cdata> flag on;
that contains a non-whitespace character
as defined in the HTML 4.01 specification
(L<http://www.w3.org/TR/html4/struct/text.html#h-9.1>);
and that is not reclassed as cruft.
=head2 :COMMENT
The C<:COMMENT> pseudoclass specifies the action for HTML comments.
Its action is called once for every C<HTML::Parser> C<comment> event that
is not reclassed as cruft.
html/pod/HTML.pod view on Meta::CPAN
=head2 :PCDATA
The C<:PCDATA> pseudoclass specifies the action for
PCDATA terminals.
Its action is called once for each non-whitespace non-raw C<text> event
that is not reclassed as cruft.
More precisely,
a C<:PCDATA> terminal is created from any
L<HTML::Parser> C<text> event that has the C<is_cdata> flag B<off>;
that contains a non-whitespace character
as defined in the HTML 4.01 specification
(L<http://www.w3.org/TR/html4/struct/text.html#h-9.1>);
and that is not reclassed as cruft.
Markup and entities in
C<:PCDATA> text
are expected to be interpreted eventually,
but it can be counter-productive to do this
during parsing.
html/t/cfg_fmt.t view on Meta::CPAN
-during-<span>
-more inline stuff-</span></p><p>
-new block-
</p></acme></body>
</html>
END_OF_EXPECTED_OUTPUT
run_one_test( $test_name, $test_html, \$test_config, \$expected_output );
$test_name = 'Block element containing PCDATA';
$test_config =
${$default_config} . '<acme> is a *pcdata included in %block';
# $test_html is same as in previous test
$expected_output = <<'END_OF_EXPECTED_OUTPUT';
<html>
<head>
</head>
<body>
<acme>
-during-</acme><p>
<span>
-more inline stuff-</span></p><p>