HTML-HTML5-Writer
view release on metacpan or search on metacpan
lib/HTML/HTML5/Writer.pm view on Meta::CPAN
my ($self) = @_;
return ($self->{'markup'} =~ m'^(xml|xhtml|application/xml|text/xml|application/xhtml\+xml)$'i);
}
sub is_polyglot
{
my ($self) = @_;
return $self->{'polyglot'};
}
sub should_quote_attributes
{
my ($self) = @_;
return $self->{'quote_attributes'} if exists $self->{'quote_attributes'};
return $self->is_xhtml || $self->is_polyglot;
}
sub should_slash_voids
{
my ($self) = @_;
return $self->{'voids'} if exists $self->{'voids'};
return $self->is_xhtml || $self->is_polyglot;
}
sub should_force_end_tags
{
my ($self) = @_;
return $self->{'end_tags'} if exists $self->{'end_tags'};
return $self->is_xhtml || $self->is_polyglot;
}
sub should_force_start_tags
{
my ($self) = @_;
return $self->{'start_tags'} if exists $self->{'start_tags'};
return $self->is_xhtml || $self->is_polyglot;
}
sub document
{
my ($self, $document) = @_;
my @childNodes = $document->childNodes;
return $self->doctype
. join '', (map { $self->_element_etc($_); } @childNodes);
}
sub doctype
{
my ($self) = @_;
return $self->{'doctype'};
}
sub _element_etc
{
my ($self, $etc) = @_;
if ($etc->nodeName eq '#text')
{ return $self->text($etc); }
elsif ($etc->nodeName eq '#comment')
{ return $self->comment($etc); }
elsif ($etc->nodeName eq '#cdata-section')
{ return $self->cdata($etc); }
elsif ($etc->isa('XML::LibXML::PI'))
{ return $self->pi($etc); }
else
{ return $self->element($etc); }
}
sub element
{
my ($self, $element) = @_;
return $element->toString
unless $element->namespaceURI eq 'http://www.w3.org/1999/xhtml';
my $rv = '';
my $tagname = $element->nodeName;
my %attrs = map { $_->nodeName => $_ } $element->attributes;
my @kids = $element->childNodes;
if ($tagname eq 'html' && !$self->is_xhtml && !$self->is_polyglot)
{
delete $attrs{'xmlns'};
}
my $omitstart = 0;
if (!%attrs and !$self->should_force_start_tags and grep { $tagname eq $_ } @OptionalStart)
{
$omitstart += eval "return \$self->_check_omit_start_${tagname}(\$element);";
}
my $omitend = 0;
if (!$self->should_force_end_tags and grep { $tagname eq $_ } @OptionalEnd)
{
$omitend += eval "return \$self->_check_omit_end_${tagname}(\$element);";
}
unless ($omitstart)
{
$rv .= '<'.$tagname;
foreach my $a (sort keys %attrs)
{
$rv .= ' '.$self->attribute($attrs{$a}, $element);
}
}
if (!@kids and grep { $tagname eq $_ } @VoidElements and !$omitstart)
{
$rv .= $self->should_slash_voids ? ' />' : '>';
return $rv;
}
$rv .= '>' unless $omitstart;
foreach my $kid (@kids)
{
$rv .= $self->_element_etc($kid);
}
unless ($omitend)
{
$rv .= '</'.$tagname.'>';
lib/HTML/HTML5/Writer.pm view on Meta::CPAN
my ($self, $attr, $element) = @_;
my $minimize = 0;
my $quote = 1;
my $quotechar = '"';
my $attrname = $attr->nodeName;
my $elemname = $element ? $element->nodeName : '*';
unless ($self->should_quote_attributes)
{
if (($attr->value eq $attrname or $attr->value eq '')
and grep { $_ eq $attrname or $_ eq sprintf('%s@%s',$elemname,$attrname) } @BooleanAttributes)
{
return $attrname;
}
if ($attr->value =~ /^[A-Za-z0-9\._:-]+$/)
{
return sprintf('%s=%s', $attrname, $attr->value);
}
}
my $encoded_value;
if ($attr->value !~ /\"/)
{
$quotechar = '"';
$encoded_value = $self->encode_entities($attr->value);
}
elsif ($attr->value !~ /\'/)
{
$quotechar = "'";
$encoded_value = $self->encode_entities($attr->value);
}
else
{
$quotechar = '"';
$encoded_value = $self->encode_entities($attr->value,
characters => "\"");
}
return sprintf('%s=%s%s%s', $attrname, $quotechar, $encoded_value, $quotechar);
}
sub comment
{
my ($self, $text) = @_;
return '<!--' . $self->encode_entities($text->nodeValue) . '-->';
}
sub pi
{
my ($self, $pi) = @_;
if ($pi->nodeName eq 'decode')
{
return HTML::HTML5::Entities::decode($pi->textContent);
}
return $pi->toString;
}
sub cdata
{
my ($self, $text) = @_;
if ($self->is_polyglot && $text->parentNode->nodeName =~ /^(script|style)$/i)
{
return '/* <![CDATA[ */' . $text->nodeValue . '/* ]]> */';
}
elsif (!$self->is_xhtml && $text->parentNode->nodeName =~ /^(script|style)$/i)
{
return $text->nodeValue;
}
elsif(!$self->is_xhtml)
{
return $self->text($text);
}
else
{
return '<![CDATA[' . $text->nodeValue . ']]>';
}
}
sub text
{
my ($self, $text) = @_;
if ($self->is_polyglot && $text->parentNode->nodeName =~ /^(script|style)$/i)
{
return '/* <![CDATA[ */' . $text->nodeValue . '/* ]]> */';
}
elsif (!$self->is_xhtml && $text->parentNode->nodeName =~ /^(script|style)$/i)
{
return $text->nodeValue;
}
elsif ($text->parentNode->nodeName =~ /^(script|style)$/i)
{
return '<![CDATA[' . $text->nodeValue . ']]>';
}
return $self->encode_entities($text->nodeValue,
characters => "<>");
}
sub encode_entities
{
my ($self, $string, %options) = @_;
my $characters = $options{'characters'};
$characters .= '&';
$characters .= '\x{0}-\x{8}\x{B}\x{C}\x{E}-\x{1F}\x{26}\x{7F}';
$characters .= '\x{80}-\x{FFFFFF}' unless $self->{'charset'} =~ /^utf[_-]?8$/i;
my $regexp = qr/[$characters]/;
local $HTML::HTML5::Entities::hex = ($self->{'refs'} !~ /dec/i);
return HTML::HTML5::Entities::encode_entities($string, $regexp);
}
sub encode_entity
{
my ($self, $char) = @_;
local $HTML::HTML5::Entities::hex = ($self->{'refs'} !~ /dec/i);
return HTML::HTML5::Entities::encode_entities($char, qr/./);
lib/HTML/HTML5/Writer.pm view on Meta::CPAN
B<DOCTYPE_XHTML_RDFA10>,
B<DOCTYPE_XHTML_RDFA11>.
Defaults to DOCTYPE_HTML5 for HTML and DOCTYPE_LEGACY for XHTML.
=item * B<charset>
This module always returns strings in Perl's internal utf8 encoding, but
you can set the 'charset' option to 'ascii' to create output that would
be suitable for re-encoding to ASCII (e.g. it will entity-encode characters
which do not exist in ASCII).
=item * B<quote_attributes>
Set this to a true to force attributes to be quoted. If not explicitly
set, the writer will automatically detect when attributes need quoting.
=item * B<voids>
Set this to true to force void elements to always be terminated with '/>'.
If not explicitly set, they'll only be terminated that way in polyglot or
XHTML documents.
=item * B<start_tags> and B<end_tags>
Except in polyglot and XHTML documents, some elements allow their
start and/or end tags to be omitted in certain circumstances. By
setting these to true, you can prevent them from being omitted.
=item * B<refs>
Special characters that can't be encoded as named entities need
to be encoded as numeric character references instead. These
can be expressed in decimal or hexadecimal. Setting this option to
'dec' or 'hex' allows you to choose. The default is 'hex'.
=back
=back
=head2 Public Methods
=over 4
=item C<< $writer->document($node) >>
Outputs (i.e. returns a string that is) an XML::LibXML::Document as HTML.
=item C<< $writer->element($node) >>
Outputs an XML::LibXML::Element as HTML.
=item C<< $writer->attribute($node) >>
Outputs an XML::LibXML::Attr as HTML.
=item C<< $writer->text($node) >>
Outputs an XML::LibXML::Text as HTML.
=item C<< $writer->cdata($node) >>
Outputs an XML::LibXML::CDATASection as HTML.
=item C<< $writer->comment($node) >>
Outputs an XML::LibXML::Comment as HTML.
=item C<< $writer->pi($node) >>
Outputs an XML::LibXML::PI as HTML.
=item C<< $writer->doctype >>
Outputs the writer's DOCTYPE.
=item C<< $writer->encode_entities($string, characters=>$more) >>
Takes a string and returns the same string with some special characters
replaced. These special characters do not include any of '&', '<', '>'
or '"', but you can provide a string of additional characters to treat as
special:
$encoded = $writer->encode_entities($raw, characters=>'&<>"');
=item C<< $writer->encode_entity($char) >>
Returns $char entity-encoded. Encoding is done regardless of whether
$char is "special" or not.
=item C<< $writer->is_xhtml >>
Boolean indicating if $writer is configured to output XHTML.
=item C<< $writer->is_polyglot >>
Boolean indicating if $writer is configured to output polyglot HTML.
=item C<< $writer->should_force_start_tags >>
=item C<< $writer->should_force_end_tags >>
Booleans indicating whether optional start and end tags should be forced.
=item C<< $writer->should_quote_attributes >>
Boolean indicating whether attributes need to be quoted.
=item C<< $writer->should_slash_voids >>
Boolean indicating whether void elements should be closed in the XHTML style.
=back
=head1 BUGS AND LIMITATIONS
Certain DOM constructs cannot be output in non-XML HTML. e.g.
my $xhtml = <<XHTML;
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Test</title></head>
( run in 0.917 second using v1.01-cache-2.11-cpan-6b5c3043376 )