HTML-Doctype
view release on metacpan or search on metacpan
lib/HTML/Doctype.pm view on Meta::CPAN
# < of <!DOCTYPE ...
my $ltpos = $location->{EntityOffset} - 9;
$gtpos = index $document, ">", $ltpos;
# malformed doctype, missing >
return if $gtpos < 0;
$gtskip += $doctyped->{ExternalId}{PublicId} =~ />/g
if defined $doctyped and exists $doctyped->{ExternalId}{PublicId};
$gtskip += $doctyped->{ExternalId}{SystemId} =~ />/g
if defined $doctyped and exists $doctyped->{ExternalId}{SystemId};
$gtpos = index $document, ">", $gtpos # +1 ?
while $gtskip--;
# malformed doctype, missing proper >
return if $gtpos < 0;
# extract possible doctype
my $text = substr $document, $ltpos, $gtpos - $ltpos + 1;
# look for ]> if suspected internal subset
if (index($text, "[") >= 0)
{
my $gtpos2 = index $document, "]>", $ltpos;
$gtpos = $gtpos2 + 1 if $gtpos2 >= 0;
}
return $gtpos - $ltpos + 1
}
sub start_dtd
{
my $self = shift;
my $doct = shift;
# ignore specified document type declarations without
# public or system identifier and implied document type
# declarations (which have just a GeneratedSystemId key)
return unless exists $doct->{ExternalId}{PublicId} or
exists $doct->{ExternalId}{SystemId};
my $puid = $doct->{ExternalId}{PublicId};
# no public identifier means HTML
return $self->_type("HTML", $doct) unless defined $puid;
# split public identifier at //
my @comp = split(/\/\//, $puid);
# malformed public identifiers mean HTML
return $self->_type("HTML", $doct) unless @comp > 2;
# we might want something different than \s and \S here
# but it is not clear to me what exactly we should expect
return $self->_type("HTML", $doct) unless $comp[2] =~ /^DTD\s+(\S+)/;
# the first token of the public text description must include
# the string "XHTML", see XHTML M12N section 3.1, and see also
# http://w3.org/mid/41584c61.156809450@smtp.bjoern.hoehrmann.de
return $self->_type("HTML", $doct) unless $1 =~ /XHTML/;
# otherwise considers this document XHTML
return $self->_type("XHTML", $doct)
}
sub start_element
{
my $self = shift;
my $elem = shift;
# no xmlns attribute means HTML
return $self->_type("HTML") unless exists $elem->{Attributes}{XMLNS};
my $xmlns = $elem->{Attributes}{XMLNS};
# this should use the corresponding helper function to deal
# with some potential edge cases but it is not in CVS yet
return $self->_type("HTML") unless $xmlns->{Defaulted} eq "specified";
# see above
# return $self->_type("HTML") unless "http://www.w3.org/1999/xhtml" eq
# join '', map { $_->{Data} } @{$xmlns->{CdataChunks}};
return $self->_type("XHTML")
}
1;
__END__
=pod
=head1 NAME
HTML::Doctype - HTML/XHTML/XML Doctype Operations
=head1 SYNOPSIS
use HTML::Doctype;
...
=head1 DESCRIPTION
Experimental module to perform some document type declaration
related operations. It currently depends on SGML::Parser::OpenSP
for which it provides a handler HTML::Doctype::Detector which can
be used to detect document type declarations.
my $p = SGML::Parser::OpenSP->new;
my $h = HTML::Doctype::Detector->new($p);
$p->handler($h)
# ...
$p->parse_string("...");
if ($h->is_xhtml)
{
# ...
( run in 0.629 second using v1.01-cache-2.11-cpan-411bb0df24b )