HTML-Parser
view release on metacpan or search on metacpan
lib/HTML/HeadParser.pm view on Meta::CPAN
} elsif ($tag =~ /^(?:title|noscript|object|command)$/) {
# Just remember tag. Initialize header when we see the end tag.
$self->{'tag'} = $tag;
} elsif ($tag eq 'link') {
return unless exists $attr->{href};
# <link href="http:..." rel="xxx" rev="xxx" title="xxx">
my $href = delete($attr->{href});
$href =~ s/^\s+//; $href =~ s/\s+$//; # HTML5
my $h_val = "<$href>";
for (sort keys %{$attr}) {
next if $_ eq "/"; # XHTML junk
$h_val .= qq(; $_="$attr->{$_}");
}
$self->{'header'}->push_header(Link => $h_val);
} elsif ($tag eq 'head' || $tag eq 'html') {
# ignore
} else {
# stop parsing
$self->eof;
}
}
t/declaration.t view on Meta::CPAN
<"<!-- foo -->">]
[<Entity>
<foo>
<"<!-- foo -->">]
<!row --> foo
EOT
$res = "";
$p->parse(<<EOT)->eof;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"[]>
EOT
is($res, <<EOT);
[<DOCTYPE>
<html>
<PUBLIC>
<"-//W3C//DTD XHTML 1.0 Strict//EN">
<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<[]>]
EOT
t/headparser.t view on Meta::CPAN
$p = HTML::HeadParser->new(H->new);
ok($p->parse("\x{FEFF}\n<title>Hi <foo></title>"));
$p->eof;
is($p->header("title"), "Hi <foo>");
$p = HTML::HeadParser->new(H->new);
$p->utf8_mode(1);
$p->parse(
<<"EOT"); # example from http://rt.cpan.org/Ticket/Display.html?id=27522
\xEF\xBB\xBF<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title>
Parkinson's disease</title>
<meta name="Keywords" content="brain,disease,dopamine,drug,levodopa,parkinson,patients,symptoms,,Medications, Medications">
</meta>
\t
\t<link href="../../css/ummAdam.css" rel="stylesheet" type="text/css" />
\t<link rel="stylesheet" rev="stylesheet" href="../../css/ummprint.css" media="print" />
\t
t/headparser.t view on Meta::CPAN
EOT
$p->eof;
is($p->header("title"), "Parkinson's disease");
is($p->header("link")->[0],
'<../../css/ummAdam.css>; rel="stylesheet"; type="text/css"');
$p = HTML::HeadParser->new(H->new);
$p->utf8_mode(1);
$p->parse(<<"EOT"); # example from http://www.mjw.com.pl/
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\r
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="pl" lang="pl"> \r
\r
<head profile="http://gmpg.org/xfn/11">\r
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\r
\r
<title> ko\xC5\x84c\xC3\xB3wki kolekcji, outlet, hurtownia odzie\xC5\xBCy Warszawa – MJW</title>\r
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon" />\r
EOT
$p->eof;
( run in 1.561 second using v1.01-cache-2.11-cpan-49f99fa48dc )