HTML-HTML5-Parser
view release on metacpan or search on metacpan
lib/HTML/HTML5/Parser.pm view on Meta::CPAN
{ $file = URI->new($file); }
else
{ $file = URI::file->new_abs($file); }
}
my $response = HTML::HTML5::Parser::UA->get($file, $opts->{user_agent});
croak "HTTP response code was not 200 OK. (Set \$opts{ignore_http_response_code} to ignore this error.)"
unless ($response->{success} || $opts->{ignore_http_response_code});
my $content = $response->{decoded_content};
my $c_type = $response->{headers}{'content-type'};
$opts->{'response'} = $response;
if ($c_type =~ /xml/i and not $opts->{'force_html'})
{
$opts->{'parser_used'} = 'XML::LibXML::Parser';
my $xml_parser = XML::LibXML->new;
$xml_parser->validation(0);
$xml_parser->recover(2);
$xml_parser->base_uri($response->base);
lib/HTML/HTML5/Parser/UA.pm view on Meta::CPAN
text/html
application/xhtml+xml;q=0.9
application/xml;q=0.1
text/xml;q=0.1
)),
},
);
my $response = $ua->get($uri);
if ($response->{headers}{'content-type'} =~ /charset=(\S+)/)
{
(my $encoding = $1) =~ s/["']//g;
$response->{decoded_content} = eval {
decode($encoding, $response->{content})
};
}
$response->{decoded_content} = $response->{content}
unless defined $response->{decoded_content};
return $response;
lib/HTML/HTML5/Parser/UA.pm view on Meta::CPAN
$content_type ||= 'text/xml' if $file =~ /\.xml$/i;
$content_type ||= 'application/xhtml+xml' if $file =~ /\.xht(ml)?$/i;
$content_type ||= 'text/html' if $file =~ /\.html?$/i;
$content_type ||= 'application/octet-stream';
return +{
success => ($status == 200),
status => $status,
reason => $reason,
headers => +{
'content-type' => $content_type,
'content-length' => length($content),
},
content => $content,
decoded_content => $content,
};
}
1;
=head1 NAME
t/html5lib-pass/tests19.dat view on Meta::CPAN
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <meta>
| charset="ascii"
| <body>
#data
<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <meta>
| content="text/html;charset=ascii"
| http-equiv="content-type"
| <body>
#data
<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa...
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa...
( run in 1.446 second using v1.01-cache-2.11-cpan-d7f47b0818f )