HTML-HTML5-Parser

 view release on metacpan or  search on metacpan

lib/HTML/HTML5/Parser.pm  view on Meta::CPAN

			{ $file = URI->new($file); }
		else
			{ $file = URI::file->new_abs($file); }
	}
	
	my $response = HTML::HTML5::Parser::UA->get($file, $opts->{user_agent});
	croak "HTTP response code was not 200 OK. (Set \$opts{ignore_http_response_code} to ignore this error.)"
		unless ($response->{success} || $opts->{ignore_http_response_code});
	
	my $content = $response->{decoded_content};
	my $c_type  = $response->{headers}{'content-type'};
	
	$opts->{'response'} = $response;
	
	if ($c_type =~ /xml/i and not $opts->{'force_html'})
	{
		$opts->{'parser_used'} = 'XML::LibXML::Parser';
		my $xml_parser = XML::LibXML->new;
		$xml_parser->validation(0);
		$xml_parser->recover(2);
		$xml_parser->base_uri($response->base);

lib/HTML/HTML5/Parser/UA.pm  view on Meta::CPAN

				text/html
				application/xhtml+xml;q=0.9
				application/xml;q=0.1
				text/xml;q=0.1
			)),
		},
	);
	
	my $response = $ua->get($uri);
	
	if ($response->{headers}{'content-type'} =~ /charset=(\S+)/)
	{
		(my $encoding = $1) =~ s/["']//g;
		$response->{decoded_content} = eval {
			decode($encoding, $response->{content})
		};
	}
	
	$response->{decoded_content} = $response->{content}
		unless defined $response->{decoded_content};
	return $response;

lib/HTML/HTML5/Parser/UA.pm  view on Meta::CPAN

	$content_type ||= 'text/xml' if $file =~ /\.xml$/i;
	$content_type ||= 'application/xhtml+xml' if $file =~ /\.xht(ml)?$/i;
	$content_type ||= 'text/html' if $file =~ /\.html?$/i;
	$content_type ||= 'application/octet-stream';
	
	return +{
		success  => ($status == 200),
		status   => $status,
		reason   => $reason,
		headers  => +{
			'content-type'   => $content_type,
			'content-length' => length($content),
		},
		content  => $content,
		decoded_content => $content,
	};
}

1;

=head1 NAME

t/html5lib-pass/tests19.dat  view on Meta::CPAN

#errors
#document
| <!DOCTYPE html>
| <html>
|   <head>
|     <meta>
|       charset="ascii"
|   <body>

#data
<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
#errors
#document
| <!DOCTYPE html>
| <html>
|   <head>
|     <meta>
|       content="text/html;charset=ascii"
|       http-equiv="content-type"
|   <body>

#data
<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa...
#errors
#document
| <!DOCTYPE html>
| <html>
|   <head>
|     <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa...



( run in 1.446 second using v1.01-cache-2.11-cpan-d7f47b0818f )