Apache2-HTML-Detergent
view release on metacpan or search on metacpan
lib/Apache2/HTML/Detergent.pm view on Meta::CPAN
return Apache2::Const::OK;
}
Apache2::Const::DECLINED;
}
sub _filter_content {
my ($f, $config, $ctx) = @_;
my $r = $f->r;
my $c = $r->connection;
my ($type, $content) = @$ctx;
# this is where we hack the content
# set up the input callbacks with subreq voodoo
my $icb = $config->callback;
$icb->register_callbacks([
sub {
# MATCH
return $_[0] =~ m!^/!;
},
sub {
# OPEN
my $uri = shift;
$r->log->debug("opening XML at $uri");
my $subr = $r->lookup_uri($uri);
my $data = '';
$subr->run_trapped(\$data);
my $io = IO::Scalar->new(\$data);
# HACK: the callback infrastructure doesn't like the globref
\$io;
},
sub {
# READ
my ($io, $len) = @_;
# HACK once again
my $fh = $$io;
my $buf;
$fh->read($buf, $len);
$buf;
},
sub {
# CLOSE
1;
},
]);
my $scrubber = HTML::Detergent->new($config);
# $r->headers_in->get('Host') || $r->get_server_name;
my $host = $r->hostname || $r->get_server_name;
my $scheme = $c->is_https ? 'https' : 'http';
my $port = $r->get_server_port;
my $uri = URI->new
(sprintf '%s://%s:%d%s', $scheme,
$host, $port, $r->unparsed_uri)->canonical;
$r->log->debug($uri);
my $utf8 = Encode::decode(Detect => $content);
$content = $utf8 if defined $utf8 and ($content ne '' and $utf8 ne '');
undef $utf8;
if ($type =~ m!/.*xml!i) {
$r->log->debug("Attempting to use XML parser for $uri");
$content = eval {
XML::LibXML->load_xml
(string => $content, recover => 1, no_network => 1) };
if ($@) {
$r->log->error("Loading $uri failed: $@");
return Apache2::Const::HTTP_BAD_GATEWAY;
}
}
# note $content might be an XML::LibXML::Document
my $doc = $scrubber->process($content, $uri);
$doc->setEncoding('utf-8');
my $root = $doc->documentElement;
if ($root and lc $root->localName eq 'html') {
# XML_DTD_NODE
# $and not grep { $_->nodeType == 14 } $doc->childNodes) {
$doc->removeInternalSubset;
$doc->removeExternalSubset;
$doc->createExternalSubset('html', undef, undef);
$doc->createInternalSubset('html', undef, undef);
}
if (defined $config->xslt) {
# check for existing xslt
my $found;
for my $child ($doc->childNodes) {
if ($child->nodeType == 7
&& lc($child->nodeName) eq 'xml-stylesheet'
&& lc($child->getData) =~ /xsl/) {
$found = $child;
last;
}
}
# TODO: config directive to override existing XSLT PI?
unless ($found) {
my $pi = $doc->createProcessingInstruction
('xml-stylesheet', sprintf 'type="text/xsl" href="%s"',
$config->xslt);
if ($root) {
$doc->insertBefore($pi, $root);
}
}
}
else {
$r->content_type(sprintf '%s; charset=utf-8', $type);
}
#$r->log->debug($r->content_encoding || 'identity');
#$r->log->debug($r->headers_in->get('Content-Encoding'));
# reuse content
$content = $doc->toString(1);
# explicitly get rid of these big objects
undef $scrubber;
( run in 1.423 second using v1.01-cache-2.11-cpan-39bf76dae61 )