Apache2-HTML-Detergent
view release on metacpan or search on metacpan
lib/Apache2/HTML/Detergent.pm view on Meta::CPAN
unless ($config->type_matches($type)) {
$r->log->debug("$type doesn't match");
return Apache2::Const::DECLINED;
}
#$r->headers_out->set('Transfer-Encoding', 'chunked');
# application/xml is the most reliable content type to
# deliver to browsers that use XSLT.
if ($config->xslt) {
$r->log->debug("forcing $type -> application/xml");
$r->content_type('application/xml; charset=utf-8');
}
# XXX will we need to restore $r->status == 200 to this condition?
if ($r->is_initial_req) {
# BEGIN BUCKET
until ($bb->is_empty) {
my $b = $bb->first;
if ($b->is_eos) {
# no further processing if the brigade only contains EOS
return Apache2::Const::DECLINED if $ctx->[1] eq '';
# nuke the brigade
$bb->destroy;
# this is where that xml code goes
return _filter_content($f, $config, $ctx);
}
if ($b->read(my $data)) {
if ($ctx->[1] eq '') {
# XXX here is where we would double-check the mime type
}
$ctx->[1] .= $data;
}
# remove this bucket only if it isn't EOS
$b->remove;
}
# destroy the brigade only after exiting the loop
$bb->destroy;
# END BUCKET
return Apache2::Const::OK;
}
Apache2::Const::DECLINED;
}
sub _filter_content {
my ($f, $config, $ctx) = @_;
my $r = $f->r;
my $c = $r->connection;
my ($type, $content) = @$ctx;
# this is where we hack the content
# set up the input callbacks with subreq voodoo
my $icb = $config->callback;
$icb->register_callbacks([
sub {
# MATCH
return $_[0] =~ m!^/!;
},
sub {
# OPEN
my $uri = shift;
$r->log->debug("opening XML at $uri");
my $subr = $r->lookup_uri($uri);
my $data = '';
$subr->run_trapped(\$data);
my $io = IO::Scalar->new(\$data);
# HACK: the callback infrastructure doesn't like the globref
\$io;
},
sub {
# READ
my ($io, $len) = @_;
# HACK once again
my $fh = $$io;
my $buf;
$fh->read($buf, $len);
$buf;
},
sub {
# CLOSE
1;
},
]);
my $scrubber = HTML::Detergent->new($config);
# $r->headers_in->get('Host') || $r->get_server_name;
my $host = $r->hostname || $r->get_server_name;
my $scheme = $c->is_https ? 'https' : 'http';
my $port = $r->get_server_port;
my $uri = URI->new
(sprintf '%s://%s:%d%s', $scheme,
$host, $port, $r->unparsed_uri)->canonical;
$r->log->debug($uri);
my $utf8 = Encode::decode(Detect => $content);
$content = $utf8 if defined $utf8 and ($content ne '' and $utf8 ne '');
undef $utf8;
if ($type =~ m!/.*xml!i) {
$r->log->debug("Attempting to use XML parser for $uri");
$content = eval {
XML::LibXML->load_xml
(string => $content, recover => 1, no_network => 1) };
if ($@) {
$r->log->error("Loading $uri failed: $@");
return Apache2::Const::HTTP_BAD_GATEWAY;
}
}
# note $content might be an XML::LibXML::Document
my $doc = $scrubber->process($content, $uri);
$doc->setEncoding('utf-8');
( run in 0.644 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )