Apache2-ModProxyPerlHtml
view release on metacpan or search on metacpan
ModProxyPerlHtml.pm view on Meta::CPAN
#------------------------------------------------------------------------------
# Project : Reverse Proxy HTML link rewriter
# Name : ModProxyPerlHtml.pm
# Language : perl 5
# Authors : Gilles Darold, gilles at darold dot net
# Copyright: Copyright (c) 2005-2022, Gilles Darold - All rights reserved -
# Description : This mod_perl module is a replacement for mod_proxy_html.c
# with far better URL HTML rewriting.
# Usage : See documentation in this file with perldoc.
#------------------------------------------------------------------------------
# This program is free software; you can redistribute it and/or modify it under
# the same terms as Perl itself.
#------------------------------------------------------------------------------
package Apache2::ModProxyPerlHtml;
use strict qw(vars);
use warnings;
require mod_perl2;
use Apache2::Connection ();
use Apache2::RequestRec;
use Apache2::RequestUtil;
use APR::Table;
use APR::URI;
use base qw(Apache2::Filter);
use Apache2::Const -compile => qw(OK DECLINED :conn_keepalive);
use constant BUFF_LEN => 8000;
use Apache2::ServerRec;
use Apache2::URI;
$Apache2::ModProxyPerlHtml::VERSION = '4.1';
%Apache2::ModProxyPerlHtml::linkElements = (
'a' => ['href'],
'applet' => ['archive', 'codebase', 'code'],
'area' => ['href'],
'bgsound' => ['src'],
'blockquote' => ['cite'],
'body' => ['background'],
'del' => ['cite'],
'embed' => ['pluginspage', 'src'],
'form' => ['action'],
'frame' => ['src', 'longdesc'],
'iframe' => ['src', 'longdesc'],
'ilayer' => ['background'],
'img' => ['src', 'lowsrc', 'longdesc', 'usemap'],
'input' => ['src', 'usemap','formaction'],
'ins' => ['cite'],
'isindex' => ['action'],
'head' => ['profile'],
'layer' => ['background', 'src'],
'link' => ['href'],
'object' => ['classid', 'codebase', 'data', 'archive', 'usemap'],
'q' => ['cite'],
'script' => ['src', 'for'],
'table' => ['background'],
'td' => ['background'],
'th' => ['background'],
'tr' => ['background'],
'xmp' => ['href'],
'button' => ['formaction'],
);
sub handler
{
my $f = shift;
my $debug = $f->r->dir_config->get('ProxyHTMLVerbose');
if ($debug && $debug =~ /(on|1)/i) {
$debug = 1;
} else {
$debug = 0;
}
# Thing we do at the first chunk
my $content_type = $f->r->content_type() || '';
unless ($f->ctx) {
$f->r->headers_out->unset('Content-Length');
my @pattern = $f->r->dir_config->get('ProxyHTMLURLMap');
my @rewrite = $f->r->dir_config->get('ProxyHTMLRewrite');
my $contenttype = $f->r->dir_config->get('ProxyHTMLContentType');
$contenttype ||= '(text\/javascript|text\/html|text\/css|text\/xml|application\/.*javascript|application\/.*xml)';
my $badcontenttype = $f->r->dir_config->get('ProxyHTMLExcludeContentType');
$badcontenttype ||= '(application\/vnd\.openxml)';
my @exclude = $f->r->dir_config->get('ProxyHTMLExcludeUri');
my @obfuscation = $f->r->dir_config->get('ProxyHTMLRot13Links');
my $ct = $f->ctx;
$ct->{data} = '';
foreach my $p (@pattern) {
push(@{$ct->{pattern}}, $p);
}
foreach my $p (@rewrite) {
push(@{$ct->{rewrite}}, $p);
}
$ct->{contenttype} = $contenttype;
$ct->{badcontenttype} = $badcontenttype;
foreach my $u (@exclude) {
push(@{$ct->{excluded}}, $u);
}
foreach my $o (@obfuscation) {
my ($elt, $attr) = split(/:/, $o);
if (uc($elt) eq 'ALL') {
$ct->{rot13elements} = 'All';
last;
} else {
$ct->{rot13elements}->{$elt} = $attr;
}
}
$f->ctx($ct);
}
# Thing we do on all invocations
my $ctx = $f->ctx;
while ($f->read(my $buffer, BUFF_LEN)) {
$ctx->{data} .= $buffer;
$ctx->{keepalives} = $f->c->keepalives;
$f->ctx($ctx);
}
# Thing we do at end
if ($f->seen_eos) {
my $parsed_uri = $f->r->construct_url();
my $a_encoding = $f->r->headers_in->{'Accept-Encoding'} || '';
my $c_encoding = $f->r->headers_out->{'Content-Encoding'} || '';
my $ct = $f->r->headers_out->{'Content-type'} || '';
# Only proceed URLs that are not excluded from rewritter
if ( ($#{$ctx->{excluded}} == -1) || !grep($parsed_uri =~ /$_/i, @{$ctx->{excluded}}) ) {
# if Accept-Encoding: gzip,deflate try to uncompress
if ( ($c_encoding =~ /gzip|deflate/) && ($ct =~ /$ctx->{contenttype}/is) && ($ct !~ /$ctx->{badcontenttype}/is) ) {
if ($debug) {
Apache2::ServerRec::warn("[ModProxyPerlHtml] Uncompressing $ct, Content-Encoding: $c_encoding");
}
use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ;
my $output = '';
anyinflate \$ctx->{data} => \$output or print STDERR "anyinflate failed: $AnyInflateError\n";
if ($ctx->{data} ne $output) {
$ctx->{data} = $output;
} else {
$c_encoding = '';
}
} else {
$c_encoding = '';
}
# Rewrite refresh command in header
my $refresh = $f->r->headers_out->{'Refresh'};
if ($refresh) {
foreach my $p (@{$ctx->{pattern}}) {
my ($match, $substitute) = split(/[\s\t]+/, $p, 2);
if ($refresh =~ s#([^\/:])$match#$1$substitute#) {
if ($debug) {
Apache2::ServerRec::warn("[ModProxyPerlHtml] Refresh header match '$match', substituted by: /$substitute/");
}
}
}
$f->r->headers_out->set('Refresh' => $refresh);
}
# Rewrite referer in header
my $referer = $f->r->headers_out->{'Referer'};
if ($referer) {
foreach my $p (@{$ctx->{pattern}}) {
my ($match, $substitute) = split(/[\s\t]+/, $p, 2);
if ($referer =~ s#([^\/:])$match#$1$substitute#) {
if ($debug) {
Apache2::ServerRec::warn("[ModProxyPerlHtml] Referer header match '$match', substituted by: /$substitute/");
}
}
}
$f->r->headers_out->set('Referer' => $referer);
}
# Only parse content that should have hyperlinks to rewrite
if ( ($content_type =~ /$ctx->{contenttype}/is) && ($content_type !~ /$ctx->{badcontenttype}/is) ) {
if ($debug) {
Apache2::ServerRec::warn("[ModProxyPerlHtml] Content-type '$content_type' match: /$ctx->{contenttype}/is");
}
# Replace links if pattern match
foreach my $p (@{$ctx->{pattern}}) {
my ($match, $substitute) = split(/[\s\t]+/, $p, 2);
&link_replacement(\$ctx->{data}, $match, $substitute, $parsed_uri, $ctx->{rot13elements});
}
# Rewrite code if rewrite pattern match
foreach my $p (@{$ctx->{rewrite}}) {
my ($match, $substitute) = split(/[\s\t]+/, $p, 2);
&rewrite_content(\$ctx->{data}, $match, $substitute, $parsed_uri);
}
}
# Compress again data if require
if (($a_encoding =~ /gzip|deflate/) && ($c_encoding =~ /gzip|deflate/)) {
if ($debug) {
Apache2::ServerRec::warn("[ModProxyPerlHtml] Compressing output as Content-Encoding: $c_encoding");
}
if ($c_encoding =~ /gzip/) {
use IO::Compress::Gzip qw(gzip $GzipError) ;
my $output = '';
my $status = gzip \$ctx->{data} => \$output or die "gzip failed: $GzipError\n";
$ctx->{data} = $output;
} elsif ($c_encoding =~ /deflate/) {
use IO::Compress::Deflate qw(deflate $DeflateError) ;
my $output = '';
my $status = deflate \$ctx->{data} => \$output or die "deflate failed: $DeflateError\n";
$ctx->{data} = $output;
}
}
}
# Apply any change
$f->ctx($ctx);
# Dump datas out
$f->print($f->ctx->{data});
my $c = $f->c;
if ($c->keepalive == Apache2::Const::CONN_KEEPALIVE && $ctx->{data} && $c->keepalives > $ctx->{keepalives}) {
if ($debug) {
Apache2::ServerRec::warn("[ModProxyPerlHtml] Cleaning context for keep alive request");
}
$ctx->{data} = '';
$ctx->{pattern} = ();
$ctx->{rewrite} = ();
$ctx->{excluded} = ();
$ctx->{rot13elements} = ();
$ctx->{contenttype} = '';
$ctx->{badcontenttype} = '';
$ctx->{keepalives} = $c->keepalives;
}
}
return Apache2::Const::OK;
}
sub link_replacement
{
my ($data, $pattern, $replacement, $uri, $rot13elements) = @_;
return if (!$$data);
my $old_terminator = $/;
$/ = '';
my %TODOS = ();
my %ROT13TODOS = ();
my $i = 0;
# Detect parts that need to be deobfuscated before replacement
if (defined $rot13elements)
{
if ($rot13elements ne 'All') {
foreach my $tag (keys %{$rot13elements}) {
while ($$data =~ s/(<$tag\s+[^>]*\b$rot13elements->{$tag}=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
$i++;
}
}
} elsif ($rot13elements eq 'All') {
foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
next if ($$data !~ /<$tag/i);
foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
while ($$data =~ s/(<$tag\s+[^>]*\b$attr=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
$i++;
}
}
}
}
}
# Decode ROT13 links now
foreach my $k (keys %ROT13TODOS) {
my $repl = rot13_decode($ROT13TODOS{$k});
$$data =~ s/ROT13REPLACE_$k\$\$/$repl/;
}
# Replace standard link into attributes of any element
foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
next if ($$data !~ /<$tag/i);
foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
while ($$data =~ s/(<$tag[\t\s]+[^>]*\b$attr=['"]*)($replacement|$pattern)([^'"\s>]+)/\$\$NEEDREPLACE$i\$\$/i) {
$TODOS{$i} = "$1$replacement$3";
$i++;
}
}
}
# Replace all links in javascript code after hiding javascript replacement pattern
my %replace_fct = ();
( run in 0.952 second using v1.01-cache-2.11-cpan-df04353d9ac )