Apache2-ModProxyPerlHtml
view release on metacpan or search on metacpan
ModProxyPerlHtml.pm view on Meta::CPAN
my ($data, $pattern, $replacement, $uri, $rot13elements) = @_;
return if (!$$data);
my $old_terminator = $/;
$/ = '';
my %TODOS = ();
my %ROT13TODOS = ();
my $i = 0;
# Detect parts that need to be deobfuscated before replacement
if (defined $rot13elements)
{
if ($rot13elements ne 'All') {
foreach my $tag (keys %{$rot13elements}) {
while ($$data =~ s/(<$tag\s+[^>]*\b$rot13elements->{$tag}=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
$i++;
}
}
} elsif ($rot13elements eq 'All') {
foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
next if ($$data !~ /<$tag/i);
foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
while ($$data =~ s/(<$tag\s+[^>]*\b$attr=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
$i++;
}
}
}
}
}
# Decode ROT13 links now
foreach my $k (keys %ROT13TODOS) {
my $repl = rot13_decode($ROT13TODOS{$k});
$$data =~ s/ROT13REPLACE_$k\$\$/$repl/;
}
# Replace standard link into attributes of any element
foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
next if ($$data !~ /<$tag/i);
foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
while ($$data =~ s/(<$tag[\t\s]+[^>]*\b$attr=['"]*)($replacement|$pattern)([^'"\s>]+)/\$\$NEEDREPLACE$i\$\$/i) {
$TODOS{$i} = "$1$replacement$3";
$i++;
}
}
}
# Replace all links in javascript code after hiding javascript replacement pattern
my %replace_fct = ();
while ($$data =~ s/(\.replace\([^,]+,[^\)]+\))/\%\%REPLACE$i\%\%/) {
$replace_fct{$i} = $1;
$i++;
}
$$data =~ s/([^\\\/]['"])($replacement|$pattern)([^'"]*['"])/$1$replacement$3/ig;
$$data =~ s/\%\%REPLACE(\d+)\%\%/$replace_fct{$1}/g;
# Some use escaped quote - Do you have better regexp ?
$$data =~ s/(\")($replacement|$pattern)(.*\")/$1$replacement$3/ig;
# Try to set a fully qualified URI
$uri =~ s/$replacement.*//;
# Replace meta refresh URLs
$$data =~ s/(<meta\b[^>]+content=['"]*.*url=)($replacement|$pattern)([^>]+)/$1$uri$replacement$3/i;
# Replace base URI
$$data =~ s/(<base\b[^>]+href=['"]*)($replacement|$pattern)([^>]+)/$1$uri$replacement$3/i;
# CSS have url import call, most of the time not quoted
$$data =~ s/(url\(['"]*)($replacement|$pattern)(.*['"]*\))/$1$replacement$3/ig;
# Javascript have image object or other with a src method.
$$data =~ s/(\.src[\s\t]*=[\s\t]*['"]*)($replacement|$pattern)(.*['"]*)/$1$replacement$3/ig;
# The single ended tag broke mod_proxy parsing
$$data =~ s/($replacement|$pattern)>/\/>/ig;
# Replace todos now
$$data =~ s/\$\$NEEDREPLACE(\d+)\$\$/$TODOS{$1}/g;
# Detect parts that need to be obfuscated after replacement
if (defined $rot13elements)
{
if ($rot13elements ne 'All') {
foreach my $tag (keys %{$rot13elements}) {
while ($$data =~ s/(<$tag\s+[^>]*\b$rot13elements->{$tag}=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
$i++;
}
}
} elsif ($rot13elements eq 'All') {
foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
next if ($$data !~ /<$tag/i);
foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
while ($$data =~ s/(<$tag\s+[^>]*\b$attr=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
$i++;
}
}
}
}
}
# Encode ROT13 links now
foreach my $k (keys %ROT13TODOS) {
my $repl = rot13_encode($ROT13TODOS{$k});
$$data =~ s/ROT13REPLACE_$k\$\$/$repl/;
}
$/ = $old_terminator;
}
sub rewrite_content
{
my ($data, $pattern, $replacement, $uri) = @_;
return if (!$$data);
my $old_terminator = $/;
ModProxyPerlHtml.pm view on Meta::CPAN
% perl Makefile.PL
% make && make install
=head1 APACHE CONFIGURATION
On Debian/Ubuntu set the following configuration into the VirtualHost section
of files /etc/apache2/sites-available/default-ssl.conf and /etc/apache2/sites-available/000-default.conf.
On CentOS/RedHat add it to /etc/httpd/conf.d/vhost.conf.
ProxyRequests Off
ProxyPreserveHost Off
ProxyPass /webcal/ http://webcal.domain.com/
PerlInputFilterHandler Apache2::ModProxyPerlHtml
PerlOutputFilterHandler Apache2::ModProxyPerlHtml
SetHandler perl-script
# Use line below and comment line above if you experience error:
# "Attempt to serve directory". The reason is that with SetHandler
# DirectoryIndex is not working
# AddHandler perl-script *
PerlSetVar ProxyHTMLVerbose "On"
LogLevel Info
<Location /webcal/>
ProxyPassReverse /
PerlAddVar ProxyHTMLURLMap "/ /webcal/"
PerlAddVar ProxyHTMLURLMap "http://webcal.domain.com /webcal"
</Location>
Note that here FilterHandlers are set globally, you can also set them in any
<Location> part to set it locally and avoid calling this Apache module globally.
If you want to rewrite some code on the fly, like changing images filename you
can use the perl variable ProxyHTMLRewrite under the location directive as
follow:
<Location /webcal/>
...
PerlAddVar ProxyHTMLRewrite "/logo/image1.png /images/logo1.png"
# Or more complicated to handle space in the code as space is the
# pattern / substitution separator character internally in ModProxyPerlHtml
PerlAddVar ProxyHTMLRewrite "ajaxurl[\s\t]*=[\s\t]*'/blog' ajaxurl = '/www2.mydom.org/blog'"
...
</Location>
this will replace each occurence of '/logo/image1.png' by '/images/logo1.png' in
the entire stream (html, javascript or css). Note that this kind of substitution
is done after all other proxy related replacements.
In some conditions javascript code can be replaced by error, for example:
imgUp.src = '/images/' + varPath + '/' + 'up.png';
will be rewritten like this:
imgUp.src = '/URL/images/' + varPath + '/URL/' + 'up.png';
To avoid the second replacement, write your JS code like that:
imgUp.src = '/images/' + varPath + unescape('%2F') + 'up.png';
ModProxyPerlHTML replacement is activated on certain HTTP Content Type. If you
experienced that replacement is not activated for your file type, you can use the
ProxyHTMLContentType configuration directive to redefined the HTTP Content Type
that should be parsed by ModProxyPerlHTML. The default value is the following
Perl regular expresssion:
PerlAddVar ProxyHTMLContentType (text\/javascript|text\/html|text\/css|text\/xml|application\/.*javascript|application\/.*xml)
If you know exactly what you are doing by editing this regexp fill free to add
the missing Content-Type that must be parsed by ModProxyPerlHTML. Otherwise drop
me a line with the content type, I will give you the rigth expression. If you don't
know about the content type, with FireFox simply type Ctrl+i on the web page.
Some MS Office files may conflict with the above ProxyHTMLContentType regex like .docx or .xlsx
files. The result is that there could suffer of replacement inside and the file will be corrupted.
to prevent this you have the ProxyHTMLExcludeContentType configuration directive to exclude certain
content-type. Here is the default value:
PerlAddVar ProxyHTMLExcludeContentType (application\/vnd\.openxml)
If you have problem with other content-type, use this directive. For example, as follow:
PerlAddVar ProxyHTMLExcludeContentType (application\/vnd\.openxml|application\/vnd\..*text)
this regex will prevent any MS Office XML or text document to be parsed.
Some javascript libraries like JQuery are wrongly rewritten by ModProxyPerlHtml.
The problem is that those javascript code include some code and regex that are
detected as links and rewritten. The only way to fix that is to exclude those
files from the URL rewritter by using the "ProxyHTMLExcludeUri" configuration
directive. For example:
PerlAddVar ProxyHTMLExcludeUri jquery.min.js$
PerlAddVar ProxyHTMLExcludeUri ^.*\/jquery-lib\/.*$
Any downloaded URI that contains the given regex will be returned asis without
rewritting. You can use this directive multiple time like above to match different
cases.
=head1 LIVE EXAMPLE
Here is the reverse proxy configuration I use to give access to Internet users
to internal applications:
ProxyRequests Off
ProxyPreserveHost Off
ProxyPass /webmail/ http://webmail.domain.com/
ProxyPass /webcal/ http://webcal.domain.com/
ProxyPass /intranet/ http://intranet.domain.com/
PerlInputFilterHandler Apache2::ModProxyPerlHtml
PerlOutputFilterHandler Apache2::ModProxyPerlHtml
SetHandler perl-script
# Use line below iand comment line above if you experience error:
# "Attempt to serve directory". The reason is that with SetHandler
# DirectoryIndex is not working
# AddHandler perl-script *
PerlSetVar ProxyHTMLVerbose "On"
( run in 1.397 second using v1.01-cache-2.11-cpan-39bf76dae61 )