Apache2-ModProxyPerlHtml

 view release on metacpan or  search on metacpan

ModProxyPerlHtml.pm  view on Meta::CPAN

					&link_replacement(\$ctx->{data}, $match, $substitute, $parsed_uri, $ctx->{rot13elements});
				}
				# Rewrite code if rewrite pattern match
				foreach my $p (@{$ctx->{rewrite}}) {
					my ($match, $substitute) = split(/[\s\t]+/, $p, 2);
					&rewrite_content(\$ctx->{data}, $match, $substitute, $parsed_uri);
				}
			}

			# Compress again data if require
			if (($a_encoding =~ /gzip|deflate/) && ($c_encoding =~ /gzip|deflate/)) {
				if ($debug) {
					Apache2::ServerRec::warn("[ModProxyPerlHtml] Compressing output as Content-Encoding: $c_encoding");
				}
				if ($c_encoding =~ /gzip/) {
					use IO::Compress::Gzip qw(gzip $GzipError) ;
					my $output = '';
					my $status = gzip \$ctx->{data} => \$output or die "gzip failed: $GzipError\n";
					$ctx->{data} = $output;
				} elsif ($c_encoding =~ /deflate/) {
					use IO::Compress::Deflate qw(deflate $DeflateError) ;
					my $output = '';
					my $status = deflate \$ctx->{data} => \$output or die "deflate failed: $DeflateError\n";
					$ctx->{data} = $output;
				}
			}
		}

		# Apply any change
		$f->ctx($ctx);

		# Dump datas out
		$f->print($f->ctx->{data});
		my $c = $f->c;
		if ($c->keepalive == Apache2::Const::CONN_KEEPALIVE && $ctx->{data} && $c->keepalives > $ctx->{keepalives}) {
			if ($debug) {
				Apache2::ServerRec::warn("[ModProxyPerlHtml] Cleaning context for keep alive request");
			}
			$ctx->{data} = '';
			$ctx->{pattern} = ();
			$ctx->{rewrite} = ();
			$ctx->{excluded} = ();
			$ctx->{rot13elements} = ();
			$ctx->{contenttype} = '';
			$ctx->{badcontenttype} = '';
			$ctx->{keepalives} = $c->keepalives;
		}
			
	}

	return Apache2::Const::OK;
}

sub link_replacement
{
	my ($data, $pattern, $replacement, $uri, $rot13elements) = @_;

	return if (!$$data);

	my $old_terminator = $/;
	$/ = '';
	my %TODOS = ();
	my %ROT13TODOS = ();
	my $i = 0;

	# Detect parts that need to be deobfuscated before replacement
	if (defined $rot13elements)
	{
		if ($rot13elements ne 'All') {
			foreach my $tag (keys %{$rot13elements}) {
				while ($$data =~ s/(<$tag\s+[^>]*\b$rot13elements->{$tag}=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
					$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
					$i++;
				}
			}
		} elsif ($rot13elements eq 'All') {
			foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
				next if ($$data !~ /<$tag/i);
				foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
					while ($$data =~ s/(<$tag\s+[^>]*\b$attr=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
						$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
						$i++;
					}
				}
			}
		}
	}

	# Decode ROT13 links now
	foreach my $k (keys %ROT13TODOS) {
		my $repl = rot13_decode($ROT13TODOS{$k});
		$$data =~ s/ROT13REPLACE_$k\$\$/$repl/;
	}

	# Replace standard link into attributes of any element
	foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
		next if ($$data !~ /<$tag/i);
		foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
			while ($$data =~ s/(<$tag[\t\s]+[^>]*\b$attr=['"]*)($replacement|$pattern)([^'"\s>]+)/\$\$NEEDREPLACE$i\$\$/i) {
				$TODOS{$i} = "$1$replacement$3";
				$i++;
			}
		}
	}
	# Replace all links in javascript code after hiding javascript replacement pattern
	my %replace_fct = ();
	while ($$data =~ s/(\.replace\([^,]+,[^\)]+\))/\%\%REPLACE$i\%\%/) {
		$replace_fct{$i} = $1;
		$i++;
	}

	$$data =~ s/([^\\\/]['"])($replacement|$pattern)([^'"]*['"])/$1$replacement$3/ig;

	$$data =~ s/\%\%REPLACE(\d+)\%\%/$replace_fct{$1}/g;

	# Some use escaped quote - Do you have better regexp ?
	$$data =~ s/(\&quot;)($replacement|$pattern)(.*\&quot;)/$1$replacement$3/ig;

	# Try to set a fully qualified URI
	$uri =~ s/$replacement.*//;
        # Replace meta refresh URLs
	$$data =~ s/(<meta\b[^>]+content=['"]*.*url=)($replacement|$pattern)([^>]+)/$1$uri$replacement$3/i;
	# Replace base URI
	$$data =~ s/(<base\b[^>]+href=['"]*)($replacement|$pattern)([^>]+)/$1$uri$replacement$3/i;

	# CSS have url import call, most of the time not quoted
	$$data =~ s/(url\(['"]*)($replacement|$pattern)(.*['"]*\))/$1$replacement$3/ig;

	# Javascript have image object or other with a src method.
	$$data =~ s/(\.src[\s\t]*=[\s\t]*['"]*)($replacement|$pattern)(.*['"]*)/$1$replacement$3/ig;
	
	# The single ended tag broke mod_proxy parsing
	$$data =~ s/($replacement|$pattern)>/\/>/ig;
	
	# Replace todos now
	$$data =~ s/\$\$NEEDREPLACE(\d+)\$\$/$TODOS{$1}/g;

	# Detect parts that need to be obfuscated after replacement
	if (defined $rot13elements)
	{
		if ($rot13elements ne 'All') {
			foreach my $tag (keys %{$rot13elements}) {
				while ($$data =~ s/(<$tag\s+[^>]*\b$rot13elements->{$tag}=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
					$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
					$i++;
				}
			}
		} elsif ($rot13elements eq 'All') {
			foreach my $tag (keys %Apache2::ModProxyPerlHtml::linkElements) {
				next if ($$data !~ /<$tag/i);
				foreach my $attr (@{$Apache2::ModProxyPerlHtml::linkElements{$tag}}) {
					while ($$data =~ s/(<$tag\s+[^>]*\b$attr=['"\s]*)([^'"\s>]+)([^>]*>)/ROT13REPLACE_$i\$\$/i) {
						$ROT13TODOS{$i} = "$1ROT13$2ROT13$3";
						$i++;
					}
				}
			}
		}
	}

	# Encode ROT13 links now
	foreach my $k (keys %ROT13TODOS) {
		my $repl = rot13_encode($ROT13TODOS{$k});
		$$data =~ s/ROT13REPLACE_$k\$\$/$repl/;
	}

	$/ = $old_terminator;
}

sub rewrite_content
{
	my ($data, $pattern, $replacement, $uri) = @_;

	return if (!$$data);

	my $old_terminator = $/;
	$/ = '';

	# Rewrite things in code (case sensitive)
	$replacement = '"' . $replacement . '"';
	$$data =~ s/$pattern/$replacement/eeg;

	$/ = $old_terminator;

}

sub rot13_decode
{
	my $str = shift;

	my @parts = split(/ROT13/, $str);
        $parts[1] =~ tr/nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/;

	return join('', @parts);
}

sub rot13_encode
{
	my $str = shift;

	my @parts = split(/ROT13/, $str);
        $parts[1] =~ tr/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM/;

	return join('', @parts);
}


1;

__END__

=head1 NAME

Apache2::ModProxyPerlHtml - rewrite HTTP headers and HTML links for reverse proxy usage

=head1 DESCRIPTION

Apache2::ModProxyPerlHtml is the most advanced Apache output filter to rewrite
HTTP headers and HTML links for reverse proxy usage. It is written in Perl and
exceeds all mod_proxy_html.c limitations without performance lost.

Apache2::ModProxyPerlHtml is very simple and has far better parsing/replacement
of URL than the original C code. It also supports meta tag, CSS, and javascript
URL rewriting and can be used with compressed HTTP. You can now replace any code
by other, like changing image names or anything else. mod_proxy_html can't do
all of that. Since release 3.x ModProxyPerlHtml is also able to rewrite HTTP
headers with Refresh url redirection and Referer. 

The replacement capability concern only the following HTTP content type:

	text/javascript
	text/html
	text/css
	text/xml
	application/.*javascript
	application/.*xml

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.839 second using v1.00-cache-2.02-grep-82fe00e-cpan-d29e8ade9f55 )