Apache-FilteringProxy

 view release on metacpan or  search on metacpan

lib/Apache/FilteringProxy.pm  view on Meta::CPAN

		# only update our cached configuration if the config file has been modififed
		if (!defined($Apache::FilteringProxy::config_modification) or 
		   ($mtime > $Apache::FilteringProxy::config_modification)) 
		{
			$r->warn("DEBUG: updating XML configuration");

			# get our XML from the config file
			open(CONFIG, "<$config_file") || $r->warn("couldn't open configuration file '$config_file'");
			undef $/;
			my $xml_source = <CONFIG>;
			$/ = "\n";
			close(CONFIG);

			# create parser object and parse configuration from our string
			my $config = new XML::EasyOBJ(-type => 'string', -param => $xml_source);
			#my $config = my $doc = new XML::EasyOBJ(-type => 'file', -param => $config_file);

			# we have just modified our file, so let's set our modification
			# date to the new mod time so we don't keep causing ourself to
			# reread the config
			@stat = stat($config_file);
			$mtime = $stat[9];

			# log the configuration file modification stats 
			if (defined($Apache::FilteringProxy::config_modification)) {
				$r->warn("config: modification time: current=$mtime,last=$Apache::FilteringProxy::config_modification");
			} else {
				$r->warn("config: modification time: current=$mtime,last=none");
			}

			$Apache::FilteringProxy::config_modification = $mtime;

			my @resources = $config->resource;
			my @filter_types = $config->getElement("filter-type")->type();
			my @strip_headers = $config->getElement("strip-headers")->name();
			my @strip_cookies = $config->getElement("strip-cookies")->name();
			my @type_translations = $config->getElement("type-translations")->item();
			my @translations = $config->translations()->item();
			my $default_url = $config->getElement("default-url")->getAttr("value");
			my $proxy_url = $config->getElement("proxy-url")->getAttr("value");
			my $logging = $config->logging()->getAttr("value");

			# see if we have logging enabled
			#
			# logging levels
			# 	0 - critical
			#	1 - verbose
			#	2 - debug with headers
			#	3 - debug with headers & source
			if ($logging) {
				$Apache::FilteringProxy::logging = $logging;
				$r->warn("config: logging level set to '".$logging."'");
			} else {
				$Apache::FilteringProxy::logging = 0;
			}

			# get admin database configuration
			$Apache::FilteringProxy::db_hostname = $config->getElement("admin-database")->hostname->getString() || "localhost";
			$Apache::FilteringProxy::db_hostport = $config->getElement("admin-database")->hostport->getString() || "5432";
			$Apache::FilteringProxy::db_username = $config->getElement("admin-database")->username->getString() || "user";
			$Apache::FilteringProxy::db_password = $config->getElement("admin-database")->password->getString() || "pass";
			$Apache::FilteringProxy::db_database = $config->getElement("admin-database")->database->getString() || "default";
			$Apache::FilteringProxy::db_driver   = $config->getElement("admin-database")->getElement("dbi-dbd")->getString() || "Pg";
			$r->warn("config: admin db hostname: " . $Apache::FilteringProxy::db_hostname) unless ($Apache::FilteringProxy::logging < 1);
			$r->warn("config: admin db hostport: " . $Apache::FilteringProxy::db_hostport) unless ($Apache::FilteringProxy::logging < 1);
			$r->warn("config: admin db username: " . $Apache::FilteringProxy::db_username) unless ($Apache::FilteringProxy::logging < 1);
			$r->warn("config: admin db password: " . (($Apache::FilteringProxy::db_password) ? "*not empty*" : "*empty*")) unless ($Apache::FilteringProxy::logging < 1);
			$r->warn("config: admin db database: " . $Apache::FilteringProxy::db_database) unless ($Apache::FilteringProxy::logging < 1);
			$r->warn("config: admin db driver: " . $Apache::FilteringProxy::db_driver) unless ($Apache::FilteringProxy::logging < 1);

			# the proxy that will be used in all requests made by LWP to
			# retrieve content from a remote server
			if ($proxy_url) {
				$Apache::FilteringProxy::proxy_url = $proxy_url;
				$r->warn("config: proxy url: $proxy_url");
			} else {
				$Apache::FilteringProxy::proxy_url = "";
			}

			# the url that a user will be sent to when they try to access
			# an unconfigured resource - set in the configuration file
			if ($default_url) {
				$Apache::FilteringProxy::default_url = $default_url;
				$r->warn("config: default url: $default_url");
			} else {
				$Apache::FilteringProxy::default_url = "http://www.slashdot.org/";
			}

			# get all of the content types that we want to filter
			undef $Apache::FilteringProxy::filter_types;
			my $filter_type;
			foreach $filter_type (@filter_types) {
				$Apache::FilteringProxy::filter_types{$filter_type->getString()} = 1;
				$r->warn("config: filter type: ".$filter_type->getString()) unless ($Apache::FilteringProxy::logging < 1);
			}

			# get all of the headers that we want to strip
			undef $Apache::FilteringProxy::strip_headers;
			my $strip_header;
			foreach $strip_header (@strip_headers) {
				$Apache::FilteringProxy::strip_headers{$strip_header->getString()} = 1;
				$r->warn("config: strip header: ".$strip_header->getString()) unless ($Apache::FilteringProxy::logging < 1);
			}

			# get all of the cookies types that we want strip
			undef $Apache::FilteringProxy::strip_cookies;
			my $strip_cookie;
			foreach $strip_cookie (@strip_cookies) {
				$Apache::FilteringProxy::strip_cookies{$strip_cookie->getString()} = 1;
				$r->warn("config: strip cookie: ".$strip_cookie->getString()) unless ($Apache::FilteringProxy::logging < 1);
			}

			# get all of the content-type translations we need to perform
			# e.g. translating "text-html" to "text/html"
			undef %Apache::FilteringProxy::type_translations;
			my $type_translation;
			foreach $type_translation (@type_translations) {
				if ($type_translation->match()->getString() && $type_translation->replace()->getString()) {
					my $match = $type_translation->match()->getString();
					my $replace = $type_translation->replace()->getString();

					$Apache::FilteringProxy::type_translations{$match} = $replace;

					$r->warn("config: type-translation: $match=>$replace") unless ($Apache::FilteringProxy::logging < 1);
				}
			}

lib/Apache/FilteringProxy.pm  view on Meta::CPAN

			$remote_servername = $1;
			$remote_port = $2;
			$resource_id = $3;
		} else {
			$remote_string =~ s/(^|\.)([^.]+)\.$//;
			$resource_id = $2;
			$remote_servername = $remote_string;
			$remote_port = 80;
		}

		$r->warn("DEBUG: resource id '$resource_id' found") unless ($Apache::FilteringProxy::logging < 3);
	} else {
		$r->warn("no server name: local hostname: $local_hostname, local servername: $local_servername");
		# could print out some pretty HTML message here
		return FORBIDDEN;
	}

	# this will be used when only the resource name is specified with the
	# resource subdomain.  We will default redirect the user to the url
	# specified for the resource.
	if ($resource_id and !$remote_servername) {
		if (defined($Apache::FilteringProxy::url{$resource_id}) and 
		    ($Apache::FilteringProxy::url{$resource_id} ne ""))
		{
			my $url = $Apache::FilteringProxy::url{$resource_id};

			if ($url =~ 
			m@
				(http:\/\/)					# http://
				([A-Za-z0-9\.\-]+)	 		# hostname
				(:([0-9]{1,5}))? 			# :port
				(\/.*|$)					# the path
			@ix) {
				my $http_string = $1;
				my $hostname = $2;
				my $port = $4 || "80";
				my $path = $5 || "";

				$r->warn("DEBUG: redirecting user to '$http_string$hostname.port$port.$resource_id.$local_servername$path'") unless ($Apache::FilteringProxy::logging < 2);
				$r->header_out("Location" => "$http_string$hostname.port$port.$resource_id.$local_servername$path");
				return REDIRECT;
			} else {
				$r->warn("DEBUG: no valid url (specified: '$url') to redirect user to for resource '$resource_id'") unless ($Apache::FilteringProxy::logging < 2);
				# we could do something like print some nice HTML error here
				return FORBIDDEN;
			}
		} else {
			$r->warn("DEBUG: no url to redirect user to for resource '$resource_id'") unless ($Apache::FilteringProxy::logging < 2);
			# we could do something like print some nice HTML error here
			return FORBIDDEN;
		}
	}

	# in admin mode, we want to rewrite every host we encounter
	if ($mode eq "admin") {
		# let's add the remote server to our list of hosts/domains we want to
		# configure for proxying
		$r->warn("ADMIN: adding hostname for resource") unless ($Apache::FilteringProxy::logging < 2);

		use DBI;
		my $dbh = DBI->connect("dbi:$Apache::FilteringProxy::db_driver:dbname=$Apache::FilteringProxy::db_database;host=$Apache::FilteringProxy::db_hostname;port=$Apache::FilteringProxy::db_hostport",$Apache::FilteringProxy::db_username,$Apache::FilteringP...

		# get all current hosts in admin to make sure we dont add the
		# hostname a second time.  The admin tool clears old entries
		# out before starting, so we know all entries in the db are valid
		my $sth = $dbh->prepare("SELECT hostname from admin;");
		$sth->execute();

		# make list of hosts
		my @hostname_list;
		my $hostname;
		$sth->bind_columns(\$hostname);
		while ($sth->fetch()) {
			push(@hostname_list, $hostname);
		}
		if (!grep(/^$remote_servername$/,@hostname_list)) {
			my $sth = $dbh->prepare("INSERT INTO admin (id, hostname) VALUES (nextval('admin_id_seq'), '$remote_servername');");
			$sth->execute();
		}
		$sth->finish();
		$dbh->disconnect();
	}

	# grab the uri that was requested and prepare it to be used to request
	# the document on the remote server
	my $uri = $r->parsed_uri;
	my $path = $uri->path();
	my $unparsed = $uri->unparse();
	my $query = $uri->query || "";
	if ($query) {
		$path .= "?$query";
	} elsif ($unparsed =~ /\?$/) {
		$path .= "?";
	}

	# no remote servername and no resource was caught above, so we redirect
	# user to the default_url page
	if (!$remote_servername) {
		$r->warn("DEBUG: no servername, redirecting to default url '$Apache::FilteringProxy::default_url'") unless ($Apache::FilteringProxy::logging < 2);
		$r->header_out("Location" => $Apache::FilteringProxy::default_url);
		return REDIRECT;
	}

	# in admin mode we want all hosts, not just configured ones to be proxied
	if ($mode ne "admin") {
		# used to make sure all kinds of other domains can't be directly accessed
		# through this script unless they are in the list of configured host or
		# domains
		my $found = 0;
		if (!grep(/^$remote_servername$/, @{$Apache::FilteringProxy::proxy_host_include_list{$resource_id}})) {
			foreach (@{$Apache::FilteringProxy::proxy_domain_include_list{$resource_id}}) {
				$r->warn("DEBUG: testing '$remote_servername' against domain include '$_'") unless ($Apache::FilteringProxy::logging < 2);
				if ($remote_servername =~ m/([A-Za-z0-9\.\-]+\.)*$_$/i) {
					$found = 1;
					last;
				}
			}

			# we didn't find the host in the host include list or domain
			# include list for this resource
			if (!$found) {



( run in 1.246 second using v1.01-cache-2.11-cpan-e1769b4cff6 )