App-DuckPAN

 view release on metacpan or  search on metacpan

lib/App/DuckPAN/Cmd/Server.pm  view on Meta::CPAN

# Rewrite all relative asset links in CSS
# Capture leading quote, insert $hostname, append filename
# E.g url("/assets/background.png") => url("http://duckduckgo.com/assets/background.png")
sub change_css {
	my ( $self, $css ) = @_;
	my $hostname = $self->hostname;
	$css =~ s!:\s*url\((["'])?/!:url\($1http://$hostname/!g;
	return $css;
}

sub change_html {
	my ( $self, $html ) = @_;

	my $root = HTML::TreeBuilder->new;
	$root->parse($html);

	my @a = $root->look_down(
		"_tag", "a"
	);

	my @link = $root->look_down(
		"_tag", "link"
	);

	# Make sure DuckPAN serves DDG CSS (already pulled down at startup)
	# ie <link href="/s123.css"> becomes <link href="/?duckduckhack_css=1">
	# Also rewrite relative links to hostname
	my $has_css = 0;
	for (@a, @link) {
		if ($_->attr('type') && $_->attr('type') eq 'text/css') {
			# We only want to load the CSS file once.
			# We only load it once because /?duckduckhack_css=1 already has all of the CSS
			# in a single page.
			unless($has_css) {
				$_->attr('href','/?duckduckhack_css=1');
				$has_css = 1;
			}
			else {
				$_->attr('href','/?duckduckhack_ignore=1');
			}
		}
		elsif (defined $_->attr('href') && substr($_->attr('href'),0,1) eq '/') {
			$_->attr('href','http://'.$self->hostname.''.$_->attr('href'));
		}
	}

	my @script = $root->look_down(
		"_tag", "script"
	);

	# Make sure DuckPAN serves DDG JS (already pulled down at startup)
	# ie <link href="/d123.js"> becomes <link href="/?duckduckhack_js=1">
	# Also rewrite relative links to hostname

	# Temp Fix: Force ignore of d.js & duckduck.
	# This logic needs to be improved!

	my $has_ddh = 0;
	for (@script) {
		if (my $src = $_->attr('src')) {
			next if ($src =~ m/^\/\?duckduckhack_/); # Already updated, no need to do again
			if ($src =~ m/^\/(dpan\d+|duckpan)\.js/) {
				if ($has_ddh){
					$_->attr('src','/?duckduckhack_ignore=1');
				}
				else {
					$_->attr('src','/?duckduckhack_js=1');
					$has_ddh = 1;
				}
			}
			elsif ($src =~ m/^\/(g\d+|serp)\.js/) {
				$_->attr('src','/?duckduckhack_templates=1');
			}
			elsif ($src =~ m/^\/(d\d+|base)\.js/) {

				# If dpan.js is not present (ie. homepage)
				# make sure we serve the js rather than blocking
				# the call to d.js
				if ($has_ddh){
					$_->attr('src','/?duckduckhack_ignore=1');
				}
				else {
					$_->attr('src','/?duckduckhack_js=1');
				}
			}
			elsif ($src =~ /locales/) {
				$_->attr('src','/?duckduckhack_locales=1');
			}
			elsif (substr($src,0,1) eq '/') {
				$_->attr('src','http://'.$self->hostname.''.$_->attr('src'));
			}
		}
	}

	my @img = $root->look_down(
		"_tag", "img"
	);

	# Rewrite img links to be requested from hostname
	for (@img) {
		if ($_->attr('src')) {
			$_->attr('src','http://'.$self->hostname.''.$_->attr('src'));
		}
	}

	my $newhtml = $root->as_HTML;

	return $self->change_js($self->change_css($newhtml));
}

# This is where we cache and check for newer versions
# of DDG JS and CSS by parsing the HTML requested from
# DuckDuckGo. If new files exits, we grab them, rewrite
# any links and store them in the cache. Otherwise we
# serve the current versions from the cache.

sub get_sub_assets {
	my ($self, $from) = @_;

	my $html = $from->{internal}->slurp;
	my $root = HTML::TreeBuilder->new;



( run in 2.736 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )