Amethyst

 view release on metacpan or  search on metacpan

Amethyst/Brain/Infobot/Module/Google.pm  view on Meta::CPAN


	my %states = map { $_ => "handler_$_" } qw(
					_start response
						);

	print STDERR "Creating child session for google\n";

	POE::Session->create(
		package_states	=> [ ref($self) => \%states ],
		args			=> [ $self, $message, $query ],
			);

	return 1;
}


sub parse_response {
	my ($self, $response) = @_;

	$self->{_debug} = 10;

	print STDERR "Self is $self\n";

	# parse the output
	my ($HEADER, $HITS, $TRAILER, $POST_NEXT) = (1..10);

	my @hits = ();
	my $hit = undef;

	my $approx_count = -1;

	my $state = $HEADER;

	my @lines = split(/\n/, $response->content);

	foreach (@lines) {
		next unless /\S/; # short circuit for blank lines

		# print STDERR substr($_, 0, 70) . "\n" if $self->{_debug};
		print STDERR $_ . "\n" if $self->{_debug};

		if ($state == $HEADER && m/about <b>([\d,]+)<\/b>/) {
			$approx_count = $1;
			print STDERR "Found Total: $approx_count\n" ;
			$state = $HITS;
		}
		elsif ($state == $HITS &&
			m|<p><a href=[^\s]*(http[^&>]*)[^>]*>(.*?)</a>|i) {
			my ($url, $title) = ($1, $2);
			$hit = new WWW::SearchResult();
			push(@hits, $hit);

			print STDERR "**Found HIT Line**\n" if ($self->{_debug});
			$url =~ s/(>.*)//g;
			$hit->add_url(WWW::Search::strip_tags($url));
			$title = "No Title" if ($title =~ /^\s+/);

			$hit->title(WWW::Search::strip_tags($title));
			$state = $HITS;
		} 
		elsif ($state == $HITS && m|Description:</font></span>\s*(.*)<br>|i) {
			print STDERR "**Parsing Description Line**\n" if ($self->{_debug});
			if ($hit) {
				my $desc = $1;
				$desc =~ s/<.*?>//g;
				$desc =~ s/Category.*//;
				$hit->description($desc);
				$state = $HITS;
			}
			else {
				print STDERR "ERROR: No hit when parsing description\n";
			}
		} 
		elsif ($state == $HITS && m@<div class=nav>@i) {
			print STDERR "**Found Last Line**\n" if ($self->{_debug});
			# end of hits
			$state = $TRAILER;
		}
		else {
			print STDERR "**No match**\n" if ($self->{_debug});
		}
	}

	return @hits;
}

sub handler_response {
	my ($kernel, $heap, $session, $pbargs) =
					@_[KERNEL, HEAP, SESSION, ARG1];
	my ($request, $response, $entry) = @$pbargs;

	unless ($response->is_success) {
		my $reply = $heap->{Module}->reply_to($heap->{Message},
						"HTTP Request failed");
		$reply->send;
		print STDERR $response->error_as_HTML;
		return;
	}

	if (0) {
		local *LOGFILE;
		open(LOGFILE, ">google.log") or die "Can't open file: $!";
		print LOGFILE $response->content;
		print LOGFILE "\n\n\n";
		close(LOGFILE);
	}

	# What we want here is a WWW::Search with a _separate_ parser
	# for pages which we have alrady retrieved.

	my @hits = parse_response($heap->{Module}, $response);

	my $module = $heap->{Module};

	# print STDERR Dumper(\@hits);

	if (@hits) {
		@hits = @hits[0..3] if @hits > 4;
		foreach my $hit (@hits) {
			my $url = $hit->url;
			my $title = $hit->title;



( run in 1.565 second using v1.01-cache-2.11-cpan-97f6503c9c8 )