Search-ContextGraph

 view release on metacpan or  search on metacpan

ContextGraph.pm  view on Meta::CPAN

		$ref = $params{parse}->( $content );
		croak "did not get an appropriate reference back after parsing"
			unless ref $ref and ref $ref =~ /(HASH|ARRAY)/;
		
		
	} else {
	
		my $code = sub { 
			my $txt  = shift; 
			$txt =~ s/\W/ /g;
			my @toks = split m/\s+/, $txt;
			\@toks;
		};
		$ref = $code->($content);
	}
	
	return unless $ref;
	$self->add( $title, $ref );
	
}

ContextGraph.pm  view on Meta::CPAN

	#warn "Found ", scalar @shared, " nodes shared between $n1 and $n2\n";
	
	my $node1 = _nodeify( $type, $n1 );
	my $node2 = _nodeify( $type, $n2 );
	# formula is w(t1,d1)/deg(d1) + w(t1,d2)/deg(d2) ... ) /deg( t1 )
	
	#warn "Calculating distance\n";
	my $sum1 = 0;
	my $sum2 = 0;
	foreach my $next ( @shared ) {
		my ( undef, $lcount1) =  split m/,/, $self->{neighbors}{$node1}{$next};
		my ( undef, $lcount2) =  split m/,/, $self->{neighbors}{$node2}{$next};

		my $degree = $self->degree( $next );
		#warn "\t degree of $next is $degree\n";
		my $elem1 = $lcount1 / $degree;
		$sum1 += $elem1;
		my $elem2 = $lcount2 / $degree;
		$sum2 += $elem2;
	}
	#warn "sum is $sum1, $sum2\n";
	my $final = ($sum1 / $self->degree( $node1 )) + ( $sum2 / $self->degree( $node2 ));

ContextGraph.pm  view on Meta::CPAN

	my %seen;

	foreach my $term ( keys %{$words} ) {

		my $t = _nodeify( 'T', $term );

		if ( exists $n->{$t} ){

			# Update the local count, if necessary
			my $curr_val = $n->{$t};
			my ( undef, $loc ) = split m/,/, $curr_val;

			unless ( $loc == $words->{$term} ) {
				$n->{$t} = join ',', 1, $words->{$term};
				$must_reweight++;
			}	
			}

		else {

			$n->{$t} = 

ContextGraph.pm  view on Meta::CPAN

		*fh = *STDOUT;
	}
	foreach my $doc ( @docs ) {
		my $n = $self->{neighbors}{$doc};

		my $row_count = scalar keys %{$n};
		print $fh $row_count;

		foreach my $t ( sort keys %{$doc} ) {
			my $index = $lookup{$t};
			my ( $weight, undef ) = split m/,/, $n->{$t};
			print $fh ' ', $index, ' ', $weight;
		}
		print $fh "\n";
	}
}



=item near_neighbors [NODE] 

ContextGraph.pm  view on Meta::CPAN

=item simple_search QUERY

This is the DWIM method - takes a query string as its argument, and returns an array
of documents, sorted by relevance.

=cut

sub simple_search {
	my ( $self, $query ) = @_;
	my @words = map { s/\W+//g; lc($_) }
				split m/\s+/, $query;	
	my @nodes = _nodeify( 'T', @words );
	my $results = $self->raw_search( @nodes );
	my ($docs, $words) = _partition( $results );
	my @sorted_docs = sort { $docs->{$b} <=> $docs->{$a} } keys %{$docs};
	return @sorted_docs;
}

=item find_by_title @TITLES

Given a list of patterns, searches for documents with matching titles

ContextGraph.pm  view on Meta::CPAN

		my %neighbors = %{$self->{neighbors}{$bad_node}};
		
		foreach my $n ( keys %neighbors ) {
			
			#print "\t $target ($bad_node) neighbor $n\n";
			if ( exists  $self->{neighbors}{$target}{$n} ) {
				#print "\t\t$n has link to $bad_node\n";
				# combine the local counts for the term members of the edge
				my $curr_val = $tnode->{$n};
				my $aug_val  = $self->{neighbors}{$bad_node}{$n};
				my ($w1, $c1) = split m/,/, $curr_val;
				my ($w2, $c2) = split m/,/, $aug_val;
				my $new_count = $c1 + $c2;
				$curr_val =~ s/,\d+$/,$new_count/;
				$tnode->{$n} = $curr_val;
				
				
			} else {
				
				die "sanity check failed for existence test"
					if exists $self->{neighbors}{$target}{$n};
				



( run in 0.841 second using v1.01-cache-2.11-cpan-71847e10f99 )