Search-ContextGraph
view release on metacpan or search on metacpan
ContextGraph.pm view on Meta::CPAN
$ref = $params{parse}->( $content );
croak "did not get an appropriate reference back after parsing"
unless ref $ref and ref $ref =~ /(HASH|ARRAY)/;
} else {
my $code = sub {
my $txt = shift;
$txt =~ s/\W/ /g;
my @toks = split m/\s+/, $txt;
\@toks;
};
$ref = $code->($content);
}
return unless $ref;
$self->add( $title, $ref );
}
ContextGraph.pm view on Meta::CPAN
#warn "Found ", scalar @shared, " nodes shared between $n1 and $n2\n";
my $node1 = _nodeify( $type, $n1 );
my $node2 = _nodeify( $type, $n2 );
# formula is w(t1,d1)/deg(d1) + w(t1,d2)/deg(d2) ... ) /deg( t1 )
#warn "Calculating distance\n";
my $sum1 = 0;
my $sum2 = 0;
foreach my $next ( @shared ) {
my ( undef, $lcount1) = split m/,/, $self->{neighbors}{$node1}{$next};
my ( undef, $lcount2) = split m/,/, $self->{neighbors}{$node2}{$next};
my $degree = $self->degree( $next );
#warn "\t degree of $next is $degree\n";
my $elem1 = $lcount1 / $degree;
$sum1 += $elem1;
my $elem2 = $lcount2 / $degree;
$sum2 += $elem2;
}
#warn "sum is $sum1, $sum2\n";
my $final = ($sum1 / $self->degree( $node1 )) + ( $sum2 / $self->degree( $node2 ));
ContextGraph.pm view on Meta::CPAN
my %seen;
foreach my $term ( keys %{$words} ) {
my $t = _nodeify( 'T', $term );
if ( exists $n->{$t} ){
# Update the local count, if necessary
my $curr_val = $n->{$t};
my ( undef, $loc ) = split m/,/, $curr_val;
unless ( $loc == $words->{$term} ) {
$n->{$t} = join ',', 1, $words->{$term};
$must_reweight++;
}
}
else {
$n->{$t} =
ContextGraph.pm view on Meta::CPAN
*fh = *STDOUT;
}
foreach my $doc ( @docs ) {
my $n = $self->{neighbors}{$doc};
my $row_count = scalar keys %{$n};
print $fh $row_count;
foreach my $t ( sort keys %{$doc} ) {
my $index = $lookup{$t};
my ( $weight, undef ) = split m/,/, $n->{$t};
print $fh ' ', $index, ' ', $weight;
}
print $fh "\n";
}
}
=item near_neighbors [NODE]
ContextGraph.pm view on Meta::CPAN
=item simple_search QUERY
This is the DWIM method - takes a query string as its argument, and returns an array
of documents, sorted by relevance.
=cut
sub simple_search {
my ( $self, $query ) = @_;
my @words = map { s/\W+//g; lc($_) }
split m/\s+/, $query;
my @nodes = _nodeify( 'T', @words );
my $results = $self->raw_search( @nodes );
my ($docs, $words) = _partition( $results );
my @sorted_docs = sort { $docs->{$b} <=> $docs->{$a} } keys %{$docs};
return @sorted_docs;
}
=item find_by_title @TITLES
Given a list of patterns, searches for documents with matching titles
ContextGraph.pm view on Meta::CPAN
my %neighbors = %{$self->{neighbors}{$bad_node}};
foreach my $n ( keys %neighbors ) {
#print "\t $target ($bad_node) neighbor $n\n";
if ( exists $self->{neighbors}{$target}{$n} ) {
#print "\t\t$n has link to $bad_node\n";
# combine the local counts for the term members of the edge
my $curr_val = $tnode->{$n};
my $aug_val = $self->{neighbors}{$bad_node}{$n};
my ($w1, $c1) = split m/,/, $curr_val;
my ($w2, $c2) = split m/,/, $aug_val;
my $new_count = $c1 + $c2;
$curr_val =~ s/,\d+$/,$new_count/;
$tnode->{$n} = $curr_val;
} else {
die "sanity check failed for existence test"
if exists $self->{neighbors}{$target}{$n};
( run in 0.841 second using v1.01-cache-2.11-cpan-71847e10f99 )