DBIx-FullTextSearch

 view release on metacpan or  search on metacpan

lib/DBIx/FullTextSearch/Phrase.pm  view on Meta::CPAN

			id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null auto_increment,
			primary key (id),
			unique (word)
			)
EOF

	my $dbh = $fts->{'dbh'};
	$dbh->do($CREATE_DATA) or return $dbh->errstr;
	push @{$fts->{'created_tables'}}, $fts->{'data_table'};
	$dbh->do($CREATE_WORD_ID) or return $dbh->errstr;
	push @{$fts->{'created_tables'}}, $fts->{'word_id_table'};
	return;
}

sub add_document {
  my ($self, $id, $words) = @_;
  # here the value in the %$words hash is an array of word
  # positions
  my $fts = $self->{'fts'};
  my $dbh = $fts->{'dbh'};
  my $word_id_table = $fts->{'word_id_table'};
  if (not defined $self->{'select_wordid_sth'}) {
    $self->{'select_wordid_sth'} = $dbh->prepare("
       select id from $word_id_table where word = ?
       ");
  }
  my $data_table = $fts->{'data_table'};
  my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'position_bits'}};
  my $num_words = 0;
  my (@wids,@data,@widshandler,@datahandler);
  my $wordid;
  $dbh->do("lock tables $word_id_table write");
  my ($maxid) = $dbh->selectrow_array("select max(id) 
                                       from $word_id_table");
  foreach my $word (keys %$words) {
    if(!defined $self->{'wordids'}->{$word}) {
      $self->{'select_wordid_sth'}->execute($word);
      ($wordid) = $self->{'select_wordid_sth'}->fetchrow_array();
      unless ($wordid) { 
	$maxid++;
	push @widshandler, "(?,$maxid)";
	push @wids, $word;
	$wordid = $maxid;
      }
      $self->{'wordids'}->{$word} = $wordid;
    } else {
      $wordid=$self->{'wordids'}->{$word};
    }
    push @datahandler, "($wordid,$id,?)";
    push @data, pack $packstring.'*', @{$words->{$word}};
    $num_words++;
  };
  $dbh->do("insert into $word_id_table values " . 
	   join (',',@widshandler),undef,@wids) if @wids;
  $dbh->do("unlock tables");
  $dbh->do("insert into $data_table values " . 
	   join (',',@datahandler),undef,@data) if @data;
  return $num_words;
}

sub update_document {
	my ($self, $id, $words) = @_;
	my $fts = $self->{'fts'};
	my $dbh = $fts->{'dbh'};
	my $data_table = $fts->{'data_table'};
	$dbh->do("delete from $data_table where doc_id = ?", {}, $id);

	$self->add_document($id, $words);
}

sub contains_hashref {
	my $self = shift;
	my $fts = $self->{'fts'};
	my $dbh = $fts->{'dbh'};
	my $data_table = $fts->{'data_table'};
	my $word_id_table = $fts->{'word_id_table'};

	my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'position_bits'}};

	my $SQL = qq{
		select doc_id, idx
		from $data_table, $word_id_table
		where word like ?
			and id = word_id
};
	my $out = {};

	for my $phrase (@_){

		my @words = split(' ', $phrase);

		my @sths;
		for (my $i = 0; $i < @words; $i++) {
			$sths[$i] = $dbh->prepare($SQL);
			$sths[$i]->execute($words[$i]);
		}

		my %prev_pos = ();
		my %cur_pos = ();

		# iterate through words in phrase
		for (my $i = 0; $i < @words; $i++){
			if($i > 0){
				%prev_pos = %cur_pos;
				%cur_pos = ();
			}
			# get docs that have this word
			while (my ($doc, $data) = $sths[$i]->fetchrow_array){
				# get positions of words in doc
				my @positions = unpack $packstring.'*', $data;
				map { $cur_pos{$doc}->{$_} = 1 } @positions;
			}
			if($i > 0){
				# check to see if word $i comes after word $i-1
				for my $doc (keys %cur_pos){
					my $isPhrase = 0;
					for my $position (keys %{$cur_pos{$doc}}){
						if ($position > 0 && exists $prev_pos{$doc}{$position - 1}){
							$isPhrase = 1;
						} else { 
							delete $cur_pos{$doc}{$position};



( run in 0.866 second using v1.01-cache-2.11-cpan-5837b0d9d2c )