Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Services/SearchParser.pm  view on Meta::CPAN

sub parse {
	my ($self, $phrase, @options) = @_;

	#remove leading bogus operators
	$phrase =~ s/^\s*(AND|OR|DIFF)\s*//;

	#change leading NOTs to negative searches
	#WARNING: This is only for compatibility with search engines which
	#understand that a leading '-' in a search term means "return every
	#document that does not match this term".  Apache::Wyrds::Services::Index
	#is one such engine.
	$phrase =~ s/^\s*(NOT)\s*/-/;

	my $result = $self->recursive_parse($phrase, @options);
	return @{$self->{'hash'}->{$result}};
}

sub recursive_parse {
	my ($self, $phrase, @options) = @_;
	my ($matched) = (1);
	while ($matched) {#first deal with parentheticals using a non-greedy regexp.
		$matched = $phrase =~ s/\(([^\(]*?)\)/$self->recursive_parse($1)/e;
	}
	$matched = 1;
	while ($matched) {#then deal with ands
		$matched = $phrase =~ s/(\S+)\s+AND\s+(\S+)/$self->intersection($self->recursive_parse($1),$self->recursive_parse($2))/e;
	}
	$matched = 1;
	while ($matched) {#then deal with ors
		$matched = $phrase =~ s/(\S+)\s+OR\s+(\S+)/$self->union($self->recursive_parse($1),$self->recursive_parse($2))/e;
	}
	$matched = 1;
	while ($matched) {#then deal with nots
		$matched = $phrase =~ s/(\S+)\s+NOT\s+(\S+)/$self->negation($self->recursive_parse($1),$self->recursive_parse($2))/e;
	}
	$matched = 1;
	while ($matched) {#then deal with diffs
		$matched = $phrase =~ s/(\S+)\s+DIFF\s+(\S+)/$self->difference($self->recursive_parse($1),$self->recursive_parse($2))/e;
	}
	return $self->get_results($phrase, @options);
}

sub get_results {
	my ($self, $item, @options) = @_;
	return $item if ($item =~ /__RESULT_\d+__/);
	my $id = $self->new_id;
	$self->{'hash'}->{$id} = [$self->{'creator'}->search($item, @options)];
	#print "item $item is $id\n";
	return $id;
}

sub new_id {
	my $self=shift;
	return '__RESULT_' . $self->{'counter'}++ . '__';
}

sub union {
	my ($self, $a, $b) = @_;
	my $id = $self->new_id;
	$self->{'hash'}->{$id} = $self->join_sets('u', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
	#use Data::Dumper;
	#warn Dumper($self->{'hash'}->{$a}) . ' union ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
	return $id;
}

sub intersection {
	my ($self, $a, $b) = @_;
	my $id = $self->new_id;
	$self->{'hash'}->{$id} = $self->join_sets('i', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
	#use Data::Dumper;
	#warn Dumper($self->{'hash'}->{$a}) . ' intersection ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
	return $id;
}

sub negation {
	my ($self, $a, $b) = @_;
	my $id = $self->new_id;
	$self->{'hash'}->{$id} = $self->join_sets('n', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
	#use Data::Dumper;
	#warn Dumper($self->{'hash'}->{$a}) . ' negation ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
	return $id;
}

sub difference {
	my ($self, $a, $b) = @_;
	my $id = $self->new_id;
	$self->{'hash'}->{$id} = $self->join_sets('d', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
	#use Data::Dumper;
	#warn Dumper($self->{'hash'}->{$a}) . ' difference ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
	return $id;
}

sub join_sets {
	my ($self, $type, $index, $a, $b) = @_;
	my (@intersection, @difference) = ();
	my (%count, %objects) = ();
	foreach my $e (@$a, @$b) {
		#WARNING: this assumes the arrays @$a and @$b are made of UNIQUE items.
		#Apache::Wyrd::Services::Index returns unique results from the search method.
		$count{$e->{$index}}++;
		$objects{$e->{$index}} = $e;
	}
	return [values %objects] if ($type eq 'u');
	if ($type eq 'n') {
		foreach my $e (@$b) {
			delete $objects{$e->{$index}};
		}
		return [values %objects];
	}
	foreach my $e (keys %count) {
		if ($count{$e} == 2) {
			push @intersection, $objects{$e};
		} else {
			push @difference, $objects{$e};
		}
	}
	if ($type eq 'i') {
		return \@intersection
	}
	return \@difference;
}


=pod

=back

=head1 BUGS/CAVEATS/RESERVED METHODS

UNKNOWN

=head1 AUTHOR

Barry King E<lt>wyrd@nospam.wyrdwright.comE<gt>

=head1 SEE ALSO

=over

=item Apache::Wyrd

General-purpose HTML-embeddable perl object

=back

=head1 LICENSE

Copyright 2002-2007 Wyrdwright, Inc. and licensed under the GNU GPL.



( run in 2.577 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )