Apache-Wyrd
view release on metacpan or search on metacpan
Wyrd/Services/SearchParser.pm view on Meta::CPAN
sub parse {
my ($self, $phrase, @options) = @_;
#remove leading bogus operators
$phrase =~ s/^\s*(AND|OR|DIFF)\s*//;
#change leading NOTs to negative searches
#WARNING: This is only for compatibility with search engines which
#understand that a leading '-' in a search term means "return every
#document that does not match this term". Apache::Wyrds::Services::Index
#is one such engine.
$phrase =~ s/^\s*(NOT)\s*/-/;
my $result = $self->recursive_parse($phrase, @options);
return @{$self->{'hash'}->{$result}};
}
sub recursive_parse {
my ($self, $phrase, @options) = @_;
my ($matched) = (1);
while ($matched) {#first deal with parentheticals using a non-greedy regexp.
$matched = $phrase =~ s/\(([^\(]*?)\)/$self->recursive_parse($1)/e;
}
$matched = 1;
while ($matched) {#then deal with ands
$matched = $phrase =~ s/(\S+)\s+AND\s+(\S+)/$self->intersection($self->recursive_parse($1),$self->recursive_parse($2))/e;
}
$matched = 1;
while ($matched) {#then deal with ors
$matched = $phrase =~ s/(\S+)\s+OR\s+(\S+)/$self->union($self->recursive_parse($1),$self->recursive_parse($2))/e;
}
$matched = 1;
while ($matched) {#then deal with nots
$matched = $phrase =~ s/(\S+)\s+NOT\s+(\S+)/$self->negation($self->recursive_parse($1),$self->recursive_parse($2))/e;
}
$matched = 1;
while ($matched) {#then deal with diffs
$matched = $phrase =~ s/(\S+)\s+DIFF\s+(\S+)/$self->difference($self->recursive_parse($1),$self->recursive_parse($2))/e;
}
return $self->get_results($phrase, @options);
}
sub get_results {
my ($self, $item, @options) = @_;
return $item if ($item =~ /__RESULT_\d+__/);
my $id = $self->new_id;
$self->{'hash'}->{$id} = [$self->{'creator'}->search($item, @options)];
#print "item $item is $id\n";
return $id;
}
sub new_id {
my $self=shift;
return '__RESULT_' . $self->{'counter'}++ . '__';
}
sub union {
my ($self, $a, $b) = @_;
my $id = $self->new_id;
$self->{'hash'}->{$id} = $self->join_sets('u', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
#use Data::Dumper;
#warn Dumper($self->{'hash'}->{$a}) . ' union ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
return $id;
}
sub intersection {
my ($self, $a, $b) = @_;
my $id = $self->new_id;
$self->{'hash'}->{$id} = $self->join_sets('i', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
#use Data::Dumper;
#warn Dumper($self->{'hash'}->{$a}) . ' intersection ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
return $id;
}
sub negation {
my ($self, $a, $b) = @_;
my $id = $self->new_id;
$self->{'hash'}->{$id} = $self->join_sets('n', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
#use Data::Dumper;
#warn Dumper($self->{'hash'}->{$a}) . ' negation ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
return $id;
}
sub difference {
my ($self, $a, $b) = @_;
my $id = $self->new_id;
$self->{'hash'}->{$id} = $self->join_sets('d', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b});
#use Data::Dumper;
#warn Dumper($self->{'hash'}->{$a}) . ' difference ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n";
return $id;
}
sub join_sets {
my ($self, $type, $index, $a, $b) = @_;
my (@intersection, @difference) = ();
my (%count, %objects) = ();
foreach my $e (@$a, @$b) {
#WARNING: this assumes the arrays @$a and @$b are made of UNIQUE items.
#Apache::Wyrd::Services::Index returns unique results from the search method.
$count{$e->{$index}}++;
$objects{$e->{$index}} = $e;
}
return [values %objects] if ($type eq 'u');
if ($type eq 'n') {
foreach my $e (@$b) {
delete $objects{$e->{$index}};
}
return [values %objects];
}
foreach my $e (keys %count) {
if ($count{$e} == 2) {
push @intersection, $objects{$e};
} else {
push @difference, $objects{$e};
}
}
if ($type eq 'i') {
return \@intersection
}
return \@difference;
}
=pod
=back
=head1 BUGS/CAVEATS/RESERVED METHODS
UNKNOWN
=head1 AUTHOR
Barry King E<lt>wyrd@nospam.wyrdwright.comE<gt>
=head1 SEE ALSO
=over
=item Apache::Wyrd
General-purpose HTML-embeddable perl object
=back
=head1 LICENSE
Copyright 2002-2007 Wyrdwright, Inc. and licensed under the GNU GPL.
( run in 2.577 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )