HTML-EP-Glimpse
view release on metacpan or search on metacpan
lib/HTML/EP/Glimpse.pm view on Meta::CPAN
# -*- perl -*-
#
# HTML::EP::Glimpse - A simple search engine using Glimpse
#
#
# Copyright (C) 1998 Jochen Wiedmann
# Am Eisteich 9
# 72555 Metzingen
# Germany
#
# Phone: +49 7123 14887
# Email: joe@ispsoft.de
#
# All rights reserved.
#
# You may distribute this module under the terms of either
# the GNU General Public License or the Artistic License, as
# specified in the Perl README file.
#
############################################################################
require 5.005;
use strict;
use HTML::EP ();
use HTML::EP::Locale ();
use HTML::EP::Glimpse::Config ();
package HTML::EP::Glimpse;
$HTML::EP::Glimpse::VERSION = '0.05';
@HTML::EP::Glimpse::ISA = qw(HTML::EP::Locale HTML::EP);
sub _prefs {
my $self = shift; my $attr = shift; my $prefs = shift;
$self->{'glimpse_config'} ||= $HTML::EP::Glimpse::Config::config;
my $config = $self->{'glimpse_config'};
my $vardir = $config->{'vardir'};
die "A directory $vardir does not exist. Please create it, with write "
. " permissions for the web server, or modify the value of "
. " vardir in $INC{'HTML/EP/Glimpse/Config.pm'}."
unless -d $vardir;
my $prefs_file = "$vardir/prefs";
if (!$prefs) {
# Load Prefs
require Safe;
my $cpt = Safe->new();
$prefs = $self->{'prefs'} = $cpt->rdo($prefs_file) || {};
$prefs->{'rootdir'} = $ENV{'DOCUMENT_ROOT'}
unless exists($prefs->{'rootdir'});
$prefs->{'dirs'} = "/"
unless exists($prefs->{'dirs'});
$prefs->{'dirs_ignored'} =
(($ENV{'PATH_INFO'} =~ /(.*)\//) ? $1 : "")
unless exists($prefs->{'dirs_ignored'});
$prefs->{'suffix'} = ".html .htm"
unless exists($prefs->{'suffix'});
} else {
# Save Prefs
require Data::Dumper;
my $d = Data::Dumper->new([$prefs])->Indent(1)->Terse(1)->Dump();
require Symbol;
my $fh = Symbol::gensym();
if ($self->{'debug'}) {
print "Saving Preferences to $prefs_file.\n";
$self->print("Saving data:\n$d\n");
}
die "Could not save data into $prefs_file: $!. Please verify whether"
. " the web server has write permissions in $vardir and on"
. " $prefs_file."
unless open($fh, ">$prefs_file") and (print $fh "$d\n")
and close($fh);
}
$self->{'glimpse_prefs'} = $prefs;
}
sub _ep_glimpse_load {
my $self = shift; my $attr = shift;
my $cgi = $self->{'cgi'};
my $prefs = $self->_prefs($attr);
if ($cgi->param('modify')) {
my $modified = 0;
foreach my $p ($cgi->param()) {
if ($p =~ /^glimpse_prefs_(.*)/) {
my $sp = $1;
my $old = $prefs->{$sp};
my $new = $cgi->param($p);
if (!defined($old)) {
if (defined($new)) {
$modified = 1;
$prefs->{$sp} = $new;
}
} elsif (!defined($new)) {
$modified = 1;
$prefs->{$sp} = $new;
} else {
$modified = ($new ne $old);
$prefs->{$sp} = $new;
}
}
}
if ($self->{'debug'}) {
$self->print("Modifications detected.\n");
}
$self->_prefs($attr, $prefs);
}
'';
}
sub _ep_glimpse_create {
my $self = shift; my $attr = shift;
my $prefs = $self->_prefs($attr);
my $vardir = $self->{'glimpse_config'}->{'vardir'};
my $debug = $self->{'debug'};
my $cfg = $self->{'glimpse_config'};
my $rootdir = $prefs->{'rootdir'};
my $dirlist = $prefs->{'dirs'};
$dirlist =~ s/\s+/ /sg;
$dirlist =~ s/^\s+//;
$dirlist =~ s/\s+$//;
my @dirs = map { "$rootdir/$_" } split(/ /, $dirlist);
$dirlist = $prefs->{'dirs_ignored'};
$dirlist =~ s/\s+/ /sg;
$dirlist =~ s/^\s+//;
$dirlist =~ s/\s+$//;
my @dirs_ignored = map { "$rootdir/$_" } split(/ /, $dirlist);
my $matchesDirsIgnored;
if (@dirs_ignored) {
my $dirsIgnoredRe = join("|", map { "\\Q$_\\E" } @dirs_ignored);
my $func = "sub { shift() =~ m[^(?:$dirsIgnoredRe)] }";
$matchesDirsIgnored = eval $func;
$self->print("Making function for directory match: $func",
" ($matchesDirsIgnored))\n") if $debug;
} else {
$matchesDirsIgnored = sub { 0 }
}
my $suffixList = $prefs->{'suffix'};
$suffixList =~ s/\s+/ /sg;
$suffixList =~ s/^\s+//;
$suffixList =~ s/\s+$//;
my @suffix = split(/ /, $suffixList);
my $matchesSuffix;
if (@suffix) {
my $suffixRe = join("|", map { "\\Q$_\\E" } @suffix);
my $func = "sub { shift() =~ m[(?:$suffixRe)\$] }";
$matchesSuffix = eval $func;
$self->print("Making function for suffix match: $func",
"($matchesSuffix)\n") if $debug;
} else {
$matchesSuffix = sub { 1 }
}
my $fileList = '';
require File::Find;
File::Find::find
(sub {
if (&$matchesDirsIgnored($File::Find::dir)) {
$self->print("Skipping directory $File::Find::dir.\n")
if $debug;
$File::Find::prune = 1;
} else {
my $f = $File::Find::name;
my $ok = ((-f $f) and &$matchesSuffix($f));
$self->print(" $f: $ok\n") if $debug;
$fileList .= "$f\n" if $ok;
}
}, @dirs);
die "No files found" unless $fileList;
my $fh = Symbol::gensym();
my $cmd = "$cfg->{'glimpseindex_path'} -b -F -H $vardir -X";
$self->print("Creating pipe to command $cmd\n") if $debug;
die "Error while creating index: $!"
unless (open($fh, "| $cmd >$vardir/.glimpse_output 2>&1") and
(print $fh $fileList) and close($fh));
$fileList;
}
sub _ep_glimpse_matchline {
my $self = shift; my $attr = shift;
my $template = defined($attr->{'template'}) ?
$attr->{'template'} : return undef;
$self->print("Setting matchline template to $template\n")
if $self->{'debug'};
$self->{'line_template'} = $template;
'';
}
sub _format_MATCHLINE {
my $self = shift; my $f = shift;
my $debug = $self->{'debug'};
my $template = $self->{'line_template'};
my $lines = $f->{'lines'};
$self->print("MATCHLINE: f = $f, lines = $lines (", @$lines, ")\n",
"line_template = $template\n") if $debug;
my $output = $self->_ep_list({'items' => $lines,
'item' => 'l',
'template' => $template});
$self->print("output = ", (defined($output) ? $output : "undef"), "\n")
if $debug;
$output;
}
sub _ep_glimpse_search {
my $self = shift; my $attr = shift;
my $prefs = $self->_prefs($attr);
my $vardir = $self->{'glimpse_config'}->{'vardir'};
my $cgi = $self->{'cgi'};
my $debug = $self->{'debug'};
my $start = ($cgi->param('start') or 0);
my $max = ($cgi->param('max') or $attr->{'max'} or 20);
my @opts = ($self->{'glimpse_config'}->{'glimpse_path'}, '-UOnbqy', '-L',
"0:" . ($start+$max), '-H', $vardir);
my $case_sensitive = $cgi->param('opt_case_sensitive') ? 1 : 0;
push(@opts, '-i') unless $case_sensitive;
my $word_boundary = $cgi->param('word_boundary') ? 1 : 0;
push(@opts, '-w') if $word_boundary;
my $whole_file = $cgi->param('opt_whole_file') ? 1 : 0;
push(@opts, '-W') unless $whole_file;
my $opt_regex = $cgi->param('opt_regex') ? 1 : 0;
push(@opts, $opt_regex ? '-e' : '-k');
my $opt_or = $cgi->param('opt_or') ? 1 : 0;
# Now for the hard part: Split the search string into words
my $search = $cgi->param('search');
$self->{'link_opts'} = $self->{'env'}->{'PATH_INFO'} . "?"
. join("&", "search=" . CGI->escape($search),
"max=$max", "opt_case_sensitive=$case_sensitive",
"word_boundary=$word_boundary", "opt_whole_file=$whole_file",
"opt_regex=$opt_regex", "opt_or=$opt_or");
my @words;
while (length($search)) {
$search =~ s/^\s+//s;
if ($search =~ /^"/s) {
if ($search =~ /"(.*?)"\s+(.*)/s) {
push(@words, $1);
$search = $2;
} else {
$search =~ s/^"//s;
$search =~ s/"$//s;
push(@words, $search);
last;
}
} else {
$search =~ s/^(\S+)//s;
push(@words, $1) if $1;
}
}
if (!@words) {
my $language = $self->{'_ep_language'};
my $msg;
if ($language eq 'de') {
$msg = "Keine Suchbegriffe gefunden";
} else {
$msg = "No search strings found";
}
$self->_ep_error({'type' => 'user', 'msg' => $msg});
}
my $sep = $opt_or ? ';' : ',';
push(@opts, join($sep, @words));
# First try using fork() and system() for security reasons.
my $ok;
my $tmpnam;
my $fh = eval {
lib/HTML/EP/Glimpse.pm view on Meta::CPAN
if ($ignoreFiles) {
--$ignoreFiles
} else {
push(@files, {'file' => $file,
'fileNum' => ++$fileNum,
'title' => $title,
'lines' => [@lines]})
}
}
undef $file;
undef $lineNum;
@lines = ();
#$self->print("Blank line detected\n") if $debug;
} elsif ($blank_seen) {
$blank_seen = 0;
if ($line =~ /^(\S+)\s+(\S.*?)\s+$/) {
$file = $1;
$title = $2;
#$self->print("New file detected: $file, $title\n")
# if $debug;
} elsif ($line =~ /^(\S+)\:\s*$/) {
$file = $title = $1;
} else {
$self->print("Cannot parse file line: $line") if $debug;
}
} elsif ($file) {
if ($lineNum) {
push(@lines, {'line' => $line,
'lineNum' => $lineNum,
'byteOffset' => $byteOffset,
'offsetStart' => $offsetStart,
'offsetEnd' => $offsetEnd});
#$self->print("Match line detected: $lineNum, $line\n")
# if $debug;
undef $lineNum;
} elsif ($line =~ /^(\d+)\:\s+(\d+)\=\s+\@(\d+)\{(\d+)\}/) {
$lineNum = $1;
$byteOffset = $2;
$offsetStart = $3;
$offsetEnd = $4;
} else {
$self->print("Cannot parse line: $line\n") if $debug;
}
} else {
$self->print("Unexpected line: $line\n") if $debug;
}
}
if ($file) {
if ($ignoreFiles) {
--$ignoreFiles
} else {
push(@files, {'file' => $file,
'fileNum' => ++$fileNum,
'title' => $title,
'lines' => [@lines]})
}
}
$self->print("Found " . scalar(@files) . " files\n") if $debug;
foreach my $file (@files) {
my $url = $file->{'file'};
$url =~ s/^\Q$prefs->{'rootdir'}\E//;
$url =~ s/^\/+/\//;
$file->{'url'} = $url;
}
$self->{'files'} = \@files;
if (@files == $max) {
$self->{'next'} = $start + $max;
}
$self->{'prev'} = $start ? $start - $max : -1;
} unless $@;
close $fh if $fh;
undef $fh;
unlink $tmpnam if $tmpnam;
'';
}
1;
__END__
=pod
=head1 NAME
HTML::EP::Glimpse - A simple search engine using Glimpse
=head1 SYNOPSIS
<!-- Put the following in your EP page: -->
<!-- Load the Glimpse package: -->
<ep-package name="HTML::EP::Glimpse">
<!-- Run glimpse: -->
<ep-glimpse-search>
<!-- List the hits: -->
<ep-list items=files item=f>
<tr><td><a href="$f->url$">$f->title$</a></td>
</ep-list>
=head1 DESCRIPTION
This is a simple search engine I wrote for the movie pages of a friend,
Anne Haasis.
It is based on HTML::EP, my embedded Perl system and Glimpse, the well
known indexing system, as a backend.
=head1 INSTALLATION
First of all, you have to install the latest version of HTML::EP, 0.20
or later, and it's prerequisites. Next you have to install this package,
HTML::EP::Glimpse. If you don't know how to install Perl packages, it's
fairly simple: Fetch the required archives from any CPAN mirror, for
example
ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module/HTML
( run in 1.489 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )