Catalog

 view release on metacpan or  search on metacpan

lib/Catalog/dmoz.pm  view on Meta::CPAN

#   along with this program; if not, write to the Free Software
#   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
#
# $Header: /cvsroot/Catalog/Catalog/lib/Catalog/dmoz.pm,v 1.10 2000/01/27 18:15:14 loic Exp $
#
package Catalog::dmoz;

use strict;
use vars qw(@ISA @tablelist_theme %default_templates $head);

use Catalog;
use Catalog::tools::tools;

@ISA = qw(Catalog);

@tablelist_theme = qw(catalog_related catalog_newsgroup);

$head = "
<body bgcolor=#ffffff>
";

#
# Built in templates
#
%default_templates
    = (
       'cimport.html' => template_parse('inline cimport', "$head
<title>Load a DMOZ catalog</title>

<center><h1>Load a DMOZ catalog</h1></center>

<center><h3><font color=red>_COMMENT_</font></h3></center>

Follow the instructions below to build your own DMOZ catalog. We do
not use the XML loader for two reasons : the dmoz data is not really
XML and needs checking and directly loading into the database using
the <i>load data infile</i> is much faster.

<p>
<ul>
<li> Load files content.rdf.gz and structure.rdf.gz from <a href=http://dmoz.org/rdf.html>http://dmoz.org/rdf.html</a>
and make sure they are in the same directory (let's say ~/dmoz).
<li> cd ~/dmoz
<li> convert_dmoz -exclude '^/Adult' -what content content.rdf.gz
<li> It prints a dot from time to time to show that it does not hang.
<li> It creates the following files:
     <ul>
     <li> category.txt (table catalog_category_dmoz)
     <li> entry2category.txt (table catalog_entry2category_dmoz)
     <li> category2category.txt (table catalog_category2category_dmoz)
     <li> dmozrecords.txt (table dmozrecords)
     </ul>
<li> Load the files into the database using the following command
<pre>
convert_dmoz -load all ~/dmoz
</pre>
<li> Click on <b>browse</b> link in the Control Panel and check that
     the catalog displays well. <b>Warning</b> the first time you click
     on <b>browse</b> Catalog will rebuild some internal tables and it
     will take some time to display. While working Catalog sends white
     space characters to keep the connection busy and prevent timeouts.
     These characters also tells you that Catalog is working and not hanging.
     One character is printed for each category. If you have 200 000 categories
     you should expect to download 200KB.
<li> Click on the <b>count</b> link in the Control Panel to calculate
     how many entries each category contains. It taks about the same time
     to complete.
     One character is printed for each category. If you have 200 000 categories
     you should expect to download 200KB.
<li> Check the FAQ in the documentation if you have problems and search 
     the <a href=http://www.egroups.com/group/sengacatalog/info.html>Catalog mailing list</a>
     for discussion on similar problems.
</ul>
</form>
"),
);

sub initialize {
    my($self) = @_;

    $self->SUPER::initialize();

    my($templates) = $self->{'templates'};
    %$templates = ( %$templates, %default_templates );

    my($db) = $self->{'db'};
    $db->resources_load('dmoz_schema', 'Catalog::dmoz::schema');
}

sub cbuild_theme {
    my($self, $name, $rowid) = @_;

    my($ret) = $self->SUPER::cbuild_theme($name, $rowid);

    #
    # Create catalog tables
    #
    my($table);
    foreach $table (@tablelist_theme) {
	my($schema) = $self->db()->schema('dmoz_schema', $table);
	$schema =~ s/NAME/$name/g;
	$self->db()->exec($schema);
    }

    return $ret;
}

sub cdestroy_real {
    my($self, $name) = @_;

    my($ret) = $self->SUPER::cdestroy_real($name);

    my($tables) = $self->db()->tables();

    my($table);
    foreach $table (@tablelist_theme) {
	my($real) = "${table}_$name";
	if(grep(/^$real$/, @$tables)) {
	    $self->db()->exec("drop table $real");
	}
    }



( run in 1.576 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )