Mediawiki-Spider
view release on metacpan or search on metacpan
lib/Mediawiki/Spider.pm view on Meta::CPAN
sub printmenu{
# also get it to put %extras in -- extras should be a hash similar to %inverted
my ($self, $page, $extratitle,@extras)=@_;
my %sortedindex=$self->sortedwikiindex();
open (FILE2,"<header.html");
my @rawheader=<FILE2>;
my $header=join('',@rawheader);
close(FILE2);
open(FILEHANDLE, ">$page") || die("($page): cannot open file: ". $!);
print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
print FILEHANDLE "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n";
print FILEHANDLE "<title>Index</title>\n<link rel=stylesheet href=\"style.css\" type=\"text/css\"> \n</head>\n<body>\n";
print FILEHANDLE "<?php include('header.inc');?>";
print FILEHANDLE "$header\n";
print FILEHANDLE "<div id=\"column-content\">";
my $incremental=0;
for my $key (sort keys %sortedindex) {
$incremental++;
# put in categories you wish to exclude
if($key=~/Exclude/){
lib/Mediawiki/Spider.pm view on Meta::CPAN
} else {
$text=~s/\[<a href=(.*?)\W+>edit<\/a>\]//g;
#$text=~s/\<table class="wikitable"(.*?)\<\/table\>//;
$text=~s/<div id="catlinks"(.*?)\<\/div\>//;
$text=~s/<div id="jump-to-nav">(.*?)\<\/div\>//;
open(FILEHANDLE, ">$folder/".$self->urldecode($word).".".$self->extension()) || die("($word): cannot open file: ". $!);
open (FILE2,"<header.html");
my @rawheader=<FILE2>;
my $header=join('',@rawheader);
close(FILE2);
print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
print FILEHANDLE "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<title>$word</title>\n<link rel=stylesheet href=\"style.css\" type=\"text/css\"/>\n </head>\n<body>\n";
print FILEHANDLE "\n<?php include('header.inc'); ?>\n";
print FILEHANDLE "$header\n$text\n";
print FILEHANDLE "\n<?php include('footer.inc'); ?>\n";
print FILEHANDLE "</body></html>"; #sleep 7; #don't go mad if not using this on own site!
close(FILEHANDLE);
}
}
}
lib/Mediawiki/Spider.pm view on Meta::CPAN
# squelch the '[edit]' links
$text=~s/\[<a href=(.*?)\W+>edit<\/a>\]//g;
$text=~s/<div id="catlinks"(.*?)\<\/div\>//;
$text=~s/<div id="jump-to-nav">(.*?)\<\/div\>//;
open(FILEHANDLE, ">$folder/$word.".$self->extension()) || die("cannot open file: ". $!);
open (FILE2,"<header.html");
my @rawheader=<FILE2>;
my $header=join('',@rawheader);
close(FILE2);
#print FILEHANDLE "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />";
print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
print FILEHANDLE "<html xmlns=\"http://www.w3.org/1999/xhtml\"><head><title>$word</title><link rel=stylesheet href=\"style.css\" type=\"text/css\"> </head><body>";
print FILEHANDLE "\n<?php include('header.inc'); ?>\n";
print FILEHANDLE "$header\n$text";
print FILEHANDLE "\n<?php include('footer.inc'); ?>\n";
print FILEHANDLE "</body></html>";
close (FILEHANDLE);
#sleep 7; #don't go mad, eh?
}
}
my %saw;
lib/Mediawiki/Wikicopy.pm view on Meta::CPAN
print "Not printing $word (excluded)\n";
} else {
# Do not have category: files... : in files is bad
$text=~s/href=\"Category:([0-9A-z\-\_\%\&\.\,\;\+\#]+)/href=\"Category-$1/g;
# squelch the '[edit]' links
my $contexturi=$uri;
$contexturi=~s/details/context/;
$text=~s/\/confluence\/display\/context\//$contexturi/g;
my $cleanword=$self->urldecode($word);
open(FILEHANDLE, ">$folder/$cleanword.".$self->extension()) || die("cannot open file: ($folder/$word.$self->extension()) ". $!);
print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
print FILEHANDLE "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n";
print FILEHANDLE "<title>$word</title><link rel=stylesheet href=\"style.css\" type=\"text/css\"> </head><body>\n
<?php include('header.inc'); ?>";
open (FILE2,"<header.html");
my @rawheader=<FILE2>;
my $header=join('',@rawheader);
close(FILE2);
print FILEHANDLE "$header\n<div id=\"column-content\">\n<h1 class=\"firstHeading\">".$self->urldecode($word)."</h1>";
print FILEHANDLE "\n$text\n";
( run in 1.254 second using v1.01-cache-2.11-cpan-49f99fa48dc )