Mediawiki-Spider

 view release on metacpan or  search on metacpan

lib/Mediawiki/Spider.pm  view on Meta::CPAN

sub printmenu{
	# also get it to put %extras in -- extras should be a hash similar to %inverted
	my ($self, $page, $extratitle,@extras)=@_;
	my %sortedindex=$self->sortedwikiindex();
    open (FILE2,"<header.html");
    my @rawheader=<FILE2>;
    my $header=join('',@rawheader);
    close(FILE2);

    open(FILEHANDLE, ">$page") || die("($page): cannot open file: ". $!);
	print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
	print FILEHANDLE "<html  xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n";
	print FILEHANDLE "<title>Index</title>\n<link rel=stylesheet href=\"style.css\" type=\"text/css\"> \n</head>\n<body>\n";
	print FILEHANDLE "<?php include('header.inc');?>";
	print FILEHANDLE "$header\n";
	print FILEHANDLE "<div id=\"column-content\">";
	my $incremental=0;
	for my $key (sort keys %sortedindex) {
		$incremental++;
		# put in categories you wish to exclude
		if($key=~/Exclude/){

lib/Mediawiki/Spider.pm  view on Meta::CPAN

				} else {
					$text=~s/\[<a href=(.*?)\W+>edit<\/a>\]//g;
					#$text=~s/\<table class="wikitable"(.*?)\<\/table\>//;
					$text=~s/<div id="catlinks"(.*?)\<\/div\>//;
					$text=~s/<div id="jump-to-nav">(.*?)\<\/div\>//;
					open(FILEHANDLE, ">$folder/".$self->urldecode($word).".".$self->extension()) || die("($word): cannot open file: ". $!);
                    open (FILE2,"<header.html");
                    my @rawheader=<FILE2>;
					my $header=join('',@rawheader);
                    close(FILE2);
					print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";

					print FILEHANDLE "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<title>$word</title>\n<link rel=stylesheet href=\"style.css\" type=\"text/css\"/>\n </head>\n<body>\n";
					print FILEHANDLE "\n<?php include('header.inc'); ?>\n";
					print FILEHANDLE "$header\n$text\n";
					print FILEHANDLE "\n<?php include('footer.inc'); ?>\n";
					print FILEHANDLE "</body></html>";	#sleep 7; #don't go mad if not using this on own site!
					close(FILEHANDLE);
				}
			}
		}

lib/Mediawiki/Spider.pm  view on Meta::CPAN

					# squelch the '[edit]' links
					$text=~s/\[<a href=(.*?)\W+>edit<\/a>\]//g;
					$text=~s/<div id="catlinks"(.*?)\<\/div\>//;
					$text=~s/<div id="jump-to-nav">(.*?)\<\/div\>//;
					open(FILEHANDLE, ">$folder/$word.".$self->extension()) || die("cannot open file: ". $!);
                    open (FILE2,"<header.html");
                    my @rawheader=<FILE2>;
					my $header=join('',@rawheader);
                    close(FILE2);
					#print FILEHANDLE "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />";
					print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
					print FILEHANDLE "<html  xmlns=\"http://www.w3.org/1999/xhtml\"><head><title>$word</title><link rel=stylesheet href=\"style.css\" type=\"text/css\"> </head><body>";
					print FILEHANDLE "\n<?php include('header.inc'); ?>\n";
		   			print FILEHANDLE "$header\n$text";
					print FILEHANDLE "\n<?php include('footer.inc'); ?>\n";
		   			print FILEHANDLE "</body></html>";
					close (FILEHANDLE);
					#sleep 7; #don't go mad, eh?
				}
			}
			 my %saw;

lib/Mediawiki/Wikicopy.pm  view on Meta::CPAN

					print "Not printing $word (excluded)\n";
				} else {
					# Do not have category: files... : in files is bad
					$text=~s/href=\"Category:([0-9A-z\-\_\%\&\.\,\;\+\#]+)/href=\"Category-$1/g;
					# squelch the '[edit]' links
					my $contexturi=$uri;
					$contexturi=~s/details/context/;
					$text=~s/\/confluence\/display\/context\//$contexturi/g;
					my $cleanword=$self->urldecode($word);
					open(FILEHANDLE,  ">$folder/$cleanword.".$self->extension()) || die("cannot open file: ($folder/$word.$self->extension()) ". $!);
					print FILEHANDLE "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
					print FILEHANDLE "<html  xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n";

					print FILEHANDLE "<title>$word</title><link rel=stylesheet href=\"style.css\" type=\"text/css\"> </head><body>\n
					<?php include('header.inc'); ?>";
					open (FILE2,"<header.html");
					my @rawheader=<FILE2>;
					my $header=join('',@rawheader);
					close(FILE2);
					print FILEHANDLE "$header\n<div id=\"column-content\">\n<h1 class=\"firstHeading\">".$self->urldecode($word)."</h1>";
		   			print FILEHANDLE "\n$text\n";



( run in 1.254 second using v1.01-cache-2.11-cpan-49f99fa48dc )