App-phoebe
view release on metacpan or search on metacpan
lib/App/Phoebe/Wikipedia.pm view on Meta::CPAN
# -*- mode: perl -*-
# Copyright (C) 2017â2021 Alex Schroeder <alex@gnu.org>
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
=head1 NAME
App::Phoebe::Wikipedia - act as Wikipedia proxy from Phoebe
=head1 DESCRIPTION
This extension turns one of your hosts into a Wikipedia proxy.
In your F<config> file, you need to specify which of your hosts it is:
package App::Phoebe::Wikipedia;
our $host = "vault.transjovian.org";
use App::Phoebe::Wikipedia;
You can also use L<App::Phoebe::Web> in which case web requests will get
redirected to the actual Wikipedia.
=cut
package App::Phoebe::Wikipedia;
use App::Phoebe qw($log @extensions $full_url_regex success result gemini_link);
use URI::Escape;
use Modern::Perl;
use MediaWiki::API;
use Text::SpanningTable;
use List::Util qw(sum min max);
use Encode;
our $host;
# Wikipedia
push(@extensions, \&wikipedia);
my $link_regex = "([-,.()'%&!?;<> _1-9A-Za-z\x{0080}-\x{fffd}]|[-,.()'%&!?;<> _0-9A-Za-z\x{0080}-\x{fffd}][-,.()'#%&!?;<> _0-9A-Za-z\x{0080}-\x{fffd}]+)"; # disallow "0" and must match HTML and plain text (ie. > and >)
sub wikipedia {
my $stream = shift;
my $url = shift;
my $headers = shift;
my $port = App::Phoebe::port($stream);
if ($url =~ m!^gemini://$host(?::$port)?/search/([a-z]+)/([^?;]+)!) {
wikipedia_serve_search($stream, $1, decode_utf8(uri_unescape($2)));
} elsif ($url =~ m!^gemini://$host(?::$port)?/text/([a-z]+)/([^?;]+)!) {
wikipedia_serve_text($stream, $1, decode_utf8(uri_unescape($2)));
} elsif ($url =~ m!^gemini://$host(?::$port)?/full/([a-z]+)/([^?;]+)!) {
wikipedia_serve_full($stream, $1, decode_utf8(uri_unescape($2)));
} elsif ($url =~ m!^gemini://$host(?::$port)?/raw/([a-z]+)/([^?;]+)!) {
wikipedia_serve_raw($stream, $1, decode_utf8(uri_unescape($2)));
} elsif ($url =~ m!^gemini://$host(?::$port)?/?$!) {
$log->info("Asking for a language");
result($stream, "10", "Search in which language? (ar, cn, en, fr, ru, es, etc.)");
} elsif ($url =~ m!^gemini://$host(?::$port)?/?\?([a-z]+)$!) {
$log->info("Redirecting to ask for a term");
my $lang = $1;
result($stream, "30", "gemini://$host:$port/$lang");
} elsif ($url =~ m!^gemini://$host(?::$port)?/([a-z][a-z][a-z]?)$!) {
$log->info("Asking for a term");
my $lang = $1;
result($stream, "10", "Search term");
} elsif ($url =~ m!^gemini://$host(?::$port)?/([a-z]+)\?([^?;]+)!) {
$log->info("Redirecting to text");
my $lang = $1;
my $term = $2;
result($stream, "30", "gemini://$host:$port/search/$lang/$term");
} elsif ($url =~ m!^gemini://$host(?::$port)?/robots\.txt$!) {
$log->info("Serving robots.txt");
success($stream, "text/plain");
$stream->write("User-agent: *\n");
$stream->write("Disallow: /\n");
} elsif (my ($lang, $term) = $url =~ m!^GET /(?:search/|text/|full/)?(?:([a-z][a-z][a-z]?)/)?(.*) HTTP/1\.[01]$!
and $headers->{host} and $headers->{host} =~ m!^$host(?::$port)?$!) {
$lang ||= "www";
my $url = "https://$lang.wikipedia.org/wiki/$term";
$log->info("Redirecting to $url");
$stream->write("HTTP/1.1 301 Back to Wikipedia!\r\n");
$stream->write("Location: $url\r\n");
$stream->write("Content-Type: text/plain\r\n");
$stream->write("\r\n");
$stream->write("See $url\n");
} else {
return 0;
}
( run in 0.357 second using v1.01-cache-2.11-cpan-96521ef73a4 )