App-phoebe

 view release on metacpan or  search on metacpan

lib/App/Phoebe/Wikipedia.pm  view on Meta::CPAN

# -*- mode: perl -*-
# Copyright (C) 2017–2021  Alex Schroeder <alex@gnu.org>

# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.

=head1 NAME

App::Phoebe::Wikipedia - act as Wikipedia proxy from Phoebe

=head1 DESCRIPTION

This extension turns one of your hosts into a Wikipedia proxy.

In your F<config> file, you need to specify which of your hosts it is:

    package App::Phoebe::Wikipedia;
    our $host = "vault.transjovian.org";
    use App::Phoebe::Wikipedia;

You can also use L<App::Phoebe::Web> in which case web requests will get
redirected to the actual Wikipedia.

=cut

package App::Phoebe::Wikipedia;
use App::Phoebe qw($log @extensions $full_url_regex success result gemini_link);
use URI::Escape;
use Modern::Perl;
use MediaWiki::API;
use Text::SpanningTable;
use List::Util qw(sum min max);
use Encode;

our $host;

# Wikipedia

push(@extensions, \&wikipedia);

my $link_regex = "([-,.()'%&!?;<> _1-9A-Za-z\x{0080}-\x{fffd}]|[-,.()'%&!?;<> _0-9A-Za-z\x{0080}-\x{fffd}][-,.()'#%&!?;<> _0-9A-Za-z\x{0080}-\x{fffd}]+)"; # disallow "0" and must match HTML and plain text (ie. > and &gt;)

sub wikipedia {
  my $stream = shift;
  my $url = shift;
  my $headers = shift;
  my $port = App::Phoebe::port($stream);
  if ($url =~ m!^gemini://$host(?::$port)?/search/([a-z]+)/([^?;]+)!) {
    wikipedia_serve_search($stream, $1, decode_utf8(uri_unescape($2)));
  } elsif ($url =~ m!^gemini://$host(?::$port)?/text/([a-z]+)/([^?;]+)!) {
    wikipedia_serve_text($stream, $1, decode_utf8(uri_unescape($2)));
  } elsif ($url =~ m!^gemini://$host(?::$port)?/full/([a-z]+)/([^?;]+)!) {
    wikipedia_serve_full($stream, $1, decode_utf8(uri_unescape($2)));
  } elsif ($url =~ m!^gemini://$host(?::$port)?/raw/([a-z]+)/([^?;]+)!) {
    wikipedia_serve_raw($stream, $1, decode_utf8(uri_unescape($2)));
  } elsif ($url =~ m!^gemini://$host(?::$port)?/?$!) {
    $log->info("Asking for a language");
    result($stream, "10", "Search in which language? (ar, cn, en, fr, ru, es, etc.)");
  } elsif ($url =~ m!^gemini://$host(?::$port)?/?\?([a-z]+)$!) {
    $log->info("Redirecting to ask for a term");
    my $lang = $1;
    result($stream, "30", "gemini://$host:$port/$lang");
  } elsif ($url =~ m!^gemini://$host(?::$port)?/([a-z][a-z][a-z]?)$!) {
    $log->info("Asking for a term");
    my $lang = $1;
    result($stream, "10", "Search term");
  } elsif ($url =~ m!^gemini://$host(?::$port)?/([a-z]+)\?([^?;]+)!) {
    $log->info("Redirecting to text");
    my $lang = $1;
    my $term = $2;
    result($stream, "30", "gemini://$host:$port/search/$lang/$term");
  } elsif ($url =~ m!^gemini://$host(?::$port)?/robots\.txt$!) {
    $log->info("Serving robots.txt");
    success($stream, "text/plain");
    $stream->write("User-agent: *\n");
    $stream->write("Disallow: /\n");
  } elsif (my ($lang, $term) = $url =~ m!^GET /(?:search/|text/|full/)?(?:([a-z][a-z][a-z]?)/)?(.*) HTTP/1\.[01]$!
	   and $headers->{host} and $headers->{host} =~ m!^$host(?::$port)?$!) {
    $lang ||= "www";
    my $url = "https://$lang.wikipedia.org/wiki/$term";
    $log->info("Redirecting to $url");
    $stream->write("HTTP/1.1 301 Back to Wikipedia!\r\n");
    $stream->write("Location: $url\r\n");
    $stream->write("Content-Type: text/plain\r\n");
    $stream->write("\r\n");
    $stream->write("See $url\n");
  } else {
    return 0;
  }



( run in 0.357 second using v1.01-cache-2.11-cpan-96521ef73a4 )