Net-Gemini

 view release on metacpan or  search on metacpan

lib/Net/Gemini.pm  view on Meta::CPAN

# -*- Perl -*-
#
# a Gemini protocol client
#
#   "The conjunction of Jupiter with one of the stars of Gemini, which
#   'we ourselves have seen' (1.6.343b30) has been dated in recent years
#   to December 337 BC."
#    -- Malcolm Wilson. Structure and Method in Aristotle's Meteorologica.

# NOTE this silently accepts URI with userinfo; those probably
# should be failed?
#
# KLUGE this may break if the URI module ever gets URI/gemini.pm
package URI::gemini {
    use URI;
    use parent 'URI::_server';
    sub default_port { 1965 }
    sub userinfo     { return undef }    # gemini has no userinfo
    sub secure       { 1 }

    sub canonical {
        my $self  = shift;
        my $other = $self->SUPER::canonical;
        $self->SUPER::userinfo(undef);    # gemini has no userinfo

        my $slash_path =
             defined( $other->authority )
          && !length( $other->path )
          && !defined( $other->query );

        if ($slash_path) {
            $other = $other->clone if $other == $self;
            $other->path("/");
        }
        $other;
    }
}

package Net::Gemini;
our $VERSION = '0.11';
use strict;
use warnings;
use Digest::SHA 'sha256_hex';
use Encode ();
use Exporter 'import';
use IO::Socket::IP;
use IO::Socket::SSL;
use Net::SSLeay;
use Parse::MIME 'parse_mime_type';

our @EXPORT_OK = qw(gemini_request);

sub _DEFAULT_BUFSIZE ()        { 4096 }
sub _DEFAULT_MAX_CONTENT ()    { 2097152 }
sub _DEFAULT_REDIRECTS ()      { 5 }
sub _DEFAULT_REDIRECT_SLEEP () { 1 }

sub code { $_[0]{_code} }    # 0..6 response code

sub content {
    $_[0]{_content};
}                            # NOTE only after certain calls and codes
sub error  { $_[0]{_error} } # error message for 0 code
sub host   { $_[0]{_host} }
sub ip     { $_[0]{_ip} }
sub meta   { $_[0]{_meta} }
sub mime   { $_[0]{_mime} }  # NOTE only after certain calls and codes
sub port   { $_[0]{_port} }
sub socket { $_[0]{_socket} }

sub status {
    $_[0]{_status};
}                            # two digit '1x', '2x', ... response code
sub uri { $_[0]{_uri} }

# see VERIFICATION below; the caller should supply a custom callback.
# the default is thus "Trust On Almost Any Use" (TOAAU) or similar to
# what gg(1) of gmid does
sub _verify_ssl { 1 }

# minimal method to get a resource (see also gemini_request)
sub get {
    my ( $class, $source, %param ) = @_;
    my %obj;
    unless ( defined $source ) {
        @obj{qw(_code _error)} = ( 0, "source is not defined" );
        goto BLESSING;
    }

    $obj{_uri} = URI->new($source);
    unless ( $obj{_uri}->scheme eq 'gemini' ) {
        @obj{qw(_code _error)} = ( 0, "could not parse '$source'" );
        goto BLESSING;
    }
    @obj{qw/_host _port/} = ( $obj{_uri}->host, $obj{_uri}->port );

    my $yuri = $obj{_uri}->canonical;
    if ( length $yuri > 1024 ) {
        @obj{qw(_code _error)} = ( 0, "URI is too long" );
        goto BLESSING;
    }

    # VERIFICATION is based on the following though much remains up to
    # the caller to manage
    # gemini://makeworld.space/gemlog/2020-07-03-tofu-rec.gmi
    # gemini://alexschroeder.ch/page/2020-07-20%20Does%20a%20Gemini%20certificate%20need%20a%20Common%20Name%20matching%20the%20domain%3F
    eval {
        $obj{_socket} = IO::Socket::IP->new(
            ( exists $param{family} ? ( Domain => $param{family} ) : () ),
            PeerAddr => $obj{_host},
            PeerPort => $obj{_port},
            Proto    => 'tcp'
        ) or die $!;
        $obj{_ip} = $obj{_socket}->peerhost;
        IO::Socket::SSL->start_SSL(
            $obj{_socket},
            SSL_hostname => $obj{_host},    # SNI
            ( $param{tofu} ? ( SSL_verifycn_scheme => 'none' ) : () ),
            SSL_verify_callback => sub {
                my ( $ok, $ctx_store, $certname, $error, $cert, $depth ) = @_;
                if ( $depth != 0 ) {
                    return 1 if $param{tofu};
                    return $ok;
                }
                my $digest = ( $param{verify_ssl} || \&_verify_ssl )->(
                    {   host   => $obj{_host},
                        port   => $obj{_port},
                        cert   => $cert,         # warning, memory address!
                                                 # compatible with certID function of amfora
                        digest =>
                          uc( sha256_hex( Net::SSLeay::X509_get_X509_PUBKEY($cert) ) ),
                        ip        => $obj{_ip},
                        notBefore => Net::SSLeay::P_ASN1_TIME_get_isotime(
                            Net::SSLeay::X509_get_notBefore($cert)
                        ),
                        notAfter => Net::SSLeay::P_ASN1_TIME_get_isotime(
                            Net::SSLeay::X509_get_notAfter($cert)
                        ),
                        okay => $ok,
                    }
                );
            },
            ( exists $param{ssl} ? %{ $param{ssl} } : () ),
        ) or die $!;
        1;
    } or do {
        @obj{qw(_code _error)} = ( 0, "IO::Socket::SSL failed: $@" );
        goto BLESSING;
    };

    binmode $obj{_socket}, ':raw';

    my $n = syswrite $obj{_socket}, "$yuri\r\n";
    unless ( defined $n ) {
        @obj{qw(_code _error)} = ( 0, "send URI failed: $!" );
        goto BLESSING;
    }

    # get the STATUS SPACE header response (and, probably, more)
    $obj{_buf} = '';
    while (1) {
        my $n = sysread $obj{_socket}, my $buf,
          $param{bufsize} || _DEFAULT_BUFSIZE;
        unless ( defined $n ) {
            @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
            goto BLESSING;
        }
        if ( $n == 0 ) {
            @obj{qw(_code _error)} = ( 0, "recv EOF" );
            goto BLESSING;
        }
        $obj{_buf} .= $buf;
        last if length $obj{_buf} >= 3;
    }
    # NOTE this is sloppy; there are fewer "full two digit status codes"
    # defined in the appendix, e.g. only 10, 11, 20, 30, 31, 40, ...
    # on the other hand, this supports any new extensions to the
    # existing numbers
    if ( $obj{_buf} =~ m/^(([1-6])[0-9])[ ]/ ) {
        @obj{qw(_status _code)} = ( $1, $2 );
        substr $obj{_buf}, 0, 3, '';
    } else {
        @obj{qw(_code _error)} = (
            0,
            "invalid response " . sprintf "%vx",
            substr $obj{_buf},
            0, 3
        );
        goto BLESSING;
    }

    # META -- at most 1024 characters, followed by \r\n. the loop is in
    # the event the server is being naughty and trickling bytes in one
    # by one (probably you will want a timeout somewhere, or an async
    # version of this code)
    my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
    while (1) {
        if ( $obj{_buf} =~ m/^(.{0,1024}?)\r\n/ ) {
            $obj{_meta} = $1;
            my $len = length $obj{_meta};
            if ( $len == 0 ) {
                # special case mentioned in the specification
                $obj{_meta} = 'text/gemini;charset=utf-8' if $obj{_code} == 2;
            } else {
                eval {
                    $obj{_meta} =
                      Encode::decode( 'UTF-8', $obj{_meta}, Encode::FB_CROAK );

lib/Net/Gemini.pm  view on Meta::CPAN

    return 1;
  });

Processing will stop if the callback returns a false value.

=item B<max_size> => I<strictly-positive-integer>

Maximum content size to collect into B<content>. Ignored if a custom
callback is provided. The code will be zero and the error will be
C<max_size> and the status will start with C<2> and the content will be
truncated if the response is larger than permitted.

=item I<max_redirects> => I<strictly-positive-integer>

How many redirections should be followed. C<5> is the default.

=item I<redirect_delay> => I<floating-point>

How long to delay between redirects, by default C<1> second. There is a
delay by default because gemini servers or firewalls may rate limit
requests, or the gemini server simply may not have much CPU available.

=item I<param> => I<hash-reference>

Parameters that will be passed to the B<get> method.

=back

=back

=head1 METHODS

=over 4

=item B<get> I<URI> [ parameters ... ]

Tries to obtain the given gemini I<URI>.

Returns an object and a result code. The socket is set to use the
C<:raw> B<binmode>. The result code will be C<0> if there was a problem
with the request--that the URI failed to parse, or the connection
failed--or otherwise a gemini code in the range of C<1> to C<6>
inclusive, which will indicate the next steps any subsequent code
should take.

For code C<2> responses the response body may be split between I<_buf>
and whatever remains unread in the socket, if anything, hence the
B<getmore> method or the B<gemini_request> utility function.

Parameters include:

=over 4

=item B<bufsize> => I<strictly-positive-integer>

Size of buffer to use for requests, 4096 by default. Note that a naughty
server may return data in far smaller increments than this.

=item B<ssl> => { params }

Passes the given parameters to the L<IO::Socket::SSL> constructor. These
could be used to configure e.g. the C<SSL_verify_mode> or to set a
verification callback, or to specify a custom SNI host via
C<SSL_hostname>.

C<Timeout> can be used to set a connect timeout on the socket. However,
a server could wedge at any point following, so it may be necessary to
wrap a B<get> request with the C<alarm> function or similar.

=item B<tofu> => I<boolean>

If true, only the leaf certificate will be checked. Otherwise, the full
certificate chain will be verified by default, which is probably not
what you want when trusting the very first leaf certificate seen.

Also with this flag set hostname verification is turned off; the caller
can manage C<SSL_verifycn_scheme> and possibly C<SSL_verifycn_name> via
the B<ssl> param if this needs to be customized.

=item B<verify_ssl> => code-reference

Custom callback function to handle SSL verification. The default is to
accept the connection (Trust On All Uses), which is perhaps not ideal.
The callback is passed a hash reference containing various information
about the certificate and connection.

  ...->get( $url, ..., verify_ssl => sub {
    my ($param) = @_;
    return 1 if int rand 2; # certificate is OK
    return 0;
  } );

Note that some have argued that under TOFU one should not verify the
hostname nor the dates (notBefore, notAfter) of the certificate, only to
accept the first certificate presented as-is, like SSH does, and to use
that certificate thereafter. This has plusses and minuses.

See C<bin/gmitool> for how C<verify_ssl> might be used in a client.

In module version 0.08 the format of the digest (fingerprint) changed to
be compatible with the amfora gemini client.

=back

=item B<getmore> I<callback> [ bufsize => n ]

A callback interface is provided to consume the response body, if
any. Generally this should only be present for response code C<2>.
The B<meta> line should be consulted for details on the MIME type
and encoding of the bytes; C<$body> in the following code may need
to be decoded.

  my $body = '';
  $gem->getmore(
      sub {
          my ( $buffer, $length ) = @_;
          $body .= $buffer;
          return 1;
      }
  );



( run in 0.480 second using v1.01-cache-2.11-cpan-39bf76dae61 )