Horris

 view release on metacpan or  search on metacpan

lib/Horris/Connection/Plugin/PeekURL.pm  view on Meta::CPAN

package Horris::Connection::Plugin::PeekURL;
# ABSTRACT: Fetches Links And Display Some Data On It


use Moose;
use AnyEvent::HTTP;
use Encode qw(encode_utf8 decode FB_CROAK);
use File::Temp;
use HTML::TreeBuilder;
use Image::Size;
use URI;
use WWW::Shorten 'TinyURL';
extends 'Horris::Connection::Plugin';
with 'MooseX::Role::Pluggable::Plugin';

sub irc_privmsg {
    my ($self, $msg) = @_;

    my $message = $msg->message;
    while ( $message =~ m{((!)?(?:https?:)(?://[^\s/?#]*)[^\s?#]*(?:\?[^\s#]*)?(?:#.*)?)}g ) {
        my $do_peek = defined($2) ? 0 : 1;
        next unless $do_peek;

		my $shorten_url;
        my $uri = URI->new($1);
        next unless $uri->scheme && $uri->scheme =~ /^http/i;
        next unless $uri->authority;

        if (length "$uri" > 50 && $uri->authority !~ /tinyurl|bit\.ly/) {
			$shorten_url = makeashorterlink($uri);
            $uri = URI->new($shorten_url);
#            $self->connection->irc_notice({
#                channel => $msg->channel,
#                message => "short url: $uri"
#            });
        }
		$shorten_url = $shorten_url ? " - $shorten_url" : '';

        my @ct;
        my $ct = 0; # 0 - text, 1 - image, 2, other
        my $file;

        my $guard; $guard = http_get $uri, 
            timeout   => 30,
            recurse   => 10,
            on_header => sub {
                my ($headers) = @_;

                if ($headers->{Status} ne '200') {
                    undef $guard;
                    $self->connection->irc_notice({
                        channel => $msg->channel,
                        message => "Request failed: $headers->{Reason} ($headers->{Status})",
                    });
                    return;
                }
                @ct = split(/\s*,\s*/, $headers->{'content-type'});
                if (grep { /^image\/.+$/i } @ct) {
                    $ct = 1;
                } elsif ( grep { !/^text\/.+$/i } @ct) {
                    # otherwise it's something we don't know about.
                    # don't spend the time and memory to load this guy
                    undef $guard;
                    $ct = 2;
                    $self->connection->irc_notice({
                        channel => $msg->channel, 
                        message => sprintf( "%s [%s]%s", $uri, $ct[0], $shorten_url)
                    });
                    return;
                }
                return 1;
            },
            on_body => sub {
                # off load to the file system.
                $file ||= File::Temp->new(UNLINK => 1);

                print $file $_[0];
                return 1;
            },
            sub {
                undef $guard;
                return unless $file;
                seek($file, 0, 0);
                if ($ct == 1) {
                    my($width, $height) = Image::Size::imgsize($file);
                    $self->connection->irc_notice({
                        channel => $msg->channel, 
                        message => sprintf( "%s [%s, w=%d, h=%d]%s", $uri, $ct[0], $width, $height, $shorten_url )
                    });
                } else {
                    my $p;
                    my $data = do { local $/; <$file> };
                    eval { 
                        $p = HTML::TreeBuilder->new(
                            implicit_tags => 1,
                            ignore_unknoown => 1,
                            ignore_text => 0
                        );
                        $p->strict_comment(1);
        
                        my $charset;
        
                        if ($data =~ /charset=(?:'([^']+?)'|"([^"]+?)"|([a-zA-Z0-9_-]+)\b)/) {
                            my $cs = lc($1 || $2 || $3);
                            if ($cs =~ /^Shift[-_]?JIS$/i) {
                                $charset = 'cp949';
                            } else {
                                $charset = $cs;
                            }
                        }

                        if (! $charset) {
                            foreach my $ct (@ct) {
                                if ($ct =~ s/charset=Shift_JIS/charset=cp949/i) {
                                    $charset = 'cp949';
                                } elsif ($ct =~ /charset=([a-zA-Z0-9_-]+)/) {
                                    $charset = $1;



( run in 1.000 second using v1.01-cache-2.11-cpan-524268b4103 )