Fancazzista-Scrap

 view release on metacpan or  search on metacpan

lib/Fancazzista/Scrap/DevtoScrapper.pm  view on Meta::CPAN

    my $url  = $base . "?tag=" . $devto->{tag} . "&per_page=" . ( $devto->{limit} || 5 );

    my $r  = HTTP::Request->new( 'GET', $url );
    my $ua = LWP::UserAgent->new();
    $ua->agent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:86.0) Gecko/20100101 Firefox/86.0');
    my $response = $ua->request($r);

    my @posts = ();

    if ( $response->is_success ) {
        my $responseContent = decode_json $response->decoded_content;
        my @children        = @{$responseContent};

        foreach (@children) {
            my $text = $_->{title};
            $text =~ s/^\s+|\s+$//g;

            push @posts,
              {
                text => encode( 'utf8', $text ),
                link => $_->{url}

lib/Fancazzista/Scrap/RedditScrapper.pm  view on Meta::CPAN


    my $base     = "https://www.reddit.com/r/";
    my $url      = $base . $subreddit->{name} . "/new.json?limit=" . ( $subreddit->{limit} || 5 );
    my $r        = HTTP::Request->new( 'GET', $url );
    my $ua       = LWP::UserAgent->new();
    my $response = $ua->request($r);

    my @subreddits = ();

    if ( $response->is_success ) {
        my $responseContent = decode_json $response->decoded_content;
        my @children        = @{ $responseContent->{data}->{children} };

        foreach (@children) {
            my $text = $_->{data}->{title};
            $text =~ s/^\s+|\s+$//g;

            push @subreddits,
              {
                text => encode( 'utf8', $text ),
                link => $_->{data}->{url}

lib/Fancazzista/Scrap/WebsiteScrapper.pm  view on Meta::CPAN

    my $url  = shift;

    my $ua = new LWP::UserAgent;
    $ua->agent( "$0/0.1 " . $ua->agent );

    my $req = new HTTP::Request 'GET' => $url;
    $req->header( 'Accept' => 'text/html' );

    my $res = $ua->request($req);

    return $res->decoded_content;
}

sub extractArticles {
    my $self     = shift;
    my $resource = shift;
    my $content  = $self->getWebsiteHtml( $resource->{url} );
    my $dom      = Mojo::DOM->new($content);
    my $found    = $dom->find( $resource->{selector} );

    my @articles = ();

t/RedditScrapper.t  view on Meta::CPAN

BEGIN { use_ok('Fancazzista::Scrap::RedditScrapper'); }

use Fancazzista::Scrap::RedditScrapper;

subtest 'test_parsing_reddit_api' => sub {
    my $control = qtakeover(
        'LWP::UserAgent' => (
            request => sub {
                my $response = qobj(
                    is_success      => 1,
                    decoded_content => '{ "data": { "children": [ { "data" : { "title": "Example JS", "url": "http://example.com/js" } } ] } }'
                );
                return $response;
            }
        )
    );
    my %subreddit = ( name => 'js', limit => 10 );
    my $scrapper  = new Fancazzista::Scrap::RedditScrapper();
    my @posts     = $scrapper->getPosts( \%subreddit );
    my %post      = %{ $posts[0] };



( run in 0.245 second using v1.01-cache-2.11-cpan-26ccb49234f )