Fancazzista-Scrap
view release on metacpan or search on metacpan
lib/Fancazzista/Scrap/DevtoScrapper.pm view on Meta::CPAN
my $url = $base . "?tag=" . $devto->{tag} . "&per_page=" . ( $devto->{limit} || 5 );
my $r = HTTP::Request->new( 'GET', $url );
my $ua = LWP::UserAgent->new();
$ua->agent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:86.0) Gecko/20100101 Firefox/86.0');
my $response = $ua->request($r);
my @posts = ();
if ( $response->is_success ) {
my $responseContent = decode_json $response->decoded_content;
my @children = @{$responseContent};
foreach (@children) {
my $text = $_->{title};
$text =~ s/^\s+|\s+$//g;
push @posts,
{
text => encode( 'utf8', $text ),
link => $_->{url}
lib/Fancazzista/Scrap/RedditScrapper.pm view on Meta::CPAN
my $base = "https://www.reddit.com/r/";
my $url = $base . $subreddit->{name} . "/new.json?limit=" . ( $subreddit->{limit} || 5 );
my $r = HTTP::Request->new( 'GET', $url );
my $ua = LWP::UserAgent->new();
my $response = $ua->request($r);
my @subreddits = ();
if ( $response->is_success ) {
my $responseContent = decode_json $response->decoded_content;
my @children = @{ $responseContent->{data}->{children} };
foreach (@children) {
my $text = $_->{data}->{title};
$text =~ s/^\s+|\s+$//g;
push @subreddits,
{
text => encode( 'utf8', $text ),
link => $_->{data}->{url}
lib/Fancazzista/Scrap/WebsiteScrapper.pm view on Meta::CPAN
my $url = shift;
my $ua = new LWP::UserAgent;
$ua->agent( "$0/0.1 " . $ua->agent );
my $req = new HTTP::Request 'GET' => $url;
$req->header( 'Accept' => 'text/html' );
my $res = $ua->request($req);
return $res->decoded_content;
}
sub extractArticles {
my $self = shift;
my $resource = shift;
my $content = $self->getWebsiteHtml( $resource->{url} );
my $dom = Mojo::DOM->new($content);
my $found = $dom->find( $resource->{selector} );
my @articles = ();
t/RedditScrapper.t view on Meta::CPAN
BEGIN { use_ok('Fancazzista::Scrap::RedditScrapper'); }
use Fancazzista::Scrap::RedditScrapper;
subtest 'test_parsing_reddit_api' => sub {
my $control = qtakeover(
'LWP::UserAgent' => (
request => sub {
my $response = qobj(
is_success => 1,
decoded_content => '{ "data": { "children": [ { "data" : { "title": "Example JS", "url": "http://example.com/js" } } ] } }'
);
return $response;
}
)
);
my %subreddit = ( name => 'js', limit => 10 );
my $scrapper = new Fancazzista::Scrap::RedditScrapper();
my @posts = $scrapper->getPosts( \%subreddit );
my %post = %{ $posts[0] };
( run in 0.245 second using v1.01-cache-2.11-cpan-26ccb49234f )