BusyBird-Input-Feed
view release on metacpan or search on metacpan
t/samples.t view on Meta::CPAN
use strict;
use warnings;
use Test::More;
use utf8;
use BusyBird::Input::Feed;
use Test::Deep 0.084 qw(cmp_deeply superhashof);
use File::Spec;
sub sh { superhashof({ @_ }) }
sub check_case {
my ($label, $got_statuses, $case) = @_;
is scalar(@$got_statuses), $case->{exp_num}, "$label: num of statuses OK";
foreach my $i (0 .. $#{$case->{exp_partial}}) {
my $got = $got_statuses->[$i];
my $exp = $case->{exp_partial}[$i];
cmp_deeply $got, $exp, "$label: status $i OK" or do {
diag(explain $got);
};
is $got->{user}{profile_image_url}, undef, "$label: status $i: user.profile_image_url is not set";
}
}
my $input = BusyBird::Input::Feed->new(use_favicon => 0);
## Only the statuses at the head are checked. Only status fields
## present in the expected statuses are checked.
my @testcases = (
{ filename => 'rtcpan.rdf',
exp_num => 15,
exp_partial => [
## If <guid> is not present, use <link> for item_id.
## "id" field is (timestamp | item_id)
sh(id => '1363869367|https://rt.cpan.org/Ticket/Display.html?id=84118',
text => 'I really beg you to take back the exception catching feature in Future 0.11',
busybird => sh( status_permalink => 'https://rt.cpan.org/Ticket/Display.html?id=84118' ),
created_at => 'Thu Mar 21 12:36:07 +0000 2013',
user => sh( screen_name => q{rt.cpan.org: Search Queue = 'future'} )),
sh( id => '1364188145|https://rt.cpan.org/Ticket/Display.html?id=84187',
text => 'needs_all() throws an exception when immediate failed subfutures are given',
busybird => sh( status_permalink => 'https://rt.cpan.org/Ticket/Display.html?id=84187' ),
created_at => 'Mon Mar 25 05:09:05 +0000 2013',
user => sh( screen_name => q{rt.cpan.org: Search Queue = 'future'} )),
sh( id => '1364188230|https://rt.cpan.org/Ticket/Display.html?id=84188',
text => 'Error message is not user-friendly for followed_by(), and_then(), or_else() and repeat()',
busybird => sh( status_permalink => 'https://rt.cpan.org/Ticket/Display.html?id=84188' ),
created_at => 'Mon Mar 25 05:10:30 +0000 2013',
user => sh( screen_name => q{rt.cpan.org: Search Queue = 'future'} )),
sh( id => '1364188340|https://rt.cpan.org/Ticket/Display.html?id=84189',
text => 'Behavior of repeat {...} foreach => [] may be counter-intuitive',
busybird => sh( status_permalink => 'https://rt.cpan.org/Ticket/Display.html?id=84189' ),
created_at => 'Mon Mar 25 05:12:20 +0000 2013',
user => sh( screen_name => q{rt.cpan.org: Search Queue = 'future'}))
]},
{ filename => 'slashdot.rss',
exp_num => 25,
exp_partial => [
## use <guid> for item_id. In this case, busybird.original.id should maintain the <guid>
sh( id => '1404616500|http://slashdot.feedsportal.com/c/35028/f/647410/s/3c35f940/sc/38/l/0Lhardware0Bslashdot0Borg0Cstory0C140C0A70C0A60C0A0A392340Cby0E20A450Ethe0Etop0Especies0Ewill0Eno0Elonger0Ebe0Ehumans0Eand0Ethat0Ecould0Ebe0Ea0Eproble...
text => q{By 2045 'The Top Species Will No Longer Be Humans,' and That Could Be a Problem},
busybird => sh( status_permalink => 'http://rss.slashdot.org/~r/Slashdot/slashdot/~3/HdnfMBYoOr4/story01.htm',
original => sh( id => 'http://slashdot.feedsportal.com/c/35028/f/647410/s/3c35f940/sc/38/l/0Lhardware0Bslashdot0Borg0Cstory0C140C0A70C0A60C0A0A392340Cby0E20A450Ethe0Etop0Especies0Ewill0Eno0Elonger0Ebe0Ehumans0Eand0Ethat0...
created_at => 'Sun Jul 06 03:15:00 +0000 2014',
user => sh( screen_name => 'Slashdot' ),
## extract <img>s from HTML content. Up to 3 images by default.
extended_entities => sh(media => [
sh(media_url => 'http://a.fsdn.com/sd/twitter_icon_large.png'),
sh(media_url => 'http://a.fsdn.com/sd/facebook_icon_large.png'),
sh(media_url => 'http://www.gstatic.com/images/icons/gplus-16.png'),
])),
sh( id => '1404606780|http://slashdot.feedsportal.com/c/35028/f/647410/s/3c35c953/sc/32/l/0Lscience0Bslashdot0Borg0Cstory0C140C0A70C0A60C0A0A42540Ctwo0Eearth0Elike0Eexoplanets0Edont0Eactually0Eexist0Dutm0Isource0Frss10B0Amainlinkanon0Gutm0I...
text => q{Two Earth-Like Exoplanets Don't Actually Exist},
busybird => sh( status_permalink => 'http://rss.slashdot.org/~r/Slashdot/slashdot/~3/NcsdVQtQOQQ/story01.htm',
original => sh( id => 'http://slashdot.feedsportal.com/c/35028/f/647410/s/3c35c953/sc/32/l/0Lscience0Bslashdot0Borg0Cstory0C140C0A70C0A60C0A0A42540Ctwo0Eearth0Elike0Eexoplanets0Edont0Eactually0Eexist0Dutm0Isource0Frss10B...
created_at => 'Sun Jul 06 00:33:00 +0000 2014',
user => sh( screen_name => 'Slashdot' ),
extended_entities => sh(media => [
sh(media_url => 'http://a.fsdn.com/sd/twitter_icon_large.png'),
sh(media_url => 'http://a.fsdn.com/sd/facebook_icon_large.png'),
sh(media_url => 'http://www.gstatic.com/images/icons/gplus-16.png'),
])),
]},
{ filename => 'stackoverflow.atom',
exp_num => 30,
exp_partial => [
sh( id => '1404624785|http://stackoverflow.com/q/24593005',
text => 'How to write Unit Test for IValidatableObject Model',
busybird => sh( status_permalink => 'http://stackoverflow.com/questions/24593005/how-to-write-unit-test-for-ivalidatableobject-model',
original => sh( id => 'http://stackoverflow.com/q/24593005' )),
## use <updated> date
created_at => 'Sun Jul 06 05:33:05 +0000 2014',
user => sh( screen_name => 'Recent Questions - Stack Overflow' )),
sh( id => '1404624716|http://stackoverflow.com/q/24593002',
text => 'hide softkeyboard when it is called from menuitem',
busybird => sh( status_permalink => 'http://stackoverflow.com/questions/24593002/hide-softkeyboard-when-it-is-called-from-menuitem',
original => sh( id => 'http://stackoverflow.com/q/24593002' )),
created_at => 'Sun Jul 06 05:31:56 +0000 2014',
user => sh( screen_name => 'Recent Questions - Stack Overflow' )),
]},
{ filename => 'googlejp.atom',
exp_num => 25,
exp_partial => [
sh( id => '1404701402|tag:blogger.com,1999:blog-20042392.post-2515664455683743324',
## status text should be decoded.
text => 'ãããããããã¡ãããã©ããã§ãããªããã©ããªé£éããã§ãã¯ãã¾ãããã',
## if there are multiple <link>s, use rel="alternate".
busybird => sh( status_permalink => 'http://feedproxy.google.com/~r/GoogleJapanBlog/~3/RP_M-WXr_6I/blog-post.html',
original => sh( id => 'tag:blogger.com,1999:blog-20042392.post-2515664455683743324' )),
## <updated> is used instead of <published>
created_at => 'Mon Jul 07 11:50:02 +0900 2014',
user => sh( screen_name => 'Google Japan Blog' ),
extended_entities => sh( media => [
sh(media_url => 'http://1.bp.blogspot.com/-eYSw5ZyZ7Ec/U7YgVYLF3TI/AAAAAAAAM_8/FPpTqUyesk0/s450/gochiphototop1.png'),
sh(media_url => 'http://1.bp.blogspot.com/-bp_kUa_Z8uQ/U7Yip34vN-I/AAAAAAAANAU/ktJQhMvf3BQ/s500/gochiprofile.png'),
sh(media_url => 'http://4.bp.blogspot.com/-pJkRMfPc2m4/U7Yi-Vm4pvI/AAAAAAAANAc/EbXv8oPCyBM/s100/genre_0011.png'),
] )),
sh( id => '1403245680|tag:blogger.com,1999:blog-20042392.post-4467811587369881889',
text => 'ææ°ã® Chrome Experiment ã§ããã¯ãããªãã«ãã·ã¥ã¼ããæ¥½ããã!',
busybird => sh( status_permalink => 'http://feedproxy.google.com/~r/GoogleJapanBlog/~3/qztQgCPoisw/chrome-experiment.html',
original => sh( id => 'tag:blogger.com,1999:blog-20042392.post-4467811587369881889' )),
## <published> is used when <updated> is missing
created_at => 'Fri Jun 20 15:28:00 +0900 2014',
user => sh( screen_name => 'Google Japan Blog' ),
extended_entities => sh(media => [
sh(media_url => 'http://feeds.feedburner.com/~r/GoogleJapanBlog/~4/qztQgCPoisw')
])),
]},
{ filename => 'slashdotjp.rdf',
exp_num => 13,
exp_partial => [
sh( id => '1404899040|http://linux.slashdot.jp/story/14/07/09/097242/',
text => 'ãã©ã¯ã«ã»ãªããã¯ã¹ãã½ãããã³ã¯ã»ãã¯ããã¸ã¼ã«è²·åããã',
busybird => sh( status_permalink => 'http://linux.slashdot.jp/story/14/07/09/097242/' ),
created_at => 'Wed Jul 09 09:44:00 +0000 2014',
user => sh( screen_name => 'ã¹ã©ãã·ã¥ãããã»ã¸ã£ãã³' )),
sh( id => '1404896100|http://yro.slashdot.jp/story/14/07/09/0533213/',
text => 'ãã¤ãªãã¶ã¼ããæãããä¸ä¸ç叿°ã®æ°ä½ãã©ã¼ã²ã¼ã ã¯DLCã§CERO Zç¸å½ã«ãªã',
busybird => sh( status_permalink => 'http://yro.slashdot.jp/story/14/07/09/0533213/' ),
created_at => 'Wed Jul 09 08:55:00 +0000 2014',
user => sh( screen_name => 'ã¹ã©ãã·ã¥ãããã»ã¸ã£ãã³' )),
]},
{ filename => 'pukiwiki_rss09.rss',
exp_num => 15,
exp_partial => [
## both ID and timestamp are missing. item_id is <link>. timestamp is just missing.
sh( id => 'http://debugitos.main.jp/index.php?Ubuntu%2FTrusty%A5%A4%A5%F3%A5%B9%A5%C8%A1%BC%A5%EB%A5%E1%A5%E2',
text => 'Ubuntu/Trustyã¤ã³ã¹ãã¼ã«ã¡ã¢',
busybird => sh( status_permalink => 'http://debugitos.main.jp/index.php?Ubuntu%2FTrusty%A5%A4%A5%F3%A5%B9%A5%C8%A1%BC%A5%EB%A5%E1%A5%E2' ),
created_at => undef,
user => sh( screen_name => q{DebugIto's} )),
]},
{ filename => 'nick.rss',
exp_num => 20,
exp_partial => [
sh( id => '1405617373|http://www.nickandmore.com/?p=24392',
## decode XML Entities (like ’)
text => q{Disney XDâs âThe 7Dâ Launches With Solid Ratings, App Hits 1.3M+ Downloads},
busybird => sh( status_permalink => 'http://www.nickandmore.com/2014/07/17/disney-xds-the-7d-launches-with-solid-ratings-app-hits-1-3m-downloads/',
original => sh( id => 'http://www.nickandmore.com/?p=24392' )),
created_at => 'Thu Jul 17 17:16:13 +0000 2014',
user => sh( screen_name => 'NICKandMORE' )),
sh( id => '1405613508|http://www.nickandmore.com/?p=24371',
## XML Entities with &
text => q{Disney Television Animation Announces âHaunted Mansionâ Special, Three Pilots & Short-Form Series},
busybird => sh( status_permalink => 'http://www.nickandmore.com/2014/07/17/disney-television-animation-announces-haunted-mansion-special-three-pilots-short-form-series/',
original => sh( id => 'http://www.nickandmore.com/?p=24371' )),
created_at => 'Thu Jul 17 16:11:48 +0000 2014',
user => sh( screen_name => 'NICKandMORE' ))
]},
{ filename => 'turner_press.rss',
exp_num => 10,
exp_partial => [
sh( id => '1410386063|7606 at https://pressroom.turner.com',
text => 'Mike Tyson Mysteries',
busybird => sh( status_permalink => 'https://pressroom.turner.com/us/adult-swim/mike-tyson-mysteries-1',
original => sh( id => '7606 at https://pressroom.turner.com' )),
created_at => 'Wed Sep 10 21:54:23 +0000 2014',
user => sh( screen_name => 'Turner Press Site' )),
sh( id => '1410385988|7605 at https://pressroom.turner.com',
text => 'Mike Tyson Mysteries',
busybird => sh( status_permalink => 'https://pressroom.turner.com/us/adult-swim/mike-tyson-mysteries-0',
original => sh( id => '7605 at https://pressroom.turner.com' )),
created_at => 'Wed Sep 10 21:53:08 +0000 2014',
user => sh( screen_name => 'Turner Press Site' ),
extended_entities => sh(media => [
## In the original feed data, only the path is in the src attr in <img> tag. In this case,
## the permalink's scheme and host should complement the link.
sh(media_url => 'https://pressroom.turner.com/modules/file/icons/image-x-generic.png')
]))
]},
{ filename => 'img_paths.rss',
exp_num => 2,
## test extraction of media_urls from <img> tags with path-only src attributes.
exp_partial => [
sh( id => '1410688800|img_paths:02',
text => 'link ends with non-slash',
busybird => sh( status_permalink => 'http://example.com/foo/bar/buzz.html',
original => sh( id => 'img_paths:02' )),
created_at => 'Sun Sep 14 10:00:00 +0000 2014',
user => sh( screen_name => 'Feed for testing img paths' ),
extended_entities => sh(media => [
sh(media_url => 'http://example.com/foo/bar/relative/path.png'),
sh(media_url => 'http://example.com/absolute/path.png')
])),
sh( id => '1410685200|img_paths:01',
text => 'link ends with slash',
busybird => sh( status_permalink => 'http://example.com/foo/bar/',
original => sh( id => 'img_paths:01' )),
created_at => 'Sun Sep 14 09:00:00 +0000 2014',
user => sh( screen_name => 'Feed for testing img paths' ),
extended_entities => sh(media => [
sh(media_url => 'http://example.com/foo/bar/relative/path.png'),
sh(media_url => 'http://example.com/absolute/path.png')
]))
]},
);
foreach my $case (@testcases) {
my $filepath = File::Spec->catfile(".", "t", "samples", $case->{filename});
check_case "$case->{filename} parse_file()", $input->parse_file($filepath), $case;
open my $file, "<", $filepath or die "Cannot open $filepath: $!";
my $data = do { local $/; <$file> };
check_case "$case->{filename} parse()", $input->parse($data), $case;
check_case "$case->{filename} parse_string()", $input->parse_string($data), $case;
close $file;
}
done_testing;
( run in 2.067 seconds using v1.01-cache-2.11-cpan-437f7b0c052 )