view release on metacpan or search on metacpan
}
},
"configure" : {
"requires" : {
"ExtUtils::MakeMaker" : "6.52",
"File::ShareDir::Install" : "0"
}
},
"runtime" : {
"requires" : {
"DateTime" : "0",
"DateTime::Format::ISO8601" : "0",
"DateTime::Format::Mail" : "0",
"Encode::Locale" : "0",
"File::ShareDir" : "0",
"File::Slurper" : "0",
"Modern::Perl" : "1.20180701",
"Mojo::JSON" : "0",
"Mojo::UserAgent::Role::Queued" : "0",
"Mojolicious" : "0",
"XML::LibXML" : "0",
"perl" : "5.026000",
"strict" : "0",
license: open_source
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: App-jupiter
no_index:
directory:
- t
- inc
requires:
DateTime: '0'
DateTime::Format::ISO8601: '0'
DateTime::Format::Mail: '0'
Encode::Locale: '0'
File::ShareDir: '0'
File::Slurper: '0'
Modern::Perl: '1.20180701'
Mojo::JSON: '0'
Mojo::UserAgent::Role::Queued: '0'
Mojolicious: '0'
XML::LibXML: '0'
perl: '5.026000'
strict: '0'
Makefile.PL view on Meta::CPAN
'strict' => 0,
'warnings' => 0,
'Modern::Perl' => 1.20180701, # for '2018'
'Mojolicious' => 0, # for Mojo::Template and Mojo::UserAgent
'Mojo::UserAgent::Role::Queued' => 0,
'XML::LibXML' => 0,
'File::Slurper' => 0,
'File::ShareDir' => 0,
'Mojo::JSON' => 0,
'Encode::Locale' => 0,
'DateTime' => 0,
'DateTime::Format::Mail' => 0,
'DateTime::Format::ISO8601' => 0,
},
CONFIGURE_REQUIRES => {
'ExtUtils::MakeMaker' => '6.52',
'File::ShareDir::Install' => 0,
},
META_MERGE => {
'meta-spec' => { version => 2 },
resources => {
repository => {
type => 'git',
`libmodern-perl-perl` for [Modern::Perl](https://metacpan.org/pod/Modern%3A%3APerl)
`libmojolicious-perl` for [Mojo::Template](https://metacpan.org/pod/Mojo%3A%3ATemplate), [Mojo::UserAgent](https://metacpan.org/pod/Mojo%3A%3AUserAgent), [Mojo::Log](https://metacpan.org/pod/Mojo%3A%3ALog),
[Mojo::JSON](https://metacpan.org/pod/Mojo%3A%3AJSON), and [Mojo::Util](https://metacpan.org/pod/Mojo%3A%3AUtil)
`libxml-libxml-perl` for [XML::LibXML](https://metacpan.org/pod/XML%3A%3ALibXML)
`libfile-slurper-perl` for [File::Slurper](https://metacpan.org/pod/File%3A%3ASlurper)
`libdatetime-perl` for [DateTime](https://metacpan.org/pod/DateTime)
`libdatetime-format-mail-perl` for [DateTime::Format::Mail](https://metacpan.org/pod/DateTime%3A%3AFormat%3A%3AMail)
`libdatetime-format-iso8601-perl` for [DateTime::Format::ISO8601](https://metacpan.org/pod/DateTime%3A%3AFormat%3A%3AISO8601)
Unfortunately, [Mojo::UserAgent::Role::Queued](https://metacpan.org/pod/Mojo%3A%3AUserAgent%3A%3ARole%3A%3AQueued) isn't packaged for Debian.
Therefore, let's build it and install it as a Debian package.
sudo apt-get install libmodule-build-tiny-perl
sudo apt-get install dh-make-perl
sudo dh-make-perl --build --cpan Mojo::UserAgent::Role::Queued
dpkg --install libmojo-useragent-role-queued-perl_1.15-1_all.deb
To generate the `README.md` from the source file, you need `pod2markdown`
**link** is the URL to the post on the web (probably a HTML page).
**blog\_title** is the title of the site.
**blog\_link** is the URL for the site on the web (probably a HTML page).
**blog\_url** is the URL for the site's feed (RSS or Atom).
**authors** are the authors (or the Dublin Core contributor), a list of strings.
**date** is the publication date, as a DateTime object.
**day** is the publication date, in ISO date format: YYYY-MM-DD, for the UTC
timezone. The UTC timezone is picked so that the day doesn't jump back and forth
when sorting entries by date.
**content** is the full post content, as string or encoded HTML.
**excerpt** is the post content, limited to 500 characters, with paragraph
separators instead of HTML elements, as HTML. It is not encoded because the idea
is that it only gets added to the HTML and not to the feed, and the HTML it
script/jupiter view on Meta::CPAN
C<libmodern-perl-perl> for L<Modern::Perl>
C<libmojolicious-perl> for L<Mojo::Template>, L<Mojo::UserAgent>, L<Mojo::Log>,
L<Mojo::JSON>, and L<Mojo::Util>
C<libxml-libxml-perl> for L<XML::LibXML>
C<libfile-slurper-perl> for L<File::Slurper>
C<libdatetime-perl> for L<DateTime>
C<libdatetime-format-mail-perl> for L<DateTime::Format::Mail>
C<libdatetime-format-iso8601-perl> for L<DateTime::Format::ISO8601>
Unfortunately, L<Mojo::UserAgent::Role::Queued> isn't packaged for Debian.
Therefore, let's build it and install it as a Debian package.
sudo apt-get install libmodule-build-tiny-perl
sudo apt-get install dh-make-perl
sudo dh-make-perl --build --cpan Mojo::UserAgent::Role::Queued
dpkg --install libmojo-useragent-role-queued-perl_1.15-1_all.deb
To generate the C<README.md> from the source file, you need F<pod2markdown>
script/jupiter view on Meta::CPAN
feed to generate, the second is the template to use:
B<jupiter html> I<atom.xml template.xml planet.html template.html feed.opml>
In the above case, Planet Jupiter will write a feed called F<atom.xml> based on
F<template.xml> and a HTML file called F<planet.html> based on F<template.html>,
using the cached entries matching the feeds in F<feed.opml>.
=cut
use DateTime;
use DateTime::Format::Mail;
use DateTime::Format::ISO8601;
use File::Basename;
use File::Slurper qw(read_binary write_binary read_text write_text);
use List::Util qw(uniq min shuffle);
use Modern::Perl;
use Mojo::Log;
use Mojo::JSON qw(decode_json encode_json);
use Mojo::Template;
use Mojo::UserAgent;
use Pod::Simple::Text;
use XML::LibXML;
script/jupiter view on Meta::CPAN
use vars qw($log);
our $log = Mojo::Log->new;
my $xpc = XML::LibXML::XPathContext->new;
$xpc->registerNs('atom', 'http://www.w3.org/2005/Atom');
$xpc->registerNs('html', 'http://www.w3.org/1999/xhtml');
$xpc->registerNs('dc', 'http://purl.org/dc/elements/1.1/');
$xpc->registerNs('itunes', 'http://www.itunes.com/dtds/podcast-1.0.dtd');
my $undefined_date = DateTime->from_epoch( epoch => 0 );
my (%wday, %month, $wday_re, $month_re);
%wday = qw (lun. Mon mar. Tue mer. Wed jeu. Thu ven. Fri sam. Sat dim. Sun);
%month = qw (janv. Jan févr. Feb mars Mar avr. Apr mai May juin Jun
juil. Jul août Aug sept. Sep oct. Oct nov. Nov déc. Dec);
$wday_re = join('|', map { quotemeta } keys %wday) unless $wday_re;
$month_re = join('|', map { quotemeta } keys %month) unless $month_re;
# Our tests don't want to call main
__PACKAGE__->main unless caller;
script/jupiter view on Meta::CPAN
B<date> is the the publication date of the HTML page, in ISO date format:
YYYY-MM-DD.
B<files> is the list of OPML files used.
=cut
sub globals {
my $files = shift;
my @time = gmtime;
my $today = DateTime->now->ymd;
return {date => $today, files => $files};
}
=head2 Writing templates for feeds
Feeds have the following keys available:
B<title> is the title of the feed.
B<url> is the URL of the feed (RSS or Atom). This is not the link to the site!
script/jupiter view on Meta::CPAN
warn "No feeds found in the OPML file $file\n" unless @nodes;
push @files, { file => $file, path => $path, name => $name };
}
@feeds = shuffle @feeds;
return \@feeds, \@files;
}
sub entries {
my $feeds = shift;
my $limit = shift;
my $date = DateTime->now(time_zone => 'UTC')->subtract( days => 90 ); # compute once
my $now = DateTime->now(time_zone => 'UTC');
my @entries;
for my $feed (@$feeds) {
next unless -r $feed->{cache_file};
my $doc = eval { XML::LibXML->load_xml(recover => 2, location => $feed->{cache_file} )};
if (not $doc) {
$feed->{message} = xml_escape "Parsing error: $@";
$feed->{code} = 422; # unprocessable
next;
}
$feed->{doc} = $doc;
script/jupiter view on Meta::CPAN
next;
}
# if this is an Atom feed, we need to sort the entries ourselves (older entries at the end)
my @candidates = map {
my $entry = {};
$entry->{element} = $_;
$entry->{id} = id($_);
$entry->{date} = updated($_) || $undefined_date;
$entry;
} @nodes;
@candidates = grep { DateTime->compare($_->{date}, $now) <= 0 } @candidates;
@candidates = unique(sort { DateTime->compare( $b->{date}, $a->{date} ) } @candidates);
@candidates = @candidates[0 .. min($#candidates, $limit - 1)];
# now that we have limited the candidates, let's add more metadata from the feed
for my $entry (@candidates) {
$entry->{feed} = $feed;
# these two are already escaped
$entry->{blog_title} = $feed->{title};
$entry->{blog_url} = $feed->{url};
}
add_age_warning($feed, \@candidates, $date);
push @entries, @candidates;
script/jupiter view on Meta::CPAN
return \@entries;
}
sub add_age_warning {
my $feed = shift;
my $entries = shift;
my $date = shift;
# feed modification date is smaller than the date given
my ($node) = $xpc->findnodes("/rss/channel | /atom:feed", $feed->{doc});
my $feed_date = updated($node);
if ($feed_date and DateTime->compare($feed_date, $date) == -1) {
$feed->{message} = "No feed updates in 90 days";
$feed->{code} = 206; # partial content
return;
} else {
# or no entry found with a modification date equal or bigger than the date given
for my $entry (@$entries) {
return if DateTime->compare($entry->{date}, $date) >= 0;
}
$feed->{message} = "No entry newer than 90 days";
$feed->{code} = 206; # partial content
}
}
sub updated {
my $node = shift;
return unless $node;
my @nodes = $xpc->findnodes('pubDate | atom:published | atom:updated', $node) or return;
my $date = $nodes[0]->textContent;
my $dt = eval { DateTime::Format::Mail->parse_datetime($date) }
|| eval { DateTime::Format::ISO8601->parse_datetime($date) }
|| eval { DateTime::Format::Mail->parse_datetime(french($date)) };
return $dt;
}
sub french {
my $date = shift;
$date =~ s/^($wday_re)/$wday{$1}/;
$date =~ s/\b($month_re)/$month{$1}/;
return $date;
}
script/jupiter view on Meta::CPAN
$seen{$node->{id}} = 1;
push(@unique, $node);
}
return @unique;
}
sub limit {
my $entries = shift;
my $limit = shift;
# we want the most recent entries overall
@$entries = sort { DateTime->compare( $b->{date}, $a->{date} ) } unique(@$entries);
return [@$entries[0 .. min($#$entries, $limit - 1)]];
}
=head2 Writing templates for entries
Entries have the following keys available:
B<title> is the title of the post.
B<link> is the URL to the post on the web (probably a HTML page).
B<blog_title> is the title of the site.
B<blog_link> is the URL for the site on the web (probably a HTML page).
B<blog_url> is the URL for the site's feed (RSS or Atom).
B<authors> are the authors (or the Dublin Core contributor), a list of strings.
B<date> is the publication date, as a DateTime object.
B<day> is the publication date, in ISO date format: YYYY-MM-DD, for the UTC
timezone. The UTC timezone is picked so that the day doesn't jump back and forth
when sorting entries by date.
B<content> is the full post content, as string or encoded HTML.
B<excerpt> is the post content, limited to 500 characters, with paragraph
separators instead of HTML elements, as HTML. It is not encoded because the idea
is that it only gets added to the HTML and not to the feed, and the HTML it
script/jupiter view on Meta::CPAN
$entry->{link} = shift(@links) || "";
my @authors = map { without_email(xml_escape strip_html($_->to_literal)) } $xpc->findnodes(
'author | atom:author/atom:name | atom:contributor/atom:name | dc:creator | dc:contributor', $element);
@authors = map { without_email(xml_escape strip_html($_->to_literal)) } $xpc->findnodes(
'/atom:feed/atom:author/atom:name | '
. '/atom:feed/atom:contributor/atom:name | '
. '/rss/channel/dc:creator | '
. '/rss/channel/dc:contributor | '
. '/rss/channel/webMaster ', $element) unless @authors;
$entry->{authors} = @authors ? \@authors : undef; # key must exist in the hash
if (DateTime->compare($entry->{date}, $undefined_date) == 0) {
$entry->{day} = "(no date found)";
} else {
$entry->{day} = $entry->{date}->clone->set_time_zone('UTC')->ymd; # operate on a clone
}
my @categories = map { xml_escape strip_html($_->to_literal) } $xpc->findnodes('category | atom:category/@term', $element);
$entry->{categories} = @categories ? \@categories : undef; # key must exist in the hash
$entry->{excerpt} = '';
$entry->{content} = '';
my @nodes = $xpc->findnodes('description[text()!=""] | atom:content[text()!=""]', $element);
@nodes = $xpc->findnodes('summary[text()!=""] | atom:summary[text()!=""] | itunes:summary[text()!=""]', $element) unless @nodes;
share/feed.rss view on Meta::CPAN
% my ($globals, $feeds, $entries) = @_;
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<title>Planet</title>
<description>This is in an aggregate of multiple feeds.</description>
<link>https://alexschroeder.ch/cgit/planet-jupiter/about</link>
<pubDate><%= DateTime::Format::Mail->format_datetime(DateTime->now) %></pubDate>
% for my $entry (@$entries) {
<item>
<title><%= $entry->{title} %></title>
<link><%= $entry->{link} %></link>
% if ($entry->{date}) {
<pubDate><%= DateTime::Format::Mail->format_datetime($entry->{date}) %></pubDate>
% }
% for my $author (@{$entry->{authors}}) {
<dc:creator><%= $author %></dc:creator>
% }
% for my $category (@{$entry->{categories}}) {
<category><%= $category %></category>
% }
<source url="<%= $entry->{blog_url} %>"><%= $entry->{blog_title} %></source>
<description>
%= $entry->{content}
t/elements.t view on Meta::CPAN
stop_daemon();
Jupiter::make_html("test-$id/rss2sample.html", "test-$id/rss2sample.xml", "test-$id/rss2sample.opml");
ok(-f "test-$id/rss2sample.html", "HTML was generated");
my $doc = XML::LibXML->load_html(location => "test-$id/rss2sample.html");
is($doc->findvalue('//li/a[position()=2]'), "Elements", "Elements feed title matches");
is($doc->findvalue('//div[@class="content"]'), "I love the fediverse!", "Encoded content extracted");
use DateTime;
my $now = DateTime->now;
my $atom = <<"EOT";
<?xml version="1.0" encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom'>
<updated>$now</updated>
<title type='text'>Textual</title>
<entry>
<updated>$now</updated>
<title type='text'>Current</title>
<summary type='text'>
t/encoding-atom.t view on Meta::CPAN
use utf8;
use Encode;
use Modern::Perl;
use Test::More;
use File::Slurper qw(write_binary read_binary write_text);
do './t/test.pl';
my ($id, $port) = init();
save_opml('rss2sample.opml');
use DateTime;
my $now = DateTime->now;
my $atom = <<'EOT';
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom'>
<updated>$now</updated>
<title type='text'>Schröderâs Blog</title>
<author><name>Alex Schröder</name><email>noreply@blogger.com</email></author>
<entry>
<published>$now</published>
<updated>$now</updated>