Data-Downloader
view release on metacpan or search on metacpan
lib/Data/Downloader/Feed.pm view on Meta::CPAN
=head1 NAME
Data::Downloader::Feed
=head1 DESCRIPTION
Represents an RSS feed.
=cut
package Data::Downloader::Feed;
use Log::Log4perl qw/:easy/;
use String::Template qw/expand_string missing_values/;
use XML::LibXML;
use XML::LibXML::XPathContext;
use File::Temp;
use File::Copy qw/copy/;
use Time::HiRes qw/gettimeofday/;
use Params::Validate qw/validate validate_with/;
use Pod::Usage qw/pod2usage/;
use Data::Downloader::Utils qw/do_system_call/;
use if $Data::Downloader::useProgressBars, "Smart::Comments";
use strict;
use warnings;
our $defaultNamespaceURI; # set once per execution; default namespace for items in feeds.
# "http://purl.org/rss/1.0/"
sub _get_from_xpath {
my $self = shift;
my ($xp,$context,$xpath) = @_;
my $xpc = XML::LibXML::XPathContext->new($context);
$xpc->registerNs(default => $defaultNamespaceURI) if $defaultNamespaceURI;
my $value = $xpc->findvalue($xpath);
TRACE "got $value from $xpath";
LOGDIE "Got 'Bad credentials' for feed ".$self->name if $value && $value eq "Bad credentials";
return $value;
}
sub _make_unique_filename {
my $self = shift;
return sprintf("%010d%06d%08d%06d",gettimeofday(),$$,int rand 1_000_000);
}
=head1 METHODS
=over
=item refresh
Refresh the data stored from this feed.
Parameters:
- download : download the files, too?
- fake : do a fake download?
- from_file : use this file instead of the live feed?
- any variables in the feed_template for this feed
Refreshing a feed may also :
- remove files which are now obolete (because the feed has a urn for a different file)
- update the symlinks for files whose metadata has changed
Also if both "user" and "password" are passed, they are treated specially
and sent as HTTP Basic auth credentials for the rss feed.
=cut
sub refresh {
my $self = shift;
my %args_tmp = @_;
my $args;
our $defaultNamespaceURI;
$self->load unless ($self->repository && $self->repository_obj);
DEBUG "refreshing feed ".$self->name.", repository is ".$self->repository_obj->name;
# TODO store last_updated, skip already stored items.
#
# Get the xml
#
my $tmp = File::Temp->new;
if (my $file = $args_tmp{from_file}) {
DEBUG "using file $file";
$args = validate(@_, { from_file => 1, download => 0, fake => 0,
map { $_ => 0} missing_values($self->feed_template) } );
copy "$file", "$tmp" or die "Copying $file to $tmp failed : $!";
} else {
my @defaults = map { $_->name => $_->default_value } $self->feed_parameters;
my %args = (@defaults, @_);
# Handle these explicitly
my ($username,$password);
if ($args{user} && $args{password}) {
$username = delete($args{'user'});
$password = delete($args{'password'});
}
my @args = %args;
$args = validate_with(
params => \@args,
spec => { download => 0, fake => 0, map { $_ => 1} missing_values($self->feed_template) },
on_fail => sub {
my $msg = shift;
print qq|\n$msg\n|;
my %default_params = map { $_->name => 1} $self->feed_parameters;
my %all_params = map { $_ => 1 } missing_values($self->feed_template);
my %mandatory_params = map { $_ => 1 } grep(!defined $default_params{$_}, keys %all_params);
my %defaults = map { $_->name => $_->default_value } $self->feed_parameters;
my $default_params_str = join("\n", map { $defaults{$_} ? "$_ ($defaults{$_})" : "$_ (optional)" } keys %defaults);
my $mandatory_params_str = join("\n", map{ "$_ (mandatory)" } keys %mandatory_params);
my $index;
my %args = map{ $_ => 1 } grep{!($index++ % 2)} @args;
my @non_valid = grep(! (defined $all_params{$_}), keys %args);
my $non_valid_str;
if (@non_valid) {
$non_valid_str = join("\n", @non_valid);
( run in 0.683 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )