Acme-Jungle-CrawlerExample
view release on metacpan or search on metacpan
lib/Data/News.pm view on Meta::CPAN
package Data::News;
use Moose;
use Text::CSV_XS;
use DateTime;
use Digest::SHA1 qw(sha1_hex);
use HTML::Entities;
has filename_csv => (
is => 'rw',
isa => 'Str',
default => sub {
my ($self) = @_;
my $today = DateTime->now( time_zone => 'local' );
#defines a name for our csv.
my $filename = $today->dmy('-').'_' . $today->hms( '-' ) . '.csv';
$self->filename_csv($filename);
},
);
has site_name => (
is => 'rw',
isa => 'Str',
default => '',
);
after 'site_name' => sub {
my ( $self, $value, $skip_verify ) = @_;
return if ! $value;
if ( ! $skip_verify ) {
$value =~ s{::}{-}g;
$self->site_name( $value, 1 );
}
} ;
has [ qw/title author content webpage meta_keywords meta_description/ ] => (
is => 'rw',
isa => 'Any',
);
has images => (
is => 'rw',
isa => 'ArrayRef',
default => sub { return []; } ,
);
has data => (
is => 'rw',
isa => 'Data::News',
default => sub {
my ($self) = @_;
return $self;
},
);
has csv => (
is => 'ro',
isa => 'Text::CSV_XS',
default => sub {
my $csv = Text::CSV_XS->new()
or die "Cannot use CSV: " . Text::CSV_XS->error_diag();
$csv->eol("\r\n");
return $csv;
},
);
sub save { #saves the data to csv
my ($self) = @_;
my @rows = (
[
sha1_hex( $self->webpage ),
$self->webpage,
decode_entities( $self->data->title ),
decode_entities( $self->data->author ),
decode_entities( $self->data->content ),
decode_entities( $self->data->meta_keywords ),
decode_entities( $self->data->meta_description ),
join( '|' , @{ $self->images } ),
],
);
my $file = './data/NEWS-' . $self->site_name. '-' . $self->filename_csv;
open my $fh, ">>:encoding(utf8)", "$file" or die "$file: $!";
$self->csv->print( $fh, $_ ) for @rows;
close $fh or die "Error on file $file: $!";
}
1;
( run in 0.823 second using v1.01-cache-2.11-cpan-39bf76dae61 )