Acme-Jungle-CrawlerExample

 view release on metacpan or  search on metacpan

lib/Data/News.pm  view on Meta::CPAN

package Data::News;
use Moose;
use Text::CSV_XS;
use DateTime;
use Digest::SHA1 qw(sha1_hex);
use HTML::Entities;

has filename_csv => (
    is      => 'rw',
    isa     => 'Str',
    default => sub {
        my ($self) = @_;
        my $today = DateTime->now( time_zone => 'local' );
        #defines a name for our csv.
        my $filename = $today->dmy('-').'_' . $today->hms( '-' ) . '.csv';
        $self->filename_csv($filename);
    },
);

has site_name => (
    is  => 'rw',
    isa => 'Str',
    default => '',
);

after 'site_name' => sub {
    my ( $self, $value, $skip_verify ) = @_; 
    return if ! $value;
    if ( ! $skip_verify ) {
        $value =~ s{::}{-}g;
        $self->site_name( $value, 1 );
    }
} ;

has [ qw/title author content webpage meta_keywords meta_description/ ] => (
    is  => 'rw',
    isa => 'Any',
);

has images => (
    is => 'rw',
    isa => 'ArrayRef',
    default => sub { return []; } ,
); 

has data => (
    is      => 'rw',
    isa     => 'Data::News',
    default => sub {
        my ($self) = @_;
        return $self;
    },
);

has csv => (
    is => 'ro',
    isa => 'Text::CSV_XS',
    default => sub {
        my $csv = Text::CSV_XS->new()
          or die "Cannot use CSV: " . Text::CSV_XS->error_diag();
        $csv->eol("\r\n");
        return $csv;
    },
);

sub save {    #saves the data to csv
    my ($self) = @_;
    my @rows = (
        [
            sha1_hex( $self->webpage ),
            $self->webpage,
            decode_entities( $self->data->title ),
            decode_entities( $self->data->author ),
            decode_entities( $self->data->content ),
            decode_entities( $self->data->meta_keywords ),
            decode_entities( $self->data->meta_description ),
            join( '|' , @{ $self->images } ),
        ],
    );
    my $file = './data/NEWS-' . $self->site_name. '-' . $self->filename_csv;

    open my $fh, ">>:encoding(utf8)", "$file" or die "$file: $!";
    $self->csv->print( $fh, $_ ) for @rows;
    close $fh or die "Error on file $file: $!";
}

1;




( run in 0.823 second using v1.01-cache-2.11-cpan-39bf76dae61 )