Statistics-Covid

 view release on metacpan or  search on metacpan

lib/Statistics/Covid/DataProvider/Base.pm  view on Meta::CPAN

	die "abstract method you need to implement it"
	# return \@datum_objs # return an array of Datum objects created
}

# saves the input datas (a perl arrayref) to a local file (2nd parameter)
# returns 0 on failure, 1 on success
# '$datas' is an arrayref of
# [ [url, data_received_string, data_as_perlvar] ... ] (where ... denotes optionally more of that first array)
# some data providers send data and metadata, in which cases $datas will contain 2
# such sub-arrays (metadata, followed by data)
# others send only data, so they have 1 such array.
# Some future providers may send more data items...
# About [url, data_received_string, data_as_perlvar] :
# url is where data was fetched
# data_received_string is the json string fetched (or whatever the provider sent)
# data_as_perlvar is the data received as a perlvar (if it's json we received, then JSON::json_decode()
# will give the perlvar.
sub save_fetched_data_to_localfile {
	my $self = $_[0];
	my $datas = $_[1]; # an array 
	my $outfiles = $_[2];
	die "abstract method you need to implement it"
	# return 0 or 1
}
sub load_fetched_data_from_localfile {
	my $self = $_[0];
	# this is the basename for the particular batch downloaded
	# depending on provider, some data is stored in just one file
	# as a perl variable (Data::Dump) with extension .pl
	# and also as a json file (verbatim from the data provider)
	# with extension .json.
	# Ideally you need only the .pl file
	# For other data providers, there are 2 files for each batch of data
	# 1 is the data, the other is metadata (for example the dates!)
	# so our input parameter is a basename which you either append a '.pm' and eval its contents
	# or do some more work to read the metadata also.
	my $inbasename = $_[1];
	die "abstract method you need to implement it"
}

##### methods below are implemented and do not generally need to be overwritten

# creates an obj. There are no input params
sub     new {
	my ($class, $params) = @_;
	$params = {} unless defined $params;

	my $parent = ( caller(1) )[3] || "N/A";
	my $whoami = ( caller(0) )[3];

	my $self = {
		# urls is a hash keyed on url, value is optional headers as arrayref
		'urls' => undef,
		'name' => undef, # this is the name for each provider, e.g. JHU or BBC
		'fileparams' => {
			# where downloaded data files go
			'datafiles-dir' => undef,
		},
		'debug' => 0,
	};
	bless $self => $class;
	for my $k (keys %$params){
		$self->{$k} = $params->{$k} if exists $self->{$k}
	}

	# we accept config-file or config-hash, see t/example-config.json for an example
	if( exists $params->{'config-file'} ){ if( ! $self->config_file($params->{'config-file'}) ){ warn "error, call to config_file() has failed."; return undef } }
	elsif( exists $params->{'config-hash'} ){ if( ! $self->config_hash($params->{'config-hash'}) ){ warn "error, call to config_hash() has failed."; return undef } }

	# you need to call init() from subclasses after new() and set
	# params
	return $self
}
sub	init {
	my $self = $_[0];

	my $debug = $self->debug();

	# leave the die someone is doing something wrong...
	die "'urls' has not been defined, set it via the parameters." unless defined $self->{'urls'};
	die "'datafiles-dir' has not been defined, set it via the parameters or specify a configuration file via 'config-file'." unless defined $self->datafilesdir();

	# make the output datadir
	if( ! Statistics::Covid::Utils::make_path($self->datafilesdir()) ){ warn "error, failed to create data dir '".$self->datafilesdir()."'."; return 0 }
	if( $debug > 0 ){ warn "check and/or made dir for datafiles '".$self->datafilesdir()."'." }
	return 1 # success
}
# returns undef on failure
# or an arrayref of [$aurl, $pv] on success
sub	fetch {
	my $self = $_[0];

	my $parent = ( caller(1) )[3] || "N/A";
	my $whoami = ( caller(0) )[3];

	my $DEBUG = $self->debug();
	my $jar = HTTP::CookieJar::LWP->new;
	my $ua = LWP::UserAgent->new(
		cookie_jar => $jar,
		timeout => 50, # seconds
	);
	# the return array will be [url, perlvar] for each url
	my @retJsonPerlVars = ();
	my ($response, $aurl, $headers);
	for my $anentry (@{$self->{'urls'}}){
		$aurl = $anentry->[0];
		$headers = $anentry->[1];
		# add a default useragent string before headers if any which can overwrite it
		if( $DEBUG > 0 ){ print STDOUT "$whoami : fetching url '$aurl' ...\n" }
		$ua->agent('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0');
		if( defined $headers ){
			$response = $ua->get($aurl, @$headers);
		} else {
			$response = $ua->get($aurl);
		}
		if( ! $response->is_success ){
			warn "failed to get url '".$aurl."': ".$response->status_line;
			return undef;
		}
		my $json_str = $response->decoded_content;
		if( ! defined $json_str or $json_str eq '' ){

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.557 second using v1.00-cache-2.02-grep-82fe00e-cpan-2c419f77a38b )