Weather-GHCN-Fetch

 view release on metacpan or  search on metacpan

lib/Weather/GHCN/StationTable.pm  view on Meta::CPAN


    if ( $content =~ m{<title>(.*?)</title>}xms ) {
        croak '*E* unable to fetch data from ' . $GHCN_STN_LIST_URL . ': ' . $1;
    }

    ## no critic [InputOutput::RequireBriefOpen]
    open my $stn_fh, '<', \$content
        or croak '*E* unable to open stations_content string';

    $_tstats->start('Parse_stn');

    my $is_stnid_filter;
    $is_stnid_filter = keys %{ $_stnid_filter_href }
        if $_stnid_filter_href;

    my %stnidx;

    # Scan the station table
    # - filtering on country, state, location and GIS distance according to options
    while ( my $line = <$stn_fh> ) {
        $_stn_count++;  # increment the stn count in the object

        # |---  0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50---|--- 60---
        # |123456789|123456789|123456789|123456789|123456789|123456789|123456789
        # (stationid).(latitu).(longitu).(elev).st.--name-----------------------
        # ACW00011604  17.1167  -61.7833   10.1    ST JOHNS COOLIDGE FLD

        ## no critic [ProhibitMagicNumbers]
        my $id = substr $line, 0, 11;

        next if $is_stnid_filter and not $_stnid_filter_href->{$id};

        my $lat   = 0 + substr $line, 12, 8;    # coerce to number
        my $long  = 0 + substr $line, 21, 9;    # coerce to number
        my $elev  = 0 + substr $line, 31, 6;    # coerce to number
        my $state =     substr $line, 38, 2;
        my $name  =     substr $line, 41, 30;
        my $gsn_flag  = substr $line, 72, 3;
        # my $hcr_crn_flag  = substr $line, 76, 3;
        # my $wmo_id        = substr $line, 80, 5;

        my $country = substr $id, 0, 2;
        $name =~ s{ \s+ \Z }{}xms;

        my $gsn = $gsn_flag eq 'GSN' ? 'GSN' : $EMPTY;

        ## use critic [ProhibitMagicNumbers]

        if (not $is_stnid_filter) {
            ## no critic [RequireExtendedFormatting]
            my $opt_country = $Opt->country;
            next if $Opt->country and $country !~ m{\A$opt_country}msi;

            my $opt_state = $Opt->state;
            next if $Opt->state and $state !~ m{\A$opt_state}msi;

            ## use critic [RequireExtendedFormatting]
            next if $Opt->location and not _match_location($id, $name, $Opt->location);

            if ( $Opt->gps ) {
                my ($opt_lat, $opt_long) = split m{[,;\s]}xms, $Opt->gps;
                my $distance = _gis_distance($opt_lat, $opt_long, $lat, $long);
                next if $distance > $Opt->radius;
            }

            next if $Opt->gsn and not $gsn;
        }

        $_station{$id} = Weather::GHCN::Station->new(
            id      => $id,
            country => $country,
            state   => $state,
            active  => $EMPTY,
            lat     => $lat,
            long    => $long,
            elev    => $elev,
            name    => $name,
            gsn     => $gsn
        );

        $stnidx{$_station{$id}->coordinates}++;
    }
    close $stn_fh or croak '*E* unable to close stations_content string';

    $_tstats->stop('Parse_stn');

    $_stn_selected_count = keys %_station;

    # assign a unique index to each station with matching coordinates
    my $ii = 0;
    foreach my $coord (sort keys %stnidx) {
        $stnidx{$coord} = ++$ii;
    }

    foreach my $stnid ( sort keys %_station ) {
        my $stn = $_station{$stnid};
        $stn->idx = $stnidx{$stn->coordinates};
    }

    $_stn_filtered_count = $self->_load_station_inventories();

    return \%_station;
}

=head2 report_kml( list => 0 )

Output the coordinates of the station collection in KML format, for
import into Google Earth as placemarks.  The active range of each
station will be included as timespans so that you can view the
placemarks across time.

=over 4

=item argument: list

If the argument list contains the 'list' keyword and a true value,
then a perl list is returned.  Otherwise, a string consisting of lines
of text is returned.

=item option: kml

Print KML on stdout.

=item option: kmlcolor <str>

A color name, one of blue, green, azure, purple, red, white or yellow.
Only the first character is recognized, so 'b' and 'bob' both result
in blue.  All colors are given an opacity of 50 (the range is 00 to ff).

=back

=cut

method report_kml ( %arg ) {
    my $return_list = $arg{list} // $_return_list;

    my $kml_color = _get_kml_color( $Opt->kmlcolor );
    my @output;

    push @output, '<?xml version="1.0" encoding="UTF-8"?>';
    push @output, '<kml xmlns="http://www.opengis.net/kml/2.2">';
    push @output, '<Document>';
    push @output, '  <Style id="mypin">';
    push @output, '  <IconStyle>';
    push @output, '  <color>' . $kml_color . '</color>';
    push @output, '  <Icon>';
    # push @output, '    <href>http://maps.google.com/mapfiles/kml/pushpin/' . $kmlcolor . '-pushpin.png</href>';
    push @output, '    <href>http://maps.google.com/mapfiles/kml/shapes/donut.png</href>';
    push @output, '  </Icon>';
    push @output, '  </IconStyle>';
    push @output, '  </Style>';

    foreach my $stn ( values %_station ) {
        next if $stn->error_count;
        # TODO:  use ->sets to get a list of spans and use the first span instead of splitting run_list
        my ($start, $end) = split m{ [-] }xms, $stn->active;
        $end //= $start;

        my $desc = $stn->description();

        push @output,         '  <Placemark>';
        push @output,         '    <styleUrl>#mypin</styleUrl>';
        push @output, sprintf '    <name>%s</name>', encode_entities($stn->name);
        push @output, sprintf '    <description>%s</description>', encode_entities($desc);
        push @output,         '    <TimeSpan>';
        push @output, sprintf '      <begin>%s-01-01T00:00:00Z</begin>', $start;
        push @output, sprintf '        <end>%s-12-31T23:59:59Z</end>', $end;
        push @output,         '    </TimeSpan>';
        push @output, sprintf '    <Point><coordinates>%f, %f, %f</coordinates></Point>', $stn->long, $stn->lat, $stn->elev;
        push @output,         '  </Placemark>';
    }

    push @output, '</Document>';
    push @output, '</kml>';

    return $return_list ? @output : tsv(\@output);
}

=head2 report_urls( list => 0, curl => 0 )

Output the URL of the .dly (daily weather data) file for each of the
stations that meet the selection criteria.

=over 4

=item argument: list

If the argument list contains the 'list' keyword and a true value,
then a perl list is returned.  Otherwise, a string consisting of lines
of text is returned.

=item argument: curl

If the argument list contains the 'curl' keyword and a true value,
then the output will be a set of lines that can be saved in a file
for subsequent input to the B<curl> program using the B<-K> option.
This facilitates bulk fetching of .dly files into the cache.

=back

=cut

method report_urls ( %arg ) {
    my $return_list = $arg{list} // $_return_list;

    my @output;

    push @output, '# Use curl -K <this_file> to download these URL\'s'
        if $arg{curl};

    foreach my $stn ( values %_station ) {
        next if $stn->error_count;
        # TODO:  use ->sets to get a list of spans and use the first span instead of splitting run_list
        my ($start, $end) = split m{ [-] }xms, $stn->active;

        if ( $arg{curl} ) {
            my @parts = split m{ [/] }xms, $stn->url;
            push @output, 'output = ' . $parts[-1];
            push @output, 'url = ' . $stn->url;
        } else {
            push @output, $stn->url;
        }
    }

    return $return_list ? @output : tsv(\@output);
}

=head2 ($opt, @errors) = set_options ( %args )

Set various options for this StationTable instance.  These options
will affect the processing and output by subsequent method calls.

Returns an Option object and a list of errors.  It is advised you
check @errors after calling set_options to report the errors and to
cease processing if there are any; e.g. I<die @errors if @errors>.

You may want to set up a file-scoped lexical variable to hold the
options object.  That way it is accessible throughout your code.
The typical calling pattern would look like this:

    my $Opt;  # a file-scope lexical

    sub run (@ARGV) {
        my $ghcn = Weather::GHCN::StationTable->new;

        my @errors;
        ($Opt, @errors) = set_options(...);
        die @errors if @errors;
        ...
}

=over 4

=item timing_stats => $TimingStats_obj

This optional argument should point to a TimingStats object that was
created by the caller and will be used to collect timing statistics.

=item hash_stats => \%hash_stats

This optional argument should be a reference to a hash that was
created by the caller and will be used to collect performance and
memory statistics.

=back

=cut

method set_options (%user_options) {

    if ( $user_options{'profile'} ) {
        # save the expanded profile file path in the object
        $_profile_file = path( $user_options{'profile'} )->absolute()->stringify;
        $_profile_href = Weather::GHCN::Options->get_profile_options($_profile_file);
    }

    $_ghcn_opt_obj //= Weather::GHCN::Options->new();
    # combine user-specified options with the defaults
    ($_opt_href, $_opt_obj) = $_ghcn_opt_obj->combine_options(\%user_options, $_profile_href);

    if ( $_opt_href->{'cachedir'} ) {
        $_cachedir = path( $_opt_href->{'cachedir'} )->absolute()->stringify;
        $_cache_obj = Weather::GHCN::CacheURI->new($_cachedir, $_opt_obj->refresh);
    } else {
        $_cache_obj = Weather::GHCN::CacheURI->new($EMPTY, $_opt_obj->refresh);
    }


    my @errors = $_ghcn_opt_obj->validate();

    if ( defined $_opt_href->{'aliases'} and defined $_opt_href->{'location'} ) {
        # if the location matches an aliases, then pull the list of
        # stations from the alias definition and assign it to the
        # $_stnid_filter
        my $stnid_string = $_opt_href->{'aliases'}->{ $_opt_href->{'location'} } // $EMPTY;
        if ($stnid_string) {
            my @stns = split m{ [,;\s] }xms, $stnid_string;
            my %stnid_filter;
            foreach my $stnid (@stns) {
                $stnid_filter{$stnid} = $TRUE;
            }
            $_stnid_filter_href = \%stnid_filter;
            $_opt_href->{'location'} = undef;
        }
    }

    # update the combined options hash in the Options object
    $_ghcn_opt_obj->opt_href = $_opt_href;
    # update the combined options object in the Options object
    $_ghcn_opt_obj->opt_obj = $_opt_obj;
    # save the combined option object in a file-scoped lexical for use throughout this code
    $Opt = $_opt_obj;

    $_measures_obj = Weather::GHCN::Measures->new($_opt_href);

    return ($_opt_obj, @errors);
}

=head2 summarize_data ()

Aggregate the daily weather data for the stations that were loaded,
according to the report option.

=over 4

=item option: report => 'daily|monthly|yearly'

When the report option is 'detail', no summarization is needed and the
method immediately returns undef.

=back


=cut

method summarize_data () {

    # when an 'detail' report is requested, we generate detail data only
    # so there is no need to summarize data.
    return if $Opt->report eq 'detail';

    # We'll be replacing $_aggregate_href with this hash after we're
    # done, but we can't loop over $_aggregate_href and be changing
    # it within the loop.  Hence the need for another hash.
    my %summary;

    $_tstats->start('Summarize_data');

    while ( my ($k,$href) = each $_aggregate_href->%* ) {
        ## no critic [ProhibitMagicNumbers]
        my $year    = substr $k, 0, 4;
        my $month   = substr $k, 4, 2;
        my $day     = substr $k, 6, 2;
        ## use critic [ProhibitMagicNumbers]

        my $key = $year;
        $key .= $month  if $Opt->report eq 'monthly' or $Opt->report eq 'daily';

lib/Weather/GHCN/StationTable.pm  view on Meta::CPAN


    my %kml_colors = (
        b => [ 'blue',  'ff780000' ],
        g => [ 'grn',   'ff147800' ],
        a => [ 'ltblu', 'ffF06414' ], # 'a' for azure
        p => [ 'purple','ff780078' ],
        r => [ 'red',   'ff1400FF' ],
        w => [ 'wht',   'ffFFFFFF' ],
        y => [ 'ylw',   'ff14F0FF' ],
    );

    # just use the first character of whatever string we're given
    my $c = substr $color_opt, 0, 1;

    return unless $kml_colors{$c};

    return $kml_colors{$c}->[1];
}

#----------------------------------------------------------------------
# -gps Helper Functions
#----------------------------------------------------------------------

# Calculate geographic distances in kilometers between coordinates in
# geodetic WGS84 format using the Haversine formula.

sub _gis_distance ($lat1, $lon1, $lat2, $lon2) {
    $lon1 = deg2rad($lon1);
    $lat1 = deg2rad($lat1);
    $lon2 = deg2rad($lon2);
    $lat2 = deg2rad($lat2);

    ## no critic [ProhibitParensWithBuiltins]
    ## no critic [ProhibitMagicNumbers]

    my $dlon = $lon2 - $lon1;
    my $dlat = $lat2 - $lat1;
    my $a = (sin($dlat/2)) ** 2 + cos($lat1) * cos($lat2) * (sin($dlon/2)) ** 2;
    my $c = 2 * atan2(sqrt($a), sqrt(1-$a));

    return 6_371_640 * $c / 1000.0;
}

#----------------------------------------------------------------------
# -location Helper Functions
#----------------------------------------------------------------------

# Match -location <pattern> to the station id or name provided in the call.
# If the pattern looks like a station id (e.g. 'CA006105887') then it matches
# to the stn_id parameter; if it looks like a comma-separated list of station
# id's, it returns success if any of them match the stn_id parameter.  Otherwise,
# it matches the <pattern> to the start of the stn_name parameter.
sub _match_location ($stn_id, $stn_name, $pattern) {

    my $result = $FALSE;

    if ($pattern =~ m{ \A $STN_ID_RE \Z }xms) {
        $result = $stn_id eq $pattern;
    }
    elsif ($pattern =~ m{ \A $STN_ID_RE ( [,] $STN_ID_RE )+ \Z }xms) {
        my @patterns = split m{ [,] }xms, $pattern;
        my $multi_pattern = '\A(' . join(q(|), @patterns) . ')\Z';

        $result = $stn_id =~ $multi_pattern;
    }
    else {
        ## no critic [RequireExtendedFormatting]
        $result = $stn_name =~ m{\A$pattern}msi;
    }

    return $result;
}

#----------------------------------------------------------------------
# -nogaps Helper Functions
#----------------------------------------------------------------------

# parse the missing values text, which looks like this:
#   month names:   Jan Feb Mar Apr May Jun Jul Aug Sep Oct
#   or day ranges: May[2] Oct[3,11] Nov[1] Dec[2,5]

sub _parse_missing_text ( $s ) {
    my @months;
    my @mmdd;
    my @f = split m{ \s }xms, $s;

    my $mmm_re    = qr{ [[:upper:]][[:lower:]][[:lower:]] }xms;
    my $nbr_rng   = qr{ \d+ ( [-] \d+)? }xms;
    my $rng_list  = qr{ $nbr_rng (, $nbr_rng)* }xms;

    foreach my $tok (@f) {
        if ( $tok =~ m{ \A $mmm_re \Z }xms ) {
            push @months, $MMM_TO_MM{$tok};
        }
        if ( $tok =~ m{ \A ($mmm_re) \[ ($rng_list) \] \Z }xms ) {
            my $mm = $MMM_TO_MM{$1};
            my @days = rng_new($2)->as_array;
            foreach my $day (@days) {
                push @mmdd, [$mm, $day];
            }
        }
    }
    return \@months, \@mmdd;
}

#----------------------------------------------------------------------
# Misc Functions
#----------------------------------------------------------------------

# format qflags like this:  I:1, N:9, S:4
sub _qflags_as_string ( $qflags_href ) {
    return $EMPTY if not $qflags_href;

    my @r;
    foreach my $qflag ( sort keys $qflags_href->%* ) {
        push @r, sprintf '%s:%d', $qflag, $qflags_href->{$qflag};
    }

    return join ', ', @r;
}
#----------------------------------------------------------------------
# Undef-safe Functions
#----------------------------------------------------------------------

# defined max - return the maximum of the two arguments,
#   or the defined argument if one of the arguments is undef
sub _dmax ($x, $y) {

    return if not defined $x and not defined $y;
    return $y if not defined $x;
    return $x if not defined $y;

    return $x > $y ? $x : $y;
}

# defined min - return the minimum of the two arguments,
#   or the defined argument if one of the arguments is undef
sub _dmin ($x, $y) {

    return if not defined $x and not defined $y;
    return $y if not defined $x;
    return $x if not defined $y;

    return $x < $y ? $x : $y;
}



( run in 1.683 second using v1.01-cache-2.11-cpan-71847e10f99 )