App-Chart

 view release on metacpan or  search on metacpan

lib/App/Chart/Suffix/LME.pm  view on Meta::CPAN

    ([sub {
        my ($elem) = @_;
        if ($elem->tag ne 'table') { return 1; }
        my $table = $elem;

        # possible tbody within
        my $tbody = List::Util::first {ref $_ && $_->tag eq 'tbody'}
          $table->content_list;
        if (! $tbody) { $tbody = $table; }

        my @rows = grep {ref $_ && $_->tag eq 'tr'} $tbody->content_list;
        if (@rows != 1) { return 1; }
        my $row = $rows[0];

        my @cols = grep {ref $_ && $_->tag eq 'td'
                           && ! html_element_contains_only_img($_) }
          $row->content_list;
        if (@cols != 1) { return 1; }
        my $col = $cols[0];

        $table->replace_with ($col->content_list);
        $changed = 1;
        return 0; # prune
      }
     ],
     1); # pre-order, no text
  if (DEBUG) { print "mung_1x1 changed $changed\n"; }
  if ($changed) {
    return $top->as_HTML;
  } else {
    return $str;
  }
}

sub html_element_contains_only_img {
  my ($elem) = @_;
  my @list = $elem->content_list;
  return (@list == 1
          && ref $list[0]
          && $list[0]->tag eq 'img');
}

sub month_str_to_nearest_iso {
  my ($str) = @_;
  my $month = Date::Calc::Decode_Month ($str)
    || die "LME parse: unrecognised month: '$str'";
  my $year = App::Chart::Download::month_to_nearest_year ($month);
  return App::Chart::ymd_to_iso ($year, $month, 1);
}


#-----------------------------------------------------------------------------
# historical download page
#
# This uses the historical data at
#
use constant HISTORICAL_XLS_URL =>
  'http://www.lme.co.uk/dataprices_historical.asp';
#
# That page is downloaded to get urls of XLS files for prices and volumes
# for each calendar month.  A price file is like
#
#     http://www.lme.co.uk/downloads/January_2007.xls
#
# and a volumes file
#
#     http://www.lme.co.uk/downloads/volumes_September_2007.xls
#
# Sometimes there's a rev num like
#
#     http://www.lme.co.uk/downloads/historic_data/May_2008(1).xls
#     http://www.lme.co.uk/downloads/historic_data/December_2008_3.xls

sub historical_xls_files {
  require App::Chart::Pagebits;
  my $h = App::Chart::Pagebits::get
    (name      => __('LME historical downloads page'),
     url       => HISTORICAL_XLS_URL,
     method    => 'POST',
     data      => 'disclaimer=agreed',
     key       => 'lme-historical-xls',
     freq_days => 2,
     timezone  => App::Chart::TZ->london,
     parse     => \&historical_xls_parse);
  my $aref = $h->{'files'} || [];
  return @$aref;
}

# $content is the "dataprices_historical.asp" page.
# Return a hashref like { 'files' => [ {elem}, {elem}, ...] }
#
# At the start of the year there can be nothing available (the previous year
# files being made chargable items) so it's possible for 'urls' to be empty.
#
# There's a size in the text following each link, but since there's no
# overlapping files to choose between there's no need to pick that out.
#
sub historical_xls_parse {
  my ($content) = @_;

  my %urls;
  require HTML::LinkExtor;
  my $p = HTML::LinkExtor->new
    (sub {
       my($tag, %links) = @_;
       $tag eq 'a' or return;
       my $link = $links{'href'} or return;

       # only the .xls files
       $link =~ /\.xls$/i or return;

       # exclude warehouse stocks
       if ($link =~ /stocks/i) { return; }

       $urls{$link} = 1;
     }, HISTORICAL_XLS_URL);
  $p->parse($content);

  my @files;
  foreach my $url (keys %urls) {
    if (DEBUG) { print "url $url\n"; }



( run in 1.167 second using v1.01-cache-2.11-cpan-39bf76dae61 )