App-Chart
view release on metacpan or search on metacpan
lib/App/Chart/Suffix/LME.pm view on Meta::CPAN
([sub {
my ($elem) = @_;
if ($elem->tag ne 'table') { return 1; }
my $table = $elem;
# possible tbody within
my $tbody = List::Util::first {ref $_ && $_->tag eq 'tbody'}
$table->content_list;
if (! $tbody) { $tbody = $table; }
my @rows = grep {ref $_ && $_->tag eq 'tr'} $tbody->content_list;
if (@rows != 1) { return 1; }
my $row = $rows[0];
my @cols = grep {ref $_ && $_->tag eq 'td'
&& ! html_element_contains_only_img($_) }
$row->content_list;
if (@cols != 1) { return 1; }
my $col = $cols[0];
$table->replace_with ($col->content_list);
$changed = 1;
return 0; # prune
}
],
1); # pre-order, no text
if (DEBUG) { print "mung_1x1 changed $changed\n"; }
if ($changed) {
return $top->as_HTML;
} else {
return $str;
}
}
sub html_element_contains_only_img {
my ($elem) = @_;
my @list = $elem->content_list;
return (@list == 1
&& ref $list[0]
&& $list[0]->tag eq 'img');
}
sub month_str_to_nearest_iso {
my ($str) = @_;
my $month = Date::Calc::Decode_Month ($str)
|| die "LME parse: unrecognised month: '$str'";
my $year = App::Chart::Download::month_to_nearest_year ($month);
return App::Chart::ymd_to_iso ($year, $month, 1);
}
#-----------------------------------------------------------------------------
# historical download page
#
# This uses the historical data at
#
use constant HISTORICAL_XLS_URL =>
'http://www.lme.co.uk/dataprices_historical.asp';
#
# That page is downloaded to get urls of XLS files for prices and volumes
# for each calendar month. A price file is like
#
# http://www.lme.co.uk/downloads/January_2007.xls
#
# and a volumes file
#
# http://www.lme.co.uk/downloads/volumes_September_2007.xls
#
# Sometimes there's a rev num like
#
# http://www.lme.co.uk/downloads/historic_data/May_2008(1).xls
# http://www.lme.co.uk/downloads/historic_data/December_2008_3.xls
sub historical_xls_files {
require App::Chart::Pagebits;
my $h = App::Chart::Pagebits::get
(name => __('LME historical downloads page'),
url => HISTORICAL_XLS_URL,
method => 'POST',
data => 'disclaimer=agreed',
key => 'lme-historical-xls',
freq_days => 2,
timezone => App::Chart::TZ->london,
parse => \&historical_xls_parse);
my $aref = $h->{'files'} || [];
return @$aref;
}
# $content is the "dataprices_historical.asp" page.
# Return a hashref like { 'files' => [ {elem}, {elem}, ...] }
#
# At the start of the year there can be nothing available (the previous year
# files being made chargable items) so it's possible for 'urls' to be empty.
#
# There's a size in the text following each link, but since there's no
# overlapping files to choose between there's no need to pick that out.
#
sub historical_xls_parse {
my ($content) = @_;
my %urls;
require HTML::LinkExtor;
my $p = HTML::LinkExtor->new
(sub {
my($tag, %links) = @_;
$tag eq 'a' or return;
my $link = $links{'href'} or return;
# only the .xls files
$link =~ /\.xls$/i or return;
# exclude warehouse stocks
if ($link =~ /stocks/i) { return; }
$urls{$link} = 1;
}, HISTORICAL_XLS_URL);
$p->parse($content);
my @files;
foreach my $url (keys %urls) {
if (DEBUG) { print "url $url\n"; }
( run in 1.167 second using v1.01-cache-2.11-cpan-39bf76dae61 )