App-Chart
view release on metacpan or search on metacpan
unused/Yahoo-v7.pm view on Meta::CPAN
# Copyright 2007, 2008, 2009, 2010, 2011, 2015, 2016, 2017, 2019, 2020, 2023, 2024 Kevin Ryde
# This file is part of Chart.
#
# Chart is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3, or (at your option) any later version.
#
# Chart is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Chart. If not, see <http://www.gnu.org/licenses/>.
# Protocols:
#
# Don't know the full state of what Yahoo intends to offer.
# But there's a range of "v7", "v8", "v11" URL forms. These might be
# meant as data sources for Yahoo client-side viewing software.
#
# Here currently using the v8 method, as it seems to be without
# protocol level hoops. Presumably it's for personal use and possibly
# after creating a Yahoo account -- even if it works without doing so
# or logging in.
#
#
# Data Format:
#
# Each of these methods can apparently return results in either CSV or
# JSON format.
#
# The JSON form, maybe CSV too, has prices put through
# single-precision floats, ie. 24 bit mantissa, which causes some
# price strings like 123.44999999 instead of 123.45.
#
# The parse here tries to massage that back to an apparent intended
# number of decimals. Eg. 2 decimals for dollars and cents, but
# allowing for trading in fractions of a cent which is 3 decimals.
#
# In daily data download, current day trading in progress shows in
# today's date and changes though the course of trading until being
# fixed maybe at marked close, or maybe the next day. Don't know
# how pre-market or post-market trading is applied, or what happens
# if a few futures or similar might even be 24 hour trading.
#
#
# Cookies and Crumb:
#
# There's been some protocol hoops to jump through in recent times.
# It seems to be sometimes on the v7 form, maybe always on v11.
# The v7 had seemed fine asking for the latest few days daily data,
# and it's possible lately needs nothing for any amount.
#
# The hoops consist of
#
# - Fetch one of the finance.yahoo.com web pages to get a
# HTTP Set-Cookie header.
# - Maybe answer the ridiculous EU cookie consent on the page.
# Maybe that depends where your IP looks like it's from.
# - Look deep within script in that page for a "crumb" string.
# Or maybe a further "getcrumb" web fetch, but seems result
# is embedded in the page.
# - On each data download, HTTP Cookie header, and URL crumb
# field.
#
# Presumably this is designed as a level of difficulty, to stop the
# past quotes and data that could be had from a single URL (and which
# Yahoo apparently found was widely abused beyond personal use).
#
package App::Chart::Yahoo;
use 5.010;
use strict;
use warnings;
use Carp;
use Date::Calc;
use Date::Parse;
use List::Util qw (min max);
use POSIX ();
use Time::Local;
use URI::Escape;
use Locale::TextDomain ('App-Chart');
use Tie::TZ;
use App::Chart;
use App::Chart::Database;
use App::Chart::Download;
use App::Chart::DownloadHandler;
use App::Chart::DownloadHandler::IndivChunks;
use App::Chart::IntradayHandler;
use App::Chart::Latest;
use App::Chart::Sympred;
use App::Chart::TZ;
use App::Chart::Weblink;
# uncomment this to run the ### lines
use Smart::Comments;
use constant DEBUG => 0;
# .X or .XY or no suffix
our $yahoo_pred = App::Chart::Sympred::Proc->new
(sub {
my ($symbol) = @_;
return ($symbol !~ /\.(FQ|LJ)$/
&& $symbol =~ /[.=]..?$|^[^.]+$/);
});
my $download_pred = App::Chart::Sympred::Any->new ($yahoo_pred);
our $latest_pred = App::Chart::Sympred::Any->new ($yahoo_pred);
our $index_pred = App::Chart::Sympred::Regexp->new (qr/^\^|^0.*\.SS$/);
my $futures_pred = App::Chart::Sympred::Any->new;
# overridden by specific nodes
App::Chart::setup_source_help
($yahoo_pred, __p('manual-node','Yahoo Finance'));
#-----------------------------------------------------------------------------
# Weblink - basic quote page
#
unused/Yahoo-v7.pm view on Meta::CPAN
if (@$timestamps) {
# timestamps are time of last trade, as can be seen by looking at
# something with low enough volume, eg. RMX.AX
#
if (defined (my $timet = $timestamps->[-1])) {
($record{'last_date'}, $record{'last_time'})
= $symbol_timezone->iso_date_time($timet);
}
if (my $indicators = $result->{'indicators'}->{'quote'}->[0]) {
foreach my $key ('open','high','low') {
if (my $aref = $indicators->{$key}) {
$record{$key} = crunch_trailing_nines($aref->[$#$timestamps]);
}
}
if (my $aref = $indicators->{'volume'}) {
$record{'volume'} = $aref->[$#$timestamps];
}
if (my $aref = $indicators->{'close'}) {
my $last = $record{'last'}
= crunch_trailing_nines($aref->[$#$timestamps]);
# "change" from second last timestamp, if there is one.
# As of Nov 2017, XAUUSD=X only ever gives a single latest
# quote from v7, no previous day to compare.
#
if (defined $last
&& scalar(@$timestamps) >= 2
&& defined(my $prev = $aref->[$#$timestamps - 1])) {
$record{'change'}
= App::Chart::decimal_sub($last, crunch_trailing_nines($prev));
}
}
}
}
if (defined $record{'last_date'}
&& (my $splits = $result->{'events'}->{'splits'})) {
while (my ($timet, $href) = each %$splits) {
my $split_date = $symbol_timezone->iso_date($timet);
if ($split_date eq $record{'last_date'}) {
__x('Split {ratio}', ratio => $href->{'splitRatio'})
}
}
}
}
return $h;
}
#-----------------------------------------------------------------------------
# Download Data
#
# This uses the historical prices page like
#
# https://finance.yahoo.com/quote/AMP.AX/history?p=AMP.AX
#
# which puts a cookie like
#
# Set-Cookie: B=fab5sl9cqn2rd&b=3&s=i3; expires=Sun, 03-Sep-2018 04:56:13 GMT; path=/; domain=.yahoo.com
#
# and contains buried within 1.5 mbytes of hideous script
#
# <script type="application/json" data-sveltekit-fetched data-url="https://query1.finance.yahoo.com/v1/test/getcrumb?lang=en-US&region=US" data-ttl="59">{"status":200,"statusText":"OK","headers":{},"body":"DKVWQE/ggh4"}</script>
#
# Any \u002F or similar is escaped "/" character or similar.
# The crumb is included in a CSV download query like the following
# (alas can't use http, it redirects to https)
#
# https://query1.finance.yahoo.com/v7/finance/download/AMP.AX?period1=1503810440&period2=1504415240&interval=1d&events=history&crumb=hdDX/HGsZ0Q
#
# period1 is the start time, period2 the end time, both as Unix seconds
# since 1 Jan 1970. Not sure of the timezone needed. Some experiments
# suggest it depends on the timezone of the symbol. http works as well as
# https. The result is like
#
# Date,Open,High,Low,Close,Adj Close,Volume
# 2017-09-07,30.299999,30.379999,30.000000,30.170000,30.170000,3451099
#
# The "9999s" are some bad rounding off to what would be usually at most
# 3 (maybe 4?) decimal places.
#
# Response is 404 if no such symbol, 401 unauthorized if no cookie or crumb.
#
# "events=div" gives dividends like
#
# Date,Dividends
# 2017-08-11,0.161556
#
# "events=div" gives splits like, for a consolidation (GXY.AX)
#
# Date,Stock Splits
# 2017-05-22,1/5
#
#----------------
# For reference, there's a "v8" which is json format (%7C = "|")
#
# https://query2.finance.yahoo.com/v8/finance/chart/IBM?formatted=true&lang=en-US®ion=US&period1=1504028419&period2=1504428419&interval=1d&events=div%7Csplit&corsDomain=finance.yahoo.com
#
# This doesn't require a cookie and crumb, has some info like symbol
# timezone. The numbers look like they're rounded through 32-bit single
# precision floating point, for example "142.55999755859375" which is 142.55
# in a 23-bit mantissa. log(14255000)/log(2) = 23.76 bits
# Are they about the same precision as the CSV ?
#
# FIXME: All prices look like they're split-adjusted, which is ok if that's
# what you want and are downloading a full data set, but bad for incremental
# since you don't know when a change is applied.
#
App::Chart::DownloadHandler->new
(name => __('Yahoo'),
pred => $download_pred,
available_tdate_by_symbol => \&daily_available_tdate,
available_tdate_extra => 2,
url_and_cookiejar_func => \&daily_url_and_cookiejar,
proc => \&daily_download,
chunk_size => 150);
sub daily_available_tdate {
unused/Yahoo-v7.pm view on Meta::CPAN
sub crunch_trailing_nines {
my ($str) = @_;
if (defined $str) {
$str =~ s/(\....(99|00)).*/$1/; # trailing garbage
if ($str =~ /(.*)\.(....9+)$/) {
$str = decimal_add_low($str,1);
} elsif ($str =~ /(.*)\.(....*01)$/) {
$str = decimal_add_low($str,-1);
}
if ($str =~ /(.*)\./) {
my $ilen = length($1);
my $decimals = ($ilen >= 4 ? 2
: $ilen == 3 ? 3
: 4);
$str = round_decimals($str,$decimals);
}
$str = pad_decimals($str, 2);
}
return $str;
}
sub decimal_add_low {
my ($str, $add) = @_;
### decimal_add_low(): "$str add $add"
$str =~ /(.*)\.(.+)$/ or return $str+$add;
my $pre = $1;
my $post = $2;
### $pre
### $post
$str = $pre * 10**length($post) + $post + $add;
while (length($post) >= length($str)) { $str = '0'.$str; }
substr($str, -length($post),0, '.');
return $str;
}
sub round_decimals {
my ($str, $decimals) = @_;
if (defined $str && $str =~ /(.*\.[0-9]{$decimals})([0-9])/) {
$str = $1;
if ($2 >= 5) { $str = decimal_add_low($str, 1); }
}
return $str;
}
sub pad_decimals {
my ($str, $decimals) = @_;
### pad_decimals(): "$str $decimals"
my $got;
if ($str =~ /\.(.*)/) {
$got = length($1);
} else {
$got = 0;
$str .= '.';
}
if ((my $add = $decimals - $got) > 0) {
$str .= '0' x $add;
}
return $str;
}
# return a hashref
# { cookies => string, # in format HTTP::Cookies ->as_string()
# crumb => string
# }
#
# If no such $symbol then return undef;
#
# Any $symbol which exists is good enough to get a crumb for all later use.
# Could hard-code something likely here, but better to go from the symbol
# which is wanted.
#
# As of April 2024, some User-Agent strings result in 503 Service Unavailable.
# Doesn't seem to affect other download parts, just this cookie/crumb
# getting part. "Mozilla/5.0" works.
#
sub cookie_and_crumb_data {
my ($symbol) = @_;
my $key = 'yahoo-cookie-and-crumb';
my $str = App::Chart::Database->read_extra ('', $key);
my $h = eval ($str || '{}');
$h = {};
unless (App::Chart::Download::timestamp_within
($h->{'timestamp'}, 3 * 86400)) {
App::Chart::Download::status ('Yahoo cookie');
require HTTP::Cookies;
my $jar = HTTP::Cookies->new;
my $user_agent = 'Mozilla/5.0';
{
# first to get a cookie
my $resp = App::Chart::Download->get
('https://www.yahoo.com/',
user_agent => $user_agent,
cookie_jar => $jar);
### jar: $jar->as_string
App::Chart::Download::verbose_message ("Yahoo cookies: "
. $jar->as_string);
}
App::Chart::Download::status ('Yahoo auth crumb');
my $resp = App::Chart::Download->get
('https://finance.yahoo.com/quote/IBM/history/?p=IBM',
user_agent => $user_agent,
cookie_jar => $jar);
my $crumb = crumb_parse($resp);
App::Chart::Download::verbose_message ("Yahoo crumb: $crumb");
my $cookies_str = $jar->as_string;
$h = { crumb => $crumb,
cookies => $cookies_str,
timestamp => App::Chart::Download::timestamp_now(),
};
my $str = Data_Dumper_str($h);
App::Chart::Database->write_extra ('', $key, $str);
}
return $h;
}
sub crumb_parse {
my ($resp) = @_;
# script like, with backslash escaping on "\uXXXX"
# "user":{"age":0,"crumb":"8OyCBPyO4ZS"
# The form prior to about July 2023 was
# "user":{"crumb":"hdDX\u002FHGsZ0Q",
# The form prior to about January 2023 was
# "CrumbStore":{"crumb":"hdDX\u002FHGsZ0Q"}
# The form prior to about May 2024 was
# "RequestPlugin":{"user":{"age":0,"crumb":"8OyCBPyO4ZS"
#
my $content = $resp->decoded_content (raise_error => 1);
$content =~ /getcrumb.*?"body":"([^"]*)"/
or die "Yahoo getcrumb not found in parse";
return App::Chart::Yahoo::javascript_string_unquote($1);
}
sub Data_Dumper_str {
my ($h) = @_;
my $dumper = Data::Dumper->new ([$h], ['var']);
$dumper->Indent(1);
$dumper->Terse(1);
$dumper->Sortkeys(1);
return $dumper->Dump;
}
# $str is an ISO date string like 2017-11-05
# It is date GMT of 9:30am in the timezone of $symbol.
# Return the date in the symbol timezone.
#
sub daily_date_fixup {
my ($symbol, $str) = @_;
### daily_date_fixup: "$symbol $str"
my ($year, $month, $day) = App::Chart::iso_to_ymd ($str);
my $timezone = App::Chart::TZ->for_symbol($symbol);
if (timezone_gmtoffset_at_ymd($timezone, $year, $month, $day+1)
<= - (10*60+20)*60) {
my $adate = App::Chart::ymd_to_adate ($year, $month, $day);
$str = App::Chart::adate_to_iso ($adate+1);
my $today = $timezone->iso_date();
if ($str gt $today) {
$str = $today;
}
}
return $str;
}
sub timezone_gmtoffset_at_ymd {
my ($timezone, $year, $month, $day) = @_;
my $timet = $timezone->call(\&POSIX::mktime,
0, 0, 0, $day, $month-1, $year-1900);
my ($sec,$min,$hour,$gmt_day) = gmtime($timet);
return $sec + 60*$min + 3600*$hour + 86400*($gmt_day - $day);
}
# Return seconds since 00:00:00, 1 Jan 1970 GMT.
sub tdate_to_unix {
my ($tdate) = @_;
my $adate = App::Chart::tdate_to_adate ($tdate);
return ($adate + 4)*86400;
}
# $str is a string from previous HTTP::Cookies ->as_string()
# Return a new HTTP::Cookies object with that content.
sub http_cookies_from_string {
my ($str) = @_;
require File::Temp;
my $fh = File::Temp->new (TEMPLATE => 'chart-XXXXXX',
TMPDIR => 1);
print $fh "#LWP-Cookies-1.0\n", $str or die;
close $fh or die;
require HTTP::Cookies;
my $jar = HTTP::Cookies->new;
$jar->load($fh->filename);
return $jar;
}
#------------------------------------------------------------------------------
# undo javascript string backslash quoting in STR, per
#
# https://developer.mozilla.org/en/JavaScript/Guide/Values,_Variables,_and_Literals#String_Literals
#
# Encode::JavaScript::UCS does \u, but not the rest
#
# cf Java as such not quite the same:
# unicode: http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#100850
# strings: http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089
#
my %javascript_backslash = ('b' => "\b", # backspace
'f' => "\f", # formfeed
'n' => "\n", # newline
'r' => "\r",
't' => "\t", # tab
'v' => "\013", # vertical tab
);
sub javascript_string_unquote {
my ($str) = @_;
$str =~ s{\\(?:
((?:[0-3]?[0-7])?[0-7]) # $1 \377 octal latin-1
|x([0-9a-fA-F]{2}) # $2 \xFF hex latin-1
|u([0-9a-fA-F]{4}) # $3 \uFFFF hex unicode
|(.) # $4 \n etc escapes
)
}{
(defined $1 ? chr(oct($1))
: defined $4 ? ($javascript_backslash{$4} || $4)
: chr(hex($2||$3))) # \x,\u hex
}egx;
return $str;
}
#------------------------------------------------------------------------------
1;
__END__
( run in 0.510 second using v1.01-cache-2.11-cpan-39bf76dae61 )