Finance-QuoteHist
view release on metacpan or search on metacpan
lib/Finance/QuoteHist/Generic.pm view on Meta::CPAN
### Parser methods
sub pre_parser {
my($self, %parms) = @_;
my $parse_mode = $parms{parse_mode} || $self->parse_mode;
my $method = "${parse_mode}_pre_parser";
return unless $self->can($method);
$self->$method(%parms, parse_mode => $parse_mode);
}
sub parser {
my($self, %parms) = @_;
my $parse_mode = $parms{parse_mode} || $self->parse_mode;
my $make_parser = "${parse_mode}_parser";
$self->$make_parser(%parms, parse_mode => $parse_mode);
}
sub html_parser {
# HTML::TableExtract supports automatic column reordering.
my $self = shift;
my $class = HTML_CLASS;
my @labels = $self->labels(@_);
my @patterns = $self->patterns(@_);
my(%pat_map, %label_map);
$pat_map{$patterns[$_]} = $_ foreach 0 .. $#patterns;
$label_map{$labels[$_]} = $_ foreach 0 .. $#labels;
$self->pattern_map(\%pat_map);
$self->label_map(\%label_map);
sub {
my $data = shift;
my $html_string;
if (ref $data) {
local($/);
$html_string = <$data>;
}
else {
$html_string = $data;
}
my %te_parms = (
headers => \@patterns,
automap => 1,
);
$te_parms{debug} = $self->{debug} if $self->{debug} > 2;
my $te = $class->new(%te_parms) or croak "Problem creating $class\n";
$te->parse($html_string);
$te->eof;
my $ts = $te->first_table_found;
[ $ts ? $ts->rows() : ()];
}
}
sub csv_parser {
# Text::CSV_XS doesn't column slice or re-order, so we do.
my $self = shift;
my @patterns = $self->patterns(@_);
sub {
my $data = shift;
return [] unless defined $data;
my @csv_lines = ref $data ? <$data> : split("\n", $data);
# BOM squad (byte order mark, as csv from google tends to be)
if ($csv_lines[0] =~ s/^\xEF\xBB\xBF//) {
for my $i (0 .. $#csv_lines) {
utf8::decode($csv_lines[$i]);
}
}
# might be unix, windows, or mac style newlines
s/\s+$// foreach @csv_lines;
return [] if !@csv_lines || $csv_lines[0] =~ /(no data)|error/i;
# attempt to get rid of comments at front of csv data
while (@csv_lines) {
last if $csv_lines[0] =~ /date/i || $csv_lines[0] =~ /\d+$/;
print STDERR "CSV reject line: $csv_lines[0]\n" if $self->{verbose};
shift @csv_lines;
}
my $first_line = $csv_lines[0];
my $sep_char = $first_line =~ /date\s*(\S)/i ? $1 : ',';
my $cp = $CSV_Class->new({sep_char => $sep_char, binary => 1})
or croak "Problem creating $CSV_Class\n";
my @pat_slice;
if ($first_line =~ /date/i) {
# derive column detection and ordering
$cp->parse($first_line) or croak ("Problem parsing (" .
$cp->error_input . ") : " . $cp->error_diag . "\n");
my @headers = $cp->fields;
my @pats = @patterns;
my @labels = map($self->pattern_label(pattern => $_), @patterns);
my(%pat_map, %label_map);
HEADER: for my $i (0 .. $#headers) {
last unless @pats;
my $header = $headers[$i];
for my $pi (0 .. $#pats) {
my $pat = $pats[$pi];
if ($header =~ /$pat/) {
my $label = $labels[$pi];
splice(@pats, $pi, 1);
splice(@labels, $pi, 1);
$pat_map{$pat} = $i;
$label_map{$label} = $i;
next HEADER;
}
}
}
shift @csv_lines;
@pat_slice = map($pat_map{$_}, @patterns);
}
else {
# no header row, trust natural order and presence
@pat_slice = 0 .. $#patterns;
}
my @rows;
foreach my $line (@csv_lines) {
$cp->parse($line) or next;
my @fields = $cp->fields;
push(@rows, [@fields[@pat_slice]]);
}
\@rows;
};
}
### Accessors, generators
( run in 2.512 seconds using v1.01-cache-2.11-cpan-98e64b0badf )