Finance-QuoteHist

 view release on metacpan or  search on metacpan

lib/Finance/QuoteHist/Generic.pm  view on Meta::CPAN


### Parser methods

sub pre_parser {
  my($self, %parms) = @_;
  my $parse_mode = $parms{parse_mode} || $self->parse_mode;
  my $method = "${parse_mode}_pre_parser";
  return unless $self->can($method);
  $self->$method(%parms, parse_mode => $parse_mode);
}

sub parser {
  my($self, %parms) = @_;
  my $parse_mode = $parms{parse_mode} || $self->parse_mode;
  my $make_parser = "${parse_mode}_parser";
  $self->$make_parser(%parms, parse_mode => $parse_mode);
}

sub html_parser {
  # HTML::TableExtract supports automatic column reordering.
  my $self = shift;
  my $class = HTML_CLASS;
  my @labels = $self->labels(@_);
  my @patterns = $self->patterns(@_);
  my(%pat_map, %label_map);
  $pat_map{$patterns[$_]} = $_ foreach 0 .. $#patterns;
  $label_map{$labels[$_]} = $_ foreach 0 .. $#labels;
  $self->pattern_map(\%pat_map);
  $self->label_map(\%label_map);
  sub {
    my $data = shift;
    my $html_string;
    if (ref $data) {
      local($/);
      $html_string = <$data>;
    }
    else {
      $html_string = $data;
    }
    my %te_parms = (
      headers => \@patterns,
      automap => 1,
    );
    $te_parms{debug} = $self->{debug} if $self->{debug} > 2;
    my $te = $class->new(%te_parms) or croak "Problem creating $class\n";
    $te->parse($html_string);
    $te->eof;
    my $ts = $te->first_table_found;
    [ $ts ? $ts->rows() : ()];
  }
}

sub csv_parser {
  # Text::CSV_XS doesn't column slice or re-order, so we do.
  my $self = shift;
  my @patterns = $self->patterns(@_);
  sub {
    my $data = shift;
    return [] unless defined $data;
    my @csv_lines = ref $data ? <$data> : split("\n", $data);
    # BOM squad (byte order mark, as csv from google tends to be)
    if ($csv_lines[0] =~ s/^\xEF\xBB\xBF//) {
      for my $i (0 .. $#csv_lines) {
        utf8::decode($csv_lines[$i]);
      }
    }
    # might be unix, windows, or mac style newlines
    s/\s+$// foreach @csv_lines;
    return [] if !@csv_lines || $csv_lines[0] =~ /(no data)|error/i;
    # attempt to get rid of comments at front of csv data
    while (@csv_lines) {
      last if $csv_lines[0] =~ /date/i || $csv_lines[0] =~ /\d+$/;
      print STDERR "CSV reject line: $csv_lines[0]\n" if $self->{verbose};
      shift @csv_lines;
    }
    my $first_line = $csv_lines[0];
    my $sep_char = $first_line =~ /date\s*(\S)/i ? $1 : ',';
    my $cp = $CSV_Class->new({sep_char => $sep_char, binary => 1})
      or croak "Problem creating $CSV_Class\n";
    my @pat_slice;
    if ($first_line =~ /date/i) {
      # derive column detection and ordering
      $cp->parse($first_line) or croak ("Problem parsing (" .
        $cp->error_input . ") : " . $cp->error_diag . "\n");
      my @headers = $cp->fields;
      my @pats = @patterns;
      my @labels = map($self->pattern_label(pattern => $_), @patterns);
      my(%pat_map, %label_map);
      HEADER: for my $i (0 .. $#headers) {
        last unless @pats;
        my $header = $headers[$i];
        for my $pi (0 .. $#pats) {
          my $pat = $pats[$pi];
          if ($header =~ /$pat/) {
            my $label = $labels[$pi];
            splice(@pats,   $pi, 1);
            splice(@labels, $pi, 1);
            $pat_map{$pat} = $i;
            $label_map{$label} = $i;
            next HEADER;
          }
        }
      }
      shift @csv_lines;
      @pat_slice = map($pat_map{$_}, @patterns);
    }
    else {
      # no header row, trust natural order and presence
      @pat_slice = 0 .. $#patterns;
    }
    my @rows;
    foreach my $line (@csv_lines) {
      $cp->parse($line) or next;
      my @fields = $cp->fields;
      push(@rows, [@fields[@pat_slice]]);
    }
    \@rows;
  };
}

### Accessors, generators



( run in 2.512 seconds using v1.01-cache-2.11-cpan-98e64b0badf )