Excel-PowerPivot-Utils

 view release on metacpan or  search on metacpan

lib/Excel/PowerPivot/Utils.pm  view on Meta::CPAN

  # if requested, delete remaining measures in model
  if ($options{delete_others}) {
    $self->log->info("deleting measure $_->{Name}"), $_->Delete
      foreach values %existing_measures;
  }

  # reactivate refresh in pivot caches
  $self->log->debug("reactivate refresh in pivot caches") if @refreshable_pivots;
  $_->{EnableRefresh} = 1 foreach @refreshable_pivots;

  $self->log->info("done injecting measures");
}


sub _build_model_format {
  my ($self, $measure) = @_;

  # get format information as specified in the $measure hash
  my $format_info = $measure->{FormatInformation} // [General => ()];

  # invoke proper OLE method to build the ModelFormat object
  my ($short_classname, @args) = @$format_info;
  my $model_method             = "ModelFormat$short_classname";
  my $model_format             = $self->workbook->Model->$model_method(@args);

  return $model_format;
}


#======================================================================
# UTILS FOR POWER QUERY
#======================================================================

sub queries {
  my ($self) = @_;

  my @wb_queries = in $self->workbook->Queries;
  my @queries    = map { { %$_{qw/Name Formula Description/} } } @wb_queries; # hash slice

  return @queries;
}


sub queries_as_YAML {
  my ($self, $col_name, $col_type) = @_;

  # build code for nice reformatting of type arguments to the TransformColumnTypes() function
  $col_name //= 8;
  $col_type //= 40;
  my $reformat_types = sub {
    my ($types) = @_;
    $types =~ s[{]["\n" . (' ' x $col_name) . '{']eg;
    $types =~ s[("\w+",) ][$1 . (' 'x($col_type-$col_name-length($1)))]eg;
    return $types;
  };

  my $yaml = "";

  foreach my $query ($self->queries) {

    # get the Power Query formula and reformat it if type arguments are on a single-line
    my $formula = $query->{Formula};
    $formula =~ s[(TransformColumnTypes.*?{)({.*})}][$1 . $reformat_types->($2) . '}']eg
      if $col_name;

    # build the YAML entry for that query
    $yaml .= "\n\n$YAML_separator_line\n"
          .  "- Name        : $query->{Name}\n"
          .  "$YAML_separator_line\n"
          .  "  Description : $query->{Description}\n"
          .  "  Formula     : |-\n"
          .  $formula =~ s/^/    /gmr; # indentation of the formula code
  }

  return $yaml;
}


sub inject_queries {
  my ($self, $queries_to_inject, %options) = @_;

  # check options
  warn "->inject_queries(..) : option '$_' is invalid"
    foreach invalid_options(\%options, qw/delete_others fast_combine handle_connections/);

  # default options
  $options{fast_combine}       //= True;
  $options{handle_connections} //= True;

  # check well-formedness of $queries_to_inject
  does $queries_to_inject, 'ARRAY'
    or die "parameter to inject_measures() is not an arrayref";
  all {has_nonempty_keys(qw/Name Formula/)->($_)} @$queries_to_inject
    or die "missing mandatory properties in parameter to ->inject_queries()";

  # gather queries already existing in the workbook
  $self->log->info("gathering existing queries in workbook");
  my %existing_queries = map {($_->Name => $_)} grep {$_} in $self->workbook->Queries;

  # this is supposed to accelerate operations -- not sure it makes a difference, though
  # see https://learn.microsoft.com/en-us/office/vba/api/excel.queries.fastcombine
  $self->workbook->Queries->{FastCombine} = True if $options{fast_combine} && $self->workbook->Queries;

  # handle each query to inject
  foreach my $query (@$queries_to_inject) {

    my $q_name = $query->{Name};

    # if that query is alreay in the workbook, update it
    if (my $existing = delete $existing_queries{$q_name}) {
      $self->log->info("updating query $q_name");
      $existing->{Description} = $query->{Description};
      $existing->{Formula}     = $query->{Formula};
    }

    # otherwise, create a new query
    else {
      $self->log->info("creating query $q_name");
      $self->workbook->Queries->Add(@{$query}{qw/Name Formula Description/});

      if ($options{handle_connections}) {
        $self->log->info("creating connection to query $q_name");
        # see https://learn.microsoft.com/en-us/office/vba/api/excel.connections.add &
        # https://learn.microsoft.com/en-gb/dotnet/api/microsoft.office.interop.excel.connections.add2?view=excel-pia
        $self->workbook->Connections->Add2({
          Name                  => "Query - $q_name",
          Description           => "Connection to query '$q_name' in the workbook",
          ConnectionString      => "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=\$Workbook\$;Location=$q_name",
          CommandText           => $q_name,
          lCmdType              => xlCmdTableCollection,
          CreateModelConnection => True,
          ImportRelationships   => False,

lib/Excel/PowerPivot/Utils.pm  view on Meta::CPAN

  $self->workbook->Model->Refresh();
}



#======================================================================
# HELPER FUNCTIONS
#======================================================================


sub has_nonempty_keys {
  my @keys = @_;
  return sub {my $hash = shift; all {$hash->{$_}} @keys};
}

sub invalid_options {
  my ($options, @valid_keys) = @_;
  my %is_valid = map {($_ => 1)} @valid_keys;
  return grep {!$is_valid{$_}} keys %$options;
}


sub flatten_format_information {
  my ($wb_measure) = @_;

  my @format_information; # of shape: ($classname, $property1, ...)

  if (my $format_obj  = $wb_measure->FormatInformation) {
    my $format_class = Win32::OLE->QueryObjectType($format_obj) =~ s/^ModelFormat//r;
    push @format_information, $format_class,
                              map {$format_obj->{$_}} $ModelFormat_properties{$format_class}->@*;
  }

  return \@format_information
}


1;

__END__

=encoding utf-8

=head1 NAME

Excel::PowerPivot::Utils - utilities for scripting Power Pivot models within Excel workbooks

=head1 SYNOPSIS

  use Excel::PowerPivot::Utils;
  my $ppu = Excel::PowerPivot::Utils->new(UTF8 => 1);

  # operations on the whole model ...
  print $ppu->whole_model_as_YAML;
  $ppu->inject_whole_model({QUERIES       => ...,
                            RELATIONSHIPS => ...
                            MEASURES      => ...});

  # .. or specific operations on queries, relationships or measures
  print $ppu->queries_as_YAML;
  $ppu->inject_queries ([{Name => 'New_table', Formula => $M_formula_for_new_table}]);
  
  print $ppu->relationships_as_YAML;
  $ppu->inject_relationships([ {ForeignKey  => 'Album.ArtistId',
                                PrimaryKey  => 'Artist.ArtistId',
                                Active      => 1},
                               ...
                             ]);
  
  print $ppu->measures_as_YAML;
  $ppu->inject_measures([{Name              => 'Invoice Lines Total Amount',
                          AssociatedTable   => 'InvoiceLine',
                          Description       => 'sum of quantities multiplied by unit price',
                          FormatInformation => [qw/Currency  USD 2/],
                          Formula           => 'SUMX(InvoiceLine, InvoiceLine[UnitPrice] * InvoiceLine[Quantity]'
                         },
                         ...
                        ]);



=head1 DESCRIPTION

This module uses OLE automation to interact with an Excel Power Pivot model.
It can be used for example for

=over

=item * 

documenting existing models

=item *

scripting series of updates or inserts on measures or queries as batch operations -- useful
for propagating similar changes to a series of models.

=item *

use a version control system on textual exports of the model

=back


Obviously, this module only works on a Windows platform with a local installation of
Microsoft Office 2016 or greater.

The exposed interface hides details about the interaction with the Excel object model as documented in
L<https://learn.microsoft.com/en-us/office/vba/excel/concepts/about-the-powerpivot-model-object-in-excel>;
nevertheless, some knowledge of that model and of the L<Win32::OLE> module is recommended to fully understand
what is going on.

=head1 CONSTRUCTOR

  my $ppu = Excel::PowerPivot::Utils->new(%options);

Creates a new instance. Options are :

=over

=item workbook

A C<Win32::OLE> object representing an Excel workbook.
If none is supplied, it will connect to the currently running Excel instance and take
the active workbook as default.

=item log

A logger object equipped with C<debug>, C<info> and C<warning> methods.
If none is supplied, a simple logger is automatically created from L<Log::Dispatch>.

=item UTF8

A boolean flag for setting the L<Win32::OLE> codepoint option to UTF8, so that
strings are properly encoded/decoded between Perl and the OLE server.
It is highly recommended to I<systematically set this option to true>, since
this module is mostly used together with L<YAML>, which uses UTF8 encoding.

This option will automatically trigger C<< Win32::OLE->Option(CP => CP_UTF8) >>
at object construction time, and will set it back to the previous value at object
destruction time. Beware however that this is a change in global state, so if
your program performs other operations through L<Win32::OLE> during the lifetime
of the C<Excel::PowerPivot::Utils> object, string handling might be affected.
This is the reason why we require this option to be set explicitly instead of
being enabled automatically by default.

=back



=head1 METHODS

=head2 Utilities for Power Query

=head3 queries

Returns information about queries in the Excel workbook. This is a list of hashrefs containing

=over

=item Name

the name of the query

=item Formula

the M formula (Power Query language)

=item Description

optional description text

=back


=head3 queries_as_YAML

Content of the L</queries> method as a L<YAML> string, nicely formatted
so that it is easily readable by humans.

=head3 inject_queries

  $ppu->inject_queries($queries, %options);

Takes an arrayref or query specifications. Each specification must be a hashref
with keys C<Name>, C<Formula> and optionally C<Description>.
For names corresponding to queries already in the workbook, this is an update operation;
other queries in the list are added to the workbook.

Options are :

=over

=item delete_others

If true, queries not mentioned in the list are deleted from the workbook. False by default.

=item handle_connections

If true, queries are automatically associated with workbook connections.
This is equivalent to manually checking "Add this data to the Data Model" in the
"Close and Load To" dialog of Power Query.
True by default.
When adding or deleting queries in the model, Excel recomputes the whole
model, so this operation may be quite slow.

=item fast_combine

Activates the C<FastCombine> property -- see
L<https://learn.microsoft.com/en-us/office/vba/api/excel.queries.fastcombine>.

=back


=head3 delete_connection_for_query

Deletes the OLEDB connection associated with the given query name.


=head2 Utilities for relationships

=head3 relationships

Returns information about relationships in the Excel model.
Due to the inner constraints of Power Pivot, all relationships are many-to-one.
The returned structure is a list of hashrefs containing :

=over

=item ForeignKey

A single string of form C<$table.$column>, describing the "many" side of the relationship.

=item PrimaryKey

A single string of form C<$table.$column>, describing the "one" side of the relationship.

=item Active

A boolean stating if the relationship is active or not.

=back

=head3 relationships_as_YAML

Content of the L</relationships> method as a L<YAML> string, nicely formatted
so that it is easily readable by humans.

=head3 inject_relationships

  $ppu->inject_relationships($relationships, %options);

Takes an arrayref or relationship specifications. Each specification must be a hashref
with keys C<ForeignKey>, C<PrimaryKey> and C<Active>.
For pairs (foreign key, primary key) corresponding to relationships already in the model,
this is an update operation on the C<Active> property; otherwise the relationships are added to the model.

Options are :

=over

=item delete_others

If true, relationships not mentioned in the list are deleted from the model. False by default.

=back


=head2 Utilities for DAX measures

=head3 measures

Returns information about measures in the Excel model. This is a list of hashrefs containing

=over

=item Name

the name of the measure

=item AssociatedTable

the name of the table to which this measure is associated

=item Formula

the DAX formula

=item Description

optional description text

=item FormatInformation

an arrayref describing the format for displaying that measure. Formats are
documented in L<https://learn.microsoft.com/en-us/office/vba/api/excel.model>.
The first member of the array is the name of the ModelFormat object, followed by the values
of its properties. The properties for each format are listed in the table below :

  ModelFormat          Property 1           Property 2
  ===========          ==========           ==========
  Currency             Symbol               DecimalPlaces
  Date                 FormatString
  DecimalNumber        UseThousandSeparator DecimalPlaces
  General
  PercentageNumber     UseThousandSeparator DecimalPlaces
  ScientificNumber     DecimalPlaces
  WholeNumber          UseThousandSeparator

=back

=head3 measures_as_YAML

Content of the L</measures> method as a L<YAML> string, nicely formatted
so that it is easily readable by humans.

=head3 inject_measures

  $ppu->inject_measures($measures, %options);

Takes an arrayref or measure specifications. Each specification must be a hashref
with keys C<Name>, C<AssociatedTable>, C<Formula> and optionally C<Description> and C<FormatInformation>.
Values for those keys are strings, except for C<FormatInformation> which takes an arrayref according to the
table above.

For names corresponding to measures already in the model, this is an update operation;
other measures in the list are added to the model.

Options are :

=over

=item delete_others

If true, measures not mentioned in the list are deleted from the model. False by default.

=back


=head2 Methods on the whole model

=head3 whole_model_as_YAML

Returns a single YAML string containing descriptions for queries, relationships and measures.

=head3 inject_whole_model

lib/Excel/PowerPivot/Utils.pm  view on Meta::CPAN

Power Pivot relationships between tables loaded into the model.
Here is an example of the YAML description :

  #======================================================================
  - ForeignKey  : Album.ArtistId
    PrimaryKey  : Artist.ArtistId
    Active      : 1
  #======================================================================

=item c)

Power Pivot measures.Since this is just for demonstration purposes, only 3 measures are defined.
Here is the YAML description :


  #======================================================================
  - Name              : Invoice Lines Total Amount
  #======================================================================
    AssociatedTable   : InvoiceLine
    Description       : 
    FormatInformation : [Currency, USD, 2]
    Formula           : |-
      SUMX(InvoiceLine, InvoiceLine[UnitPrice] * InvoiceLine[Quantity])
  
  #======================================================================
  - Name              : Invoice Total Amount
  #======================================================================
    AssociatedTable   : Invoice
    Description       : 
    FormatInformation : [Currency, USD, 2]
    Formula           : |-
      SUM(Invoice[Total])


  #======================================================================
  - Name              : Invoice Lines Percentage Sales
  #======================================================================
    AssociatedTable   : InvoiceLine
    Description       : 
    FormatInformation : [PercentageNumber, 1, 0]
    Formula           : |-
      DIVIDE([Invoice Lines Total Amount], [Invoice Total Amount])

=item d)

Once the Power Pivot model is in place, we can start building pivot tables
based on the DAX measures. For this task the Perl module has no added
value, it is done through standard OLE automation :

  # create a Pivot Table (percentage of sales per genre, for each customer country)
  my $pcache = $workbook->PivotCaches->Create(xlExternal,
                                              $workbook->Connections("ThisWorkbookDataModel"));
  my $ptable = $pcache->CreatePivotTable("ComputedPivot!R5C1",
                                         'Sales_by_genre_and_country');
  $ptable->CubeFields("[Measures].[Invoice Lines Percentage Sales]")->{Orientation} = xlDataField;
  $ptable->CubeFields("[Genre].[Name]")                             ->{Orientation} = xlColumnField;
  $ptable->CubeFields("[Customer].[Country]")                       ->{Orientation} = xlRowField;

=item e)

Then it is possible to write Excel formulas that extract values from the pivot cache.
So for example here is the formula that retrieves the percentage of sales for the "Classical"
genre in Austria :

  = CUBEVALUE("ThisWorkbookDataModel", 
             "[Measures].[Invoice Lines Percentage Sales]",
             "[Genre].[Name].[Classical]",
             "[Customer].[Country].[Austria]")

=back

=back


=head1 AUTHOR

Laurent Dami, E<lt>dami at cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2023 by Laurent Dami.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.



( run in 0.540 second using v1.01-cache-2.11-cpan-39bf76dae61 )