Attean
view release on metacpan or search on metacpan
lib/Attean/API/QueryPlanner.pm view on Meta::CPAN
foreach my $c (@{ $plan->children }) {
$cost += $self->cost_for_plan($c, $model);
}
}
$plan->cost($cost);
if ($self->log->is_trace) {
$self->log->trace("Cost $cost estimated for\n".$plan->as_string);
} else {
$self->log->debug('Estimated cost for \''.ref($plan).'\' is '.$cost);
}
return $cost;
}
}
}
package Attean::API::IDPJoinPlanner 0.035 {
use Encode qw(encode);
use Attean::RDF;
use LWP::UserAgent;
use Scalar::Util qw(blessed reftype);
use List::Util qw(reduce);
use List::Util qw(all any);
use Types::Standard qw(Int ConsumerOf InstanceOf);
use URI::Escape;
use Algorithm::Combinatorics qw(subsets);
use List::Util qw(min);
use Math::Cartesian::Product;
use Moo::Role;
with 'Attean::API::JoinPlanner';
with 'Attean::API::SimpleCostPlanner';
sub joins_for_plan_alternatives {
my $self = shift;
my $model = shift;
my $active_graphs = shift;
my $default_graphs = shift;
my $interesting = shift;
my @args = @_; # each $args[$i] here is an array reference containing alternate plans for element $i
my $k = 3; # this is the batch size over which to do full dynamic programming
# initialize $optPlan{$i} to be a set of alternate plans for evaluating element $i
my %optPlan;
foreach my $i (0 .. $#args) {
$optPlan{$i} = [$self->prune_plans($model, $interesting, $args[$i])];
}
my @todo = (0 .. $#args); # initialize the todo list to all elements
my $next_symbol = 'a'; # when we start batching together sub-plans, we'll rename them with letters (e.g. elements 1, 2, and 4 might become 'a', and then 3, 5, and 'a' become 'b')
# until we've joined all the elements in todo and are left with a set of plans for the join of all elements
while (scalar(@todo) > 1) {
$k = ($k < scalar(@todo)) ? $k : scalar(@todo); # in case we're joining fewer than the batch size
foreach my $i (2 .. $k) { # we've already initialized plans for evaluating single elements; now consider plans for groups of elements (with group sizes 2, 3, ..., $k)
foreach my $s (subsets(\@todo, $i)) { # pick a subset of size $i of the elements that need to be planned
my $s_key = join('.', sort @$s);
$optPlan{$s_key} = [];
foreach my $o (subsets($s)) { # partition the subset s into two (o and not_o)
next if (scalar(@$o) == 0); # only consider proper, non-empty subsets
next if (scalar(@$o) == scalar(@$s)); # only consider proper, non-empty subsets
my $o_key = join('.', sort @$o);
my %o = map { $_ => 1 } @$o;
my $not_o_key = join('.', sort grep { not exists $o{$_} } @$s);
my $lhs = $optPlan{$o_key}; # get the plans for evaluating o
my $rhs = $optPlan{$not_o_key}; # get the plans for evaluating not_o
# compute and store all the possible ways to evaluate s (o â not_o)
push(@{ $optPlan{$s_key} }, $self->join_plans($model, $active_graphs, $default_graphs, $lhs, $rhs, 'inner'));
$optPlan{$s_key} = [$self->prune_plans($model, $interesting, $optPlan{$s_key})];
}
}
}
# find the minimum cost plan $p that computes the join over $k elements (the elements end up in @v)
my %min_plans;
foreach my $w (subsets(\@todo, $k)) {
my $w_key = join('.', sort @$w);
my $plans = $optPlan{$w_key};
my @costs = map { $self->cost_for_plan($_, $model) => [$_, $w] } @$plans;
my %costs = @costs;
my $min = min keys %costs;
my @min_plans;
while (my ($cost, $data) = splice(@costs, 0, 2)) {
if ($cost == $min) {
push(@min_plans, $data);
}
}
$min_plans{ $min } = \@min_plans;
}
my $min_cost = min keys %min_plans;
my $min_plans = $min_plans{$min_cost};
my @min_plans;
my $min_key;
foreach my $d (@$min_plans) {
my ($p, $v) = @$d;
my $v_key = join('.', sort @$v);
if (not(defined($min_key)) or $min_key eq $v_key) {
push(@min_plans, $p);
$min_key = $v_key;
}
}
# my ($p, $v) = @$min_plan;
# my $v_key = join('.', sort @$v);
# warn "Choosing join for $v_key\n";
# generate a new symbol $t to stand in for $p, the join over the elements in @v
my $t = $next_symbol++;
# remove elements in @v from the todo list, and replace them by the new composite element $t
$optPlan{$t} = [@min_plans];
my %v = map { $_ => 1 } split(/[.]/, $min_key);
push(@todo, $t);
@todo = grep { not exists $v{$_} } @todo;
# also remove subsets of @v from the optPlan hash as they are now covered by $optPlan{$t}
foreach my $o (subsets([keys %v])) {
my $o_key = join('.', sort @$o);
# warn "deleting $o_key\n";
delete $optPlan{$o_key};
}
}
my $final_key = join('.', sort @todo);
# use Data::Dumper;
# warn Dumper($optPlan{$final_key});
return $self->prune_plans($model, $interesting, $optPlan{$final_key});
}
( run in 1.844 second using v1.01-cache-2.11-cpan-39bf76dae61 )