Attean

 view release on metacpan or  search on metacpan

lib/Attean/API/QueryPlanner.pm  view on Meta::CPAN

				foreach my $c (@{ $plan->children }) {
					$cost	+= $self->cost_for_plan($c, $model);
				}
			}
			
			$plan->cost($cost);
			if ($self->log->is_trace) {
				$self->log->trace("Cost $cost estimated for\n".$plan->as_string);
			} else {
				$self->log->debug('Estimated cost for \''.ref($plan).'\' is '.$cost);
			}
			return $cost;
		}
	}
}

package Attean::API::IDPJoinPlanner 0.035 {
	use Encode qw(encode);
	use Attean::RDF;
	use LWP::UserAgent;
	use Scalar::Util qw(blessed reftype);
	use List::Util qw(reduce);
	use List::Util qw(all any);
	use Types::Standard qw(Int ConsumerOf InstanceOf);
	use URI::Escape;
	use Algorithm::Combinatorics qw(subsets);
	use List::Util qw(min);
	use Math::Cartesian::Product;

	use Moo::Role;

	with 'Attean::API::JoinPlanner';
	with 'Attean::API::SimpleCostPlanner';

	sub joins_for_plan_alternatives {
		my $self			= shift;
		my $model			= shift;
		my $active_graphs	= shift;
		my $default_graphs	= shift;
		my $interesting		= shift;
		my @args			= @_; # each $args[$i] here is an array reference containing alternate plans for element $i

		my $k				= 3; # this is the batch size over which to do full dynamic programming
		
		# initialize $optPlan{$i} to be a set of alternate plans for evaluating element $i
		my %optPlan;
		foreach my $i (0 .. $#args) {
			$optPlan{$i}	= [$self->prune_plans($model, $interesting, $args[$i])];
		}
		
		my @todo	= (0 .. $#args); # initialize the todo list to all elements
		my $next_symbol	= 'a'; # when we start batching together sub-plans, we'll rename them with letters (e.g. elements 1, 2, and 4 might become 'a', and then 3, 5, and 'a' become 'b')
		
		# until we've joined all the elements in todo and are left with a set of plans for the join of all elements
		while (scalar(@todo) > 1) {
			$k	= ($k < scalar(@todo)) ? $k : scalar(@todo); # in case we're joining fewer than the batch size
			foreach my $i (2 .. $k) { # we've already initialized plans for evaluating single elements; now consider plans for groups of elements (with group sizes 2, 3, ..., $k)
				foreach my $s (subsets(\@todo, $i)) { # pick a subset of size $i of the elements that need to be planned
					my $s_key	= join('.', sort @$s);
					$optPlan{$s_key}	= [];
					foreach my $o (subsets($s)) { # partition the subset s into two (o and not_o)
						next if (scalar(@$o) == 0); # only consider proper, non-empty subsets
						next if (scalar(@$o) == scalar(@$s)); # only consider proper, non-empty subsets
						my $o_key	= join('.', sort @$o);
						my %o		= map { $_ => 1 } @$o;
						my $not_o_key	= join('.', sort grep { not exists $o{$_} } @$s);
						
						my $lhs		= $optPlan{$o_key}; # get the plans for evaluating o
						my $rhs		= $optPlan{$not_o_key}; # get the plans for evaluating not_o
						
						# compute and store all the possible ways to evaluate s (o ⋈ not_o)
						push(@{ $optPlan{$s_key} }, $self->join_plans($model, $active_graphs, $default_graphs, $lhs, $rhs, 'inner'));
						$optPlan{$s_key}	= [$self->prune_plans($model, $interesting, $optPlan{$s_key})];
					}
				}
			}
			
			# find the minimum cost plan $p that computes the join over $k elements (the elements end up in @v)
			my %min_plans;
			foreach my $w (subsets(\@todo, $k)) {
				my $w_key		= join('.', sort @$w);
				my $plans		= $optPlan{$w_key};
				my @costs		= map { $self->cost_for_plan($_, $model) => [$_, $w] } @$plans;
				my %costs		= @costs;
				my $min			= min keys %costs;
				my @min_plans;
				while (my ($cost, $data) = splice(@costs, 0, 2)) {
					if ($cost == $min) {
						push(@min_plans, $data);
					}
				}
				$min_plans{ $min }	= \@min_plans;
			}
			my $min_cost	= min keys %min_plans;
			my $min_plans	= $min_plans{$min_cost};
			my @min_plans;
			my $min_key;
			foreach my $d (@$min_plans) {
				my ($p, $v)		= @$d;
				my $v_key		= join('.', sort @$v);
				if (not(defined($min_key)) or $min_key eq $v_key) {
					push(@min_plans, $p);
					$min_key	= $v_key;
				}
			}
# 			my ($p, $v)		= @$min_plan;
# 			my $v_key		= join('.', sort @$v);
# 			warn "Choosing join for $v_key\n";
			
			# generate a new symbol $t to stand in for $p, the join over the elements in @v
			my $t	= $next_symbol++;
			
			# remove elements in @v from the todo list, and replace them by the new composite element $t
			$optPlan{$t}	= [@min_plans];
			my %v	= map { $_ => 1 } split(/[.]/, $min_key);
			push(@todo, $t);
			@todo	= grep { not exists $v{$_} } @todo;
			
			# also remove subsets of @v from the optPlan hash as they are now covered by $optPlan{$t}
			foreach my $o (subsets([keys %v])) {
				my $o_key	= join('.', sort @$o);
# 				warn "deleting $o_key\n";
				delete $optPlan{$o_key};
			}
		}
		
		my $final_key	= join('.', sort @todo);
# 		use Data::Dumper;
# 		warn Dumper($optPlan{$final_key});
		return $self->prune_plans($model, $interesting, $optPlan{$final_key});
	}



( run in 1.844 second using v1.01-cache-2.11-cpan-39bf76dae61 )