Data-Mining-Apriori
view release on metacpan or search on metacpan
lib/Data/Mining/Apriori.pm view on Meta::CPAN
$self->insert_key_items_transaction(\@items);
}
}
close(FILE);
}
sub quantity_possible_rules{
$self->validate_data;
return ((3**scalar(keys(%{$self->{keyItemsDescription}})))-(2**(scalar(keys(%{$self->{keyItemsDescription}}))+1))+1);
}
sub generate_rules{
$self->validate_data;
if($self->{messages}){
print "\n${\scalar(keys(%{$self->{keyItemsDescription}}))} items, ${\$self->quantity_possible_rules} possible rules";
}
my @largeItemsetLengthOne = grep{(($self->{keyItemsTransactions}{$_}/$self->{totalTransactions})*100)>=$self->{metrics}{minSupport}}keys(%{$self->{keyItemsDescription}});
$self->association_rules(\@largeItemsetLengthOne);
}
sub association_rules{
my @largeItemset = @{$_[1]};
my @variations = variations(\@largeItemset,$self->{largeItemsetLength});
my @frequentItemset;
if($self->{messages}){
print "\nLarge itemset of length $self->{largeItemsetLength}, ${\scalar(@largeItemset)} items ";
print "\nProcessing ...";
}
VARIATIONS:
foreach my$variation(@variations){
my@candidateItemset=@{$variation};
my@antecedent;
my@consequent;
for(my$antecedentLength=0;$antecedentLength<$#candidateItemset;$antecedentLength++){
push@antecedent,$candidateItemset[$antecedentLength];
@consequent=();
for(my$consequentLength=($antecedentLength+1);$consequentLength<=$#candidateItemset;$consequentLength++){
push@consequent,$candidateItemset[$consequentLength];
}
@antecedent=sort@antecedent;
@consequent=sort@consequent;
next if("@consequent"~~@{$self->{implications}{"@antecedent"}});
last VARIATIONS if(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets});
$self->{numberSubsets}++;
push @{$self->{implications}{"@antecedent"}},"@consequent";
my@implication;
push@implication,@antecedent,@consequent;
@implication=sort(@implication);
next unless $self->{keyItemsTransactions}{"@antecedent"};
my$supportAntecedent=($self->{keyItemsTransactions}{"@antecedent"}/$self->{totalTransactions});
next unless $self->{keyItemsTransactions}{"@implication"};
my$supportConsequent=($self->{keyItemsTransactions}{"@implication"}/$self->{totalTransactions});
my $support = $supportConsequent;
next if($support < $self->{metrics}{minSupport});
my $confidence = ($supportConsequent/$supportAntecedent);
next if(defined $self->{metrics}{minConfidence} && $confidence < $self->{metrics}{minConfidence});
my $lift = ($support/($supportAntecedent*$supportConsequent));
next if(defined $self->{metrics}{minLift} && $lift < $self->{metrics}{minLift});
my $leverage = ($support-($supportAntecedent*$supportConsequent));
next if(defined $self->{metrics}{minLeverage} && $leverage < $self->{metrics}{minLeverage});
my $conviction = ((1-$supportConsequent)==0)?"NaN":((1-$confidence)==0)?"NaN":((1-$supportConsequent)/(1-$confidence));
next if(defined $self->{metrics}{minConviction} && $conviction < $self->{metrics}{minConviction});
my $coverage = $supportAntecedent;
next if(defined $self->{metrics}{minCoverage} && $coverage < $self->{metrics}{minCoverage});
my $correlation = (($support-($supportAntecedent*$supportConsequent))/sqrt($supportAntecedent*(1-$supportAntecedent)*$supportConsequent*(1-$supportConsequent)));
next if(defined $self->{metrics}{minCorrelation} && $correlation < $self->{metrics}{minCorrelation});
my $cosine = ($support/sqrt($supportAntecedent*$supportConsequent));
next if(defined $self->{metrics}{minCosine} && $cosine < $self->{metrics}{minCosine});
my $laplace = (($support+1)/($supportAntecedent+2));
next if(defined $self->{metrics}{minLaplace} && $laplace < $self->{metrics}{minLaplace});
my $jaccard = ($support/($supportAntecedent+$supportConsequent-$support));
next if(defined $self->{metrics}{minJaccard} && $jaccard < $self->{metrics}{minJaccard});
$self->{rule}++;
$support = sprintf("%.$self->{precision}f", $support);
$confidence = sprintf("%.$self->{precision}f", $confidence);
$lift = sprintf("%.$self->{precision}f", $lift);
$leverage = sprintf("%.$self->{precision}f", $leverage);
$conviction = sprintf("%.$self->{precision}f", $conviction)if($conviction ne "NaN");
$coverage = sprintf("%.$self->{precision}f", $coverage);
$correlation = sprintf("%.$self->{precision}f", $correlation);
$cosine = sprintf("%.$self->{precision}f", $cosine);
$laplace = sprintf("%.$self->{precision}f", $laplace);
$jaccard = sprintf("%.$self->{precision}f", $jaccard);
$self->{associationRules}{$self->{rule}} = {
implication => "{ @antecedent } => { @consequent }",
support => $support,
confidence => $confidence,
lift => $lift,
leverage => $leverage,
conviction => $conviction,
coverage => $coverage,
correlation => $correlation,
cosine => $cosine,
laplace => $laplace,
jaccard => $jaccard,
items => [@antecedent, @consequent]
};
my@items=grep{!($_~~@frequentItemset)}@implication;
push@frequentItemset,@items;
last VARIATIONS if(defined $self->{limitRules} && $self->{rule} == $self->{limitRules});
}
}
if($self->{messages}){
print "\nFrequent itemset: { @frequentItemset }, ${\scalar(@frequentItemset)} items ";
}
if(defined $self->{associationRules}){
@{$self->{frequentItemset}}=@frequentItemset;
$self->output;
}
return if((defined $self->{limitRules} && $self->{rule} == $self->{limitRules})
||(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets}));
if(scalar(@frequentItemset)>=($self->{largeItemsetLength}+1)){
$self->{largeItemsetLength}++;
$self->{associationRules} = undef;
$self->association_rules(\@frequentItemset);
}
}
sub stop{
if($self->{messages}){
print "\nStopping ...";
$self->output if $self->{associationRules};
print "\nExit? (Y/N): ";
my $answer = <STDIN>;
chomp($answer);
if($answer =~ /^y$/i){
exit;
}
else{
print "Processing ...";
}
}
else{
$self->output if $self->{associationRules};
exit;
}
}
( run in 1.993 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )