Data-Mining-Apriori

 view release on metacpan or  search on metacpan

lib/Data/Mining/Apriori.pm  view on Meta::CPAN

			$self->insert_key_items_transaction(\@items);
		}
	}
	close(FILE);
}

sub quantity_possible_rules{
	$self->validate_data;
	return ((3**scalar(keys(%{$self->{keyItemsDescription}})))-(2**(scalar(keys(%{$self->{keyItemsDescription}}))+1))+1);
}

sub generate_rules{
	$self->validate_data;
	if($self->{messages}){
		print "\n${\scalar(keys(%{$self->{keyItemsDescription}}))} items, ${\$self->quantity_possible_rules} possible rules";
	}
	my @largeItemsetLengthOne = grep{(($self->{keyItemsTransactions}{$_}/$self->{totalTransactions})*100)>=$self->{metrics}{minSupport}}keys(%{$self->{keyItemsDescription}});
	$self->association_rules(\@largeItemsetLengthOne);
}

sub association_rules{
	my @largeItemset = @{$_[1]};
	my @variations = variations(\@largeItemset,$self->{largeItemsetLength});
	my @frequentItemset;
	if($self->{messages}){
		print "\nLarge itemset of length $self->{largeItemsetLength}, ${\scalar(@largeItemset)} items ";
		print "\nProcessing ...";
	}
	VARIATIONS:
	foreach my$variation(@variations){
		my@candidateItemset=@{$variation};
		my@antecedent;
		my@consequent;
		for(my$antecedentLength=0;$antecedentLength<$#candidateItemset;$antecedentLength++){
			push@antecedent,$candidateItemset[$antecedentLength];
			@consequent=();
			for(my$consequentLength=($antecedentLength+1);$consequentLength<=$#candidateItemset;$consequentLength++){
				push@consequent,$candidateItemset[$consequentLength];
			}
			@antecedent=sort@antecedent;
			@consequent=sort@consequent;
			next if("@consequent"~~@{$self->{implications}{"@antecedent"}});
			last VARIATIONS if(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets});
			$self->{numberSubsets}++;
			push @{$self->{implications}{"@antecedent"}},"@consequent";
			my@implication;
			push@implication,@antecedent,@consequent;
			@implication=sort(@implication);
			next unless $self->{keyItemsTransactions}{"@antecedent"};
			my$supportAntecedent=($self->{keyItemsTransactions}{"@antecedent"}/$self->{totalTransactions});
			next unless $self->{keyItemsTransactions}{"@implication"};
			my$supportConsequent=($self->{keyItemsTransactions}{"@implication"}/$self->{totalTransactions});
			my $support = $supportConsequent;
			next if($support < $self->{metrics}{minSupport});
			my $confidence = ($supportConsequent/$supportAntecedent);
			next if(defined $self->{metrics}{minConfidence} && $confidence < $self->{metrics}{minConfidence});
			my $lift = ($support/($supportAntecedent*$supportConsequent));
			next if(defined $self->{metrics}{minLift} && $lift < $self->{metrics}{minLift});
			my $leverage = ($support-($supportAntecedent*$supportConsequent));
			next if(defined $self->{metrics}{minLeverage} && $leverage < $self->{metrics}{minLeverage});
			my $conviction = ((1-$supportConsequent)==0)?"NaN":((1-$confidence)==0)?"NaN":((1-$supportConsequent)/(1-$confidence));
			next if(defined $self->{metrics}{minConviction} && $conviction < $self->{metrics}{minConviction});
			my $coverage = $supportAntecedent;
			next if(defined $self->{metrics}{minCoverage} && $coverage < $self->{metrics}{minCoverage});
			my $correlation = (($support-($supportAntecedent*$supportConsequent))/sqrt($supportAntecedent*(1-$supportAntecedent)*$supportConsequent*(1-$supportConsequent)));
			next if(defined $self->{metrics}{minCorrelation} && $correlation < $self->{metrics}{minCorrelation});
			my $cosine = ($support/sqrt($supportAntecedent*$supportConsequent));
			next if(defined $self->{metrics}{minCosine} && $cosine < $self->{metrics}{minCosine});
			my $laplace = (($support+1)/($supportAntecedent+2));
			next if(defined $self->{metrics}{minLaplace} && $laplace < $self->{metrics}{minLaplace});
			my $jaccard = ($support/($supportAntecedent+$supportConsequent-$support));
			next if(defined $self->{metrics}{minJaccard} && $jaccard < $self->{metrics}{minJaccard});
			$self->{rule}++;
			$support = sprintf("%.$self->{precision}f", $support);
			$confidence = sprintf("%.$self->{precision}f", $confidence);
			$lift = sprintf("%.$self->{precision}f", $lift);
			$leverage = sprintf("%.$self->{precision}f", $leverage);
			$conviction = sprintf("%.$self->{precision}f", $conviction)if($conviction ne "NaN");
			$coverage = sprintf("%.$self->{precision}f", $coverage);
			$correlation = sprintf("%.$self->{precision}f", $correlation);
			$cosine = sprintf("%.$self->{precision}f", $cosine);
			$laplace = sprintf("%.$self->{precision}f", $laplace);
			$jaccard = sprintf("%.$self->{precision}f", $jaccard);
			$self->{associationRules}{$self->{rule}} = {
				implication => "{ @antecedent } => { @consequent }",
				support => $support, 
				confidence => $confidence,
				lift => $lift,
				leverage => $leverage,
				conviction => $conviction,
				coverage => $coverage,
				correlation => $correlation,
				cosine => $cosine,
				laplace => $laplace,
				jaccard => $jaccard,
				items => [@antecedent, @consequent]
			};
			my@items=grep{!($_~~@frequentItemset)}@implication;
			push@frequentItemset,@items;
			last VARIATIONS if(defined $self->{limitRules} && $self->{rule} == $self->{limitRules});
		}
	}
	if($self->{messages}){
		print "\nFrequent itemset: { @frequentItemset }, ${\scalar(@frequentItemset)} items ";
	}
	if(defined $self->{associationRules}){
		@{$self->{frequentItemset}}=@frequentItemset;
		$self->output;
	}
	return if((defined $self->{limitRules} && $self->{rule} == $self->{limitRules})
				||(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets}));
	if(scalar(@frequentItemset)>=($self->{largeItemsetLength}+1)){
		$self->{largeItemsetLength}++;
		$self->{associationRules} = undef;
		$self->association_rules(\@frequentItemset);
	}
}

sub stop{
	if($self->{messages}){
		print "\nStopping ...";
		$self->output if $self->{associationRules};
		print "\nExit? (Y/N): ";
		my $answer = <STDIN>;
		chomp($answer);
		if($answer =~ /^y$/i){
			exit;
		}
		else{
			print "Processing ...";
		}
	}
	else{
		$self->output if $self->{associationRules};
		exit;
	}
}



( run in 1.993 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )