view release on metacpan or search on metacpan
fallback/const-c.inc view on Meta::CPAN
static int
constant_16 (pTHX_ const char *name, IV *iv_return) {
/* When generated this function returned values for the list of names given
here. However, subsequent manual editing may have added or removed some.
CS_ERR_NO_MEMORY IZ_VERSION_MAJOR IZ_VERSION_MINOR IZ_VERSION_PATCH */
/* Offset 13 gives the best switch position. */
switch (name[13]) {
case 'J':
if (memEQ(name, "IZ_VERSION_MAJOR", 16)) {
/* ^ */
#ifdef IZ_VERSION_MAJOR
fallback/const-c.inc view on Meta::CPAN
}
break;
case 15:
/* Names all of length 15. */
/* CS_ERR_GETVALUE CS_ERR_OVERFLOW */
/* Offset 10 gives the best switch position. */
switch (name[10]) {
case 'R':
if (memEQ(name, "CS_ERR_OVERFLOW", 15)) {
/* ^ */
#ifdef CS_ERR_OVERFLOW
view all matches for this distribution
view release on metacpan or search on metacpan
t/test.data view on Meta::CPAN
said VBD O
1989 CD B
will MD O
be VB O
the DT B
best JJS I
year NN I
in IN O
its PRP$ B
history NN I
, , O
t/test.data view on Meta::CPAN
friends NNS B
that WDT O
patience NN B
is VBZ O
the DT B
best JJS I
weapon NN I
against IN O
the DT B
gringos NNS I
, , O
t/test.data view on Meta::CPAN
for IN O
5 NN B
% NN I
, , O
at IN O
best JJS O
, , O
of IN O
the DT B
station NN I
's POS B
t/test.data view on Meta::CPAN
patient NN I
not RB O
only RB O
the DT B
very RB I
best JJS I
therapy NN I
which WDT B
we PRP B
have VBP O
established VBN O
t/test.data view on Meta::CPAN
, , O
personal JJ B
finance NN I
, , O
the DT B
best JJS I
colleges NNS I
, , O
and CC O
investments NNS B
. . O
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/Closest/NetworkAddress.pm view on Meta::CPAN
Creates an object containing the list of addresses to compare against
=head2 $self->compare($network_address)
Will find the best match in the network_address_list for the network_address specified.
Returns the network address that best matches.
=cut
sub compare {
my ($self, $target) = @_;
carp "Must specify a target" unless defined $target;
my $best_na;
my $best_level = 0;
foreach my $na (@{$self->network_address_list}) {
my $r = $self->measure($na, $target);
if ($r > $best_level) {
$best_level = $r;
$best_na = $na;
}
}
return $best_na || 0;
}
=head1 AUTHOR
Ton Voon C<ton.voon@altinity.com>
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
src/cluster.c view on Meta::CPAN
index[i] = ix;
}
/* Start the iteration */
for (iter = 0; iter < niter; iter++) {
int ixbest = 0;
int iybest = 0;
int iobject = iter % nelements;
iobject = index[iobject];
if (transpose == 0) {
double closest = metric(ndata, data, celldata[ixbest], mask,
dummymask, weights, iobject, iybest,
transpose);
double radius = maxradius * (1. - ((double)iter)/((double)niter));
double tau = inittau * (1. - ((double)iter)/((double)niter));
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance = metric(ndata, data, celldata[ix], mask,
dummymask, weights, iobject, iy,
transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
radius) {
double sum = 0.;
for (i = 0; i < ndata; i++) {
if (mask[iobject][i] == 0) continue;
celldata[ix][iy][i] +=
src/cluster.c view on Meta::CPAN
double** celldatavector = malloc(ndata*sizeof(double*));
double radius = maxradius * (1. - ((double)iter)/((double)niter));
double tau = inittau * (1. - ((double)iter)/((double)niter));
for (i = 0; i < ndata; i++)
celldatavector[i] = &(celldata[ixbest][iybest][i]);
closest = metric(ndata, data, celldatavector, mask, dummymask,
weights, iobject, 0, transpose);
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance;
for (i = 0; i < ndata; i++)
celldatavector[i] = &(celldata[ixbest][iybest][i]);
distance = metric(ndata, data, celldatavector, mask,
dummymask, weights, iobject, 0,
transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
free(celldatavector);
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
radius) {
double sum = 0.;
for (i = 0; i < ndata; i++) {
if (mask[i][iobject] == 0) continue;
celldata[ix][iy][i] +=
src/cluster.c view on Meta::CPAN
for (i = 0; i < nygrid; i++) {
dummymask[i] = malloc(ncolumns*sizeof(int));
for (j = 0; j < ncolumns; j++) dummymask[i][j] = 1;
}
for (i = 0; i < nrows; i++) {
int ixbest = 0;
int iybest = 0;
double closest = metric(ndata, data, celldata[ixbest], mask,
dummymask, weights, i, iybest, transpose);
int ix, iy;
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance = metric(ndata, data, celldata[ix], mask,
dummymask, weights, i, iy,
transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
clusterid[i][0] = ixbest;
clusterid[i][1] = iybest;
}
for (i = 0; i < nygrid; i++) free(dummymask[i]);
free(dummymask);
}
else {
double** celldatavector = malloc(ndata*sizeof(double*));
int** dummymask = malloc(nrows*sizeof(int*));
int ixbest = 0;
int iybest = 0;
for (i = 0; i < nrows; i++) {
dummymask[i] = malloc(sizeof(int));
dummymask[i][0] = 1;
}
for (i = 0; i < ncolumns; i++) {
double closest;
int ix, iy;
for (j = 0; j < ndata; j++)
celldatavector[j] = &(celldata[ixbest][iybest][j]);
closest = metric(ndata, data, celldatavector, mask, dummymask,
weights, i, 0, transpose);
for (ix = 0; ix < nxgrid; ix++) {
for (iy = 0; iy < nygrid; iy++) {
double distance;
for (j = 0; j < ndata; j++)
celldatavector[j] = &(celldata[ix][iy][j]);
distance = metric(ndata, data, celldatavector, mask,
dummymask, weights, i, 0, transpose);
if (distance < closest) {
ixbest = ix;
iybest = iy;
closest = distance;
}
}
}
clusterid[i][0] = ixbest;
clusterid[i][1] = iybest;
}
free(celldatavector);
for (i = 0; i < nrows; i++) free(dummymask[i]);
free(dummymask);
}
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
JumpHash.xs view on Meta::CPAN
* regardless of keys size.
*
* It is 64 bit only.
*/
/* Find best way to ROTL32/ROTL64 */
#ifndef ROTL64
#if defined(_MSC_VER)
#include <stdlib.h> /* Microsoft put _rotl declaration in here */
#define ROTL64(x,r) _rotl64(x,r)
#else
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/ContextVector.pm view on Meta::CPAN
return $features;
}
=head2 $self->train
Keeps the best features (top N) and norms the vectors.
=cut
sub train {
my $self = shift;
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/CurveFit/Simple.pm view on Meta::CPAN
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(fit %STATS_H);
}
# fit() - only public function for this distribution
# Given at least parameter "xy", generate a best-fit curve within a time limit.
# Output: max deviation, avg deviation, implementation source string (perl or C, for now).
# Optional parameters and their defaults:
# terms => 3 # number of terms in formula, max is 10
# time_limit => 3 # number of seconds to try for better fit
# inv => 1 # invert sense of curve-fit, from x->y to y->x
lib/Algorithm/CurveFit/Simple.pm view on Meta::CPAN
=item * Support more programming languages for formula implementation: R, MATLAB, python
=item * Calculate the actual term sigfigs and set precision appropriately in the formula implementation instead of just "%.11f".
=item * Support trying a range of terms and returning whatever gives the best fit.
=item * Support piecewise output formulas.
=item * Work around L<Algorithm::CurveFit>'s occasional hang problem when using ten-term polynomials.
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/CurveFit.pm view on Meta::CPAN
name 'x' is default. (Hence 'xdata'.)
=item params
The parameters are the symbols in the formula whose value is varied by the
algorithm to find the best fit of the curve to the data. There may be
one or more parameters, but please keep in mind that the number of parameters
not only increases processing time, but also decreases the quality of the fit.
The value of this options should be an anonymous array. This array should
hold one anonymous array for each parameter. That array should hold (in order)
lib/Algorithm/CurveFit.pm view on Meta::CPAN
In order to prevent looping forever, you are strongly encouraged to make use of
the accuracy measure (see also: maximum_iterations).
The final set of parameters is B<not> returned from the subroutine but the
parameters are modified in-place. That means the original data structure will
hold the best estimate of the parameters.
=item xdata
This should be an array reference to an array holding the data for the
variable of the function. (Which defaults to 'x'.)
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
return \%answer;
}
###################################### Decision Tree Construction ####################################
## At the root node, we find the best feature that yields the greatest reduction in
## class entropy from the entropy based on just the class priors. The logic for
## finding this feature is different for symbolic features and for numeric features.
## That logic is built into the method shown later for best feature calculations.
sub construct_decision_tree_classifier {
print "\nConstructing the decision tree ...\n";
my $self = shift;
if ($self->{_debug3}) {
$self->determine_data_condition();
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
if ($existing_node_entropy < $self->{_entropy_threshold}) {
print "\nRD5 returning because existing node entropy is below threshold\n" if $self->{_debug3};
return;
}
my @copy_of_path_attributes = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
my ($best_feature, $best_feature_entropy, $best_feature_val_entropies, $decision_val) =
$self->best_feature_calculator(\@copy_of_path_attributes, $existing_node_entropy);
$node->set_feature($best_feature);
$node->display_node() if $self->{_debug3};
if (defined($self->{_max_depth_desired}) &&
(@features_and_values_or_thresholds_on_branch >= $self->{_max_depth_desired})) {
print "\nRD6 REACHED LEAF NODE AT MAXIMUM DEPTH ALLOWED\n" if $self->{_debug3};
return;
}
return if ! defined $best_feature;
if ($self->{_debug3}) {
print "\nRD7 Existing entropy at node: $existing_node_entropy\n";
print "\nRD8 Calculated best feature is $best_feature and its value $decision_val\n";
print "\nRD9 Best feature entropy: $best_feature_entropy\n";
print "\nRD10 Calculated entropies for different values of best feature: @$best_feature_val_entropies\n";
}
my $entropy_gain = $existing_node_entropy - $best_feature_entropy;
print "\nRD11 Expected entropy gain at this node: $entropy_gain\n" if $self->{_debug3};
if ($entropy_gain > $self->{_entropy_threshold}) {
if (exists $self->{_numeric_features_valuerange_hash}->{$best_feature} &&
$self->{_feature_values_how_many_uniques_hash}->{$best_feature} >
$self->{_symbolic_to_numeric_cardinality_threshold}) {
my $best_threshold = $decision_val; # as returned by best feature calculator
my ($best_entropy_for_less, $best_entropy_for_greater) = @$best_feature_val_entropies;
my @extended_branch_features_and_values_or_thresholds_for_lessthan_child =
@{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
my @extended_branch_features_and_values_or_thresholds_for_greaterthan_child =
@{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
my $feature_threshold_combo_for_less_than = "$best_feature" . '<' . "$best_threshold";
my $feature_threshold_combo_for_greater_than = "$best_feature" . '>' . "$best_threshold";
push @extended_branch_features_and_values_or_thresholds_for_lessthan_child,
$feature_threshold_combo_for_less_than;
push @extended_branch_features_and_values_or_thresholds_for_greaterthan_child,
$feature_threshold_combo_for_greater_than;
if ($self->{_debug3}) {
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
\@extended_branch_features_and_values_or_thresholds_for_lessthan_child)} @{$self->{_class_names}};
my @class_probabilities_for_greaterthan_child_node =
map {$self->probability_of_a_class_given_sequence_of_features_and_values_or_thresholds($_,
\@extended_branch_features_and_values_or_thresholds_for_greaterthan_child)} @{$self->{_class_names}};
if ($self->{_debug3}) {
print "\nRD14 class entropy for going down lessthan child: $best_entropy_for_less\n";
print "\nRD15 class_entropy_for_going_down_greaterthan_child: $best_entropy_for_greater\n";
}
if ($best_entropy_for_less < $existing_node_entropy - $self->{_entropy_threshold}) {
my $left_child_node = DTNode->new(undef, $best_entropy_for_less,
\@class_probabilities_for_lessthan_child_node,
\@extended_branch_features_and_values_or_thresholds_for_lessthan_child, $self);
$node->add_child_link($left_child_node);
$self->recursive_descent($left_child_node);
}
if ($best_entropy_for_greater < $existing_node_entropy - $self->{_entropy_threshold}) {
my $right_child_node = DTNode->new(undef, $best_entropy_for_greater,
\@class_probabilities_for_greaterthan_child_node,
\@extended_branch_features_and_values_or_thresholds_for_greaterthan_child, $self);
$node->add_child_link($right_child_node);
$self->recursive_descent($right_child_node);
}
} else {
print "\nRD16 RECURSIVE DESCENT: In section for symbolic features for creating children"
if $self->{_debug3};
my @values_for_feature = @{$self->{_features_and_unique_values_hash}->{$best_feature}};
print "\nRD17 Values for feature $best_feature are @values_for_feature\n" if $self->{_debug3};
my @feature_value_combos = sort map {"$best_feature" . '=' . $_} @values_for_feature;
my @class_entropies_for_children = ();
foreach my $feature_and_value_index (0..@feature_value_combos-1) {
print "\nRD18 Creating a child node for: $feature_value_combos[$feature_and_value_index]\n"
if $self->{_debug3};
my @extended_branch_features_and_values_or_thresholds;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
return;
}
}
## This is the heart of the decision tree constructor. Its main job is to figure
## out the best feature to use for partitioning the training data samples that
## correspond to the current node. The search for the best feature is carried out
## differently for symbolic features and for numeric features. For a symbolic
## feature, the method estimates the entropy for each value of the feature and then
## averages out these entropies as a measure of the discriminatory power of that
## features. For a numeric feature, on the other hand, it estimates the entropy
## reduction that can be achieved if were to partition the set of training samples
## for each possible threshold. For a numeric feature, all possible sampling points
## relevant to the node in question are considered as candidates for thresholds.
sub best_feature_calculator {
my $self = shift;
my $features_and_values_or_thresholds_on_branch = shift;
my $existing_node_entropy = shift;
my @features_and_values_or_thresholds_on_branch = @$features_and_values_or_thresholds_on_branch;
my $pattern1 = '(.+)=(.+)';
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
@true_numeric_types_feature_names = grep {$_ if !$seen{$_}++} @true_numeric_types_feature_names;
%seen = ();
@symbolic_types_feature_names = grep {$_ if !$seen{$_}++} @symbolic_types_feature_names;
my @bounded_intervals_numeric_types =
@{$self->find_bounded_intervals_for_numeric_features(\@true_numeric_types)};
# Calculate the upper and the lower bounds to be used when searching for the best
# threshold for each of the numeric features that are in play at the current node:
my (%upperbound, %lowerbound);
foreach my $feature (@true_numeric_types_feature_names) {
$upperbound{$feature} = undef;
$lowerbound{$feature} = undef;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
} elsif (defined($lowerbound{$feature_name})) {
foreach my $x (@values) {
push @newvalues, $x if $x > $lowerbound{$feature_name};
}
} else {
die "Error is bound specifications in best feature calculator";
}
} else {
@newvalues = @{deep_copy_array(\@values)};
}
next if @newvalues == 0;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
$self->probability_of_a_sequence_of_features_and_values_or_thresholds(\@for_right_child);
push @partitioning_entropies, $partitioning_entropy;
$partitioning_point_child_entropies_hash{$feature_name}{$value} = [$entropy1, $entropy2];
}
my ($min_entropy, $best_partition_point_index) = minimum(\@partitioning_entropies);
if ($min_entropy < $existing_node_entropy) {
$partitioning_point_threshold{$feature_name} = $newvalues[$best_partition_point_index];
$entropy_values_for_different_features{$feature_name} = $min_entropy;
}
} else {
print "\nBFC2: Entering section reserved for symbolic features\n" if $self->{_debug3};
print "\nBFC3 Feature name: $feature_name\n" if $self->{_debug3};
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
if ($entropy < $existing_node_entropy) {
$entropy_values_for_different_features{$feature_name} = $entropy;
}
}
}
my $min_entropy_for_best_feature;
my $best_feature_name;
foreach my $feature_nom (keys %entropy_values_for_different_features) {
if (!defined($best_feature_name)) {
$best_feature_name = $feature_nom;
$min_entropy_for_best_feature = $entropy_values_for_different_features{$feature_nom};
} else {
if ($entropy_values_for_different_features{$feature_nom} < $min_entropy_for_best_feature) {
$best_feature_name = $feature_nom;
$min_entropy_for_best_feature = $entropy_values_for_different_features{$feature_nom};
}
}
}
my $threshold_for_best_feature;
if (exists $partitioning_point_threshold{$best_feature_name}) {
$threshold_for_best_feature = $partitioning_point_threshold{$best_feature_name};
} else {
$threshold_for_best_feature = undef;
}
my $best_feature_entropy = $min_entropy_for_best_feature;
my @val_based_entropies_to_be_returned;
my $decision_val_to_be_returned;
if (exists $self->{_numeric_features_valuerange_hash}->{$best_feature_name} &&
$self->{_feature_values_how_many_uniques_hash}->{$best_feature_name} >
$self->{_symbolic_to_numeric_cardinality_threshold}) {
@val_based_entropies_to_be_returned =
@{$partitioning_point_child_entropies_hash{$best_feature_name}{$threshold_for_best_feature}};
} else {
@val_based_entropies_to_be_returned = ();
}
if (exists $partitioning_point_threshold{$best_feature_name}) {
$decision_val_to_be_returned = $partitioning_point_threshold{$best_feature_name};
} else {
$decision_val_to_be_returned = undef;
}
print "\nBFC6 Val based entropies to be returned for feature $best_feature_name are " .
"@val_based_entropies_to_be_returned\n" if $self->{_debug3};
return ($best_feature_name, $best_feature_entropy, \@val_based_entropies_to_be_returned,
$decision_val_to_be_returned);
}
######################################### Entropy Calculators #####################################
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my %seen2 = ();
@symbolic_types_feature_names = grep {$_ if !$seen2{$_}++} @symbolic_types_feature_names;
my $bounded_intervals_numeric_types = $self->find_bounded_intervals_for_numeric_features(\@true_numeric_types);
print_array_with_msg("POS: Answer returned by find_bounded: ",
$bounded_intervals_numeric_types) if $self->{_debug2};
# Calculate the upper and the lower bounds to be used when searching for the best
# threshold for each of the numeric features that are in play at the current node:
my (%upperbound, %lowerbound);
foreach my $feature_name (@true_numeric_types_feature_names) {
$upperbound{$feature_name} = undef;
$lowerbound{$feature_name} = undef;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my %seen2 = ();
@symbolic_types_feature_names = grep {$_ if !$seen2{$_}++} @symbolic_types_feature_names;
my $bounded_intervals_numeric_types = $self->find_bounded_intervals_for_numeric_features(\@true_numeric_types);
print_array_with_msg("POSC: Answer returned by find_bounded: ",
$bounded_intervals_numeric_types) if $self->{_debug2};
# Calculate the upper and the lower bounds to be used when searching for the best
# threshold for each of the numeric features that are in play at the current node:
my (%upperbound, %lowerbound);
foreach my $feature_name (@true_numeric_types_feature_names) {
$upperbound{$feature_name} = undef;
$lowerbound{$feature_name} = undef;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
## training data by running a 10-fold cross-validation test on it. This test divides
## all of the training data into ten parts, with nine parts used for training a
## decision tree and one part used for testing its ability to classify correctly.
## This selection of nine parts for training and one part for testing is carried out
## in all of the ten different possible ways. This testing functionality can also
## be used to find the best values to use for the constructor parameters
## entropy_threshold, max_depth_desired, and
## symbolic_to_numeric_cardinality_threshold.
## Only the CSV training files can be evaluated in this manner (because only CSV
## training are allowed to have numeric features --- which is the more interesting
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
cross-validation test on the data. This test divides all of the training data into
ten parts, with nine parts used for training a decision tree and one part used for
testing its ability to classify correctly. This selection of nine parts for training
and one part for testing is carried out in all of the ten different ways that are
possible. This testing functionality in Version 2.1 can also be used to find the
best values to use for the constructor parameters C<entropy_threshold>,
C<max_depth_desired>, and C<symbolic_to_numeric_cardinality_threshold>.
B<Version 2.0 is a major rewrite of this module.> Now you can use both numeric and
symbolic features for constructing a decision tree. A feature is numeric if it can
take any floating-point value over an interval.
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
together to the data as partitioned by the feature test. You then drop from the root
node a set of child nodes, one for each partition of the training data created by the
feature test at the root node. When your features are purely symbolic, you'll have
one child node for each value of the feature chosen for the feature test at the root.
When the test at the root involves a numeric feature, you find the decision threshold
for the feature that best bipartitions the data and you drop from the root node two
child nodes, one for each partition. Now at each child node you pose the same
question that you posed when you found the best feature to use at the root: Which
feature at the child node in question would maximally disambiguate the class labels
associated with the training data corresponding to that child node?
As the reader would expect, the two key steps in any approach to decision-tree based
classification are the construction of the decision tree itself from a file
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
test set of data is a good way to develop greater proficiency with decision trees.
=head1 WHAT PRACTICAL PROBLEM IS SOLVED BY THIS MODULE
If you are new to the concept of a decision tree, their practical utility is best
understood with an example that only involves symbolic features. However, as
mentioned earlier, versions of the module higher than 2.0 allow you to use both
symbolic and numeric features.
Consider the following scenario: Let's say you are running a small investment company
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
my $root_node = $dt->construct_decision_tree_classifier();
Now you and your company (with practically no employees) are ready to service the
customers again. Suppose your computer needs to make a buy/sell decision about an
investment prospect that is best described by:
price_to_earnings_ratio = low
price_to_sales_ratio = very_low
return_on_equity = none
market_share = medium
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
The last statement above prints out a Confusion Matrix and the value of Training Data
Quality Index on a scale of 0 to 100, with 100 designating perfect training data.
The Confusion Matrix shows how the different classes were mislabeled in the 10-fold
cross-validation test.
This testing functionality can also be used to find the best values to use for the
constructor parameters C<entropy_threshold>, C<max_depth_desired>, and
C<symbolic_to_numeric_cardinality_threshold>.
The following two scripts in the C<Examples> directory illustrate the use of the
C<EvalTrainingData> class for testing the quality of your data:
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
explicitly giving a value to the 'C<number_of_histogram_bins>' parameter.
=back
You can choose the best values to use for the last three constructor parameters by
running a 10-fold cross-validation test on your training data through the class
C<EvalTrainingData> that comes with Versions 2.1 and higher of this module. See the
section "TESTING THE QUALITY OF YOUR TRAINING DATA" of this document page.
=over
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
why the decision tree classifier may associate significant probabilities with
multiple class labels is that you used inadequate number of training samples to
induce the decision tree. The good thing is that the classifier does not lie to you
(unlike, say, a hard classification rule that would return a single class label
corresponding to the partitioning of the underlying feature space). The decision
tree classifier give you the best classification that can be made given the training
data you fed into it.
=head1 USING BAGGING
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
calculate the regression coefficients. When C<jacobian_choice> is set to 1, you get
a weak version of gradient descent in which the Jacobian is set to the "design
matrix" itself. Choosing 2 for C<jacobian_choice> results in a more reasonable
approximation to the Jacobian. That, however, is at a cost of much longer
computation time. B<NOTE:> For most cases, using 0 for C<jacobian_choice> is the
best choice. See my tutorial "I<Linear Regression and Regression Trees>" for why
that is the case.
=back
=head2 B<Methods defined for C<RegressionTree> class>
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/Diff/Any.pm view on Meta::CPAN
}
}
=head1 DESCRIPTION
This is a simple module to select the best available implementation of the
standard C<diff> algorithm, which works by effectively trying to solve the
Longest Common Subsequence (LCS) problem. This algorithm is described in:
I<A Fast Algorithm for Computing Longest Common Subsequences>, CACM, vol.20,
no.5, pp.350-353, May 1977.
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/Diff/JSON.pm view on Meta::CPAN
=back
=head1 FEEDBACK
I welcome feedback about my code, including constructive criticism, bug
reports, documentation improvements, and feature requests. The best bug reports
include files that I can add to the test suite, which fail with the current
code in my git repo and will pass once I've fixed the bug
Feature requests are far more likely to get implemented if you submit a patch
yourself.
view all matches for this distribution
view release on metacpan or search on metacpan
#ifndef IVSIZE
# ifdef LONGSIZE
# define IVSIZE LONGSIZE
# else
# define IVSIZE 4 /* A bold guess, but the best we can make. */
# endif
#endif
#ifndef UVTYPE
# define UVTYPE unsigned IVTYPE
#endif
#ifndef PERL_MAGIC_ext
# define PERL_MAGIC_ext '~'
#endif
/* That's the best we can do... */
#ifndef SvPV_force_nomg
# define SvPV_force_nomg SvPV_force
#endif
#ifndef SvPV_nomg
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution
view release on metacpan or search on metacpan
2011-02-20 Juan J. Merelo Guervós <jjmerelo@gmail.com>
* lib/Algorithm/Evolutionary/Op/Breeder_Diverser.pm (apply): Made
even more diverse by not inserting the new individual if it is the
same as the parent; refactored also to best practices.
* lib/Algorithm/Evolutionary/Op/Uniform_Crossover_Diff.pm (apply):
Changed to leave at least one difference without change
2011-02-19 Juan J. Merelo Guervós <jjmerelo@gmail.com>
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN
my $total_fitness = shift;
if ( !$total_fitness ) {
map( $total_fitness += $fitness_of->{$_}, @$population);
}
my $population_size = @{$population};
my @best = rnkeytop { $fitness_of->{$_} } 2 => @$population; # Extract elite
my @reproductive_pool = get_pool_roulette_wheel( $population, $fitness_of,
$population_size, $total_fitness ); # Reproduce
my @offspring = produce_offspring( \@reproductive_pool, $population_size - 2 ); #Obtain offspring
unshift( @offspring, @best ); #Insert elite at the beginning
@offspring; # return
}
"010101"; # Magic true value required at end of module
__END__
lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN
for (my $i = 0; $i < $number_of_strings; $i++) {
$population[$i] = random_chromosome( $length);
$fitness_of{$population[$i]} = max_ones( $population[$i] );
}
my @best;
my $generations=0;
do {
my @pool;
if ( $generations % 2 == 1 ) {
get_pool_roulette_wheel( \@population, \%fitness_of, $number_of_strings );
lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN
for my $p ( @new_pop ) {
if ( !$fitness_of{$p} ) {
$fitness_of{$p} = max_ones( $p );
}
}
@best = rnkeytop { $fitness_of{$_} } $number_of_strings/2 => @population;
@population = (@best, @new_pop);
print "Best so far $best[0] with fitness $fitness_of{$best[0]}\n";
} while ( ( $generations++ < $number_of_generations ) and ($fitness_of{$best[0]} != $length ));
=head1 DESCRIPTION
Assorted functions needed by an evolutionary algorithm, mainly for demos and simple clients.
lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN
$slots the number of individuals to return.
=head2 single_generation( $population_arrayref, $fitness_of_hashref )
Applies all steps to arrive to a new generation, except
evaluation. Keeps the two best for the next generation.
=head2 get_pool_roulette_wheel( $population_arrayref, $fitness_of_hashref, $how_many_I_need )
Obtains a pool of new chromosomes using fitness_proportional selection
view all matches for this distribution
view release on metacpan or search on metacpan
2011-02-20 Juan J. Merelo Guervós <jjmerelo@gmail.com>
* lib/Algorithm/Evolutionary/Op/Breeder_Diverser.pm (apply): Made
even more diverse by not inserting the new individual if it is the
same as the parent; refactored also to best practices.
* lib/Algorithm/Evolutionary/Op/Uniform_Crossover_Diff.pm (apply):
Changed to leave at least one difference without change
2011-02-19 Juan J. Merelo Guervós <jjmerelo@gmail.com>
view all matches for this distribution
view release on metacpan or search on metacpan
lib/Algorithm/Evolutionary/Op/Breeder.pm view on Meta::CPAN
my $generation =
new Algorithm::Evolutionary::Op::Breeder( $selector, [$m, $c] );
my @sortPop = sort { $b->Fitness() <=> $a->Fitness() } @pop;
my $bestIndi = $sortPop[0];
my $previous_average = average( \@sortPop );
$generation->apply( \@sortPop );
=head1 Base Class
view all matches for this distribution
view release on metacpan or search on metacpan
examples/breeding_perls.pl view on Meta::CPAN
)->start;
sub callback {
my $p = shift;
if ($p->best_fit->fitness == $TARGET) {
$p->suspend;
printf "Solution found after %d generations:\n%s\n",
$p->generations, $p->best_fit->as_perl_code;
}
if ($p->generations == 100_000) {
$p->suspend;
print "Timed out after 100,000 generations.. try a smaller target\n";
view all matches for this distribution
view release on metacpan or search on metacpan
examples/data_generator.pl view on Meta::CPAN
# How the synthetic data is generated for clustering is
# controlled entirely by the input_parameter_file keyword in
# the function call shown below. The class prior
# probabilities, the mean vectors and covariance matrix
# entries in file must be according to the syntax shown in
# the example param.txt file. It is best to edit that file
# as needed for the purpose of data generation.
#my $parameter_file = "param1.txt"; #2D
#my $parameter_file = "param2.txt"; #2D
#my $parameter_file = "param3.txt"; #2D
view all matches for this distribution
view release on metacpan or search on metacpan
#ifndef IVSIZE
# ifdef LONGSIZE
# define IVSIZE LONGSIZE
# else
# define IVSIZE 4 /* A bold guess, but the best we can make. */
# endif
#endif
#ifndef UVTYPE
# define UVTYPE unsigned IVTYPE
#endif
#ifndef PERL_MAGIC_ext
# define PERL_MAGIC_ext '~'
#endif
/* That's the best we can do... */
#ifndef sv_catpvn_nomg
# define sv_catpvn_nomg sv_catpvn
#endif
#ifndef sv_catsv_nomg
view all matches for this distribution
view release on metacpan or search on metacpan
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
view all matches for this distribution