best results from the CPAN

said VBD O
1989 CD B
will MD O
be VB O
the DT B
best JJS I
year NN I
in IN O
its PRP$ B
history NN I
, , O

friends NNS B
that WDT O
patience NN B
is VBZ O
the DT B
best JJS I
weapon NN I
against IN O
the DT B
gringos NNS I
, , O

for IN O
5 NN B
% NN I
, , O
at IN O
best JJS O
, , O
of IN O
the DT B
station NN I
's POS B

patient NN I
not RB O
only RB O
the DT B
very RB I
best JJS I
therapy NN I
which WDT B
we PRP B
have VBP O
established VBN O

, , O
personal JJ B
finance NN I
, , O
the DT B
best JJS I
colleges NNS I
, , O
and CC O
investments NNS B
. . O

view all matches for this distribution

Algorithm-Closest-NetworkAddress

1 match

lib/Algorithm/Closest/NetworkAddress.pm view on Meta::CPAN


Creates an object containing the list of addresses to compare against

=head2 $self->compare($network_address)

Will find the best match in the network_address_list for the network_address specified.
Returns the network address that best matches.

=cut

sub compare {
	my ($self, $target) = @_;
	carp "Must specify a target" unless defined $target;
	my $best_na;
	my $best_level = 0;
	foreach my $na (@{$self->network_address_list}) {
		my $r = $self->measure($na, $target);
		if ($r > $best_level) {
			$best_level = $r;
			$best_na = $na;
		}
	}
	return $best_na || 0;
}

=head1 AUTHOR

Ton Voon C<ton.voon@altinity.com>

view all matches for this distribution

Algorithm-Cluster-Thresh

1 match

		     END OF TERMS AND CONDITIONS

	Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-Cluster

1 match

src/cluster.c view on Meta::CPAN

        index[i] = ix;
    }

    /* Start the iteration */
    for (iter = 0; iter < niter; iter++) {
        int ixbest = 0;
        int iybest = 0;
        int iobject = iter % nelements;
        iobject = index[iobject];
        if (transpose == 0) {
            double closest = metric(ndata, data, celldata[ixbest], mask,
                                    dummymask, weights, iobject, iybest,
                                    transpose);
            double radius = maxradius * (1. - ((double)iter)/((double)niter));
            double tau = inittau * (1. - ((double)iter)/((double)niter));

            for (ix = 0; ix < nxgrid; ix++) {
                for (iy = 0; iy < nygrid; iy++) {
                    double distance = metric(ndata, data, celldata[ix], mask,
                                             dummymask, weights, iobject, iy,
                                             transpose);
                    if (distance < closest) {
                        ixbest = ix;
                        iybest = iy;
                        closest = distance;
                    }
                }
            }
            for (ix = 0; ix < nxgrid; ix++) {
                for (iy = 0; iy < nygrid; iy++) {
                    if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
                        radius) {
                        double sum = 0.;
                        for (i = 0; i < ndata; i++) {
                            if (mask[iobject][i] == 0) continue;
                            celldata[ix][iy][i] +=

src/cluster.c view on Meta::CPAN

            double** celldatavector = malloc(ndata*sizeof(double*));
            double radius = maxradius * (1. - ((double)iter)/((double)niter));
            double tau = inittau * (1. - ((double)iter)/((double)niter));

            for (i = 0; i < ndata; i++)
                celldatavector[i] = &(celldata[ixbest][iybest][i]);
            closest = metric(ndata, data, celldatavector, mask, dummymask,
                             weights, iobject, 0, transpose);
            for (ix = 0; ix < nxgrid; ix++) {
                for (iy = 0; iy < nygrid; iy++) {
                    double distance;
                    for (i = 0; i < ndata; i++)
                        celldatavector[i] = &(celldata[ixbest][iybest][i]);
                    distance = metric(ndata, data, celldatavector, mask,
                                      dummymask, weights, iobject, 0,
                                      transpose);
                    if (distance < closest) {
                        ixbest = ix;
                        iybest = iy;
                        closest = distance;
                    }
                }
            }
            free(celldatavector);
            for (ix = 0; ix < nxgrid; ix++) {
                for (iy = 0; iy < nygrid; iy++) {
                    if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
                        radius) {
                        double sum = 0.;
                        for (i = 0; i < ndata; i++) {
                            if (mask[i][iobject] == 0) continue;
                            celldata[ix][iy][i] +=

src/cluster.c view on Meta::CPAN

        for (i = 0; i < nygrid; i++) {
            dummymask[i] = malloc(ncolumns*sizeof(int));
            for (j = 0; j < ncolumns; j++) dummymask[i][j] = 1;
        }
        for (i = 0; i < nrows; i++) {
            int ixbest = 0;
            int iybest = 0;
            double closest = metric(ndata, data, celldata[ixbest], mask,
                                    dummymask, weights, i, iybest, transpose);
            int ix, iy;
            for (ix = 0; ix < nxgrid; ix++) {
                for (iy = 0; iy < nygrid; iy++) {
                    double distance = metric(ndata, data, celldata[ix], mask,
                                             dummymask, weights, i, iy,
                                             transpose);
                    if (distance < closest) {
                        ixbest = ix;
                        iybest = iy;
                        closest = distance;
                    }
                }
            }
            clusterid[i][0] = ixbest;
            clusterid[i][1] = iybest;
        }
        for (i = 0; i < nygrid; i++) free(dummymask[i]);
        free(dummymask);
    }
    else {
        double** celldatavector = malloc(ndata*sizeof(double*));
        int** dummymask = malloc(nrows*sizeof(int*));
        int ixbest = 0;
        int iybest = 0;
        for (i = 0; i < nrows; i++) {
            dummymask[i] = malloc(sizeof(int));
            dummymask[i][0] = 1;
        }
        for (i = 0; i < ncolumns; i++) {
            double closest;
            int ix, iy;
            for (j = 0; j < ndata; j++)
                celldatavector[j] = &(celldata[ixbest][iybest][j]);
            closest = metric(ndata, data, celldatavector, mask, dummymask,
                             weights, i, 0, transpose);
            for (ix = 0; ix < nxgrid; ix++) {
                for (iy = 0; iy < nygrid; iy++) {
                    double distance;
                    for (j = 0; j < ndata; j++)
                        celldatavector[j] = &(celldata[ix][iy][j]);
                    distance = metric(ndata, data, celldatavector, mask,
                                      dummymask, weights, i, 0, transpose);
                    if (distance < closest) {
                        ixbest = ix;
                        iybest = iy;
                        closest = distance;
                    }
                }
            }
            clusterid[i][0] = ixbest;
            clusterid[i][1] = iybest;
        }
        free(celldatavector);
        for (i = 0; i < nrows; i++) free(dummymask[i]);
        free(dummymask);
    }

view all matches for this distribution

Algorithm-ConsistentHash-CHash

2 results

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-ConsistentHash-JumpHash

2 results

JumpHash.xs view on Meta::CPAN

 * regardless of keys size.
 *
 * It is 64 bit only.
 */

/* Find best way to ROTL32/ROTL64 */
#ifndef ROTL64
#if defined(_MSC_VER)
  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
  #define ROTL64(x,r)  _rotl64(x,r)
#else

view all matches for this distribution

Algorithm-ContextVector

2 results

lib/Algorithm/ContextVector.pm view on Meta::CPAN

    return $features;
}

=head2 $self->train

Keeps the best features (top N) and norms the vectors.

=cut

sub train {
    my $self = shift;

view all matches for this distribution

Algorithm-CouponCode

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-Cron

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-CurveFit-Simple

3 results

lib/Algorithm/CurveFit/Simple.pm view on Meta::CPAN

    our @ISA = qw(Exporter);
    our @EXPORT_OK = qw(fit %STATS_H);
}

# fit() - only public function for this distribution
# Given at least parameter "xy", generate a best-fit curve within a time limit.
# Output: max deviation, avg deviation, implementation source string (perl or C, for now).
# Optional parameters and their defaults:
#    terms       => 3      # number of terms in formula, max is 10
#    time_limit  => 3      # number of seconds to try for better fit
#    inv         => 1      # invert sense of curve-fit, from x->y to y->x

lib/Algorithm/CurveFit/Simple.pm view on Meta::CPAN


=item * Support more programming languages for formula implementation: R, MATLAB, python

=item * Calculate the actual term sigfigs and set precision appropriately in the formula implementation instead of just "%.11f".

=item * Support trying a range of terms and returning whatever gives the best fit.

=item * Support piecewise output formulas.

=item * Work around L<Algorithm::CurveFit>'s occasional hang problem when using ten-term polynomials.

view all matches for this distribution

Algorithm-CurveFit

2 results

lib/Algorithm/CurveFit.pm view on Meta::CPAN

name 'x' is default. (Hence 'xdata'.)

=item params

The parameters are the symbols in the formula whose value is varied by the
algorithm to find the best fit of the curve to the data. There may be
one or more parameters, but please keep in mind that the number of parameters
not only increases processing time, but also decreases the quality of the fit.

The value of this options should be an anonymous array. This array should
hold one anonymous array for each parameter. That array should hold (in order)

lib/Algorithm/CurveFit.pm view on Meta::CPAN

In order to prevent looping forever, you are strongly encouraged to make use of
the accuracy measure (see also: maximum_iterations).

The final set of parameters is B<not> returned from the subroutine but the
parameters are modified in-place. That means the original data structure will
hold the best estimate of the parameters.

=item xdata

This should be an array reference to an array holding the data for the
variable of the function. (Which defaults to 'x'.)

view all matches for this distribution

Algorithm-DecisionTree

3 results

    return \%answer;
}

######################################    Decision Tree Construction  ####################################

##  At the root node, we find the best feature that yields the greatest reduction in
##  class entropy from the entropy based on just the class priors. The logic for
##  finding this feature is different for symbolic features and for numeric features.
##  That logic is built into the method shown later for best feature calculations.
sub construct_decision_tree_classifier {
    print "\nConstructing the decision tree ...\n";
    my $self = shift;
    if ($self->{_debug3}) {        
        $self->determine_data_condition();

    if ($existing_node_entropy < $self->{_entropy_threshold}) { 
        print "\nRD5 returning because existing node entropy is below threshold\n" if $self->{_debug3};
        return;
    }
    my @copy_of_path_attributes = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
    my ($best_feature, $best_feature_entropy, $best_feature_val_entropies, $decision_val) =
                    $self->best_feature_calculator(\@copy_of_path_attributes, $existing_node_entropy);
    $node->set_feature($best_feature);
    $node->display_node() if $self->{_debug3};
    if (defined($self->{_max_depth_desired}) && 
               (@features_and_values_or_thresholds_on_branch >= $self->{_max_depth_desired})) {
        print "\nRD6 REACHED LEAF NODE AT MAXIMUM DEPTH ALLOWED\n" if $self->{_debug3}; 
        return;
    }
    return if ! defined $best_feature;
    if ($self->{_debug3}) { 
        print "\nRD7 Existing entropy at node: $existing_node_entropy\n";
        print "\nRD8 Calculated best feature is $best_feature and its value $decision_val\n";
        print "\nRD9 Best feature entropy: $best_feature_entropy\n";
        print "\nRD10 Calculated entropies for different values of best feature: @$best_feature_val_entropies\n";
    }
    my $entropy_gain = $existing_node_entropy - $best_feature_entropy;
    print "\nRD11 Expected entropy gain at this node: $entropy_gain\n" if $self->{_debug3};
    if ($entropy_gain > $self->{_entropy_threshold}) {
        if (exists $self->{_numeric_features_valuerange_hash}->{$best_feature} && 
              $self->{_feature_values_how_many_uniques_hash}->{$best_feature} > 
                                        $self->{_symbolic_to_numeric_cardinality_threshold}) {
            my $best_threshold = $decision_val;            # as returned by best feature calculator
            my ($best_entropy_for_less, $best_entropy_for_greater) = @$best_feature_val_entropies;
            my @extended_branch_features_and_values_or_thresholds_for_lessthan_child = 
                                        @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
            my @extended_branch_features_and_values_or_thresholds_for_greaterthan_child  = 
                                        @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)}; 
            my $feature_threshold_combo_for_less_than = "$best_feature" . '<' . "$best_threshold";
            my $feature_threshold_combo_for_greater_than = "$best_feature" . '>' . "$best_threshold";
            push @extended_branch_features_and_values_or_thresholds_for_lessthan_child, 
                                                                  $feature_threshold_combo_for_less_than;
            push @extended_branch_features_and_values_or_thresholds_for_greaterthan_child, 
                                                               $feature_threshold_combo_for_greater_than;
            if ($self->{_debug3}) {

                 \@extended_branch_features_and_values_or_thresholds_for_lessthan_child)} @{$self->{_class_names}};
            my @class_probabilities_for_greaterthan_child_node = 
                map {$self->probability_of_a_class_given_sequence_of_features_and_values_or_thresholds($_,
              \@extended_branch_features_and_values_or_thresholds_for_greaterthan_child)} @{$self->{_class_names}};
            if ($self->{_debug3}) {
                print "\nRD14 class entropy for going down lessthan child: $best_entropy_for_less\n";
                print "\nRD15 class_entropy_for_going_down_greaterthan_child: $best_entropy_for_greater\n";
            }
            if ($best_entropy_for_less < $existing_node_entropy - $self->{_entropy_threshold}) {
                my $left_child_node = DTNode->new(undef, $best_entropy_for_less,
                                                         \@class_probabilities_for_lessthan_child_node,
                              \@extended_branch_features_and_values_or_thresholds_for_lessthan_child, $self);
                $node->add_child_link($left_child_node);
                $self->recursive_descent($left_child_node);
            }
            if ($best_entropy_for_greater < $existing_node_entropy - $self->{_entropy_threshold}) {
                my $right_child_node = DTNode->new(undef, $best_entropy_for_greater,
                                                         \@class_probabilities_for_greaterthan_child_node,
                            \@extended_branch_features_and_values_or_thresholds_for_greaterthan_child, $self);
                $node->add_child_link($right_child_node);
                $self->recursive_descent($right_child_node);
            }
        } else {
            print "\nRD16 RECURSIVE DESCENT: In section for symbolic features for creating children"
                if $self->{_debug3};
            my @values_for_feature = @{$self->{_features_and_unique_values_hash}->{$best_feature}};
            print "\nRD17 Values for feature $best_feature are @values_for_feature\n" if $self->{_debug3};
            my @feature_value_combos = sort map {"$best_feature" . '=' . $_} @values_for_feature;
            my @class_entropies_for_children = ();
            foreach my $feature_and_value_index (0..@feature_value_combos-1) {
                print "\nRD18 Creating a child node for: $feature_value_combos[$feature_and_value_index]\n"
                    if $self->{_debug3};
                my @extended_branch_features_and_values_or_thresholds;

        return;
    }
}

##  This is the heart of the decision tree constructor.  Its main job is to figure
##  out the best feature to use for partitioning the training data samples that
##  correspond to the current node.  The search for the best feature is carried out
##  differently for symbolic features and for numeric features.  For a symbolic
##  feature, the method estimates the entropy for each value of the feature and then
##  averages out these entropies as a measure of the discriminatory power of that
##  features.  For a numeric feature, on the other hand, it estimates the entropy
##  reduction that can be achieved if were to partition the set of training samples
##  for each possible threshold.  For a numeric feature, all possible sampling points
##  relevant to the node in question are considered as candidates for thresholds.
sub best_feature_calculator {
    my $self = shift;
    my $features_and_values_or_thresholds_on_branch = shift;
    my $existing_node_entropy = shift;
    my @features_and_values_or_thresholds_on_branch =  @$features_and_values_or_thresholds_on_branch;
    my $pattern1 = '(.+)=(.+)';

    @true_numeric_types_feature_names = grep {$_ if !$seen{$_}++} @true_numeric_types_feature_names;
    %seen = ();
    @symbolic_types_feature_names = grep {$_ if !$seen{$_}++} @symbolic_types_feature_names;
    my @bounded_intervals_numeric_types = 
                       @{$self->find_bounded_intervals_for_numeric_features(\@true_numeric_types)};
    # Calculate the upper and the lower bounds to be used when searching for the best
    # threshold for each of the numeric features that are in play at the current node:
    my (%upperbound, %lowerbound);
    foreach my $feature (@true_numeric_types_feature_names) {
        $upperbound{$feature} = undef;
        $lowerbound{$feature} = undef;

                } elsif (defined($lowerbound{$feature_name})) {
                    foreach my $x (@values) {
                        push @newvalues, $x if $x > $lowerbound{$feature_name};
                    }
                } else {
                    die "Error is bound specifications in best feature calculator";
                }
            } else {
                @newvalues = @{deep_copy_array(\@values)};
            }
            next if @newvalues == 0;

                     $self->probability_of_a_sequence_of_features_and_values_or_thresholds(\@for_right_child);

                push @partitioning_entropies, $partitioning_entropy;
                $partitioning_point_child_entropies_hash{$feature_name}{$value} = [$entropy1, $entropy2];
            }
            my ($min_entropy, $best_partition_point_index) = minimum(\@partitioning_entropies);
            if ($min_entropy < $existing_node_entropy) {
                $partitioning_point_threshold{$feature_name} = $newvalues[$best_partition_point_index];
                $entropy_values_for_different_features{$feature_name} = $min_entropy;
            }
        } else {
            print "\nBFC2:  Entering section reserved for symbolic features\n" if $self->{_debug3};
            print "\nBFC3 Feature name: $feature_name\n" if $self->{_debug3};

            if ($entropy < $existing_node_entropy) {
                $entropy_values_for_different_features{$feature_name} = $entropy;
            }
        }
    }
    my $min_entropy_for_best_feature;
    my $best_feature_name;
    foreach my $feature_nom (keys %entropy_values_for_different_features) { 
        if (!defined($best_feature_name)) {
            $best_feature_name = $feature_nom;
            $min_entropy_for_best_feature = $entropy_values_for_different_features{$feature_nom};
        } else {
            if ($entropy_values_for_different_features{$feature_nom} < $min_entropy_for_best_feature) {
                $best_feature_name = $feature_nom;
                $min_entropy_for_best_feature = $entropy_values_for_different_features{$feature_nom};
            }
        }
    }
    my $threshold_for_best_feature;
    if (exists $partitioning_point_threshold{$best_feature_name}) {
        $threshold_for_best_feature = $partitioning_point_threshold{$best_feature_name};
    } else {
        $threshold_for_best_feature = undef;
    }
    my $best_feature_entropy = $min_entropy_for_best_feature;
    my @val_based_entropies_to_be_returned;
    my $decision_val_to_be_returned;
    if (exists $self->{_numeric_features_valuerange_hash}->{$best_feature_name} && 
          $self->{_feature_values_how_many_uniques_hash}->{$best_feature_name} > 
                                    $self->{_symbolic_to_numeric_cardinality_threshold}) {
        @val_based_entropies_to_be_returned = 
            @{$partitioning_point_child_entropies_hash{$best_feature_name}{$threshold_for_best_feature}};
    } else {
        @val_based_entropies_to_be_returned = ();
    }
    if (exists $partitioning_point_threshold{$best_feature_name}) {
        $decision_val_to_be_returned = $partitioning_point_threshold{$best_feature_name};
    } else {
        $decision_val_to_be_returned = undef;
    }
    print "\nBFC6 Val based entropies to be returned for feature $best_feature_name are " .
        "@val_based_entropies_to_be_returned\n"  if $self->{_debug3};
    return ($best_feature_name, $best_feature_entropy, \@val_based_entropies_to_be_returned, 
                                                                      $decision_val_to_be_returned);
}

#########################################    Entropy Calculators     #####################################

    my %seen2 = ();
    @symbolic_types_feature_names = grep {$_ if !$seen2{$_}++} @symbolic_types_feature_names;
    my $bounded_intervals_numeric_types = $self->find_bounded_intervals_for_numeric_features(\@true_numeric_types);
    print_array_with_msg("POS: Answer returned by find_bounded: ", 
                                       $bounded_intervals_numeric_types) if $self->{_debug2};
    # Calculate the upper and the lower bounds to be used when searching for the best
    # threshold for each of the numeric features that are in play at the current node:
    my (%upperbound, %lowerbound);
    foreach my $feature_name (@true_numeric_types_feature_names) {
        $upperbound{$feature_name} = undef;
        $lowerbound{$feature_name} = undef;

    my %seen2 = ();
    @symbolic_types_feature_names = grep {$_ if !$seen2{$_}++} @symbolic_types_feature_names;
    my $bounded_intervals_numeric_types = $self->find_bounded_intervals_for_numeric_features(\@true_numeric_types);
    print_array_with_msg("POSC: Answer returned by find_bounded: ", 
                                       $bounded_intervals_numeric_types) if $self->{_debug2};
    # Calculate the upper and the lower bounds to be used when searching for the best
    # threshold for each of the numeric features that are in play at the current node:
    my (%upperbound, %lowerbound);
    foreach my $feature_name (@true_numeric_types_feature_names) {
        $upperbound{$feature_name} = undef;
        $lowerbound{$feature_name} = undef;

##  training data by running a 10-fold cross-validation test on it. This test divides
##  all of the training data into ten parts, with nine parts used for training a
##  decision tree and one part used for testing its ability to classify correctly.
##  This selection of nine parts for training and one part for testing is carried out
##  in all of the ten different possible ways.  This testing functionality can also
##  be used to find the best values to use for the constructor parameters
##  entropy_threshold, max_depth_desired, and
##  symbolic_to_numeric_cardinality_threshold.

##  Only the CSV training files can be evaluated in this manner (because only CSV
##  training are allowed to have numeric features --- which is the more interesting

cross-validation test on the data.  This test divides all of the training data into
ten parts, with nine parts used for training a decision tree and one part used for
testing its ability to classify correctly. This selection of nine parts for training
and one part for testing is carried out in all of the ten different ways that are
possible.  This testing functionality in Version 2.1 can also be used to find the
best values to use for the constructor parameters C<entropy_threshold>,
C<max_depth_desired>, and C<symbolic_to_numeric_cardinality_threshold>.

B<Version 2.0 is a major rewrite of this module.> Now you can use both numeric and
symbolic features for constructing a decision tree. A feature is numeric if it can
take any floating-point value over an interval.

together to the data as partitioned by the feature test.  You then drop from the root
node a set of child nodes, one for each partition of the training data created by the
feature test at the root node. When your features are purely symbolic, you'll have
one child node for each value of the feature chosen for the feature test at the root.
When the test at the root involves a numeric feature, you find the decision threshold
for the feature that best bipartitions the data and you drop from the root node two
child nodes, one for each partition.  Now at each child node you pose the same
question that you posed when you found the best feature to use at the root: Which
feature at the child node in question would maximally disambiguate the class labels
associated with the training data corresponding to that child node?

As the reader would expect, the two key steps in any approach to decision-tree based
classification are the construction of the decision tree itself from a file

test set of data is a good way to develop greater proficiency with decision trees.


=head1 WHAT PRACTICAL PROBLEM IS SOLVED BY THIS MODULE

If you are new to the concept of a decision tree, their practical utility is best
understood with an example that only involves symbolic features. However, as
mentioned earlier, versions of the module higher than 2.0 allow you to use both
symbolic and numeric features.

Consider the following scenario: Let's say you are running a small investment company


    my $root_node = $dt->construct_decision_tree_classifier();

Now you and your company (with practically no employees) are ready to service the
customers again. Suppose your computer needs to make a buy/sell decision about an
investment prospect that is best described by:

    price_to_earnings_ratio  =  low
    price_to_sales_ratio     =  very_low
    return_on_equity         =  none
    market_share             =  medium

The last statement above prints out a Confusion Matrix and the value of Training Data
Quality Index on a scale of 0 to 100, with 100 designating perfect training data.
The Confusion Matrix shows how the different classes were mislabeled in the 10-fold
cross-validation test.

This testing functionality can also be used to find the best values to use for the
constructor parameters C<entropy_threshold>, C<max_depth_desired>, and
C<symbolic_to_numeric_cardinality_threshold>.

The following two scripts in the C<Examples> directory illustrate the use of the
C<EvalTrainingData> class for testing the quality of your data:

explicitly giving a value to the 'C<number_of_histogram_bins>' parameter.

=back


You can choose the best values to use for the last three constructor parameters by
running a 10-fold cross-validation test on your training data through the class
C<EvalTrainingData> that comes with Versions 2.1 and higher of this module.  See the
section "TESTING THE QUALITY OF YOUR TRAINING DATA" of this document page.

=over

why the decision tree classifier may associate significant probabilities with
multiple class labels is that you used inadequate number of training samples to
induce the decision tree.  The good thing is that the classifier does not lie to you
(unlike, say, a hard classification rule that would return a single class label
corresponding to the partitioning of the underlying feature space).  The decision
tree classifier give you the best classification that can be made given the training
data you fed into it.


=head1 USING BAGGING

calculate the regression coefficients.  When C<jacobian_choice> is set to 1, you get
a weak version of gradient descent in which the Jacobian is set to the "design
matrix" itself. Choosing 2 for C<jacobian_choice> results in a more reasonable
approximation to the Jacobian.  That, however, is at a cost of much longer
computation time.  B<NOTE:> For most cases, using 0 for C<jacobian_choice> is the
best choice.  See my tutorial "I<Linear Regression and Regression Trees>" for why
that is the case.

=back

=head2 B<Methods defined for C<RegressionTree> class>

view all matches for this distribution

Algorithm-Dependency

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-DependencySolver

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-Diff-Any

1 match

lib/Algorithm/Diff/Any.pm view on Meta::CPAN

  }
}

=head1 DESCRIPTION

This is a simple module to select the best available implementation of the
standard C<diff> algorithm, which works by effectively trying to solve the
Longest Common Subsequence (LCS) problem. This algorithm is described in:
I<A Fast Algorithm for Computing Longest Common Subsequences>, CACM, vol.20,
no.5, pp.350-353, May 1977.

view all matches for this distribution

Algorithm-Diff-Callback

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-Diff-JSON

2 results

lib/Algorithm/Diff/JSON.pm view on Meta::CPAN

=back

=head1 FEEDBACK

I welcome feedback about my code, including constructive criticism, bug
reports, documentation improvements, and feature requests. The best bug reports
include files that I can add to the test suite, which fail with the current
code in my git repo and will pass once I've fixed the bug

Feature requests are far more likely to get implemented if you submit a patch
yourself.

view all matches for this distribution

Algorithm-Diff-XS

1 match


#ifndef IVSIZE
#  ifdef LONGSIZE
#    define IVSIZE LONGSIZE
#  else
#    define IVSIZE 4 /* A bold guess, but the best we can make. */
#  endif
#endif
#ifndef UVTYPE
#  define UVTYPE                         unsigned IVTYPE
#endif


#ifndef PERL_MAGIC_ext
#  define PERL_MAGIC_ext                 '~'
#endif

/* That's the best we can do... */
#ifndef SvPV_force_nomg
#  define SvPV_force_nomg                SvPV_force
#endif

#ifndef SvPV_nomg

view all matches for this distribution

Algorithm-DistanceMatrix

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-Easing

1 match

                     END OF TERMS AND CONDITIONS

        Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-EquivalenceSets

1 match

		     END OF TERMS AND CONDITIONS

	Appendix: How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.

  To do so, attach the following notices to the program.  It is safest to
attach them to the start of each source file to most effectively convey

view all matches for this distribution

Algorithm-Evolutionary-Fitness

2 results

Changes view on Meta::CPAN


2011-02-20  Juan J. Merelo Guervós  <jjmerelo@gmail.com>

	* lib/Algorithm/Evolutionary/Op/Breeder_Diverser.pm (apply): Made
	even more diverse by not inserting the new individual if it is the
	same as the parent; refactored also to best practices.

	* lib/Algorithm/Evolutionary/Op/Uniform_Crossover_Diff.pm (apply):
	Changed to leave at least one difference without change

2011-02-19  Juan J. Merelo Guervós  <jjmerelo@gmail.com>

view all matches for this distribution

Algorithm-Evolutionary-Simple

3 results

lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN

  my $total_fitness = shift;
  if ( !$total_fitness ) {
    map( $total_fitness += $fitness_of->{$_}, @$population);
  }
  my $population_size = @{$population};
  my @best = rnkeytop { $fitness_of->{$_} } 2 => @$population; # Extract elite
  my @reproductive_pool = get_pool_roulette_wheel( $population, $fitness_of, 
						   $population_size, $total_fitness ); # Reproduce
  my @offspring = produce_offspring( \@reproductive_pool, $population_size - 2 ); #Obtain offspring
  unshift( @offspring, @best ); #Insert elite at the beginning
  @offspring; # return
}

"010101"; # Magic true value required at end of module
__END__

lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN

  for (my $i = 0; $i < $number_of_strings; $i++) {
   $population[$i] = random_chromosome( $length);
   $fitness_of{$population[$i]} = max_ones( $population[$i] );
  }

  my @best;
  my $generations=0;
  do {
    my @pool;
    if ( $generations % 2 == 1 ) {
      get_pool_roulette_wheel( \@population, \%fitness_of, $number_of_strings );

lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN

    for my $p ( @new_pop ) {
        if ( !$fitness_of{$p} ) {
	   $fitness_of{$p} = max_ones( $p );
	}
    }
    @best = rnkeytop { $fitness_of{$_} } $number_of_strings/2 => @population;
    @population = (@best, @new_pop);
    print "Best so far $best[0] with fitness $fitness_of{$best[0]}\n";
  } while ( ( $generations++ < $number_of_generations ) and ($fitness_of{$best[0]} != $length ));


=head1 DESCRIPTION

Assorted functions needed by an evolutionary algorithm, mainly for demos and simple clients.

lib/Algorithm/Evolutionary/Simple.pm view on Meta::CPAN

  $slots the number of individuals to return.

=head2 single_generation( $population_arrayref, $fitness_of_hashref )

Applies all steps to arrive to a new generation, except
evaluation. Keeps the two best for the next generation.

=head2 get_pool_roulette_wheel( $population_arrayref, $fitness_of_hashref, $how_many_I_need )

Obtains a pool of new chromosomes using fitness_proportional selection

view all matches for this distribution

Algorithm-Evolutionary-Utils

2 results

Changes view on Meta::CPAN


2011-02-20  Juan J. Merelo Guervós  <jjmerelo@gmail.com>

	* lib/Algorithm/Evolutionary/Op/Breeder_Diverser.pm (apply): Made
	even more diverse by not inserting the new individual if it is the
	same as the parent; refactored also to best practices.

	* lib/Algorithm/Evolutionary/Op/Uniform_Crossover_Diff.pm (apply):
	Changed to leave at least one difference without change

2011-02-19  Juan J. Merelo Guervós  <jjmerelo@gmail.com>

view all matches for this distribution

Algorithm-Evolutionary

21 results

lib/Algorithm/Evolutionary/Op/Breeder.pm view on Meta::CPAN


    my $generation = 
      new Algorithm::Evolutionary::Op::Breeder( $selector, [$m, $c] );

    my @sortPop = sort { $b->Fitness() <=> $a->Fitness() } @pop;
    my $bestIndi = $sortPop[0];
    my $previous_average = average( \@sortPop );
    $generation->apply( \@sortPop );

=head1 Base Class

view all matches for this distribution

Algorithm-Evolve

3 results

examples/breeding_perls.pl view on Meta::CPAN

)->start;

sub callback {
    my $p = shift;

    if ($p->best_fit->fitness == $TARGET) {
        $p->suspend;
        printf "Solution found after %d generations:\n%s\n",
                    $p->generations, $p->best_fit->as_perl_code;
    }
    
    if ($p->generations == 100_000) {
        $p->suspend;
        print "Timed out after 100,000 generations.. try a smaller target\n";

view all matches for this distribution

Algorithm-ExpectationMaximization

2 results

examples/data_generator.pl view on Meta::CPAN

# How the synthetic data is generated for clustering is
# controlled entirely by the input_parameter_file keyword in
# the function call shown below.  The class prior
# probabilities, the mean vectors and covariance matrix
# entries in file must be according to the syntax shown in
# the example param.txt file.  It is best to edit that file
# as needed for the purpose of data generation.

#my $parameter_file = "param1.txt";             #2D
#my $parameter_file = "param2.txt";             #2D     
#my $parameter_file = "param3.txt";             #2D

view all matches for this distribution

Algorithm-Heapify-XS

3 results


#ifndef IVSIZE
#  ifdef LONGSIZE
#    define IVSIZE LONGSIZE
#  else
#    define IVSIZE 4 /* A bold guess, but the best we can make. */
#  endif
#endif
#ifndef UVTYPE
#  define UVTYPE                         unsigned IVTYPE
#endif


#ifndef PERL_MAGIC_ext
#  define PERL_MAGIC_ext                 '~'
#endif

/* That's the best we can do... */
#ifndef sv_catpvn_nomg
#  define sv_catpvn_nomg                 sv_catpvn
#endif

#ifndef sv_catsv_nomg

view all matches for this distribution

Algorithm-History-Levels

1 match