view release on metacpan or search on metacpan
Examples/classify_test_data_in_a_file.pl view on Meta::CPAN
170171172173174175176177178179180181182183184185186187188189190191192@all_class_names
=
grep
{
$_
if
!
$seen
{
$_
}++}
values
%class_for_sample_hash
;
"\n All class names: @all_class_names\n"
if
$debug
;
%numeric_features_valuerange_hash
= ();
my
%feature_values_how_many_uniques_hash
= ();
%features_and_unique_values_hash
= ();
foreach
my
$feature
(
keys
%features_and_values_hash
) {
my
%seen1
= ();
my
@unique_values_for_feature
=
sort
grep
{
$_
if
$_
ne
'NA'
&& !
$seen1
{
$_
}++}
@{
$features_and_values_hash
{
$feature
}};
$feature_values_how_many_uniques_hash
{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^\d*\.\d+$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_hash
{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash
{
$feature
} = \
@unique_values_for_feature
;
}
if
(
$debug
) {
"\nAll class names: @all_class_names\n"
;
"\nEach sample data record:\n"
;
foreach
my
$sample
(
sort
{sample_index(
$a
) <=> sample_index(
$b
)}
keys
%feature_values_for_samples_hash
) {
"$sample => @{$feature_values_for_samples_hash{$sample}}\n"
;
ExamplesBagging/bagging_for_bulk_classification.pl view on Meta::CPAN
150151152153154155156157158159160161162163164165166167168169170171172@all_class_names
=
grep
{
$_
if
!
$seen
{
$_
}++}
values
%class_for_sample_hash
;
"\n All class names: @all_class_names\n"
if
$debug
;
%numeric_features_valuerange_hash
= ();
my
%feature_values_how_many_uniques_hash
= ();
%features_and_unique_values_hash
= ();
foreach
my
$feature
(
keys
%features_and_values_hash
) {
my
%seen1
= ();
my
@unique_values_for_feature
=
sort
grep
{
$_
if
$_
ne
'NA'
&& !
$seen1
{
$_
}++}
@{
$features_and_values_hash
{
$feature
}};
$feature_values_how_many_uniques_hash
{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^\d*\.\d+$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_hash
{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash
{
$feature
} = \
@unique_values_for_feature
;
}
if
(
$debug
) {
"\nAll class names: @all_class_names\n"
;
"\nEach sample data record:\n"
;
foreach
my
$sample
(
sort
{sample_index(
$a
) <=> sample_index(
$b
)}
keys
%feature_values_for_samples_hash
) {
"$sample => @{$feature_values_for_samples_hash{$sample}}\n"
;
ExamplesBoosting/boosting_for_bulk_classification.pl view on Meta::CPAN
157158159160161162163164165166167168169170171172173174175176177178179@all_class_names
=
grep
{
$_
if
!
$seen
{
$_
}++}
values
%class_for_sample_hash
;
"\n All class names: @all_class_names\n"
if
$debug
;
%numeric_features_valuerange_hash
= ();
my
%feature_values_how_many_uniques_hash
= ();
%features_and_unique_values_hash
= ();
foreach
my
$feature
(
keys
%features_and_values_hash
) {
my
%seen1
= ();
my
@unique_values_for_feature
=
sort
grep
{
$_
if
$_
ne
'NA'
&& !
$seen1
{
$_
}++}
@{
$features_and_values_hash
{
$feature
}};
$feature_values_how_many_uniques_hash
{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^\d*\.\d+$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_hash
{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash
{
$feature
} = \
@unique_values_for_feature
;
}
if
(
$debug
) {
"\nAll class names: @all_class_names\n"
;
"\nEach sample data record:\n"
;
foreach
my
$sample
(
sort
{sample_index(
$a
) <=> sample_index(
$b
)}
keys
%feature_values_for_samples_hash
) {
"$sample => @{$feature_values_for_samples_hash{$sample}}\n"
;
lib/Algorithm/BoostedDecisionTree.pm view on Meta::CPAN
96979899100101102103104105106107108109110111112113114115116117118
}
}
my
%features_and_unique_values_hash
= ();
my
%feature_values_how_many_uniques_hash
= ();
my
%numeric_features_valuerange_hash
= ();
my
$numregex
=
'[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?'
;
foreach
my
$feature
(
keys
%features_and_values_hash
) {
my
%seen
= ();
my
@unique_values_for_feature
=
grep
{
$_
if
$_
ne
'NA'
&& !
$seen
{
$_
}++} @{
$features_and_values_hash
{
$feature
}};
$feature_values_how_many_uniques_hash
{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^
$numregex
$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_hash
{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash
{
$feature
} = \
@unique_values_for_feature
;
}
$self
->{_all_trees}->{0}->{_class_names} = \
@all_class_names
;
$self
->{_all_trees}->{0}->{_feature_names} = \
@feature_names
;
$self
->{_all_trees}->{0}->{_samples_class_label_hash} = \
%class_for_sample_hash
;
$self
->{_all_trees}->{0}->{_training_data_hash} = \
%feature_values_for_samples_hash
;
$self
->{_all_trees}->{0}->{_features_and_values_hash} = \
%features_and_values_hash
;
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
8384858687888990919293949596979899100101102103104105106die
"\n\nError in the names you have used for features and/or values. "
.
"Try using the csv_cleanup_needed option in the constructor call."
unless
$self
->check_names_used(\
@features_and_values
);
my
@new_features_and_values
= ();
my
$pattern
=
'(\S+)\s*=\s*(\S+)'
;
foreach
my
$feature_and_value
(
@features_and_values
) {
$feature_and_value
=~ /
$pattern
/;
my
(
$feature
,
$value
) = ($1, $2);
my
$newvalue
=
$value
;
my
@unique_values_for_feature
= @{
$self
->{_features_and_unique_values_hash}->{
$feature
}};
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^
$numregex
$/}
@unique_values_for_feature
;
if
(! contained_in(
$feature
,
keys
%{
$self
->{_prob_distribution_numeric_features_hash}}) &&
$not_all_values_float
== 0) {
$newvalue
= closest_sampling_point(
$value
, \
@unique_values_for_feature
);
}
push
@new_features_and_values
,
"$feature"
.
'='
.
"$newvalue"
;
}
@features_and_values
=
@new_features_and_values
;
"\nCL1 New feature and values: @features_and_values\n"
if
$self
->{_debug3};
my
%answer
= ();
foreach
my
$class_name
(@{
$self
->{_class_names}}) {
$answer
{
$class_name
} =
undef
;
}
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
551552553554555556557558559560561562563564565566567568569570571572573my
$pattern3
=
'(.+)>(.+)'
;
my
@all_symbolic_features
= ();
foreach
my
$feature_name
(@{
$self
->{_feature_names}}) {
push
@all_symbolic_features
,
$feature_name
if
!
exists
$self
->{_prob_distribution_numeric_features_hash}->{
$feature_name
};
}
my
@symbolic_features_already_used
= ();
foreach
my
$feature_and_value_or_threshold
(
@features_and_values_or_thresholds_on_branch
) {
push
@symbolic_features_already_used
, $1
if
$feature_and_value_or_threshold
=~ /
$pattern1
/;
}
my
@symbolic_features_not_yet_used
;
foreach
my
$x
(
@all_symbolic_features
) {
push
@symbolic_features_not_yet_used
,
$x
unless
contained_in(
$x
,
@symbolic_features_already_used
);
}
my
@true_numeric_types
= ();
my
@symbolic_types
= ();
my
@true_numeric_types_feature_names
= ();
my
@symbolic_types_feature_names
= ();
foreach
my
$item
(
@features_and_values_or_thresholds_on_branch
) {
if
(
$item
=~ /
$pattern2
/) {
push
@true_numeric_types
,
$item
;
push
@true_numeric_types_feature_names
, $1;
}
elsif
(
$item
=~ /
$pattern3
/) {
lib/Algorithm/DecisionTree.pm view on Meta::CPAN
17311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753my
%features_and_values_hash
=
map
{
my
$a
=
$_
; {
$all_feature_names
[
$a
] => [
map
{
my
$b
=
$_
;
$b
=~ /^\d+$/ ?
sprintf
(
"%.1f"
,
$b
) :
$b
}
map
{
$data_hash
{
$_
}->[
$a
-1]}
keys
%data_hash
]} } @{
$self
->{_csv_columns_for_features}};
my
%numeric_features_valuerange_hash
= ();
my
%feature_values_how_many_uniques_hash
= ();
my
%features_and_unique_values_hash
= ();
my
$numregex
=
'[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?'
;
foreach
my
$feature
(
keys
%features_and_values_hash
) {
my
%seen1
= ();
my
@unique_values_for_feature
=
sort
grep
{
$_
if
$_
ne
'NA'
&& !
$seen1
{
$_
}++}
@{
$features_and_values_hash
{
$feature
}};
$feature_values_how_many_uniques_hash
{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^
$numregex
$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_hash
{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash
{
$feature
} = \
@unique_values_for_feature
;
}
if
(
$self
->{_debug1}) {
"\nAll class names: @all_class_names\n"
;
"\nEach sample data record:\n"
;
foreach
my
$sample
(
sort
{sample_index(
$a
) <=> sample_index(
$b
)}
keys
%feature_values_for_samples_hash
) {
"$sample => @{$feature_values_for_samples_hash{$sample}}\n"
;
lib/Algorithm/DecisionTreeWithBagging.pm view on Meta::CPAN
159160161162163164165166167168169170171172173174175176177178179180181"all class names: @all_class_names\n"
if
$self
->{_debug2};
my
%numeric_features_valuerange_hash_bags
=
map
{
$_
=> {}} 0 ..
$self
->{_how_many_bags} - 1;
my
%feature_values_how_many_uniques_hash_bags
=
map
{
$_
=> {}} 0 ..
$self
->{_how_many_bags} - 1;
my
%features_and_unique_values_hash_bags
=
map
{
$_
=> {}} 0 ..
$self
->{_how_many_bags} - 1;
my
$numregex
=
'[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?'
;
foreach
my
$i
(0 ..
$self
->{_how_many_bags} - 1) {
foreach
my
$feature
(
keys
%{
$features_and_values_hash_bags
{
$i
}}) {
my
%seen
= ();
my
@unique_values_for_feature_in_bag
=
grep
{
$_
if
$_
ne
'NA'
&& !
$seen
{
$_
}++} @{
$features_and_values_hash_bags
{
$i
}{
$feature
}};
$feature_values_how_many_uniques_hash_bags
{
$i
}->{
$feature
} =
scalar
@unique_values_for_feature_in_bag
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^
$numregex
$/}
@unique_values_for_feature_in_bag
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature_in_bag
);
$numeric_features_valuerange_hash_bags
{
$i
}->{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash_bags
{
$i
}->{
$feature
} = \
@unique_values_for_feature_in_bag
;
}
}
if
(
$self
->{_debug2}) {
"\nDisplaying value ranges for numeric features in each bag:\n\n"
;
foreach
my
$bag_index
(
keys
%numeric_features_valuerange_hash_bags
) {
my
%keyval
= %{
$numeric_features_valuerange_hash_bags
{
$bag_index
}};
lib/Algorithm/RandomizedTreesForBigData.pm view on Meta::CPAN
241242243244245246247248249250251252253254255256257258259260261262263}
my
$numeric_features_valuerange_all_trees
= {
map
{
my
$t
=
$_
;
$t
=> {}} 0 ..
$self
->{_how_many_trees} - 1};
my
$feature_values_how_many_uniques_all_trees
= {
map
{
my
$t
=
$_
;
$t
=> {}} 0 ..
$self
->{_how_many_trees} - 1};
my
$features_and_unique_values_all_trees
= {
map
{
my
$t
=
$_
;
$t
=> {}} 0 ..
$self
->{_how_many_trees} - 1};
my
$numregex
=
'[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?'
;
foreach
my
$t
(0 ..
$self
->{_how_many_trees} - 1) {
foreach
my
$feature
(
sort
keys
%{
$features_and_values_all_trees
->{
$t
}}) {
my
%all_values_for_feature
=
map
{
$_
=> 1} @{
$features_and_values_all_trees
->{
$t
}->{
$feature
}};
my
@unique_values_for_feature
=
grep
{
$_
ne
'NA'
}
keys
%all_values_for_feature
;
$feature_values_how_many_uniques_all_trees
->{
$t
}->{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^
$numregex
$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_all_trees
->{
$t
}->{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_all_trees
->{
$t
}->{
$feature
} = \
@unique_values_for_feature
;
}
}
if
(
$self
->{_debug1}) {
"\nDisplaying value ranges for numeric features for all trees:\n\n"
;
foreach
my
$tree_index
(
keys
%{
$numeric_features_valuerange_all_trees
}) {
my
%keyval
= %{
$numeric_features_valuerange_all_trees
->{
$tree_index
}};
lib/Algorithm/RegressionTree.pm view on Meta::CPAN
100101102103104105106107108109110111112113114115116117118119120121122my
%feature_values_for_samples_hash
=
map
{
my
$sampleID
=
$_
;
"sample_"
.
$sampleID
=> [
map
{
my
$fname
=
$all_feature_names
[
$_
-1];
$fname
.
"="
.
eval
{
$data_hash
{
$sampleID
}->[
$_
-1] =~ /^\d+$/ ?
sprintf
(
"%.1f"
,
$data_hash
{
$sampleID
}->[
$_
-1] ) : ...
my
%features_and_values_hash
=
map
{
my
$a
=
$_
; {
$all_feature_names
[
$a
-1] => [
map
{
my
$b
=
$_
;
$b
=~ /^\d+$/ ?
sprintf
(
"%.1f"
,
$b
) :
$b
}
map
{
$data_hash
{
$_
}->[
$a
-1]}
keys
%data_hash
]} } @{
$self
->{_predictor_columns}};
my
%numeric_features_valuerange_hash
= ();
my
%feature_values_how_many_uniques_hash
= ();
my
%features_and_unique_values_hash
= ();
my
$numregex
=
'[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?'
;
foreach
my
$feature
(
keys
%features_and_values_hash
) {
my
%seen
= ();
my
@unique_values_for_feature
=
grep
{
$_
if
$_
ne
'NA'
&& !
$seen
{
$_
}++} @{
$features_and_values_hash
{
$feature
}};
$feature_values_how_many_uniques_hash
{
$feature
} =
scalar
@unique_values_for_feature
;
my
$not_all_values_float
= 0;
map
{
$not_all_values_float
= 1
if
$_
!~ /^
$numregex
$/}
@unique_values_for_feature
;
if
(
$not_all_values_float
== 0) {
my
@minmaxvalues
= minmax(\
@unique_values_for_feature
);
$numeric_features_valuerange_hash
{
$feature
} = \
@minmaxvalues
;
}
$features_and_unique_values_hash
{
$feature
} = \
@unique_values_for_feature
;
}
if
(
$self
->{_debug1_r}) {
"\nDependent var values: @dependent_var_values\n"
;
"\nEach sample data record:\n"
;
foreach
my
$kee
(
sort
{sample_index(
$a
) <=> sample_index(
$b
)}
keys
%feature_values_for_samples_hash
) {
"$kee => @{$feature_values_for_samples_hash{$kee}}\n"
;