AI-MaxEntropy
view release on metacpan or search on metacpan
AI-MaxEntropy.xs view on Meta::CPAN
SV* self
PREINIT:
dTRACE("_neg_log_likelihood");
/* fetch the pre-cached samples and f_map */
SV* _c = *hvref_fetch(self, "_c");
struct samples_t* samples =
INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
struct f_map_t* f_map =
INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
int** lambda_idx = f_map->lambda_idx;
/* fetch other useful data */
SV* smoother = *hvref_fetch(self, "smoother");
int x_num = SvIV(*hvref_fetch(self, "x_num"));
int y_num = SvIV(*hvref_fetch(self, "y_num"));
int f_num = SvIV(*hvref_fetch(self, "f_num"));
/* intermediate variables */
AV* av_d_log_lh;
char* smoother_type;
int i, j, x, y, lambda_i;
double log_lh, sum_exp_lambda_f, sigma, fxy;
double* lambda_f = (double*)malloc(sizeof(double) * y_num);
AI-MaxEntropy.xs view on Meta::CPAN
PREINIT:
dSP;
dTRACE("_apply_gis");
/* fetch the pre-cached samples and f_map */
SV* _c = *hvref_fetch(self, "_c");
struct samples_t* samples =
INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
struct f_map_t* f_map =
INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
int** lambda_idx = f_map->lambda_idx;
/* fetch other useful data */
AV* f_freq = (AV*)SvRV(*hvref_fetch(self, "f_freq"));
int x_num = SvIV(*hvref_fetch(self, "x_num"));
int y_num = SvIV(*hvref_fetch(self, "y_num"));
int f_num = SvIV(*hvref_fetch(self, "f_num"));
int af_num = SvIV(*hvref_fetch(self, "af_num"));
/* intermediate variables */
SV *sv_r;
AV *av_lambda, *av_d_lambda;
int i, j, k, y, lambda_i, r;
double sum_exp_lambda_f, pxy;
AI-MaxEntropy.xs
Changes
inc/Module/AutoInstall.pm
inc/Module/Install.pm
inc/Module/Install/AutoInstall.pm
inc/Module/Install/Base.pm
inc/Module/Install/Include.pm
inc/Module/Install/Makefile.pm
inc/Module/Install/MakeMaker.pm
inc/Module/Install/Metadata.pm
inc/Test/Builder.pm
inc/Test/Builder/Module.pm
inc/Test/More.pm
inc/Test/Number/Delta.pm
lib/AI/MaxEntropy.pm
lib/AI/MaxEntropy/Model.pm
lib/AI/MaxEntropy/Util.pm
LICENSE
Makefile.PL
MANIFEST This list of files
$me->see(['rough', 'big'] => 'pomelo');
# the order of active features is not concerned, too
$me->see(['big', 'rough'] => 'pomelo');
# ...
# and, let it learn
my $model = $me->learn;
# then, we can make predictions on unseen data
# ask what a red thing is most likely to be
print $model->predict(['red'])."\n";
# the answer is apple, because all red things the learner have ever seen
# are apples
# ask what a smooth thing is most likely to be
print $model->predict(['smooth'])."\n";
# the answer is banana, because the learner have seen more smooth bananas
# (weighted 3) than smooth apples (weighted 2)
# load the model
$model->load('model_file');
CONCEPTS
What is a Maximum Entropy model?
Maximum Entropy (ME) model is a popular approach for machine learning.
From a user's view, it just behaves like a classifier which classify
things according to the previously learnt things.
Theorically, a ME learner try to recover the real probability
distribution of the data based on limited number of observations, by
applying the principle of maximum entropy.
You can find some good tutorials on Maximum Entropy model here:
<http://homepages.inf.ed.ac.uk/s0450736/maxent.html>
Features
Generally, a feature is a binary function answers a yes-no question on a
specified piece of data.
For examples,
"Is it a red apple?"
"Is it a yellow banana?"
If the answer is yes, we say this feature is active on that piece of
data.
In practise, a feature is usually represented as a tuple "<x, y>". For
examples, the above two features can be represented as
<red, apple>
<yellow, banana>
Samples
A sample is a set of active features, all of which share a common "y".
inc/Module/Install/Metadata.pm view on Meta::CPAN
#line 1
package Module::Install::Metadata;
use strict 'vars';
use Module::Install::Base;
use vars qw{$VERSION $ISCORE @ISA};
BEGIN {
$VERSION = '0.68';
$ISCORE = 1;
@ISA = qw{Module::Install::Base};
}
inc/Module/Install/Metadata.pm view on Meta::CPAN
my $type = shift;
push @{ $self->{values}{no_index}{$type} }, @_ if $type;
return $self->{values}{no_index};
}
sub read {
my $self = shift;
$self->include_deps( 'YAML', 0 );
require YAML;
my $data = YAML::LoadFile('META.yml');
# Call methods explicitly in case user has already set some values.
while ( my ( $key, $value ) = each %$data ) {
next unless $self->can($key);
if ( ref $value eq 'HASH' ) {
while ( my ( $module, $version ) = each %$value ) {
$self->can($key)->($self, $module => $version );
}
}
else {
$self->can($key)->($self, $value);
}
}
inc/Test/Builder.pm view on Meta::CPAN
# Make Test::Builder thread-safe for ithreads.
BEGIN {
use Config;
# Load threads::shared when threads are turned on.
# 5.8.0's threads are so busted we no longer support them.
if( $] >= 5.008001 && $Config{useithreads} && $INC{'threads.pm'}) {
require threads::shared;
# Hack around YET ANOTHER threads::shared bug. It would
# occassionally forget the contents of the variable when sharing it.
# So we first copy the data, then share, then put our copy back.
*share = sub (\[$@%]) {
my $type = ref $_[0];
my $data;
if( $type eq 'HASH' ) {
%$data = %{$_[0]};
}
elsif( $type eq 'ARRAY' ) {
@$data = @{$_[0]};
}
elsif( $type eq 'SCALAR' ) {
$$data = ${$_[0]};
}
else {
die("Unknown type: ".$type);
}
$_[0] = &threads::shared::share($_[0]);
if( $type eq 'HASH' ) {
%{$_[0]} = %$data;
}
elsif( $type eq 'ARRAY' ) {
@{$_[0]} = @$data;
}
elsif( $type eq 'SCALAR' ) {
${$_[0]} = $$data;
}
else {
die("Unknown type: ".$type);
}
return $_[0];
};
}
# 5.8.0's threads::shared is busted when threads are off
# and earlier Perls just don't have that module at all.
lib/AI/MaxEntropy.pm view on Meta::CPAN
$me->see(['rough', 'big'] => 'pomelo');
# the order of active features is not concerned, too
$me->see(['big', 'rough'] => 'pomelo');
# ...
# and, let it learn
my $model = $me->learn;
# then, we can make predictions on unseen data
# ask what a red thing is most likely to be
print $model->predict(['red'])."\n";
# the answer is apple, because all red things the learner have ever seen
# are apples
# ask what a smooth thing is most likely to be
print $model->predict(['smooth'])."\n";
# the answer is banana, because the learner have seen more smooth bananas
# (weighted 3) than smooth apples (weighted 2)
lib/AI/MaxEntropy.pm view on Meta::CPAN
=head1 CONCEPTS
=head2 What is a Maximum Entropy model?
Maximum Entropy (ME) model is a popular approach for machine learning.
From a user's view, it just behaves like a classifier which classify things
according to the previously learnt things.
Theorically, a ME learner try to recover the real probability distribution
of the data based on limited number of observations, by applying the
principle of maximum entropy.
You can find some good tutorials on Maximum Entropy model here:
L<http://homepages.inf.ed.ac.uk/s0450736/maxent.html>
=head2 Features
Generally, a feature is a binary function answers a yes-no question on a
specified piece of data.
For examples,
"Is it a red apple?"
"Is it a yellow banana?"
If the answer is yes,
we say this feature is active on that piece of data.
In practise, a feature is usually represented as
a tuple C<E<lt>x, yE<gt>>. For examples, the above two features can be
represented as
<red, apple>
<yellow, banana>
=head2 Samples
lib/AI/MaxEntropy/Model.pm view on Meta::CPAN
$self->{y_num} = scalar(@{$self->{y_list}});
$self->{f_num} = scalar(@{$self->{lambda}});
$self->{x_bucket}->{$self->{x_list}->[$_]} = $_
for (0 .. $self->{x_num} - 1);
$self->{y_bucket}->{$self->{y_list}->[$_]} = $_
for (0 .. $self->{y_num} - 1);
}
sub save {
my ($self, $file) = @_;
my $data = [
$self->{x_list},
$self->{y_list},
$self->{f_map},
$self->{lambda}
];
DumpFile($file, $data);
}
sub all_x { @{$_[0]->{x_list}} }
sub all_labels { @{$_[0]->{y_list}} }
sub score {
my $self = shift;
my ($x, $y) = @_;
# preprocess if $x is hashref
$x = [
lib/AI/MaxEntropy/Model.pm view on Meta::CPAN
use AI::MaxEntropy::Model;
# learn a model by AI::MaxEntropy
require AI::MaxEntropy;
my $me = AI::MaxEntropy->new;
$me->see(['round', 'smooth', 'red'] => 'apple' => 2);
$me->see(['long', 'smooth', 'yellow'] => 'banana' => 3);
$me->see(['round', 'rough'] => 'orange' => 2);
my $model = $me->learn;
# make prediction on unseen data
# ask what a red round thing is most likely to be
my $y = $model->predict(['round', 'red']);
# the answer apple is expected
# print out scores of all possible labels
for ($model->all_labels) {
my $s = $model->score(['round', 'red'] => $_);
print "$_: $s\n";
}
PERL_MAGIC_glob|5.007002||p
PERL_MAGIC_isaelem|5.007002||p
PERL_MAGIC_isa|5.007002||p
PERL_MAGIC_mutex|5.007002||p
PERL_MAGIC_nkeys|5.007002||p
PERL_MAGIC_overload_elem|5.007002||p
PERL_MAGIC_overload_table|5.007002||p
PERL_MAGIC_overload|5.007002||p
PERL_MAGIC_pos|5.007002||p
PERL_MAGIC_qr|5.007002||p
PERL_MAGIC_regdata|5.007002||p
PERL_MAGIC_regdatum|5.007002||p
PERL_MAGIC_regex_global|5.007002||p
PERL_MAGIC_shared_scalar|5.007003||p
PERL_MAGIC_shared|5.007003||p
PERL_MAGIC_sigelem|5.007002||p
PERL_MAGIC_sig|5.007002||p
PERL_MAGIC_substr|5.007002||p
PERL_MAGIC_sv|5.007002||p
PERL_MAGIC_taint|5.007002||p
PERL_MAGIC_tiedelem|5.007002||p
XS_VERSION|||
XS|||
ZeroD|5.009002||p
Zero|||
_aMY_CXT|5.007003||p
_pMY_CXT|5.007003||p
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
aTHX_|5.006000||p
aTHX|5.006000||p
add_data|||
allocmy|||
amagic_call|||
any_dup|||
ao|||
append_elem|||
append_list|||
apply_attrs_my|||
apply_attrs_string||5.006001|
apply_attrs|||
apply|||
magic_getsubstr|||
magic_gettaint|||
magic_getuvar|||
magic_getvec|||
magic_get|||
magic_killbackrefs|||
magic_len|||
magic_methcall|||
magic_methpack|||
magic_nextpack|||
magic_regdata_cnt|||
magic_regdatum_get|||
magic_regdatum_set|||
magic_scalarpack|||
magic_set_all_env|||
magic_setamagic|||
magic_setarylen|||
magic_setbm|||
magic_setcollxfrm|||
magic_setdbline|||
magic_setdefelem|||
#endif
/* Older perls (<=5.003) lack AvFILLp */
#ifndef AvFILLp
# define AvFILLp AvFILL
#endif
#ifndef ERRSV
# define ERRSV get_sv("@",FALSE)
#endif
#ifndef newSVpvn
# define newSVpvn(data,len) ((data) \
? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \
: newSV(0))
#endif
/* Hint: gv_stashpvn
* This function's backport doesn't support the length parameter, but
* rather ignores it. Portability can only be ensured if the length
* parameter is used for speed reasons, but the length can always be
* correctly computed from the string argument.
*/
#ifndef gv_stashpvn
PL_hints = oldhints;
PL_curcop->cop_stash = old_cop_stash;
PL_curstash = old_curstash;
PL_curcop->cop_line = oldline;
}
#endif
#endif
/*
* Boilerplate macros for initializing and accessing interpreter-local
* data from C. All statics in extensions should be reworked to use
* this, if you want to make the extension thread-safe. See ext/re/re.xs
* for an example of the use of these macros.
*
* Code that uses these macros is responsible for the following:
* 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts"
* 2. Declare a typedef named my_cxt_t that is a structure that contains
* all the data that needs to be interpreter-local.
* 3. Use the START_MY_CXT macro after the declaration of my_cxt_t.
* 4. Use the MY_CXT_INIT macro such that it is called exactly once
* (typically put in the BOOT: section).
* 5. Use the members of the my_cxt_t structure everywhere as
* MY_CXT.member.
* 6. Use the dMY_CXT macro (a declaration) in all the functions that
* access MY_CXT.
*/
#if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \
defined(PERL_CAPI) || defined(PERL_IMPLICIT_CONTEXT)
#ifndef START_MY_CXT
/* This must appear in all extensions that define a my_cxt_t structure,
* right after the definition (i.e. at file scope). The non-threads
* case below uses it to declare the data as static. */
#define START_MY_CXT
#if (PERL_VERSION < 4 || (PERL_VERSION == 4 && PERL_SUBVERSION < 68 ))
/* Fetches the SV that keeps the per-interpreter data. */
#define dMY_CXT_SV \
SV *my_cxt_sv = get_sv(MY_CXT_KEY, FALSE)
#else /* >= perl5.004_68 */
#define dMY_CXT_SV \
SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY, \
sizeof(MY_CXT_KEY)-1, TRUE)
#endif /* < perl5.004_68 */
/* This declaration should be used within all functions that use the
* interpreter-local data. */
#define dMY_CXT \
dMY_CXT_SV; \
my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv))
/* Creates and zeroes the per-interpreter data.
* (We allocate my_cxtp in a Perl SV so that it will be released when
* the interpreter goes away.) */
#define MY_CXT_INIT \
dMY_CXT_SV; \
/* newSV() allocates one more than needed */ \
my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\
Zero(my_cxtp, 1, my_cxt_t); \
sv_setuv(my_cxt_sv, PTR2UV(my_cxtp))
/* This macro must be used to access members of the my_cxt_t structure.
* e.g. MYCXT.some_data */
#define MY_CXT (*my_cxtp)
/* Judicious use of these macros can reduce the number of times dMY_CXT
* is used. Use is similar to pTHX, aTHX etc. */
#define pMY_CXT my_cxt_t *my_cxtp
#define pMY_CXT_ pMY_CXT,
#define _pMY_CXT ,pMY_CXT
#define aMY_CXT my_cxtp
#define aMY_CXT_ aMY_CXT,
#define _aMY_CXT ,aMY_CXT
#endif /* START_MY_CXT */
#ifndef MY_CXT_CLONE
/* Clones the per-interpreter data. */
#define MY_CXT_CLONE \
dMY_CXT_SV; \
my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\
Copy(INT2PTR(my_cxt_t*, SvUV(my_cxt_sv)), my_cxtp, 1, my_cxt_t);\
sv_setuv(my_cxt_sv, PTR2UV(my_cxtp))
#endif
#else /* single interpreter */
#ifndef START_MY_CXT
#endif
#ifndef PERL_MAGIC_overload_table
# define PERL_MAGIC_overload_table 'c'
#endif
#ifndef PERL_MAGIC_bm
# define PERL_MAGIC_bm 'B'
#endif
#ifndef PERL_MAGIC_regdata
# define PERL_MAGIC_regdata 'D'
#endif
#ifndef PERL_MAGIC_regdatum
# define PERL_MAGIC_regdatum 'd'
#endif
#ifndef PERL_MAGIC_env
# define PERL_MAGIC_env 'E'
#endif
if (radix && IN_LOCALE) {
STRLEN len = strlen(radix);
if (*sp + len <= send && memEQ(*sp, radix, len)) {
*sp += len;
return TRUE;
}
}
#endif /* PERL_VERSION */
#endif /* USE_LOCALE_NUMERIC */
/* always try "." if numeric radix didn't match because
* we may have data from different locales mixed */
if (*sp < send && **sp == '.') {
++*sp;
return TRUE;
}
return FALSE;
}
#endif
#endif
/* grok_number depends on grok_numeric_radix */
( run in 0.285 second using v1.01-cache-2.11-cpan-8d75d55dd25 )