AI-MaxEntropy

 view release on metacpan or  search on metacpan

AI-MaxEntropy.xs  view on Meta::CPAN

	SV*     self
    PREINIT:
	dTRACE("_neg_log_likelihood");
        /* fetch the pre-cached samples and f_map */
	SV* _c = *hvref_fetch(self, "_c");
	struct samples_t* samples =
	    INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
	struct f_map_t* f_map =
	    INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
	int** lambda_idx = f_map->lambda_idx;
	/* fetch other useful data */
	SV* smoother = *hvref_fetch(self, "smoother");
        int x_num = SvIV(*hvref_fetch(self, "x_num"));
	int y_num = SvIV(*hvref_fetch(self, "y_num"));
	int f_num = SvIV(*hvref_fetch(self, "f_num"));
	/* intermediate variables */
	AV* av_d_log_lh;
	char* smoother_type;
	int i, j, x, y, lambda_i;
        double log_lh, sum_exp_lambda_f, sigma, fxy;
	double* lambda_f = (double*)malloc(sizeof(double) * y_num);

AI-MaxEntropy.xs  view on Meta::CPAN

    PREINIT:
        dSP;
	dTRACE("_apply_gis");
        /* fetch the pre-cached samples and f_map */
        SV* _c = *hvref_fetch(self, "_c");
	struct samples_t* samples =
	    INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
	struct f_map_t* f_map =
	    INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));
	int** lambda_idx = f_map->lambda_idx;
	/* fetch other useful data */
	AV* f_freq = (AV*)SvRV(*hvref_fetch(self, "f_freq"));
        int x_num = SvIV(*hvref_fetch(self, "x_num"));
	int y_num = SvIV(*hvref_fetch(self, "y_num"));
	int f_num = SvIV(*hvref_fetch(self, "f_num"));
	int af_num = SvIV(*hvref_fetch(self, "af_num"));
	/* intermediate variables */
	SV *sv_r;
	AV *av_lambda, *av_d_lambda;
	int i, j, k, y, lambda_i, r;
	double sum_exp_lambda_f, pxy;

MANIFEST  view on Meta::CPAN

AI-MaxEntropy.xs
Changes
inc/Module/AutoInstall.pm
inc/Module/Install.pm
inc/Module/Install/AutoInstall.pm
inc/Module/Install/Base.pm
inc/Module/Install/Include.pm
inc/Module/Install/Makefile.pm
inc/Module/Install/MakeMaker.pm
inc/Module/Install/Metadata.pm
inc/Test/Builder.pm
inc/Test/Builder/Module.pm
inc/Test/More.pm
inc/Test/Number/Delta.pm
lib/AI/MaxEntropy.pm
lib/AI/MaxEntropy/Model.pm
lib/AI/MaxEntropy/Util.pm
LICENSE
Makefile.PL
MANIFEST			This list of files

README  view on Meta::CPAN

      $me->see(['rough', 'big'] => 'pomelo');

      # the order of active features is not concerned, too
      $me->see(['big', 'rough'] => 'pomelo');

      # ...

      # and, let it learn
      my $model = $me->learn;

      # then, we can make predictions on unseen data

      # ask what a red thing is most likely to be
      print $model->predict(['red'])."\n";
      # the answer is apple, because all red things the learner have ever seen
      # are apples
  
      # ask what a smooth thing is most likely to be
      print $model->predict(['smooth'])."\n";
      # the answer is banana, because the learner have seen more smooth bananas
      # (weighted 3) than smooth apples (weighted 2)

README  view on Meta::CPAN

      # load the model
      $model->load('model_file');

CONCEPTS
  What is a Maximum Entropy model?
    Maximum Entropy (ME) model is a popular approach for machine learning.
    From a user's view, it just behaves like a classifier which classify
    things according to the previously learnt things.

    Theorically, a ME learner try to recover the real probability
    distribution of the data based on limited number of observations, by
    applying the principle of maximum entropy.

    You can find some good tutorials on Maximum Entropy model here:

    <http://homepages.inf.ed.ac.uk/s0450736/maxent.html>

  Features
    Generally, a feature is a binary function answers a yes-no question on a
    specified piece of data.

    For examples,

      "Is it a red apple?"

      "Is it a yellow banana?"

    If the answer is yes, we say this feature is active on that piece of
    data.

    In practise, a feature is usually represented as a tuple "<x, y>". For
    examples, the above two features can be represented as

      <red, apple>

      <yellow, banana>

  Samples
    A sample is a set of active features, all of which share a common "y".

inc/Module/Install/Metadata.pm  view on Meta::CPAN

#line 1
package Module::Install::Metadata;

use strict 'vars';
use Module::Install::Base;

use vars qw{$VERSION $ISCORE @ISA};
BEGIN {
	$VERSION = '0.68';
	$ISCORE  = 1;
	@ISA     = qw{Module::Install::Base};
}

inc/Module/Install/Metadata.pm  view on Meta::CPAN

    my $type = shift;
    push @{ $self->{values}{no_index}{$type} }, @_ if $type;
    return $self->{values}{no_index};
}

sub read {
    my $self = shift;
    $self->include_deps( 'YAML', 0 );

    require YAML;
    my $data = YAML::LoadFile('META.yml');

    # Call methods explicitly in case user has already set some values.
    while ( my ( $key, $value ) = each %$data ) {
        next unless $self->can($key);
        if ( ref $value eq 'HASH' ) {
            while ( my ( $module, $version ) = each %$value ) {
                $self->can($key)->($self, $module => $version );
            }
        }
        else {
            $self->can($key)->($self, $value);
        }
    }

inc/Test/Builder.pm  view on Meta::CPAN

# Make Test::Builder thread-safe for ithreads.
BEGIN {
    use Config;
    # Load threads::shared when threads are turned on.
    # 5.8.0's threads are so busted we no longer support them.
    if( $] >= 5.008001 && $Config{useithreads} && $INC{'threads.pm'}) {
        require threads::shared;

        # Hack around YET ANOTHER threads::shared bug.  It would 
        # occassionally forget the contents of the variable when sharing it.
        # So we first copy the data, then share, then put our copy back.
        *share = sub (\[$@%]) {
            my $type = ref $_[0];
            my $data;

            if( $type eq 'HASH' ) {
                %$data = %{$_[0]};
            }
            elsif( $type eq 'ARRAY' ) {
                @$data = @{$_[0]};
            }
            elsif( $type eq 'SCALAR' ) {
                $$data = ${$_[0]};
            }
            else {
                die("Unknown type: ".$type);
            }

            $_[0] = &threads::shared::share($_[0]);

            if( $type eq 'HASH' ) {
                %{$_[0]} = %$data;
            }
            elsif( $type eq 'ARRAY' ) {
                @{$_[0]} = @$data;
            }
            elsif( $type eq 'SCALAR' ) {
                ${$_[0]} = $$data;
            }
            else {
                die("Unknown type: ".$type);
            }

            return $_[0];
        };
    }
    # 5.8.0's threads::shared is busted when threads are off
    # and earlier Perls just don't have that module at all.

lib/AI/MaxEntropy.pm  view on Meta::CPAN

  $me->see(['rough', 'big'] => 'pomelo');

  # the order of active features is not concerned, too
  $me->see(['big', 'rough'] => 'pomelo');

  # ...

  # and, let it learn
  my $model = $me->learn;

  # then, we can make predictions on unseen data

  # ask what a red thing is most likely to be
  print $model->predict(['red'])."\n";
  # the answer is apple, because all red things the learner have ever seen
  # are apples
  
  # ask what a smooth thing is most likely to be
  print $model->predict(['smooth'])."\n";
  # the answer is banana, because the learner have seen more smooth bananas
  # (weighted 3) than smooth apples (weighted 2)

lib/AI/MaxEntropy.pm  view on Meta::CPAN


=head1 CONCEPTS

=head2 What is a Maximum Entropy model?

Maximum Entropy (ME) model is a popular approach for machine learning.
From a user's view, it just behaves like a classifier which classify things
according to the previously learnt things.

Theorically, a ME learner try to recover the real probability distribution 
of the data based on limited number of observations, by applying the
principle of maximum entropy. 

You can find some good tutorials on Maximum Entropy model here:

L<http://homepages.inf.ed.ac.uk/s0450736/maxent.html>

=head2 Features

Generally, a feature is a binary function answers a yes-no question on a
specified piece of data. 

For examples, 

  "Is it a red apple?"

  "Is it a yellow banana?"

If the answer is yes,
we say this feature is active on that piece of data.

In practise, a feature is usually represented as
a tuple C<E<lt>x, yE<gt>>. For examples, the above two features can be
represented as

  <red, apple>

  <yellow, banana>

=head2 Samples

lib/AI/MaxEntropy/Model.pm  view on Meta::CPAN

    $self->{y_num} = scalar(@{$self->{y_list}});
    $self->{f_num} = scalar(@{$self->{lambda}});
    $self->{x_bucket}->{$self->{x_list}->[$_]} = $_
        for (0 .. $self->{x_num} - 1);
    $self->{y_bucket}->{$self->{y_list}->[$_]} = $_
        for (0 .. $self->{y_num} - 1);
}

sub save {
    my ($self, $file) = @_;
    my $data = [
        $self->{x_list},
	$self->{y_list},
	$self->{f_map},
	$self->{lambda}
    ];
    DumpFile($file, $data);    
}

sub all_x { @{$_[0]->{x_list}} }
sub all_labels { @{$_[0]->{y_list}} }

sub score {
    my $self = shift;
    my ($x, $y) = @_;
    # preprocess if $x is hashref
    $x = [

lib/AI/MaxEntropy/Model.pm  view on Meta::CPAN

  use AI::MaxEntropy::Model;

  # learn a model by AI::MaxEntropy
  require AI::MaxEntropy;
  my $me = AI::MaxEntropy->new;
  $me->see(['round', 'smooth', 'red'] => 'apple' => 2);
  $me->see(['long', 'smooth', 'yellow'] => 'banana' => 3);
  $me->see(['round', 'rough'] => 'orange' => 2);
  my $model = $me->learn;

  # make prediction on unseen data
  # ask what a red round thing is most likely to be
  my $y = $model->predict(['round', 'red']);
  # the answer apple is expected

  # print out scores of all possible labels
  for ($model->all_labels) {
      my $s = $model->score(['round', 'red'] => $_);
      print "$_: $s\n";
  }
  

ppport.h  view on Meta::CPAN

PERL_MAGIC_glob|5.007002||p
PERL_MAGIC_isaelem|5.007002||p
PERL_MAGIC_isa|5.007002||p
PERL_MAGIC_mutex|5.007002||p
PERL_MAGIC_nkeys|5.007002||p
PERL_MAGIC_overload_elem|5.007002||p
PERL_MAGIC_overload_table|5.007002||p
PERL_MAGIC_overload|5.007002||p
PERL_MAGIC_pos|5.007002||p
PERL_MAGIC_qr|5.007002||p
PERL_MAGIC_regdata|5.007002||p
PERL_MAGIC_regdatum|5.007002||p
PERL_MAGIC_regex_global|5.007002||p
PERL_MAGIC_shared_scalar|5.007003||p
PERL_MAGIC_shared|5.007003||p
PERL_MAGIC_sigelem|5.007002||p
PERL_MAGIC_sig|5.007002||p
PERL_MAGIC_substr|5.007002||p
PERL_MAGIC_sv|5.007002||p
PERL_MAGIC_taint|5.007002||p
PERL_MAGIC_tiedelem|5.007002||p

ppport.h  view on Meta::CPAN

XS_VERSION|||
XS|||
ZeroD|5.009002||p
Zero|||
_aMY_CXT|5.007003||p
_pMY_CXT|5.007003||p
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
aTHX_|5.006000||p
aTHX|5.006000||p
add_data|||
allocmy|||
amagic_call|||
any_dup|||
ao|||
append_elem|||
append_list|||
apply_attrs_my|||
apply_attrs_string||5.006001|
apply_attrs|||
apply|||

ppport.h  view on Meta::CPAN

magic_getsubstr|||
magic_gettaint|||
magic_getuvar|||
magic_getvec|||
magic_get|||
magic_killbackrefs|||
magic_len|||
magic_methcall|||
magic_methpack|||
magic_nextpack|||
magic_regdata_cnt|||
magic_regdatum_get|||
magic_regdatum_set|||
magic_scalarpack|||
magic_set_all_env|||
magic_setamagic|||
magic_setarylen|||
magic_setbm|||
magic_setcollxfrm|||
magic_setdbline|||
magic_setdefelem|||

ppport.h  view on Meta::CPAN

#endif

/* Older perls (<=5.003) lack AvFILLp */
#ifndef AvFILLp
#  define AvFILLp                        AvFILL
#endif
#ifndef ERRSV
#  define ERRSV                          get_sv("@",FALSE)
#endif
#ifndef newSVpvn
#  define newSVpvn(data,len)             ((data)                                              \
                                    ? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \
                                    : newSV(0))
#endif

/* Hint: gv_stashpvn
 * This function's backport doesn't support the length parameter, but
 * rather ignores it. Portability can only be ensured if the length
 * parameter is used for speed reasons, but the length can always be
 * correctly computed from the string argument.
 */
#ifndef gv_stashpvn

ppport.h  view on Meta::CPAN

	PL_hints = oldhints;
	PL_curcop->cop_stash = old_cop_stash;
	PL_curstash = old_curstash;
	PL_curcop->cop_line = oldline;
}
#endif
#endif

/*
 * Boilerplate macros for initializing and accessing interpreter-local
 * data from C.  All statics in extensions should be reworked to use
 * this, if you want to make the extension thread-safe.  See ext/re/re.xs
 * for an example of the use of these macros.
 *
 * Code that uses these macros is responsible for the following:
 * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts"
 * 2. Declare a typedef named my_cxt_t that is a structure that contains
 *    all the data that needs to be interpreter-local.
 * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t.
 * 4. Use the MY_CXT_INIT macro such that it is called exactly once
 *    (typically put in the BOOT: section).
 * 5. Use the members of the my_cxt_t structure everywhere as
 *    MY_CXT.member.
 * 6. Use the dMY_CXT macro (a declaration) in all the functions that
 *    access MY_CXT.
 */

#if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \
    defined(PERL_CAPI)    || defined(PERL_IMPLICIT_CONTEXT)

#ifndef START_MY_CXT

/* This must appear in all extensions that define a my_cxt_t structure,
 * right after the definition (i.e. at file scope).  The non-threads
 * case below uses it to declare the data as static. */
#define START_MY_CXT

#if (PERL_VERSION < 4 || (PERL_VERSION == 4 && PERL_SUBVERSION < 68 ))
/* Fetches the SV that keeps the per-interpreter data. */
#define dMY_CXT_SV \
	SV *my_cxt_sv = get_sv(MY_CXT_KEY, FALSE)
#else /* >= perl5.004_68 */
#define dMY_CXT_SV \
	SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY,		\
				  sizeof(MY_CXT_KEY)-1, TRUE)
#endif /* < perl5.004_68 */

/* This declaration should be used within all functions that use the
 * interpreter-local data. */
#define dMY_CXT	\
	dMY_CXT_SV;							\
	my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv))

/* Creates and zeroes the per-interpreter data.
 * (We allocate my_cxtp in a Perl SV so that it will be released when
 * the interpreter goes away.) */
#define MY_CXT_INIT \
	dMY_CXT_SV;							\
	/* newSV() allocates one more than needed */			\
	my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\
	Zero(my_cxtp, 1, my_cxt_t);					\
	sv_setuv(my_cxt_sv, PTR2UV(my_cxtp))

/* This macro must be used to access members of the my_cxt_t structure.
 * e.g. MYCXT.some_data */
#define MY_CXT		(*my_cxtp)

/* Judicious use of these macros can reduce the number of times dMY_CXT
 * is used.  Use is similar to pTHX, aTHX etc. */
#define pMY_CXT		my_cxt_t *my_cxtp
#define pMY_CXT_	pMY_CXT,
#define _pMY_CXT	,pMY_CXT
#define aMY_CXT		my_cxtp
#define aMY_CXT_	aMY_CXT,
#define _aMY_CXT	,aMY_CXT

#endif /* START_MY_CXT */

#ifndef MY_CXT_CLONE
/* Clones the per-interpreter data. */
#define MY_CXT_CLONE \
	dMY_CXT_SV;							\
	my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\
	Copy(INT2PTR(my_cxt_t*, SvUV(my_cxt_sv)), my_cxtp, 1, my_cxt_t);\
	sv_setuv(my_cxt_sv, PTR2UV(my_cxtp))
#endif

#else /* single interpreter */

#ifndef START_MY_CXT

ppport.h  view on Meta::CPAN

#endif

#ifndef PERL_MAGIC_overload_table
#  define PERL_MAGIC_overload_table      'c'
#endif

#ifndef PERL_MAGIC_bm
#  define PERL_MAGIC_bm                  'B'
#endif

#ifndef PERL_MAGIC_regdata
#  define PERL_MAGIC_regdata             'D'
#endif

#ifndef PERL_MAGIC_regdatum
#  define PERL_MAGIC_regdatum            'd'
#endif

#ifndef PERL_MAGIC_env
#  define PERL_MAGIC_env                 'E'
#endif

ppport.h  view on Meta::CPAN

    if (radix && IN_LOCALE) {
        STRLEN len = strlen(radix);
        if (*sp + len <= send && memEQ(*sp, radix, len)) {
            *sp += len;
            return TRUE;
        }
    }
#endif /* PERL_VERSION */
#endif /* USE_LOCALE_NUMERIC */
    /* always try "." if numeric radix didn't match because
     * we may have data from different locales mixed */
    if (*sp < send && **sp == '.') {
        ++*sp;
        return TRUE;
    }
    return FALSE;
}
#endif
#endif

/* grok_number depends on grok_numeric_radix */



( run in 0.366 second using v1.01-cache-2.11-cpan-8d75d55dd25 )