AI-MaxEntropy

 view release on metacpan or  search on metacpan

AI-MaxEntropy.xs  view on Meta::CPAN


#define hvref_fetch(hvref, key) \
    hv_fetch((HV*)SvRV(hvref), key, strlen(key), 0)
#define hvref_exists(hvref, key) \
    hv_exists((HV*)SvRV(hvref), key, strlen(key))
#define hvref_store(hvref, key, value) \
    hv_store((HV*)SvRV(hvref), key, strlen(key), value, 0)
#define hvref_delete(hvref, key) \
    hv_delete((HV*)SvRV(hvref), key, strlen(key), G_DISCARD)

/* internal structures */

struct samples_t {
    int s_num;
    int* x_len;
    int** x;
    int* y;
    double* w;
};

struct f_map_t {

AI-MaxEntropy.xs  view on Meta::CPAN

	f = sv_2mortal(newSVnv(log_lh));
	g = sv_2mortal(newRV_noinc((SV*)av_d_log_lh));
	TRACE("leave");
    CLEANUP:
	free(lambda_f);
	free(exp_lambda_f);
	free(d_log_lh);
	free(lambda);

SV*
_apply_gis(self, progress_cb, epsilon)
        SV*     self
	SV*     progress_cb
	double  epsilon
    PREINIT:
        dSP;
	dTRACE("_apply_gis");
        /* fetch the pre-cached samples and f_map */
        SV* _c = *hvref_fetch(self, "_c");
	struct samples_t* samples =
	    INT2PTR(struct samples_t*, SvIV(*hvref_fetch(_c, "samples")));
	struct f_map_t* f_map =
	    INT2PTR(struct f_map_t*, SvIV(*hvref_fetch(_c, "f_map")));

AI-MaxEntropy.xs  view on Meta::CPAN

	    d_lambda_norm = 0;
	    lambda_norm = 0;
	    for (i = 0; i < f_num; i++) {
	        d_lambda[i] = (1.0 / af_num) * log(p_f[i] / p1_f[i]);
		lambda[i] += d_lambda[i];
		d_lambda_norm += d_lambda[i] * d_lambda[i];
		lambda_norm += lambda[i] * lambda[i];
	    }
	    d_lambda_norm = sqrt(d_lambda_norm);
	    lambda_norm = sqrt(lambda_norm);
	    /* call progress_cb if defined */
	    if (SvOK(progress_cb) && SvROK(progress_cb) &&
	        SvTYPE(SvRV(progress_cb)) == SVt_PVCV) {
	        TRACE("call progress_cb");
	        av_lambda = newAV();
		av_d_lambda = newAV();
		av_extend(av_lambda, f_num - 1);
		av_extend(av_d_lambda, f_num - 1);
		for (i = 0; i < f_num; i++) {
		    av_store(av_lambda, i, newSVnv(lambda[i]));
		    av_store(av_d_lambda, i, newSVnv(d_lambda[i]));
		}
	        ENTER;
		SAVETMPS;
		PUSHMARK(SP);
		XPUSHs(sv_2mortal(newSViv(k)));
		XPUSHs(sv_2mortal(newRV_noinc((SV*)av_lambda)));
		XPUSHs(sv_2mortal(newRV_noinc((SV*)av_d_lambda)));
		XPUSHs(sv_2mortal(newSVnv(lambda_norm)));
		XPUSHs(sv_2mortal(newSVnv(d_lambda_norm)));
		PUTBACK;
		call_sv(progress_cb, G_ARRAY);
		SPAGAIN;
		sv_r = POPs;
		r = SvIV(sv_r);
	        PUTBACK;
		FREETMPS;
		LEAVE;
                while (SvREFCNT(sv_r) > 0) { SvREFCNT_dec(sv_r); }
		if (r != 0) break;
	    }
	    k++;

Changes  view on Meta::CPAN

Revision history for Perl extension AI::MaxEntropy.

0.20  Thu Mar  6 20:20:00 2008
	- Implemented GIS for ME model learning, now the client program can
	  any of the two algorithm (GIS and L-BFGS) for learning
	- More optimization for speed
	- $self->{optimizer} changed to $self->{algorithm}
	- all_features changed to all_x
	- Some modification on the documentation to avoid the ambiguity of
	  the concept 'feature'

0.11  Sat Feb 16 17:27:00 2008
	- Optimize the XS code, now the the function 'learn' should run 
	  at least twice faster than 0.10 version
	- New functions in AI::MaxEntropy::Util, which allows the client
	  program manipulate samples more flexibly
	- Replace Test::Differences with is_deeply in Test::More

LICENSE  view on Meta::CPAN

The MIT License

Copyright (c) 2008, Laye Suen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

META.yml  view on Meta::CPAN

generated_by: Module::Install version 0.68
license: MIT
meta-spec: 
  url: http://module-build.sourceforge.net/META-spec-v1.3.html
  version: 1.3
name: AI-MaxEntropy
no_index: 
  directory: 
    - inc
    - t
requires: 
  Algorithm::LBFGS: 0.16
  YAML::Syck: 0.87
version: 0.20

Makefile.PL  view on Meta::CPAN

use strict;
use warnings;

use inc::Module::Install;

name            'AI-MaxEntropy';
all_from        'lib/AI/MaxEntropy.pm';
license         'MIT';

requires        'Algorithm::LBFGS'      => '0.16';
requires        'YAML::Syck'            => '0.87';

include         'Test::Builder';
include         'Test::Builder::Module';
include         'Test::More';
include         'Test::Number::Delta';

auto_install;

WriteMakefile(
    LIBS              => ['-lm'],

README  view on Meta::CPAN

      my $me = AI::MaxEntropy->new; 
  
      # the learner see 2 red round smooth apples
      $me->see(['round', 'smooth', 'red'] => 'apple' => 2);
  
      # the learner see 3 yellow long smooth bananas
      $me->see(['long', 'smooth', 'yellow'] => 'banana' => 3);

      # and more

      # samples needn't have the same numbers of active features
      $me->see(['rough', 'big'] => 'pomelo');

      # the order of active features is not concerned, too
      $me->see(['big', 'rough'] => 'pomelo');

      # ...

      # and, let it learn
      my $model = $me->learn;

      # then, we can make predictions on unseen data

      # ask what a red thing is most likely to be

README  view on Meta::CPAN

      # ask what a smooth thing is most likely to be
      print $model->predict(['smooth'])."\n";
      # the answer is banana, because the learner have seen more smooth bananas
      # (weighted 3) than smooth apples (weighted 2)

      # ask what a red, long thing is most likely to be
      print $model->predict(['red', 'long'])."\n";
      # the answer is banana, because the learner have seen more long bananas
      # (weighted 3) than red apples (weighted 2)

      # print out scores of all possible answers to the feature round and red
      for ($model->all_labels) {
          my $s = $model->score(['round', 'red'] => $_);
          print "$_: $s\n";
      }
  
      # save the model
      $model->save('model_file');

      # load the model
      $model->load('model_file');

README  view on Meta::CPAN

    things according to the previously learnt things.

    Theorically, a ME learner try to recover the real probability
    distribution of the data based on limited number of observations, by
    applying the principle of maximum entropy.

    You can find some good tutorials on Maximum Entropy model here:

    <http://homepages.inf.ed.ac.uk/s0450736/maxent.html>

  Features
    Generally, a feature is a binary function answers a yes-no question on a
    specified piece of data.

    For examples,

      "Is it a red apple?"

      "Is it a yellow banana?"

    If the answer is yes, we say this feature is active on that piece of
    data.

    In practise, a feature is usually represented as a tuple "<x, y>". For
    examples, the above two features can be represented as

      <red, apple>

      <yellow, banana>

  Samples
    A sample is a set of active features, all of which share a common "y".
    This common "y" is sometimes called label or tag. For example, we have a
    big round red apple, the correpsonding sample is

      {<big, apple>, <round, apple>, <red, apple>}

    In this module, a samples is denoted in Perl code as

      $xs => $y => $w

    $xs is an array ref holding all "x", $y is a scalar holding the label

README  view on Meta::CPAN


      ['big', 'round', 'red'] => 'apple' => 1.0

    The weight $w can be ommited when it equals to 1.0, so the above
    denotation can be shorten to

      ['big', 'round', 'red'] => 'apple'

  Models
    With a set of samples, a model can be learnt for future predictions. The
    model (the lambda vector essentailly) is a knowledge representation of
    the samples that it have seen before. By applying the model, we can
    calculate the probability of each possible label for a certain sample.
    And choose the most possible one according to these probabilities.

FUNCTIONS
    NOTE: This is still an alpha version, the APIs may be changed in future
    versions.

  new
    Create a Maximum Entropy learner. Optionally, initial values of

README  view on Meta::CPAN

    Let the Maximum Entropy learner see a sample.

      my $me = AI::MaxEntropy->new;

      # see a sample with default weight 1.0
      $me->see(['red', 'round'] => 'apple');
  
      # see a sample with specified weight 0.5
      $me->see(['yellow', 'long'] => 'banana' => 0.5);

    The sample can be also represented in the attribute-value form, which
    like

      $me->see({color => 'yellow', shape => 'long'} => 'banana');
      $me->see({color => ['red', 'green'], shape => 'round'} => 'apple');

    Actually, the two samples above are converted internally to,

      $me->see(['color:yellow', 'shape:long'] => 'banana');
      $me->see(['color:red', 'color:green', 'shape:round'] => 'apple');

  forget_all
    Forget all samples the learner have seen previously.

  cut
    Cut the features that occur less than the specified number.

    For example,

      ...
      $me->cut(1)

    will cut all features that occur less than one time.

  learn
    Learn a model from all the samples that the learner have seen so far,
    returns an AI::MaxEntropy::Model object, which can be used to make
    prediction on unlabeled samples.

      ...

      my $model = $me->learn;

README  view on Meta::CPAN

    This property enables client program to choose different algorithms for
    learning the ME model and set their parameters.

    There are mainly 3 algorithm for learning ME models, they are GIS, IIS
    and L-BFGS. This module implements 2 of them, namely, L-BFGS and GIS.
    L-BFGS provides full functionality, while GIS runs faster, but only
    applicable on limited scenarios.

    To use GIS, the following conditions must be satisified:

    1. All samples have same number of active features

    2. No feature has been cut

    3. No smoother is used (in fact, the property "smoother" is simplly
    ignored when the type of algorithm equal to 'gis').

    This property "algorithm" is supposed to be a hash ref, like

      {
        type => ...,
        progress_cb => ...,
        param_1 => ...,
        param_2 => ...,
        ...,
        param_n => ...
      }

   type
    The entry "type => ..." specifies which algorithm is used for the
    optimization. Valid values include:

      'lbfgs'       Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS)
      'gis'         General Iterative Scaling (GIS)

    If ommited, 'lbfgs' is used by default.

   progress_cb
    The entry "progress_cb => ..." specifies the progress callback
    subroutine which is used to trace the process of the algorithm. The
    specified callback routine will be called at each iteration of the
    algorithm.

    For L-BFGS, "progress_cb" will be directly passed to "fmin" in
    Algorithm::LBFGS. f(x) is the negative log-likelihood of current lambda
    vector.

    For GIS, the "progress_cb" is supposed to have a prototype like

      progress_cb(i, lambda, d_lambda, lambda_norm, d_lambda_norm)

    "i" is the number of the iterations, "lambda" is an array ref containing
    the current lambda vector, "d_lambda" is an array ref containing the
    delta of the lambda vector in current iteration, "lambda_norm" and
    "d_lambda_norm" are Euclid norms of "lambda" and "d_lambda"
    respectively.

    For both L-BFGS and GIS, the client program can also pass a string
    'verbose' to "progress_cb" to use a default progress callback which
    simply print out the progress on the screen.

    "progress_cb" can also be omitted if the client program do not want to
    trace the progress.

   parameters
    The rest entries are parameters for the specified algorithm. Each
    parameter will be assigned with its default value when it is not given
    explicitly.

    For L-BFGS, the parameters will be directly passed to Algorithm::LBFGS
    object, please refer to "Parameters" in Algorithm::LBFGS for details.

    For GIS, there is only one parameter "epsilon", which controls the
    precision of the algorithm (similar to the "epsilon" in
    Algorithm::LBFGS). Generally speaking, a smaller "epsilon" produces a
    more precise result. The default value of "epsilon" is 1e-3.

  smoother
    The smoother is a solution to the over-fitting problem. This property
    chooses which type of smoother the client program want to apply and sets
    the smoothing parameters.

    Only one smoother have been implemented in this version of the module,
    the Gaussian smoother.

    One can apply the Gaussian smoother as following,

README  view on Meta::CPAN

AUTHOR
    Laye Suen, <laye@cpan.org>

COPYRIGHT AND LICENSE
    The MIT License

    Copyright (C) 2008, Laye Suen

    Permission is hereby granted, free of charge, to any person obtaining a
    copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:

    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

inc/Module/AutoInstall.pm  view on Meta::CPAN

use Cwd                 ();
use ExtUtils::MakeMaker ();

use vars qw{$VERSION};
BEGIN {
	$VERSION = '1.03';
}

# special map on pre-defined feature sets
my %FeatureMap = (
    ''      => 'Core Features',    # XXX: deprecated
    '-core' => 'Core Features',
);

# various lexical flags
my ( @Missing, @Existing,  %DisabledTests, $UnderCPAN,     $HasCPANPLUS );
my ( $Config,  $CheckOnly, $SkipInstall,   $AcceptDefault, $TestOnly );
my ( $PostambleActions, $PostambleUsed );

# See if it's a testing or non-interactive session
_accept_default( $ENV{AUTOMATED_TESTING} or ! -t STDIN ); 
_init();

inc/Module/AutoInstall.pm  view on Meta::CPAN


        if ( $feature =~ m/^-(\w+)$/ ) {
            my $option = lc($1);

            # check for a newer version of myself
            _update_to( $modules, @_ ) and return if $option eq 'version';

            # sets CPAN configuration options
            $Config = $modules if $option eq 'config';

            # promote every features to core status
            $core_all = ( $modules =~ /^all$/i ) and next
              if $option eq 'core';

            next unless $option eq 'core';
        }

        print "[" . ( $FeatureMap{ lc($feature) } || $feature ) . "]\n";

        $modules = [ %{$modules} ] if UNIVERSAL::isa( $modules, 'HASH' );

inc/Module/AutoInstall.pm  view on Meta::CPAN

.
}

# check if we're connected to some host, using inet_aton
sub _connected_to {
    my $site = shift;

    return (
        ( _load('Socket') and Socket::inet_aton($site) ) or _prompt(
            qq(
*** Your host cannot resolve the domain name '$site', which
    probably means the Internet connections are unavailable.
==> Should we try to install the required module(s) anyway?), 'n'
          ) =~ /^[Yy]/
    );
}

# check if a directory is writable; may create it on demand
sub _can_write {
    my $path = shift;
    mkdir( $path, 0755 ) unless -e $path;

inc/Module/AutoInstall.pm  view on Meta::CPAN


    no strict 'refs';

    $PostambleUsed = 0;
    local *MY::postamble = \&postamble unless defined &MY::postamble;
    ExtUtils::MakeMaker::WriteMakefile(%args);

    print << "." unless $PostambleUsed;
*** WARNING: Makefile written with customized MY::postamble() without
    including contents from Module::AutoInstall::postamble() --
    auto installation features disabled.  Please contact the author.
.

    return 1;
}

sub postamble {
    $PostambleUsed = 1;

    return << ".";

inc/Module/Install.pm  view on Meta::CPAN

    # releases once we can make sure it won't clash with custom
    # Module::Install extensions.
    $VERSION = '0.68';
}

# Whether or not inc::Module::Install is actually loaded, the
# $INC{inc/Module/Install.pm} is what will still get set as long as
# the caller loaded module this in the documented manner.
# If not set, the caller may NOT have loaded the bundled version, and thus
# they may not have a MI version that works with the Makefile.PL. This would
# result in false errors or unexpected behaviour. And we don't want that.
my $file = join( '/', 'inc', split /::/, __PACKAGE__ ) . '.pm';
unless ( $INC{$file} ) {
    die <<"END_DIE";
Please invoke ${\__PACKAGE__} with:

    use inc::${\__PACKAGE__};

not:

    use ${\__PACKAGE__};

END_DIE
}

# If the script that is loading Module::Install is from the future,
# then make will detect this and cause it to re-run over and over
# again. This is bad. Rather than taking action to touch it (which
# is unreliable on some platforms and requires write permissions)
# for now we should catch this and refuse to run.
if ( -f $0 and (stat($0))[9] > time ) {
	die << "END_DIE";
Your installer $0 has a modification time in the future.

This is known to create infinite loops in make.

Please correct this, then run $0 again.

END_DIE

inc/Module/Install.pm  view on Meta::CPAN

    File::Find::find( sub {
        my $file = $File::Find::name;
        return unless $file =~ m!^\Q$path\E/(.+)\.pm\Z!is;
        my $subpath = $1;
        return if lc($subpath) eq lc($self->{dispatch});

        $file = "$self->{path}/$subpath.pm";
        my $pkg = "$self->{name}::$subpath";
        $pkg =~ s!/!::!g;

        # If we have a mixed-case package name, assume case has been preserved
        # correctly.  Otherwise, root through the file to locate the case-preserved
        # version of the package name.
        if ( $subpath eq lc($subpath) || $subpath eq uc($subpath) ) {
            open PKGFILE, "<$subpath.pm" or die "find_extensions: Can't open $subpath.pm: $!";
            my $in_pod = 0;
            while ( <PKGFILE> ) {
                $in_pod = 1 if /^=\w/;
                $in_pod = 0 if /^=cut/;
                next if ($in_pod || /^=cut/);  # skip pod text
                next if /^\s*#/;               # and comments
                if ( m/^\s*package\s+($pkg)\s*;/i ) {

inc/Module/Install/AutoInstall.pm  view on Meta::CPAN

    my $self = shift;
    $self->auto_install(@_);
}

sub auto_install {
    my $self = shift;
    return if $self->{done}++;

    # Flatten array of arrays into a single array
    my @core = map @$_, map @$_, grep ref,
               $self->build_requires, $self->requires;

    my @config = @_;

    # We'll need Module::AutoInstall
    $self->include('Module::AutoInstall');
    require Module::AutoInstall;

    Module::AutoInstall->import(
        (@config ? (-config => \@config) : ()),
        (@core   ? (-core   => \@core)   : ()),
        $self->features,
    );

    $self->makemaker_args( Module::AutoInstall::_make_args() );

    my $class = ref($self);
    $self->postamble(
        "# --- $class section:\n" .
        Module::AutoInstall::postamble()
    );
}

inc/Module/Install/MakeMaker.pm  view on Meta::CPAN

        $args{NAME} =~ s/::/-/g;
    }

    foreach my $key (qw(name module_name version version_from abstract author installdirs)) {
        my $value = delete($args{uc($key)}) or next;
        $self->$key($value);
    }

    if (my $prereq = delete($args{PREREQ_PM})) {
        while (my($k,$v) = each %$prereq) {
            $self->requires($k,$v);
        }
    }

    # put the remaining args to makemaker_args
    $self->makemaker_args(%args);
}

END {
    if ( $makefile ) {
        $makefile->write;

inc/Module/Install/Makefile.pm  view on Meta::CPAN

	if ( eval($ExtUtils::MakeMaker::VERSION) >= 6.10 ) {
		$args->{NO_META} = 1;
	}
	if ( eval($ExtUtils::MakeMaker::VERSION) > 6.17 and $self->sign ) {
		$args->{SIGN} = 1;
	}
	unless ( $self->is_admin ) {
		delete $args->{SIGN};
	}

	# merge both kinds of requires into prereq_pm
	my $prereq = ($args->{PREREQ_PM} ||= {});
	%$prereq = ( %$prereq,
		map { @$_ }
		map { @$_ }
		grep $_,
		($self->build_requires, $self->requires)
	);

	# merge both kinds of requires into prereq_pm
	my $subdirs = ($args->{DIR} ||= []);
	if ($self->bundles) {
		foreach my $bundle (@{ $self->bundles }) {
			my ($file, $dir) = @$bundle;
			push @$subdirs, $dir if -d $dir;
			delete $prereq->{$file};
		}
	}

	if ( my $perl_version = $self->perl_version ) {

inc/Module/Install/Metadata.pm  view on Meta::CPAN

	$ISCORE  = 1;
	@ISA     = qw{Module::Install::Base};
}

my @scalar_keys = qw{
    name module_name abstract author version license
    distribution_type perl_version tests installdirs
};

my @tuple_keys = qw{
    build_requires requires recommends bundles
};

sub Meta            { shift        }
sub Meta_ScalarKeys { @scalar_keys }
sub Meta_TupleKeys  { @tuple_keys  }

foreach my $key (@scalar_keys) {
    *$key = sub {
        my $self = shift;
        return $self->{values}{$key} if defined wantarray and !@_;

inc/Module/Install/Metadata.pm  view on Meta::CPAN

                next;
            }
            my $rv = [ $module, $version ];
            push @rv, $rv;
        }
        push @{ $self->{values}{$key} }, @rv;
        @rv;
    };
}

# configure_requires is currently a null-op
sub configure_requires { 1 }

# Aliases for build_requires that will have alternative
# meanings in some future version of META.yml.
sub test_requires      { shift->build_requires(@_)  }
sub install_requires   { shift->build_requires(@_)  }

# Aliases for installdirs options
sub install_as_core    { $_[0]->installdirs('perl')   }
sub install_as_cpan    { $_[0]->installdirs('site')   }
sub install_as_site    { $_[0]->installdirs('site')   }
sub install_as_vendor  { $_[0]->installdirs('vendor') }

sub sign {
    my $self = shift;
    return $self->{'values'}{'sign'} if defined wantarray and ! @_;

inc/Module/Install/Metadata.pm  view on Meta::CPAN

        dist_name    => $self->name,
        dist_version => $self->version,
        license      => $self->license,
    );
    $self->provides(%{ $build->find_dist_packages || {} });
}

sub feature {
    my $self     = shift;
    my $name     = shift;
    my $features = ( $self->{values}{features} ||= [] );

    my $mods;

    if ( @_ == 1 and ref( $_[0] ) ) {
        # The user used ->feature like ->features by passing in the second
        # argument as a reference.  Accomodate for that.
        $mods = $_[0];
    } else {
        $mods = \@_;
    }

    my $count = 0;
    push @$features, (
        $name => [
            map {
                ref($_) ? ( ref($_) eq 'HASH' ) ? %$_
                                                : @$_
                        : $_
            } @$mods
        ]
    );

    return @$features;
}

sub features {
    my $self = shift;
    while ( my ( $name, $mods ) = splice( @_, 0, 2 ) ) {
        $self->feature( $name, @$mods );
    }
    return $self->{values}->{features}
    	? @{ $self->{values}->{features} }
    	: ();
}

sub no_index {
    my $self = shift;
    my $type = shift;
    push @{ $self->{values}{no_index}{$type} }, @_ if $type;
    return $self->{values}{no_index};
}

inc/Module/Install/Metadata.pm  view on Meta::CPAN

            'GPL'                                             => 'gpl',         1,
            'LGPL'                                            => 'lgpl',        1,
            'BSD'                                             => 'bsd',         1,
            'Artistic'                                        => 'artistic',    1,
            'MIT'                                             => 'mit',         1,
            'proprietary'                                     => 'proprietary', 0,
        );
        while ( my ($pattern, $license, $osi) = splice(@phrases, 0, 3) ) {
            $pattern =~ s{\s+}{\\s+}g;
            if ( $license_text =~ /\b$pattern\b/i ) {
                if ( $osi and $license_text =~ /All rights reserved/i ) {
                        warn "LEGAL WARNING: 'All rights reserved' may invalidate Open Source licenses. Consider removing it.";
		}
                $self->license($license);
                return 1;
            }
        }
    }

    warn "Cannot determine license info from $file\n";
    return 'unknown';
}

inc/Test/Builder.pm  view on Meta::CPAN

    return $Test;
}


#line 150

sub create {
    my $class = shift;

    my $self = bless {}, $class;
    $self->reset;

    return $self;
}

#line 169

use vars qw($Level);

sub reset {
    my ($self) = @_;

    # We leave this a global because it has to be localized and localizing
    # hash keys is just asking for pain.  Also, it was documented.
    $Level = 1;

    $self->{Test_Died}    = 0;
    $self->{Have_Plan}    = 0;
    $self->{No_Plan}      = 0;
    $self->{Original_Pid} = $$;

inc/Test/Builder.pm  view on Meta::CPAN

    You named your test '$name'.  You shouldn't use numbers for your test names.
    Very confusing.
ERR

    my($pack, $file, $line) = $self->caller;

    my $todo = $self->todo($pack);
    $self->_unoverload_str(\$todo);

    my $out;
    my $result = &share({});

    unless( $test ) {
        $out .= "not ";
        @$result{ 'ok', 'actual_ok' } = ( ( $todo ? 1 : 0 ), 0 );
    }
    else {
        @$result{ 'ok', 'actual_ok' } = ( 1, $test );
    }

    $out .= "ok";
    $out .= " $self->{Curr_Test}" if $self->use_numbers;

    if( defined $name ) {
        $name =~ s|#|\\#|g;     # # in a name can confuse Test::Harness.
        $out   .= " - $name";
        $result->{name} = $name;
    }
    else {
        $result->{name} = '';
    }

    if( $todo ) {
        $out   .= " # TODO $todo";
        $result->{reason} = $todo;
        $result->{type}   = 'todo';
    }
    else {
        $result->{reason} = '';
        $result->{type}   = '';
    }

    $self->{Test_Results}[$self->{Curr_Test}-1] = $result;
    $out .= "\n";

    $self->_print($out);

    unless( $test ) {
        my $msg = $todo ? "Failed (TODO)" : "Failed";
        $self->_print_diag("\n") if $ENV{HARNESS_ACTIVE};

	if( defined $name ) {
	    $self->diag(qq[  $msg test '$name'\n]);

inc/Test/Builder.pm  view on Meta::CPAN


    lock($self->{Curr_Test});
    if( defined $num ) {
        unless( $self->{Have_Plan} ) {
            $self->croak("Can't change the current test number without a plan!");
        }

        $self->{Curr_Test} = $num;

        # If the test counter is being pushed forward fill in the details.
        my $test_results = $self->{Test_Results};
        if( $num > @$test_results ) {
            my $start = @$test_results ? @$test_results : 0;
            for ($start..$num-1) {
                $test_results->[$_] = &share({
                    'ok'      => 1, 
                    actual_ok => undef, 
                    reason    => 'incrementing test number', 
                    type      => 'unknown', 
                    name      => undef 
                });
            }
        }
        # If backward, wipe history.  Its their funeral.
        elsif( $num < @$test_results ) {
            $#{$test_results} = $num - 1;
        }
    }
    return $self->{Curr_Test};
}


#line 1489

sub summary {
    my($self) = shift;

inc/Test/Builder.pm  view on Meta::CPAN

#line 1616

#'#
sub _sanity_check {
    my $self = shift;

    $self->_whoa($self->{Curr_Test} < 0,  'Says here you ran a negative number of tests!');
    $self->_whoa(!$self->{Have_Plan} and $self->{Curr_Test}, 
          'Somehow your tests ran without a plan!');
    $self->_whoa($self->{Curr_Test} != @{ $self->{Test_Results} },
          'Somehow you got a different number of results than tests ran!');
}

#line 1637

sub _whoa {
    my($self, $check, $desc) = @_;
    if( $check ) {
        local $Level = $Level + 1;
        $self->croak(<<"WHOA");
WHOA!  $desc

inc/Test/Builder.pm  view on Meta::CPAN

    if( ($self->{Original_Pid} != $$) 			or
	(!$self->{Have_Plan} && !$self->{Test_Died}) 	or
	$self->{Bailed_Out}
      )
    {
	_my_exit($?);
	return;
    }

    # Figure out if we passed or failed and print helpful messages.
    my $test_results = $self->{Test_Results};
    if( @$test_results ) {
        # The plan?  We have no plan.
        if( $self->{No_Plan} ) {
            $self->_print("1..$self->{Curr_Test}\n") unless $self->no_header;
            $self->{Expected_Tests} = $self->{Curr_Test};
        }

        # Auto-extended arrays and elements which aren't explicitly
        # filled in with a shared reference will puke under 5.8.0
        # ithreads.  So we have to fill them in by hand. :(
        my $empty_result = &share({});
        for my $idx ( 0..$self->{Expected_Tests}-1 ) {
            $test_results->[$idx] = $empty_result
              unless defined $test_results->[$idx];
        }

        my $num_failed = grep !$_->{'ok'}, 
                              @{$test_results}[0..$self->{Curr_Test}-1];

        my $num_extra = $self->{Curr_Test} - $self->{Expected_Tests};

        if( $num_extra < 0 ) {
            my $s = $self->{Expected_Tests} == 1 ? '' : 's';
            $self->diag(<<"FAIL");
Looks like you planned $self->{Expected_Tests} test$s but only ran $self->{Curr_Test}.
FAIL
        }
        elsif( $num_extra > 0 ) {

inc/Test/More.pm  view on Meta::CPAN


    return $ok;
}


sub _is_module_name {
    my $module = shift;

    # Module names start with a letter.
    # End with an alphanumeric.
    # The rest is an alphanumeric or ::
    $module =~ s/\b::\b//g;
    $module =~ /^[a-zA-Z]\w*$/;
}

#line 779

use vars qw(@Data_Stack %Refs_Seen);
my $DNE = bless [], 'Does::Not::Exist';

sub _dne {

inc/Test/More.pm  view on Meta::CPAN

        elsif( $type eq 'REF' ) {
            $var = "\${$var}";
        }
    }

    my @vals = @{$Stack[-1]{vals}}[0,1];
    my @vars = ();
    ($vars[0] = $var) =~ s/\$FOO/     \$got/;
    ($vars[1] = $var) =~ s/\$FOO/\$expected/;

    my $out = "Structures begin differing at:\n";
    foreach my $idx (0..$#vals) {
        my $val = $vals[$idx];
        $vals[$idx] = !defined $val ? 'undef'          :
                      _dne($val)    ? "Does not exist" :
                      ref $val      ? "$val"           :
                                      "'$val'";
    }

    $out .= "$vars[0] = $vals[0]\n";
    $out .= "$vars[1] = $vals[1]\n";

lib/AI/MaxEntropy.pm  view on Meta::CPAN

    # cut 0 for default
    $self->cut(0) if $self->{last_cut} == -1;
    # initialize
    $self->{lambda} = [map { 0 } (1 .. $self->{f_num})];
    $self->_cache;
    # optimize
    my $type = $self->{algorithm}->{type} || 'lbfgs';
    if ($type eq 'lbfgs') {
        my $o = Algorithm::LBFGS->new(%{$self->{algorithm}});
        $o->fmin(\&_neg_log_likelihood, $self->{lambda},
            $self->{algorithm}->{progress_cb}, $self);
    }
    elsif ($type eq 'gis') {
        die 'GIS is not applicable'
	    if $self->{af_num} == -1 or $self->{last_cut} != 0;
	my $progress_cb = $self->{algorithm}->{progress_cb};
	$progress_cb = sub {
	    print "$_[0]: |lambda| = $_[3], |d_lambda| = $_[4]\n"; 0;
        } if defined($progress_cb) and $progress_cb eq 'verbose';
        my $epsilon = $self->{algorithm}->{epsilon} || 1e-3;
        $self->{lambda} = $self->_apply_gis($progress_cb, $epsilon);
    }
    else { die "$type is not a valid algorithm type" }
    # finish
    $self->_free_cache;
    return $self->_create_model;
}

sub _create_model {
    my $self = shift;
    my $model = AI::MaxEntropy::Model->new;    

lib/AI/MaxEntropy.pm  view on Meta::CPAN

  my $me = AI::MaxEntropy->new; 
  
  # the learner see 2 red round smooth apples
  $me->see(['round', 'smooth', 'red'] => 'apple' => 2);
  
  # the learner see 3 yellow long smooth bananas
  $me->see(['long', 'smooth', 'yellow'] => 'banana' => 3);

  # and more

  # samples needn't have the same numbers of active features
  $me->see(['rough', 'big'] => 'pomelo');

  # the order of active features is not concerned, too
  $me->see(['big', 'rough'] => 'pomelo');

  # ...

  # and, let it learn
  my $model = $me->learn;

  # then, we can make predictions on unseen data

  # ask what a red thing is most likely to be

lib/AI/MaxEntropy.pm  view on Meta::CPAN

  # ask what a smooth thing is most likely to be
  print $model->predict(['smooth'])."\n";
  # the answer is banana, because the learner have seen more smooth bananas
  # (weighted 3) than smooth apples (weighted 2)

  # ask what a red, long thing is most likely to be
  print $model->predict(['red', 'long'])."\n";
  # the answer is banana, because the learner have seen more long bananas
  # (weighted 3) than red apples (weighted 2)

  # print out scores of all possible answers to the feature round and red
  for ($model->all_labels) {
      my $s = $model->score(['round', 'red'] => $_);
      print "$_: $s\n";
  }
  
  # save the model
  $model->save('model_file');

  # load the model
  $model->load('model_file');

lib/AI/MaxEntropy.pm  view on Meta::CPAN

according to the previously learnt things.

Theorically, a ME learner try to recover the real probability distribution 
of the data based on limited number of observations, by applying the
principle of maximum entropy. 

You can find some good tutorials on Maximum Entropy model here:

L<http://homepages.inf.ed.ac.uk/s0450736/maxent.html>

=head2 Features

Generally, a feature is a binary function answers a yes-no question on a
specified piece of data. 

For examples, 

  "Is it a red apple?"

  "Is it a yellow banana?"

If the answer is yes,
we say this feature is active on that piece of data.

In practise, a feature is usually represented as
a tuple C<E<lt>x, yE<gt>>. For examples, the above two features can be
represented as

  <red, apple>

  <yellow, banana>

=head2 Samples

A sample is a set of active features, all of which share a common C<y>.
This common C<y> is sometimes called label or tag.
For example, we have a big round red apple, the correpsonding sample is 

  {<big, apple>, <round, apple>, <red, apple>}

In this module, a samples is denoted in Perl code as

  $xs => $y => $w

C<$xs> is an array ref holding all C<x>,

lib/AI/MaxEntropy.pm  view on Meta::CPAN

  ['big', 'round', 'red'] => 'apple' => 1.0

The weight C<$w> can be ommited when it equals to 1.0,
so the above denotation can be shorten to

  ['big', 'round', 'red'] => 'apple'

=head2 Models

With a set of samples, a model can be learnt for future predictions.
The model (the lambda vector essentailly) is a knowledge representation
of the samples that it have seen before.
By applying the model, we can calculate the probability of each possible
label for a certain sample. And choose the most possible one
according to these probabilities.

=head1 FUNCTIONS

NOTE: This is still an alpha version, the APIs may be changed
in future versions.

lib/AI/MaxEntropy.pm  view on Meta::CPAN

Let the Maximum Entropy learner see a sample.

  my $me = AI::MaxEntropy->new;

  # see a sample with default weight 1.0
  $me->see(['red', 'round'] => 'apple');
  
  # see a sample with specified weight 0.5
  $me->see(['yellow', 'long'] => 'banana' => 0.5);

The sample can be also represented in the attribute-value form, which like

  $me->see({color => 'yellow', shape => 'long'} => 'banana');
  $me->see({color => ['red', 'green'], shape => 'round'} => 'apple');

Actually, the two samples above are converted internally to,

  $me->see(['color:yellow', 'shape:long'] => 'banana');
  $me->see(['color:red', 'color:green', 'shape:round'] => 'apple');

=head2 forget_all

Forget all samples the learner have seen previously.

=head2 cut

Cut the features that occur less than the specified number.

For example, 

  ...
  $me->cut(1)

will cut all features that occur less than one time.

=head2 learn 

Learn a model from all the samples that the learner have seen so far,
returns an L<AI::MaxEntropy::Model> object, which can be used to make
prediction on unlabeled samples.

  ...

  my $model = $me->learn;

lib/AI/MaxEntropy.pm  view on Meta::CPAN

This property enables client program to choose different algorithms for
learning the ME model and set their parameters.

There are mainly 3 algorithm for learning ME models, they are GIS, IIS and
L-BFGS. This module implements 2 of them, namely,  L-BFGS and GIS.
L-BFGS provides full functionality, while GIS runs faster, but only 
applicable on limited scenarios.

To use GIS, the following conditions must be satisified:

1. All samples have same number of active features

2. No feature has been cut

3. No smoother is used (in fact, the property L</smoother> is simplly
ignored when the type of algorithm equal to 'gis').

This property C<algorithm> is supposed to be a hash ref, like

  {
    type => ...,
    progress_cb => ...,
    param_1 => ...,
    param_2 => ...,
    ...,
    param_n => ...
  }

=head3 type

The entry C<type =E<gt> ...> specifies which algorithm is used for the 
optimization. Valid values include:

  'lbfgs'       Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS)
  'gis'         General Iterative Scaling (GIS)

If ommited, C<'lbfgs'> is used by default.

=head3 progress_cb

The entry C<progress_cb =E<gt> ...> specifies the progress callback
subroutine which is used to trace the process of the algorithm. 
The specified callback routine will be called at each iteration of the
algorithm.

For L-BFGS, C<progress_cb> will be directly passed to
L<Algorithm::LBFGS/fmin>. C<f(x)> is the negative log-likelihood of current
lambda vector.

For GIS, the C<progress_cb> is supposed to have a prototype like

  progress_cb(i, lambda, d_lambda, lambda_norm, d_lambda_norm)

C<i> is the number of the iterations, C<lambda> is an array ref containing
the current lambda vector, C<d_lambda> is an array ref containing the
delta of the lambda vector in current iteration, C<lambda_norm> and
C<d_lambda_norm> are Euclid norms of C<lambda> and C<d_lambda> respectively.

For both L-BFGS and GIS, the client program can also pass a string
C<'verbose'> to C<progress_cb> to use a default progress callback
which simply print out the progress on the screen.

C<progress_cb> can also be omitted if the client program
do not want to trace the progress.

=head3 parameters

The rest entries are parameters for the specified algorithm.
Each parameter will be assigned with its default value when it is not
given explicitly.

For L-BFGS, the parameters will be directly passed to
L<Algorithm::LBFGS> object, please refer to L<Algorithm::LBFGS/Parameters>
for details.

For GIS, there is only one parameter C<epsilon>, which controls the
precision of the algorithm (similar to the C<epsilon> in
L<Algorithm::LBFGS>). Generally speaking, a smaller C<epsilon> produces
a more precise result. The default value of C<epsilon> is 1e-3.

=head2 smoother

The smoother is a solution to the over-fitting problem. 
This property chooses which type of smoother the client program want to
apply and sets the smoothing parameters. 

Only one smoother have been implemented in this version of the module, 
the Gaussian smoother.

lib/AI/MaxEntropy.pm  view on Meta::CPAN

Laye Suen, E<lt>laye@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

The MIT License

Copyright (C) 2008, Laye Suen

Permission is hereby granted, free of charge, to any person obtaining a 
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

lib/AI/MaxEntropy/Model.pm  view on Meta::CPAN

  $me->see(['round', 'smooth', 'red'] => 'apple' => 2);
  $me->see(['long', 'smooth', 'yellow'] => 'banana' => 3);
  $me->see(['round', 'rough'] => 'orange' => 2);
  my $model = $me->learn;

  # make prediction on unseen data
  # ask what a red round thing is most likely to be
  my $y = $model->predict(['round', 'red']);
  # the answer apple is expected

  # print out scores of all possible labels
  for ($model->all_labels) {
      my $s = $model->score(['round', 'red'] => $_);
      print "$_: $s\n";
  }
  
  # save the model to file
  $model->save('model_file');

  # load the model from file
  $model->load('model_file');

lib/AI/MaxEntropy/Model.pm  view on Meta::CPAN

=head2 predict

Get the most possible label for a unlabeled sample

  ...
  
  $y = $model->predict(['round', 'red']);

=head2 score

Get scores for every possible label for a unlabeled sample

  ...

  $s = $model->score(['round', 'red'] => 'apple');

=head2 save

Dumps the model to a file.

  ...

lib/AI/MaxEntropy/Model.pm  view on Meta::CPAN

Laye Suen, E<lt>laye@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

The MIT License

Copyright (C) 2008, Laye Suen

Permission is hereby granted, free of charge, to any person obtaining a 
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

lib/AI/MaxEntropy/Util.pm  view on Meta::CPAN

  use AI::MaxEntropy::Util qw/:all/;

  my $me = AI::MaxEntropy->new;
  
  my $samples = [
      [['a', 'b', 'c'] => 'x'],
      [['e', 'f'] => 'y' => 1.5],
      ...
  ];

  my ($result, $model) = train_and_test($me, $samples, 'xxxo');

  print precision($result)."\n";
  print recall($result, 'x')."\n";

=head1 DESCRIPTION

This module makes doing experiments with Maximum Entropy learner easier.

Generally, an experiment involves a training set and a testing set
(sometimes also a parameter adjusting set). The learner is trained with
samples in the training set and tested with samples in the testing set.
Usually, 2 measures of performance are concerned.
One is precision, indicating the percentage of samples which are correctly
predicted in the testing set. The other one is recall, indicating the 
precision of samples with a certain label.

=head1 FUNCTIONS

=head2 train_and_test

This function automated the process of training and testing.

    my $me = AI::MaxEntropy->new;
    my $sample = [
        [ ['a', 'b'] => 'x' => 1.5 ],
	...
    ];
    my ($result, $model) = train_and_test($me, $sample, 'xxxo');

First, the whole samples set will be divided into a training set and a
testing set according to the specified pattern. A pattern is a string,
in which each character stands for a part of the samples set.
If the character is C<'x'>, the corresponding part is used for training.
If the character is C<'o'>, the corresponding part is used for testing.
Otherwise, the corresponding part is simply ignored.

For example, the pattern 'xxxo' means the first three forth of the samples
set are used for training while the last one forth is used for testing.

The function returns two values. The first one is an array ref describe
the result of testing, in which each element follows a structure like
C<[sample =E<gt> result]>. The second one is the model learnt from the
training set, which is an L<AI::MaxEntropy::Model> object.

=head2 traverse_partially

This function is the core implementation of L</train_and_test>. It traverse
through some of the elements in an array according to a pattern,
and does some specified actions with each of these elements.
  
  my $arr = [1, 2, 3, 4, 5];

lib/AI/MaxEntropy/Util.pm  view on Meta::CPAN

array ref in which all elements in the original array is mapped according
to the code snippet's return value.

  my $arr = [1, 2, 3, 4, 5];
  
  # increase the last one third of the elements by 1
  $arr = map_partially { $_ + 1 } $arr, '--x';

=head2 precision

Calculates the precision based on the result returned by
L</train_and_test>.

  ...
  my ($result, $model) = train_and_test(...);
  print precision($result)."\n";

Note that the weights of samples are taken into consideration.

=head2 recall

Calculates the recall of a certain label based on the result returned by
L</train_and_test>.

  ...
  my ($result, $model) = train_and_test(...);
  print recall($result, 'label')."\n";

Note that the weights of samples are taken into consideration.

=head1 SEE ALSO

L<AI::MaxEntropy>, L<AI::MaxEntropy::Model>

=head1 AUTHOR

Laye Suen, E<lt>laye@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

The MIT License

Copyright (C) 2008, Laye Suen

Permission is hereby granted, free of charge, to any person obtaining a 
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

ppport.h  view on Meta::CPAN


=head1 OPTIONS

=head2 --help

Display a brief usage summary.

=head2 --patch=I<file>

If this option is given, a single patch file will be created if
any changes are suggested. This requires a working diff program
to be installed on your system.

=head2 --copy=I<suffix>

If this option is given, a copy of each file will be saved with
the given suffix that contains the suggested changes. This does
not require any external programs.

If neither C<--patch> or C<--copy> are given, the default is to
simply print the diffs for each file. This requires either
C<Text::Diff> or a C<diff> program to be installed.

=head2 --diff=I<program>

Manually set the diff program and options to use. The default
is to use C<Text::Diff>, when installed, and output unified
context diffs.

=head2 --compat-version=I<version>

ppport.h  view on Meta::CPAN

=head2 --list-unsupported

Lists the API elements that are known not to be supported by
F<ppport.h> and below which version of Perl they probably
won't be available or work.

=head2 --api-info=I<name>

Show portability information for API elements matching I<name>.
If I<name> is surrounded by slashes, it is interpreted as a regular
expression.

=head1 DESCRIPTION

In order for a Perl extension (XS) module to be as portable as possible
across differing versions of Perl itself, certain steps need to be taken.

=over 4

=item *

ppport.h  view on Meta::CPAN

=item *

You should avoid using deprecated parts of the API. For example, using
global Perl variables without the C<PL_> prefix is deprecated. Also,
some API functions used to have a C<perl_> prefix. Using this form is
also deprecated. You can safely use the supported API, as F<ppport.h>
will provide wrappers for older Perl versions.

=item *

If you use one of a few functions that were not present in earlier
versions of Perl, and that can't be provided using a macro, you have
to explicitly request support for these functions by adding one or
more C<#define>s in your source code before the inclusion of F<ppport.h>.

These functions will be marked C<explicit> in the list shown by
C<--list-provided>.

Depending on whether you module has a single or multiple files that
use such functions, you want either C<static> or global variants.

ppport.h  view on Meta::CPAN


=head1 EXAMPLES

To verify whether F<ppport.h> is needed for your module, whether you
should make any changes to your code, and whether any special defines
should be used, F<ppport.h> can be run as a Perl script to check your
source code. Simply say:

    perl ppport.h

The result will usually be a list of patches suggesting changes
that should at least be acceptable, if not necessarily the most
efficient solution, or a fix for all possible problems.

If you know that your XS module uses features only available in
newer Perl releases, if you're aware that it uses C++ comments,
and if you want all suggestions as a single patch file, you could
use something like this:

    perl ppport.h --compat-version=5.6.0 --cplusplus --patch=test.diff

If you only want your code to be scanned without any suggestions
for changes, use:

    perl ppport.h --nochanges

ppport.h  view on Meta::CPAN


    perl ppport.h --diff='diff -C 10'

This would output context diffs with 10 lines of context.

To display portability information for the C<newSVpvn> function,
use:

    perl ppport.h --api-info=newSVpvn

Since the argument to C<--api-info> can be a regular expression,
you can use

    perl ppport.h --api-info=/_nomg$/

to display portability information for all C<_nomg> functions or

    perl ppport.h --api-info=/./

to display information for all known API elements.

ppport.h  view on Meta::CPAN

mg_length||5.005000|
mg_localize|||
mg_magical|||
mg_set|||
mg_size||5.005000|
mini_mktime||5.007002|
missingterm|||
mode_from_discipline|||
modkids|||
mod|||
moreswitches|||
mul128|||
mulexp10|||n
my_atof2||5.007002|
my_atof||5.006000|
my_attrs|||
my_bcopy|||n
my_betoh16|||n
my_betoh32|||n
my_betoh64|||n
my_betohi|||n

ppport.h  view on Meta::CPAN

pad_block_start|||
pad_check_dup|||
pad_compname_type|||
pad_findlex|||
pad_findmy|||
pad_fixup_inner_anons|||
pad_free|||
pad_leavemy|||
pad_new|||
pad_push|||
pad_reset|||
pad_setsv|||
pad_sv|||
pad_swipe|||
pad_tidy|||
pad_undef|||
parse_body|||
parse_unicode_opts|||
path_is_absolute|||
peep|||
pending_ident|||

ppport.h  view on Meta::CPAN

regtry|||
reguni|||
regwhite|||
reg|||
repeatcpy|||
report_evil_fh|||
report_uninit|||
require_errno|||
require_pv||5.006000|
rninstr|||
rsignal_restore|||
rsignal_save|||
rsignal_state||5.004000|
rsignal||5.004000|
run_body|||
runops_debug||5.005000|
runops_standard||5.005000|
rvpv_dup|||
rxres_free|||
rxres_restore|||
rxres_save|||
safesyscalloc||5.006000|n
safesysfree||5.006000|n
safesysmalloc||5.006000|n
safesysrealloc||5.006000|n
same_dirent|||
save_I16||5.004000|
save_I32|||
save_I8||5.006000|
save_aelem||5.004050|
save_alloc||5.006000|

ppport.h  view on Meta::CPAN

strnEQ|||
strnNE|||
study_chunk|||
sub_crush_depth|||
sublex_done|||
sublex_push|||
sublex_start|||
sv_2bool|||
sv_2cv|||
sv_2io|||
sv_2iuv_non_preserve|||
sv_2iv_flags||5.009001|
sv_2iv|||
sv_2mortal|||
sv_2nv|||
sv_2pv_flags||5.007002|
sv_2pv_nolen|5.006000||p
sv_2pvbyte_nolen|||
sv_2pvbyte|5.006000||p
sv_2pvutf8_nolen||5.006000|
sv_2pvutf8||5.006000|

ppport.h  view on Meta::CPAN

sv_pvutf8n_force||5.006000|
sv_pvutf8n||5.006000|
sv_pvutf8||5.006000|
sv_pv||5.006000|
sv_recode_to_utf8||5.007003|
sv_reftype|||
sv_release_COW|||
sv_release_IVX|||
sv_replace|||
sv_report_used|||
sv_reset|||
sv_rvweaken||5.006000|
sv_setiv_mg|5.006000||p
sv_setiv|||
sv_setnv_mg|5.006000||p
sv_setnv|||
sv_setpv_mg|5.006000||p
sv_setpvf_mg_nocontext|||pvn
sv_setpvf_mg|5.006000|5.004000|pv
sv_setpvf_nocontext|||vn
sv_setpvf||5.004000|v

ppport.h  view on Meta::CPAN


# Scan for possible replacement candidates

my(%replace, %need, %hints, %depends);
my $replace = 0;
my $hint = '';

while (<DATA>) {
  if ($hint) {
    if (m{^\s*\*\s(.*?)\s*$}) {
      $hints{$hint} ||= '';  # suppress warning with older perls
      $hints{$hint} .= "$1\n";
    }
    else {
      $hint = '';
    }
  }
  $hint = $1 if m{^\s*$rccs\sHint:\s+(\w+)\s*$};

  $replace     = $1 if m{^\s*$rccs\s+Replace:\s+(\d+)\s+$rcce\s*$};
  $replace{$2} = $1 if $replace and m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(\w+)};

ppport.h  view on Meta::CPAN


  for (qw(uses needs uses_todo needed_global needed_static)) {
    for $func (keys %{$file{$_}}) {
      push @{$global{$_}{$func}}, $filename;
    }
  }

  $files{$filename} = \%file;
}

# Globally resolve NEED_'s
my $need;
for $need (keys %{$global{needs}}) {
  if (@{$global{needs}{$need}} > 1) {
    my @targets = @{$global{needs}{$need}};
    my @t = grep $files{$_}{needed_global}{$need}, @targets;
    @targets = @t if @t;
    @t = grep /\.xs$/i, @targets;
    @targets = @t if @t;
    my $target = shift @targets;
    $files{$target}{needs}{$need} = 'global';

ppport.h  view on Meta::CPAN

#  define ERRSV                          get_sv("@",FALSE)
#endif
#ifndef newSVpvn
#  define newSVpvn(data,len)             ((data)                                              \
                                    ? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \
                                    : newSV(0))
#endif

/* Hint: gv_stashpvn
 * This function's backport doesn't support the length parameter, but
 * rather ignores it. Portability can only be ensured if the length
 * parameter is used for speed reasons, but the length can always be
 * correctly computed from the string argument.
 */
#ifndef gv_stashpvn
#  define gv_stashpvn(str,len,create)    gv_stashpv(str,create)
#endif

/* Replace: 1 */
#ifndef get_cv
#  define get_cv                         perl_get_cv

ppport.h  view on Meta::CPAN

}
#endif
#endif

/*
 * Boilerplate macros for initializing and accessing interpreter-local
 * data from C.  All statics in extensions should be reworked to use
 * this, if you want to make the extension thread-safe.  See ext/re/re.xs
 * for an example of the use of these macros.
 *
 * Code that uses these macros is responsible for the following:
 * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts"
 * 2. Declare a typedef named my_cxt_t that is a structure that contains
 *    all the data that needs to be interpreter-local.
 * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t.
 * 4. Use the MY_CXT_INIT macro such that it is called exactly once
 *    (typically put in the BOOT: section).
 * 5. Use the members of the my_cxt_t structure everywhere as
 *    MY_CXT.member.
 * 6. Use the dMY_CXT macro (a declaration) in all the functions that
 *    access MY_CXT.

ppport.h  view on Meta::CPAN

#endif

/*
 * The grok_* routines have been modified to use warn() instead of
 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
 * which is why the stack variable has been renamed to 'xdigit'.
 */

#ifndef grok_bin
#if defined(NEED_grok_bin)
static UV DPPP_(my_grok_bin)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result);
static
#else
extern UV DPPP_(my_grok_bin)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result);
#endif

#ifdef grok_bin
#  undef grok_bin
#endif
#define grok_bin(a,b,c,d) DPPP_(my_grok_bin)(aTHX_ a,b,c,d)
#define Perl_grok_bin DPPP_(my_grok_bin)

#if defined(NEED_grok_bin) || defined(NEED_grok_bin_GLOBAL)
UV
DPPP_(my_grok_bin)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_2 = UV_MAX / 2;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;

    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
        /* strip off leading b or 0b.
           for compatibility silently suffer "b" and "0b" as valid binary
           numbers. */
        if (len >= 1) {
            if (s[0] == 'b') {
                s++;
                len--;

ppport.h  view on Meta::CPAN

                    value = (value << 1) | (bit - '0');
                    continue;
                }
                /* Bah. We're just overflowed.  */
                warn("Integer overflow in binary number");
                overflowed = TRUE;
                value_nv = (NV) value;
            }
            value_nv *= 2.0;
	    /* If an NV has not enough bits in its mantissa to
	     * represent a UV this summing of small low-order numbers
	     * is a waste of time (because the NV cannot preserve
	     * the low-order bits anyway): we could just remember when
	     * did we overflow and in the end just multiply value_nv by the
	     * right amount. */
            value_nv += (NV)(bit - '0');
            continue;
        }
        if (bit == '_' && len && allow_underscores && (bit = s[1])
            && (bit == '0' || bit == '1'))
	    {
		--len;
		++s;
                goto redo;
	    }
        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
            warn("Illegal binary digit '%c' ignored", *s);
        break;
    }

ppport.h  view on Meta::CPAN

#endif
	) {
	warn("Binary number > 0b11111111111111111111111111111111 non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifndef grok_hex
#if defined(NEED_grok_hex)
static UV DPPP_(my_grok_hex)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result);
static
#else
extern UV DPPP_(my_grok_hex)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result);
#endif

#ifdef grok_hex
#  undef grok_hex
#endif
#define grok_hex(a,b,c,d) DPPP_(my_grok_hex)(aTHX_ a,b,c,d)
#define Perl_grok_hex DPPP_(my_grok_hex)

#if defined(NEED_grok_hex) || defined(NEED_grok_hex_GLOBAL)
UV
DPPP_(my_grok_hex)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_16 = UV_MAX / 16;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;
    const char *xdigit;

    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
        /* strip off leading x or 0x.
           for compatibility silently suffer "x" and "0x" as valid hex numbers.
        */
        if (len >= 1) {
            if (s[0] == 'x') {
                s++;

ppport.h  view on Meta::CPAN

                if (value <= max_div_16) {
                    value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
                    continue;
                }
                warn("Integer overflow in hexadecimal number");
                overflowed = TRUE;
                value_nv = (NV) value;
            }
            value_nv *= 16.0;
	    /* If an NV has not enough bits in its mantissa to
	     * represent a UV this summing of small low-order numbers
	     * is a waste of time (because the NV cannot preserve
	     * the low-order bits anyway): we could just remember when
	     * did we overflow and in the end just multiply value_nv by the
	     * right amount of 16-tuples. */
            value_nv += (NV)((xdigit - PL_hexdigit) & 15);
            continue;
        }
        if (*s == '_' && len && allow_underscores && s[1]
		&& (xdigit = strchr((char *) PL_hexdigit, s[1])))
	    {
		--len;
		++s;
                goto redo;
	    }
        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
            warn("Illegal hexadecimal digit '%c' ignored", *s);
        break;
    }

ppport.h  view on Meta::CPAN

#endif
	) {
	warn("Hexadecimal number > 0xffffffff non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifndef grok_oct
#if defined(NEED_grok_oct)
static UV DPPP_(my_grok_oct)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result);
static
#else
extern UV DPPP_(my_grok_oct)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result);
#endif

#ifdef grok_oct
#  undef grok_oct
#endif
#define grok_oct(a,b,c,d) DPPP_(my_grok_oct)(aTHX_ a,b,c,d)
#define Perl_grok_oct DPPP_(my_grok_oct)

#if defined(NEED_grok_oct) || defined(NEED_grok_oct_GLOBAL)
UV
DPPP_(my_grok_oct)(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
{
    const char *s = start;
    STRLEN len = *len_p;
    UV value = 0;
    NV value_nv = 0;

    const UV max_div_8 = UV_MAX / 8;
    bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
    bool overflowed = FALSE;

    for (; len-- && *s; s++) {
         /* gcc 2.95 optimiser not smart enough to figure that this subtraction
            out front allows slicker code.  */
        int digit = *s - '0';
        if (digit >= 0 && digit <= 7) {
            /* Write it in this wonky order with a goto to attempt to get the
               compiler to make the common case integer-only loop pretty tight.
            */

ppport.h  view on Meta::CPAN

                    value = (value << 3) | digit;
                    continue;
                }
                /* Bah. We're just overflowed.  */
                warn("Integer overflow in octal number");
                overflowed = TRUE;
                value_nv = (NV) value;
            }
            value_nv *= 8.0;
	    /* If an NV has not enough bits in its mantissa to
	     * represent a UV this summing of small low-order numbers
	     * is a waste of time (because the NV cannot preserve
	     * the low-order bits anyway): we could just remember when
	     * did we overflow and in the end just multiply value_nv by the
	     * right amount of 8-tuples. */
            value_nv += (NV)digit;
            continue;
        }
        if (digit == ('_' - '0') && len && allow_underscores
            && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
	    {
		--len;
		++s;
                goto redo;
	    }
        /* Allow \octal to work the DWIM way (that is, stop scanning
         * as soon as non-octal characters are seen, complain only iff
         * someone seems to want to use the digits eight and nine). */
        if (digit == 8 || digit == 9) {

ppport.h  view on Meta::CPAN

#endif
	) {
	warn("Octal number > 037777777777 non-portable");
    }
    *len_p = s - start;
    if (!overflowed) {
        *flags = 0;
        return value;
    }
    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
    if (result)
        *result = value_nv;
    return UV_MAX;
}
#endif
#endif

#ifdef NO_XSLOCKS
#  ifdef dJMPENV
#    define dXCPT             dJMPENV; int rEtV = 0
#    define XCPT_TRY_START    JMPENV_PUSH(rEtV); if (rEtV == 0)
#    define XCPT_TRY_END      JMPENV_POP;

t/03-learn_by_gis.t  view on Meta::CPAN


my ($lambda, $d_lambda, $p1_f, $n);
my $zero = 1e-5;
my $me = AI::MaxEntropy->new(); 
$me->see(['round', 'smooth', 'red'] => 'apple' => 2);
$me->see(['long', 'smooth', 'yellow'] => 'banana' => 3);
$me->{algorithm}->{type} = 'gis';

###
NAME 'The first iteration';
$me->{algorithm}->{progress_cb} =
    sub { ($lambda, $d_lambda) = ($_[1], $_[2]); 1 };
$me->learn;
$p1_f = [
    2 * (exp(0) / (exp(0) + exp(0))),
    2 * (exp(0) / (exp(0) + exp(0))) + 3 * (exp(0) / (exp(0) + exp(0))),
    2 * (exp(0) / (exp(0) + exp(0))),
    3 * (exp(0) / (exp(0) + exp(0))),
    3 * (exp(0) / (exp(0) + exp(0))),
    2 * (exp(0) / (exp(0) + exp(0))),
    2 * (exp(0) / (exp(0) + exp(0))) + 3 * (exp(0) / (exp(0) + exp(0))),

t/03-learn_by_gis.t  view on Meta::CPAN

	(1.0 / 3) * log($zero / $p1_f->[7]),
	(1.0 / 3) * log(3 / $p1_f->[8]),
	(1.0 / 3) * log(3 / $p1_f->[9])
    ]
],
$__;

###
NAME 'The second iteration';
my @l = @$lambda;
$me->{algorithm}->{progress_cb} =
    sub { ($lambda, $d_lambda) = ($_[1], $_[2]); $n++; $n >= 2 ? 1 : 0 };
$me->learn;
my $p0 = exp($l[0] + $l[1] + $l[2]) + exp($l[5] + $l[6] + $l[7]);
my $p0_0 = exp($l[0] + $l[1] + $l[2]) / $p0;
my $p0_1 = exp($l[5] + $l[6] + $l[7]) / $p0;
my $p1 = exp($l[6] + $l[8] + $l[9]) + exp($l[1] + $l[3] + $l[4]);
my $p1_0 = exp($l[1] + $l[3] + $l[4]) / $p1;
my $p1_1 = exp($l[6] + $l[8] + $l[9]) / $p1;
$p1_f = [
    2 * $p0_0,

t/05-util.t  view on Meta::CPAN

###
NAME 'map_partially o-o => o';
$a = [1, 2, 3, 4, 5, 6];
$b = map_partially { $_ + 1 } $a, 'o-o' => 'o';
is_deeply $b, [2, 3, 6, 7],
$__;

###
NAME 'train_and_test xxo';
require AI::MaxEntropy;
my ($me, $samples, $result, $model);
$me = AI::MaxEntropy->new;
$samples = [
    [['a', 'b', 'c'] => 'x'],
    [['e', 'f'] => 'z'],
    [['e'] => 'z']
];
($result, $model) = train_and_test($me, $samples, 'xxo');
is_deeply
$result,
[
    [[['e'] => 'z'] => 'z']
],
$__;

###
NAME 'train_and_test xxxxo';
$me->forget_all;
$samples = [
    [['a', 'b'] => 'x'],
    [['c', 'd'] => 'y'],
    [['i', 'j'] => 'z'],
    [['p', 'q'] => 'xx'],
    [['a'] => 'x'],
    [['c'] => 'x' => 2]
];
($result, $model) = train_and_test($me, $samples, 'xxxxo');
is_deeply
$result,
[
    [[['a'] => 'x'] => 'x'],
    [[['c'] => 'x' => 2] => 'y']
],
$__;

###
NAME 'precision';
delta_ok precision($result), 1 / 3,
$__;

###
NAME 'recall of x';
delta_ok recall($result, 'x'), 1 / 3,
$__;



( run in 0.762 second using v1.01-cache-2.11-cpan-49f99fa48dc )