AI-XGBoost

 view release on metacpan or  search on metacpan

Build.PL  view on Meta::CPAN

    "Module::Build" => "0.28"
  },
  "dist_abstract" => "Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>",
  "dist_author" => [
    "Pablo Rodr\x{ed}guez Gonz\x{e1}lez <pablo.rodriguez.gonzalez\@gmail.com>"
  ],
  "dist_name" => "AI-XGBoost",
  "dist_version" => "0.11",
  "license" => "apache",
  "module_name" => "AI::XGBoost",
  "recursive_test_files" => 1,
  "requires" => {
    "Alien::XGBoost" => 0,
    "Carp" => 0,
    "Exception::Class" => 0,
    "Exporter::Easy" => 0,
    "FFI::Platypus" => 0,
    "Moose" => 0,
    "NativeCall" => 0,
    "namespace::autoclean" => 0,
    "perl" => "5.010",
    "strict" => 0,
    "utf8" => 0,
    "warnings" => 0
  },
  "test_requires" => {
    "Test::More" => 0,
    "Test::Most" => 0
  }
);


my %fallback_build_requires = (
  "Module::Build" => "0.28",
  "Test::More" => 0,
  "Test::Most" => 0
);


unless ( eval { Module::Build->VERSION(0.4004) } ) {
  delete $module_build_args{test_requires};
  $module_build_args{build_requires} = \%fallback_build_requires;
}

my $build = Module::Build->new(%module_build_args);


$build->create_build_script;

MANIFEST  view on Meta::CPAN

# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.008.
Build.PL
Changes
LICENSE
MANIFEST
META.json
META.yml
README
dist.ini
examples/agaricus.txt.test
examples/agaricus.txt.train
examples/basic.pl
examples/capi.pl
examples/capi_dump_model.pl
examples/capi_raw.pl
examples/featmap.txt
examples/iris.pl
lib/AI/XGBoost.pm
lib/AI/XGBoost/Booster.pm
lib/AI/XGBoost/CAPI.pm

META.json  view on Meta::CPAN

            "FFI::Platypus" : "0",
            "Moose" : "0",
            "NativeCall" : "0",
            "namespace::autoclean" : "0",
            "perl" : "5.010",
            "strict" : "0",
            "utf8" : "0",
            "warnings" : "0"
         }
      },
      "test" : {
         "requires" : {
            "Test::More" : "0",
            "Test::Most" : "0"
         }
      }
   },
   "release_status" : "stable",
   "resources" : {
      "bugtracker" : {
         "web" : "https://github.com/pablrod/p5-AI-XGBoost/issues"

dist.ini  view on Meta::CPAN

[PodSyntaxTests] 
[PodCoverageTests]
[AutoPrereqs]
[PerlTidy]
perltidyrc = .perltidyrc
[Test::Perl::Critic]

[Prereqs]
NativeCall = 0
[Prereqs / TestRequires]
Test::Most = 0 ; for done_testing
[Prereqs / DevelopRequires]
Pod::Weaver = 0
Pod::Weaver::Section::Contributors = 0

examples/basic.pl  view on Meta::CPAN

use 5.010;
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);

# We are going to solve a binary classification problem:
#  Mushroom poisonous or not

my $train_data = DMatrix->From(file => 'agaricus.txt.train');
my $test_data = DMatrix->From(file => 'agaricus.txt.test');

# With XGBoost we can solve this problem using 'gbtree' booster
#  and as loss function a logistic regression 'binary:logistic'
#  (Gradient Boosting Regression Tree)
# XGBoost Tree Booster has a lot of parameters that we can tune
# (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)

my $booster = train(data => $train_data, number_of_rounds => 10, params => {
        objective => 'binary:logistic',
        eta => 1.0,
        max_depth => 2,
        silent => 1
    });

# For binay classification predictions are probability confidence scores in [0, 1]
#  indicating that the label is positive (1 in the first column of agaricus.txt.test)
my $predictions = $booster->predict(data => $test_data);

say join "\n", @$predictions[0 .. 10];

examples/capi.pl  view on Meta::CPAN

use 5.010;
use AI::XGBoost::CAPI qw(:all);

my $dtrain = XGDMatrixCreateFromFile('agaricus.txt.train');
my $dtest = XGDMatrixCreateFromFile('agaricus.txt.test');

my ($rows, $cols) = (XGDMatrixNumRow($dtrain), XGDMatrixNumCol($dtrain));
say "Train dimensions: $rows, $cols";

my $booster = XGBoosterCreate([$dtrain]);

for my $iter (0 .. 10) {
    XGBoosterUpdateOneIter($booster, $iter, $dtrain);
}

my $predictions = XGBoosterPredict($booster, $dtest);
# say join "\n", @$predictions;

XGBoosterFree($booster);
XGDMatrixFree($dtrain);
XGDMatrixFree($dtest);




examples/capi_dump_model.pl  view on Meta::CPAN

use strict;
use warnings;
use 5.010;
use Data::Dumper;
use AI::XGBoost::CAPI qw(:all);

my $dtrain = XGDMatrixCreateFromFile('agaricus.txt.train');
my $dtest = XGDMatrixCreateFromFile('agaricus.txt.test');

my $booster = XGBoosterCreate([$dtrain]);
XGBoosterUpdateOneIter($booster, 1, $dtrain);

my $json_model_with_stats = XGBoosterDumpModelEx($booster, "featmap.txt", 1, "json");

say Dumper $json_model_with_stats;

XGBoosterFree($booster);
XGDMatrixFree($dtrain);
XGDMatrixFree($dtest);





examples/capi_raw.pl  view on Meta::CPAN

use 5.010;
use AI::XGBoost::CAPI::RAW;
use FFI::Platypus;

my $silent = 0;
my ($dtrain, $dtest) = (0, 0);

AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromFile('agaricus.txt.test', $silent, \$dtest);
AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromFile('agaricus.txt.train', $silent, \$dtrain);

my ($rows, $cols) = (0, 0);
AI::XGBoost::CAPI::RAW::XGDMatrixNumRow($dtrain, \$rows);
AI::XGBoost::CAPI::RAW::XGDMatrixNumCol($dtrain, \$cols);
say "Dimensions: $rows, $cols";

my $booster = 0;

AI::XGBoost::CAPI::RAW::XGBoosterCreate( [$dtrain] , 1, \$booster);

for my $iter (0 .. 10) {
    AI::XGBoost::CAPI::RAW::XGBoosterUpdateOneIter($booster, $iter, $dtrain);
}

my $out_len = 0;
my $out_result = 0;

AI::XGBoost::CAPI::RAW::XGBoosterPredict($booster, $dtest, 0, 0, \$out_len, \$out_result);
my $ffi = FFI::Platypus->new();
my $predictions = $ffi->cast(opaque => "float[$out_len]", $out_result);

#say join "\n", @$predictions;

AI::XGBoost::CAPI::RAW::XGBoosterFree($booster);
AI::XGBoost::CAPI::RAW::XGDMatrixFree($dtrain);
AI::XGBoost::CAPI::RAW::XGDMatrixFree($dtest);




examples/iris.pl  view on Meta::CPAN


# XGBoost uses number for "class" so we are going to codify classes
my %class = (
    setosa => 0,
    versicolor => 1,
    virginica => 2
);

my $iris = Data::Dataset::Classic::Iris::get();

# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];

sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
    my $array = shift;
    my @aux = ();
    for my $row (@$array) {
        for my $column (0 .. scalar @$row - 1) {
            push @{$aux[$column]}, $row->[$column];
        }
    }
    return \@aux;
}

$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);

my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];

my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);

# Multiclass problems need a diferent objective function and the number
#  of classes, in this case we are using 'multi:softprob' and
#  num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
        max_depth => 3,
        eta => 0.3,
        silent => 1,
        objective => 'multi:softprob',
        num_class => 3
    });

my $predictions = $booster->predict(data => $test_data);



lib/AI/XGBoost.pm  view on Meta::CPAN

=head1 SYNOPSIS

 use 5.010;
 use aliased 'AI::XGBoost::DMatrix';
 use AI::XGBoost qw(train);
 
 # We are going to solve a binary classification problem:
 #  Mushroom poisonous or not
 
 my $train_data = DMatrix->From(file => 'agaricus.txt.train');
 my $test_data = DMatrix->From(file => 'agaricus.txt.test');
 
 # With XGBoost we can solve this problem using 'gbtree' booster
 #  and as loss function a logistic regression 'binary:logistic'
 #  (Gradient Boosting Regression Tree)
 # XGBoost Tree Booster has a lot of parameters that we can tune
 # (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)
 
 my $booster = train(data => $train_data, number_of_rounds => 10, params => {
         objective => 'binary:logistic',
         eta => 1.0,
         max_depth => 2,
         silent => 1
     });
 
 # For binay classification predictions are probability confidence scores in [0, 1]
 #  indicating that the label is positive (1 in the first column of agaricus.txt.test)
 my $predictions = $booster->predict(data => $test_data);
 
 say join "\n", @$predictions[0 .. 10];

 use aliased 'AI::XGBoost::DMatrix';
 use AI::XGBoost qw(train);
 use Data::Dataset::Classic::Iris;
 
 # We are going to solve a multiple classification problem:
 #  determining plant species using a set of flower's measures 
 
 # XGBoost uses number for "class" so we are going to codify classes
 my %class = (
     setosa => 0,
     versicolor => 1,
     virginica => 2
 );
 
 my $iris = Data::Dataset::Classic::Iris::get();
 
 # Split train and test, label and features
 my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 
 sub transpose {
 # Transposing without using PDL, Data::Table, Data::Frame or other modules
 # to keep minimal dependencies
     my $array = shift;
     my @aux = ();
     for my $row (@$array) {
         for my $column (0 .. scalar @$row - 1) {
             push @{$aux[$column]}, $row->[$column];
         }
     }
     return \@aux;
 }
 
 $train_dataset = transpose($train_dataset);
 $test_dataset = transpose($test_dataset);
 
 my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
 my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
 
 my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
 my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
 
 # Multiclass problems need a diferent objective function and the number
 #  of classes, in this case we are using 'multi:softprob' and
 #  num_class => 3
 my $booster = train(data => $train_data, number_of_rounds => 20, params => {
         max_depth => 3,
         eta => 0.3,
         silent => 1,
         objective => 'multi:softprob',
         num_class => 3
     });
 
 my $predictions = $booster->predict(data => $test_data);

=head1 DESCRIPTION

Perl wrapper for XGBoost library. 

The easiest way to use the wrapper is using C<train>, but beforehand 
you need the data to be used contained in a C<DMatrix> object

This is a work in progress, feedback, comments, issues, suggestion and
pull requests are welcome!!

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN

=head1 SYNOPSIS

 use 5.010;
 use aliased 'AI::XGBoost::DMatrix';
 use AI::XGBoost qw(train);
 
 # We are going to solve a binary classification problem:
 #  Mushroom poisonous or not
 
 my $train_data = DMatrix->From(file => 'agaricus.txt.train');
 my $test_data = DMatrix->From(file => 'agaricus.txt.test');
 
 # With XGBoost we can solve this problem using 'gbtree' booster
 #  and as loss function a logistic regression 'binary:logistic'
 #  (Gradient Boosting Regression Tree)
 # XGBoost Tree Booster has a lot of parameters that we can tune
 # (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)
 
 my $booster = train(data => $train_data, number_of_rounds => 10, params => {
         objective => 'binary:logistic',
         eta => 1.0,
         max_depth => 2,
         silent => 1
     });
 
 # For binay classification predictions are probability confidence scores in [0, 1]
 #  indicating that the label is positive (1 in the first column of agaricus.txt.test)
 my $predictions = $booster->predict(data => $test_data);
 
 say join "\n", @$predictions[0 .. 10];

=head1 DESCRIPTION

Booster objects control training, prediction and evaluation

Work In Progress, the API may change. Comments and suggestions are welcome!

=head1 METHODS

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN

=head1 VERSION

version 0.11

=head1 SYNOPSIS

 use 5.010;
 use AI::XGBoost::CAPI qw(:all);
 
 my $dtrain = XGDMatrixCreateFromFile('agaricus.txt.train');
 my $dtest = XGDMatrixCreateFromFile('agaricus.txt.test');
 
 my ($rows, $cols) = (XGDMatrixNumRow($dtrain), XGDMatrixNumCol($dtrain));
 say "Train dimensions: $rows, $cols";
 
 my $booster = XGBoosterCreate([$dtrain]);
 
 for my $iter (0 .. 10) {
     XGBoosterUpdateOneIter($booster, $iter, $dtrain);
 }
 
 my $predictions = XGBoosterPredict($booster, $dtest);
 # say join "\n", @$predictions;
 
 XGBoosterFree($booster);
 XGDMatrixFree($dtrain);
 XGDMatrixFree($dtest);

=head1 DESCRIPTION

Perlified wrapper for the C API

=head2 Error handling

XGBoost c api functions returns some int to signal the presence/absence of error.
In this module that is achieved using Exceptions from L<Exception::Class>

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN


version 0.11

=head1 SYNOPSIS

 use 5.010;
 use AI::XGBoost::CAPI::RAW;
 use FFI::Platypus;
 
 my $silent = 0;
 my ($dtrain, $dtest) = (0, 0);
 
 AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromFile('agaricus.txt.test', $silent, \$dtest);
 AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromFile('agaricus.txt.train', $silent, \$dtrain);
 
 my ($rows, $cols) = (0, 0);
 AI::XGBoost::CAPI::RAW::XGDMatrixNumRow($dtrain, \$rows);
 AI::XGBoost::CAPI::RAW::XGDMatrixNumCol($dtrain, \$cols);
 say "Dimensions: $rows, $cols";
 
 my $booster = 0;
 
 AI::XGBoost::CAPI::RAW::XGBoosterCreate( [$dtrain] , 1, \$booster);
 
 for my $iter (0 .. 10) {
     AI::XGBoost::CAPI::RAW::XGBoosterUpdateOneIter($booster, $iter, $dtrain);
 }
 
 my $out_len = 0;
 my $out_result = 0;
 
 AI::XGBoost::CAPI::RAW::XGBoosterPredict($booster, $dtest, 0, 0, \$out_len, \$out_result);
 my $ffi = FFI::Platypus->new();
 my $predictions = $ffi->cast(opaque => "float[$out_len]", $out_result);
 
 #say join "\n", @$predictions;
 
 AI::XGBoost::CAPI::RAW::XGBoosterFree($booster);
 AI::XGBoost::CAPI::RAW::XGDMatrixFree($dtrain);
 AI::XGBoost::CAPI::RAW::XGDMatrixFree($dtest);

=head1 DESCRIPTION

Wrapper for the C API.

The doc for the methods is extracted from doxygen comments: https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h

=head1 FUNCTIONS

=head2 XGBGetLastError

misc/using_capi.c  view on Meta::CPAN

#include <stdio.h>
#include <xgboost/c_api.h>

int main() {
    
    DMatrixHandle dtrain;
    DMatrixHandle dtest;
    // Agaricus files can be found in XGBoost demo/data directory
    // Original source: http://archive.ics.uci.edu/ml/datasets/mushroom
    XGDMatrixCreateFromFile("agaricus.txt.test", 0, &dtest);
    XGDMatrixCreateFromFile("agaricus.txt.train", 0, &dtrain);
    DMatrixHandle cache[] = {dtrain};
    BoosterHandle booster;
    XGBoosterCreate(cache, 1, &booster);
    for (int iter = 0; iter < 11; iter++) {
        XGBoosterUpdateOneIter(booster, iter, dtrain);
    }

    bst_ulong out_len;
    const float *out_result;
    XGBoosterPredict(booster, dtest, 0, 0, &out_len, &out_result);
   
    printf("Length: %ld\n", out_len);
    for (int output = 0; output < out_len; output++) {
        printf("%f\n", out_result[output]);
    }

}

t/00-load.t  view on Meta::CPAN

#!/usr/bin/env perl -w

use strict;
use warnings;
use utf8;

use Test::More tests => 1;

BEGIN {
    use_ok('AI::XGBoost');
}

t/10-cast_arguments.t  view on Meta::CPAN

#!/usr/bin/env perl -w

use strict;
use warnings;
use utf8;

use Test::Most tests => 1;

=encoding utf8

=head1 NAME

Cast from native types to perl types back and fort

=head1 TESTS

=cut

t/20-dmatrix.t  view on Meta::CPAN

#!/usr/bin/env perl -w

use strict;
use warnings;
use utf8;

use Test::More tests => 6;

BEGIN {
    use_ok('AI::XGBoost::DMatrix');
}

{
    my $matrix = [ [ 1, 1 ] ];
    my $data = AI::XGBoost::DMatrix->FromMat( matrix => $matrix );
    is( $data->num_row, scalar @$matrix,          'DMatrix constructed has the right number of rows' );
    is( $data->num_col, scalar @{ $matrix->[0] }, 'DMatrix constructed has the right number of cols' );

t/author-critic.t  view on Meta::CPAN

#!perl

BEGIN {
    unless ( $ENV{AUTHOR_TESTING} ) {
        print qq{1..0 # SKIP these tests are for testing by the author\n};
        exit;
    }
}

use strict;
use warnings;

use Test::More;
use English qw(-no_match_vars);

t/author-pod-coverage.t  view on Meta::CPAN

#!perl

BEGIN {
    unless ( $ENV{AUTHOR_TESTING} ) {
        print qq{1..0 # SKIP these tests are for testing by the author\n};
        exit;
    }
}

# This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests.

use Test::Pod::Coverage 1.08;
use Pod::Coverage::TrustPod;

all_pod_coverage_ok( { coverage_class => 'Pod::Coverage::TrustPod' } );

t/author-pod-syntax.t  view on Meta::CPAN

#!perl

BEGIN {
    unless ( $ENV{AUTHOR_TESTING} ) {
        print qq{1..0 # SKIP these tests are for testing by the author\n};
        exit;
    }
}

# This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests.
use strict;
use warnings;
use Test::More;
use Test::Pod 1.41;

t/author-synopsis.t  view on Meta::CPAN

#!perl

BEGIN {
    unless ( $ENV{AUTHOR_TESTING} ) {
        print qq{1..0 # SKIP these tests are for testing by the author\n};
        exit;
    }
}

use Test::Synopsis;

all_synopsis_ok();



( run in 0.433 second using v1.01-cache-2.11-cpan-87723dcf8b7 )