view release on metacpan or search on metacpan
examples/basic.pl view on Meta::CPAN
use 5.010;
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
# We are going to solve a binary classification problem:
# Mushroom poisonous or not
my $train_data = DMatrix->From(file => 'agaricus.txt.train');
my $test_data = DMatrix->From(file => 'agaricus.txt.test');
# With XGBoost we can solve this problem using 'gbtree' booster
# and as loss function a logistic regression 'binary:logistic'
# (Gradient Boosting Regression Tree)
# XGBoost Tree Booster has a lot of parameters that we can tune
# (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)
my $booster = train(data => $train_data, number_of_rounds => 10, params => {
objective => 'binary:logistic',
eta => 1.0,
max_depth => 2,
silent => 1
});
# For binay classification predictions are probability confidence scores in [0, 1]
# indicating that the label is positive (1 in the first column of agaricus.txt.test)
my $predictions = $booster->predict(data => $test_data);
say join "\n", @$predictions[0 .. 10];
examples/iris.pl view on Meta::CPAN
# XGBoost uses number for "class" so we are going to codify classes
my %class = (
setosa => 0,
versicolor => 1,
virginica => 2
);
my $iris = Data::Dataset::Classic::Iris::get();
# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
my $array = shift;
my @aux = ();
for my $row (@$array) {
for my $column (0 .. scalar @$row - 1) {
push @{$aux[$column]}, $row->[$column];
}
}
return \@aux;
}
$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);
my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
# Multiclass problems need a diferent objective function and the number
# of classes, in this case we are using 'multi:softprob' and
# num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
max_depth => 3,
eta => 0.3,
silent => 1,
objective => 'multi:softprob',
num_class => 3
});
my $predictions = $booster->predict(data => $test_data);
lib/AI/XGBoost.pm view on Meta::CPAN
use AI::XGBoost::Booster;
use Exporter::Easy ( OK => ['train'] );
our $VERSION = '0.11'; # VERSION
# ABSTRACT: Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>
sub train {
my %args = @_;
my ( $params, $data, $number_of_rounds ) = @args{qw(params data number_of_rounds)};
my $booster = AI::XGBoost::Booster->new( cache => [$data] );
if ( defined $params ) {
while ( my ( $name, $value ) = each %$params ) {
$booster->set_param( $name, $value );
}
}
for my $iteration ( 0 .. $number_of_rounds - 1 ) {
$booster->update( dtrain => $data, iteration => $iteration );
}
return $booster;
}
1;
__END__
=pod
lib/AI/XGBoost.pm view on Meta::CPAN
=head1 SYNOPSIS
use 5.010;
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
# We are going to solve a binary classification problem:
# Mushroom poisonous or not
my $train_data = DMatrix->From(file => 'agaricus.txt.train');
my $test_data = DMatrix->From(file => 'agaricus.txt.test');
# With XGBoost we can solve this problem using 'gbtree' booster
# and as loss function a logistic regression 'binary:logistic'
# (Gradient Boosting Regression Tree)
# XGBoost Tree Booster has a lot of parameters that we can tune
# (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)
my $booster = train(data => $train_data, number_of_rounds => 10, params => {
objective => 'binary:logistic',
eta => 1.0,
max_depth => 2,
silent => 1
});
# For binay classification predictions are probability confidence scores in [0, 1]
# indicating that the label is positive (1 in the first column of agaricus.txt.test)
my $predictions = $booster->predict(data => $test_data);
say join "\n", @$predictions[0 .. 10];
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
use Data::Dataset::Classic::Iris;
# We are going to solve a multiple classification problem:
# determining plant species using a set of flower's measures
# XGBoost uses number for "class" so we are going to codify classes
my %class = (
setosa => 0,
versicolor => 1,
virginica => 2
);
my $iris = Data::Dataset::Classic::Iris::get();
# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
my $array = shift;
my @aux = ();
for my $row (@$array) {
for my $column (0 .. scalar @$row - 1) {
push @{$aux[$column]}, $row->[$column];
}
}
return \@aux;
}
$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);
my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
# Multiclass problems need a diferent objective function and the number
# of classes, in this case we are using 'multi:softprob' and
# num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
max_depth => 3,
eta => 0.3,
silent => 1,
objective => 'multi:softprob',
num_class => 3
});
my $predictions = $booster->predict(data => $test_data);
=head1 DESCRIPTION
Perl wrapper for XGBoost library.
The easiest way to use the wrapper is using C<train>, but beforehand
you need the data to be used contained in a C<DMatrix> object
This is a work in progress, feedback, comments, issues, suggestion and
pull requests are welcome!!
XGBoost library is used via L<Alien::XGBoost>. That means downloading,
compiling and installing if it's not available in your system.
=head1 FUNCTIONS
=head2 train
Performs gradient boosting using the data and parameters passed
Returns a trained AI::XGBoost::Booster used
=head3 Parameters
=over 4
=item params
Parameters for the booster object.
Full list available: https://github.com/dmlc/xgboost/blob/master/doc/parameter.md
=item data
AI::XGBoost::DMatrix object used for training
=item number_of_rounds
Number of boosting iterations
=back
=head1 ROADMAP
lib/AI/XGBoost/Booster.pm view on Meta::CPAN
my $self = shift;
my %args = @_;
my ( $dtrain, $grad, $hess ) = @args{qw(dtrain grad hess)};
XGBoosterBoostOneIter( $self->_handle, $dtrain, $grad, $hess );
return $self;
}
sub predict {
my $self = shift;
my %args = @_;
my $data = $args{'data'};
my $result = XGBoosterPredict( $self->_handle, $data->handle );
my $result_size = scalar @$result;
my $matrix_rows = $data->num_row;
if ( $result_size != $matrix_rows && $result_size % $matrix_rows == 0 ) {
my $col_size = $result_size / $matrix_rows;
return [ map { [ @$result[ $_ * $col_size .. $_ * $col_size + $col_size - 1 ] ] } 0 .. $matrix_rows - 1 ];
}
return $result;
}
sub set_param {
my $self = shift;
my ( $name, $value ) = @_;
lib/AI/XGBoost/Booster.pm view on Meta::CPAN
=head1 SYNOPSIS
use 5.010;
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
# We are going to solve a binary classification problem:
# Mushroom poisonous or not
my $train_data = DMatrix->From(file => 'agaricus.txt.train');
my $test_data = DMatrix->From(file => 'agaricus.txt.test');
# With XGBoost we can solve this problem using 'gbtree' booster
# and as loss function a logistic regression 'binary:logistic'
# (Gradient Boosting Regression Tree)
# XGBoost Tree Booster has a lot of parameters that we can tune
# (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)
my $booster = train(data => $train_data, number_of_rounds => 10, params => {
objective => 'binary:logistic',
eta => 1.0,
max_depth => 2,
silent => 1
});
# For binay classification predictions are probability confidence scores in [0, 1]
# indicating that the label is positive (1 in the first column of agaricus.txt.test)
my $predictions = $booster->predict(data => $test_data);
say join "\n", @$predictions[0 .. 10];
=head1 DESCRIPTION
Booster objects control training, prediction and evaluation
Work In Progress, the API may change. Comments and suggestions are welcome!
=head1 METHODS
lib/AI/XGBoost/Booster.pm view on Meta::CPAN
=head3 Parameters
=over 4
=item iteration
Current iteration number
=item dtrain
Training data (AI::XGBoost::DMatrix)
=back
=head2 boost
Boost one iteration using your own gradient
=head3 Parameters
=over 4
=item dtrain
Training data (AI::XGBoost::DMatrix)
=item grad
Gradient of your objective function (Reference to an array)
=item hess
Hessian of your objective function, that is, second order gradient (Reference to an array)
=back
=head2 predict
Predict data using the trained model
=head3 Parameters
=over 4
=item data
Data to predict
=back
=head2 set_param
Set booster parameter
=head3 Example
lib/AI/XGBoost/Booster.pm view on Meta::CPAN
=over 4
=item importance_type
Type of importance. Valid values:
=over 4
=item weight
Number of times a feature is used to split the data across all trees
=item gain
Average gain of the feature when it is used in trees
=item cover
Average coverage of the feature when it is used in trees
=back
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
sub XGDMatrixCreateFromFile {
my ( $filename, $silent ) = @_;
$silent //= 1;
my $matrix = 0;
my $error = AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromFile( $filename, $silent, \$matrix );
_CheckCall($error);
return $matrix;
}
sub XGDMatrixCreateFromMat {
my ( $data, $missing ) = @_;
$missing //= "NaN";
# TODO Support simple arrays
# TODO Support PDL
# TODO ¿Adapters?
my $data_adapter = [ map { @$_ } @$data ];
my $nrows = scalar @$data;
my $ncols = scalar @{ $data->[0] };
my $matrix = 0;
my $error = AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromMat( $data_adapter, $nrows, $ncols, $missing, \$matrix );
_CheckCall($error);
return $matrix;
}
sub XGDMatrixNumRow {
my ($matrix) = @_;
my $rows = 0;
_CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixNumRow( $matrix, \$rows ) );
return $rows;
}
sub XGDMatrixNumCol {
my ($matrix) = @_;
my $cols = 0;
_CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixNumCol( $matrix, \$cols ) );
return $cols;
}
sub XGDMatrixSetFloatInfo {
my ( $matrix, $info, $data ) = @_;
_CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixSetFloatInfo( $matrix, $info, $data, scalar @$data ) );
}
sub XGDMatrixGetFloatInfo {
my ( $matrix, $info ) = @_;
my $out_len = 0;
my $out_result = 0;
_CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixGetFloatInfo( $matrix, $info, \$out_len, \$out_result ) );
my $ffi = FFI::Platypus->new();
return $ffi->cast( opaque => "float[$out_len]", $out_result );
}
sub XGDMatrixSetUintInfo {
my ( $matrix, $info, $data ) = @_;
_CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixSetUintInfo( $matrix, $info, $data, scalar @$data ) );
}
sub XGDMatrixGetUintInfo {
my ( $matrix, $info ) = @_;
my $out_len = 0;
my $out_result = 0;
_CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixGetUintInfo( $matrix, $info, \$out_len, \$out_result ) );
my $ffi = FFI::Platypus->new();
return $ffi->cast( opaque => "uint32[$out_len]", $out_result );
}
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
AI::XGBoost::CAPI::RAW::XGBoosterEvalOneIter(
$booster, $iter, $matrices, $array_of_opaque_matrices_names,
$number_of_matrices, \$out_result
)
);
$out_result = $ffi->cast( opaque => "opaque[$number_of_matrices]", $out_result );
return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}
sub XGBoosterPredict {
my ( $booster, $data_matrix, $option_mask, $ntree_limit ) = @_;
$option_mask //= 0;
$ntree_limit //= 0;
my $out_len = 0;
my $out_result = 0;
_CheckCall(
AI::XGBoost::CAPI::RAW::XGBoosterPredict( $booster, $data_matrix, $option_mask,
$ntree_limit, \$out_len, \$out_result
)
);
my $ffi = FFI::Platypus->new();
return $ffi->cast( opaque => "float[$out_len]", $out_result );
}
sub XGBoosterDumpModel {
my ( $booster, $feature_map, $with_stats ) = @_;
$feature_map //= "";
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
=head2 Error handling
XGBoost c api functions returns some int to signal the presence/absence of error.
In this module that is achieved using Exceptions from L<Exception::Class>
=head1 FUNCTIONS
=head2 XGDMatrixCreateFromFile
Load a data matrix
Parameters:
=over 4
=item filename
the name of the file
=item silent
whether print messages during loading
=back
Returns a loaded data matrix
=head2 XGDMatrixCreateFromMat
Create from dense matrix
Parameters:
=over 4
=item matrix
matrix data
=item missing
value indicating missing data (optional)
=back
Returns a loaded data matrix
=head2 XGDMatrixNumRow
Get number of rows
Parameters:
=over 4
=item matrix
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
=head2 XGDMatrixSetUintInfo
=head2 XGDMatrixGetUintInfo
=head2 XGDMatrixSaveBinary
=head2 XGDMatrixSliceDMatrix
=head2 XGDMatrixFree
Free space in data matrix
Parameters:
=over 4
=item matrix
DMatrix to be freed
=back
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
=item booster
XGBoost learner to train
=item iter
current iteration rounds
=item train_matrix
training data
=back
=head2 XGBoosterBoostOneIter
=head2 XGBoosterEvalOneIter
=head2 XGBoosterPredict
Make prediction based on train matrix
Parameters:
=over 4
=item booster
XGBoost learner
=item data_matrix
Data matrix with the elements to predict
=item option_mask
bit-mask of options taken in prediction, possible values
=over 4
=item
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
=back
=item ntree_limit
limit number of trees used for prediction, this is only valid for boosted trees
when the parameter is set to 0, we will use all the trees
=back
Returns an arrayref with the predictions corresponding to the rows of data matrix
=head2 XGBoosterDumpModel
=head2 XGBoosterDumpModelEx
=head2 XGBoosterDumpModelWithFeatures
=head2 XGBoosterDumpModelExWithFeatures
=head2 XGBoosterFree
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
All functions in this file will return 0 when success
and -1 when an error occurred,
XGBGetLastError can be called to retrieve the error
This function is thread safe and can be called by different thread
Returns string error information
=head2 XGDMatrixCreateFromFile
Load a data matrix
Parameters:
=over 4
=item filename
the name of the file
=item silent
whether print messages during loading
=item out
a loaded data matrix
=back
=head2 XGDMatrixCreateFromCSREx
Create a matrix content from CSR fromat
Parameters:
=over 4
=item indptr
pointer to row headers
=item indices
findex
=item data
fvalue
=item nindptr
number of rows in the matrix + 1
=item nelem
number of nonzero elements in the matrix
=item num_col
number of columns; when it's set to 0, then guess from data
=item out
created dmatrix
=back
=head2 XGDMatrixCreateFromCSCEx
Create a matrix content from CSC format
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=over 4
=item col_ptr
pointer to col headers
=item indices
findex
=item data
fvalue
=item nindptr
number of rows in the matrix + 1
=item nelem
number of nonzero elements in the matrix
=item num_row
number of rows; when it's set to 0, then guess from data
=back
=head2 XGDMatrixCreateFromMat
Create matrix content from dense matrix
Parameters:
=over 4
=item data
pointer to the data space
=item nrow
number of rows
=item ncol
number columns
=item missing
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=back
=head2 XGDMatrixCreateFromMat_omp
Create matrix content from dense matrix
Parameters:
=over 4
=item data
pointer to the data space
=item nrow
number of rows
=item ncol
number columns
=item missing
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGDMatrixSliceDMatrix
Create a new dmatrix from sliced content of existing matrix
Parameters:
=over 4
=item handle
instance of data matrix to be sliced
=item idxset
index set
=item len
length of index set
=item out
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
the handle to the DMatrix
=item out
The address to hold number of cols.
=back
=head2 XGDMatrixSaveBinary
load a data matrix into binary file
Parameters:
=over 4
=item handle
a instance of data matrix
=item fname
file name
=item silent
print statistics when saving
=back
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGDMatrixSetFloatInfo
Set float vector to a content in info
Parameters:
=over 4
=item handle
a instance of data matrix
=item field
field name, can be label, weight
=item array
pointer to float vector
=item len
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGDMatrixSetUIntInfo
Set uint32 vector to a content in info
Parameters:
=over 4
=item handle
a instance of data matrix
=item field
field name, can be label, weight
=item array
pointer to unsigned int vector
=item len
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGDMatrixSetGroup
Set label of the training matrix
Parameters:
=over 4
=item handle
a instance of data matrix
=item group
pointer to group size
=item len
length of the array
=back
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGDMatrixGetFloatInfo
Get float info vector from matrix
Parameters:
=over 4
=item handle
a instance of data matrix
=item field
field name
=item out_len
used to set result length
=item out_dptr
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGDMatrixGetUIntInfo
Get uint32 info vector from matrix
Parameters:
=over 4
=item handle
a instance of data matrix
=item field
field name
=item out_len
The length of the field
=item out_dptr
pointer to the result
=back
=head2 XGDMatrixFree
Free space in data matrix
=head2 XGBoosterCreate
Create xgboost learner
Parameters:
=over 4
=item dmats
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
Parameters:
=over 4
=item handle
handle
=item dtrain
training data
=item grad
gradient statistics
=item hess
second order gradinet statistics
=item len
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=item handle
handle
=item iter
current iteration rounds
=item dtrain
training data
=back
=head2 XGBoosterEvalOneIter
=head2 XGBoosterPredict
Make prediction based on dmat
Parameters:
=over 4
=item handle
handle
=item dmat
data matrix
=item option_mask
bit-mask of options taken in prediction, possible values
=over 4
=item
0: normal prediction
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
package AI::XGBoost::DMatrix;
use strict;
use warnings;
use utf8;
our $VERSION = '0.11'; # VERSION
# ABSTRACT: XGBoost class for data
use Moose;
use AI::XGBoost::CAPI qw(:all);
use Carp;
use namespace::autoclean;
has handle => ( is => 'ro', );
sub From {
my ( $package, %args ) = @_;
return __PACKAGE__->FromFile( filename => $args{file}, silent => $args{silent} ) if ( defined $args{file} );
return __PACKAGE__->FromMat( map { $_ => $args{$_} if defined $_ } qw(matrix missing label) )
if ( defined $args{matrix} );
Carp::cluck( "I don't know how to build a " . __PACKAGE__ . " with this data: " . join( ", ", %args ) );
}
sub FromFile {
my ( $package, %args ) = @_;
my $handle = XGDMatrixCreateFromFile( @args{qw(filename silent)} );
return __PACKAGE__->new( handle => $handle );
}
sub FromMat {
my ( $package, %args ) = @_;
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
1;
__END__
=pod
=encoding utf-8
=head1 NAME
AI::XGBoost::DMatrix - XGBoost class for data
=head1 VERSION
version 0.11
=head1 SYNOPSIS
use aliased 'AI::XGBoost::DMatrix';
my $train_data = DMatrix->FromFile(filename => 'agaricus.txt.train');
=head1 DESCRIPTION
XGBoost DMatrix perl model
Work In Progress, the API may change. Comments and suggestions are welcome!
=head1 METHODS
=head2 From
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
=back
=head2 set_label
Set label of DMatrix. This label is the "classes" in classification problems
=head3 Parameters
=over 4
=item data
Array with the labels
=back
=head2 set_label_pdl
Set label of DMatrix. This label is the "classes" in classification problems
=head3 Parameters
=over 4
=item data
Piddle with the labels
=back
=head2 get_label
Get label of DMatrix. This label is the "classes" in classification problems
=head2 set_weight
misc/using_capi.c view on Meta::CPAN
#include <stdio.h>
#include <xgboost/c_api.h>
int main() {
DMatrixHandle dtrain;
DMatrixHandle dtest;
// Agaricus files can be found in XGBoost demo/data directory
// Original source: http://archive.ics.uci.edu/ml/datasets/mushroom
XGDMatrixCreateFromFile("agaricus.txt.test", 0, &dtest);
XGDMatrixCreateFromFile("agaricus.txt.train", 0, &dtrain);
DMatrixHandle cache[] = {dtrain};
BoosterHandle booster;
XGBoosterCreate(cache, 1, &booster);
for (int iter = 0; iter < 11; iter++) {
XGBoosterUpdateOneIter(booster, iter, dtrain);
}
bst_ulong out_len;
t/20-dmatrix.t view on Meta::CPAN
use utf8;
use Test::More tests => 6;
BEGIN {
use_ok('AI::XGBoost::DMatrix');
}
{
my $matrix = [ [ 1, 1 ] ];
my $data = AI::XGBoost::DMatrix->FromMat( matrix => $matrix );
is( $data->num_row, scalar @$matrix, 'DMatrix constructed has the right number of rows' );
is( $data->num_col, scalar @{ $matrix->[0] }, 'DMatrix constructed has the right number of cols' );
is_deeply( [ $data->dims ], [ 1, 2 ], 'DMatrix dim method returns correct dimensions' );
}
{
my $matrix = [ map { [$_] } 0 .. 9 ];
my $dmatrix = AI::XGBoost::DMatrix->FromMat( matrix => $matrix );
my $sliced_matrix = $dmatrix->slice( [ map { $_ % 2 ? $_ : () } 0 .. 9 ] );
is( $sliced_matrix->num_row, ( scalar @$matrix ) / 2, 'Sliced DMatrix has right number of rows' );
is( $sliced_matrix->num_col, scalar @{ $matrix->[0] }, 'Sliced DMatrix has right number of cols' );
}