view release on metacpan or search on metacpan
examples/iris.pl view on Meta::CPAN
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
use Data::Dataset::Classic::Iris;
# We are going to solve a multiple classification problem:
# determining plant species using a set of flower's measures
# XGBoost uses number for "class" so we are going to codify classes
my %class = (
setosa => 0,
versicolor => 1,
virginica => 2
);
my $iris = Data::Dataset::Classic::Iris::get();
# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
my $array = shift;
my @aux = ();
for my $row (@$array) {
for my $column (0 .. scalar @$row - 1) {
push @{$aux[$column]}, $row->[$column];
}
}
return \@aux;
}
$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);
my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
# Multiclass problems need a diferent objective function and the number
# of classes, in this case we are using 'multi:softprob' and
# num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
max_depth => 3,
eta => 0.3,
silent => 1,
objective => 'multi:softprob',
num_class => 3
lib/AI/XGBoost.pm view on Meta::CPAN
# ABSTRACT: Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>
sub train {
my %args = @_;
my ( $params, $data, $number_of_rounds ) = @args{qw(params data number_of_rounds)};
my $booster = AI::XGBoost::Booster->new( cache => [$data] );
if ( defined $params ) {
while ( my ( $name, $value ) = each %$params ) {
$booster->set_param( $name, $value );
}
}
for my $iteration ( 0 .. $number_of_rounds - 1 ) {
$booster->update( dtrain => $data, iteration => $iteration );
}
return $booster;
}
1;
lib/AI/XGBoost.pm view on Meta::CPAN
});
# For binay classification predictions are probability confidence scores in [0, 1]
# indicating that the label is positive (1 in the first column of agaricus.txt.test)
my $predictions = $booster->predict(data => $test_data);
say join "\n", @$predictions[0 .. 10];
use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
use Data::Dataset::Classic::Iris;
# We are going to solve a multiple classification problem:
# determining plant species using a set of flower's measures
# XGBoost uses number for "class" so we are going to codify classes
my %class = (
setosa => 0,
versicolor => 1,
virginica => 2
);
my $iris = Data::Dataset::Classic::Iris::get();
# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
my $array = shift;
my @aux = ();
for my $row (@$array) {
for my $column (0 .. scalar @$row - 1) {
push @{$aux[$column]}, $row->[$column];
}
}
return \@aux;
}
$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);
my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
# Multiclass problems need a diferent objective function and the number
# of classes, in this case we are using 'multi:softprob' and
# num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
max_depth => 3,
eta => 0.3,
silent => 1,
objective => 'multi:softprob',
num_class => 3
lib/AI/XGBoost/Booster.pm view on Meta::CPAN
my $result = XGBoosterPredict( $self->_handle, $data->handle );
my $result_size = scalar @$result;
my $matrix_rows = $data->num_row;
if ( $result_size != $matrix_rows && $result_size % $matrix_rows == 0 ) {
my $col_size = $result_size / $matrix_rows;
return [ map { [ @$result[ $_ * $col_size .. $_ * $col_size + $col_size - 1 ] ] } 0 .. $matrix_rows - 1 ];
}
return $result;
}
sub set_param {
my $self = shift;
my ( $name, $value ) = @_;
XGBoosterSetParam( $self->_handle, $name, $value );
return $self;
}
sub set_attr {
my $self = shift;
my ( $name, $value ) = @_;
XGBoosterSetAttr( $self->_handle, $name, $value );
return $self;
}
sub get_attr {
my $self = shift;
my ($name) = @_;
XGBoosterGetAttr( $self->_handle, $name );
lib/AI/XGBoost/Booster.pm view on Meta::CPAN
=head3 Parameters
=over 4
=item data
Data to predict
=back
=head2 set_param
Set booster parameter
=head3 Example
$booster->set_param('objective', 'binary:logistic');
=head2 set_attr
Set a string attribute
=head2 get_attr
Get a string attribute
=head2 get_score
Get importance of each feature
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
=head2 XGBoosterCreate
Create XGBoost learner
Parameters:
=over 4
=item matrices
matrices that are set to be cached
=back
=head2 XGBoosterSetParam
=head2 XGBoosterSetAttr
=head2 XGBoosterGetAttr
=head2 XGBoosterGetAttrNames
lib/AI/XGBoost/CAPI.pm view on Meta::CPAN
=item
4: output feature contributions to individual predictions
=back
=item ntree_limit
limit number of trees used for prediction, this is only valid for boosted trees
when the parameter is set to 0, we will use all the trees
=back
Returns an arrayref with the predictions corresponding to the rows of data matrix
=head2 XGBoosterDumpModel
=head2 XGBoosterDumpModelEx
=head2 XGBoosterDumpModelWithFeatures
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=item nindptr
number of rows in the matrix + 1
=item nelem
number of nonzero elements in the matrix
=item num_col
number of columns; when it's set to 0, then guess from data
=item out
created dmatrix
=back
=head2 XGDMatrixCreateFromCSCEx
Create a matrix content from CSC format
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=item nindptr
number of rows in the matrix + 1
=item nelem
number of nonzero elements in the matrix
=item num_row
number of rows; when it's set to 0, then guess from data
=back
=head2 XGDMatrixCreateFromMat
Create matrix content from dense matrix
Parameters:
=over 4
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
Create a new dmatrix from sliced content of existing matrix
Parameters:
=over 4
=item handle
instance of data matrix to be sliced
=item idxset
index set
=item len
length of index set
=item out
a sliced new matrix
=back
=head2 XGDMatrixNumRow
Get number of rows.
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=item handle
a instance of data matrix
=item field
field name
=item out_len
used to set result length
=item out_dptr
pointer to the result
=back
=head2 XGDMatrixGetUIntInfo
Get uint32 info vector from matrix
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=head2 XGBoosterCreate
Create xgboost learner
Parameters:
=over 4
=item dmats
matrices that are set to be cached
=item len
length of dmats
=item out
handle to the result booster
=back
lib/AI/XGBoost/CAPI/RAW.pm view on Meta::CPAN
=item
4: output feature contributions to individual predictions
=back
=item ntree_limit
limit number of trees used for prediction, this is only valid for boosted trees
when the parameter is set to 0, we will use all the trees
=item out_len
used to store length of returning result
=item out_result
used to set a pointer to array
=back
=head2 XGBoosterLoadModel
Load model form existing file
Parameters:
=over 4
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
my ( $package, %args ) = @_;
my $handle = XGDMatrixCreateFromFile( @args{qw(filename silent)} );
return __PACKAGE__->new( handle => $handle );
}
sub FromMat {
my ( $package, %args ) = @_;
my $handle = XGDMatrixCreateFromMat( @args{qw(matrix missing)} );
my $matrix = __PACKAGE__->new( handle => $handle );
if ( defined $args{label} ) {
$matrix->set_label( $args{label} );
}
return $matrix;
}
sub set_float_info {
my $self = shift();
my ( $field, $info ) = @_;
XGDMatrixSetFloatInfo( $self->handle, $field, $info );
return $self;
}
sub set_float_info_pdl {
my $self = shift();
my ( $field, $info ) = @_;
XGDMatrixSetFloatInfo( $self->handle, $field, $info->flat()->unpdl() );
return $self;
}
sub get_float_info {
my $self = shift();
my $field = shift();
XGDMatrixGetFloatInfo( $self->handle, $field );
}
sub set_uint_info {
my $self = shift();
my ( $field, $info ) = @_;
XGDMatrixSetUintInfo( $self->handle, $field, $info );
return $self;
}
sub get_uint_info {
my $self = shift();
my $field = shift();
XGDMatrixGetUintInfo( $self->handle, $field );
}
sub save_binary {
my $self = shift();
my ( $filename, $silent ) = @_;
$silent //= 1;
XGDMatrixSaveBinary( $self->handle, $filename, $silent );
return $self;
}
sub set_label {
my $self = shift();
my $label = shift();
$self->set_float_info( 'label', $label );
}
sub set_label_pdl {
my $self = shift();
my $label = shift();
$self->set_float_info_pdl( 'label', $label->flat()->unpdl() );
}
sub get_label {
my $self = shift();
$self->get_float_info('label');
}
sub set_weight {
my $self = shift();
my $weight = shift();
$self->set_float_info( 'weight', $weight );
return $self;
}
sub set_weight_pdl {
my $self = shift();
my $weight = shift();
$self->set_float_info( 'weight', $weight->flat()->unpdl() );
return $self;
}
sub get_weight {
my $self = shift();
$self->get_float_info('weight');
}
sub set_base_margin {
my $self = shift();
my $margin = shift();
$self->set_float_info( 'base_margin', $margin );
return $self;
}
sub get_base_margin {
my $self = shift();
$self->get_float_info('base_margin');
}
sub set_group {
my $self = shift();
my $group = shift();
XGDMatrixSetGroup( $self->handle, $group );
return $self;
}
sub num_row {
my $self = shift();
XGDMatrixNumRow( $self->handle );
}
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
=item label
Array with the labels of the rows of matrix. Optional
=item missing
Value to identify missing values. Optional, default `NaN`
=back
=head2 set_float_info
Set float type property
=head3 Parameters
=over 4
=item field
Field name of the information
=item info
array with the information
=back
=head2 set_float_info_pdl
Set float type property
=head3 Parameters
=over 4
=item field
Field name of the information
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
=head3 Parameters
=over 4
=item field
Field name of the information
=back
=head2 set_uint_info
Set uint type property
=head3 Parameters
=over 4
=item field
Field name of the information
lib/AI/XGBoost/DMatrix.pm view on Meta::CPAN
=item filename
Filename and path
=item silent
Don't show information messages, optional, default 1
=back
=head2 set_label
Set label of DMatrix. This label is the "classes" in classification problems
=head3 Parameters
=over 4
=item data
Array with the labels
=back
=head2 set_label_pdl
Set label of DMatrix. This label is the "classes" in classification problems
=head3 Parameters
=over 4
=item data
Piddle with the labels
=back
=head2 get_label
Get label of DMatrix. This label is the "classes" in classification problems
=head2 set_weight
Set weight of each instance
=head3 Parameters
=over 4
=item weight
Array with the weights
=back
=head2 set_weight_pdl
Set weight of each instance
=head3 Parameters
=over 4
=item weight
pdl with the weights
=back
=head2 get_weight
Get the weight of each instance
=head2 set_base_margin
Set base margin of booster to start from
=head3 Parameters
=over 4
=item margin
Array with the margins
=back
=head2 get_base_margin
Get the base margin
=head2 set_group
Set group size
=head3 Parameters
=over 4
=item group
Array with the size of each group
misc/using_capi.c view on Meta::CPAN
#include <stdio.h>
#include <xgboost/c_api.h>
int main() {
DMatrixHandle dtrain;
DMatrixHandle dtest;
// Agaricus files can be found in XGBoost demo/data directory
// Original source: http://archive.ics.uci.edu/ml/datasets/mushroom
XGDMatrixCreateFromFile("agaricus.txt.test", 0, &dtest);
XGDMatrixCreateFromFile("agaricus.txt.train", 0, &dtrain);
DMatrixHandle cache[] = {dtrain};
BoosterHandle booster;
XGBoosterCreate(cache, 1, &booster);
for (int iter = 0; iter < 11; iter++) {
XGBoosterUpdateOneIter(booster, iter, dtrain);
}
bst_ulong out_len;