AI-XGBoost

 view release on metacpan or  search on metacpan

examples/iris.pl  view on Meta::CPAN

use aliased 'AI::XGBoost::DMatrix';
use AI::XGBoost qw(train);
use Data::Dataset::Classic::Iris;

# We are going to solve a multiple classification problem:
#  determining plant species using a set of flower's measures 

# XGBoost uses number for "class" so we are going to codify classes
my %class = (
    setosa => 0,
    versicolor => 1,
    virginica => 2
);

my $iris = Data::Dataset::Classic::Iris::get();

# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];

sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
    my $array = shift;
    my @aux = ();
    for my $row (@$array) {
        for my $column (0 .. scalar @$row - 1) {
            push @{$aux[$column]}, $row->[$column];
        }
    }
    return \@aux;
}

$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);

my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];

my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);

# Multiclass problems need a diferent objective function and the number
#  of classes, in this case we are using 'multi:softprob' and
#  num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
        max_depth => 3,
        eta => 0.3,
        silent => 1,
        objective => 'multi:softprob',
        num_class => 3

lib/AI/XGBoost.pm  view on Meta::CPAN


# ABSTRACT: Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>

sub train {
    my %args = @_;
    my ( $params, $data, $number_of_rounds ) = @args{qw(params data number_of_rounds)};

    my $booster = AI::XGBoost::Booster->new( cache => [$data] );
    if ( defined $params ) {
        while ( my ( $name, $value ) = each %$params ) {
            $booster->set_param( $name, $value );
        }
    }
    for my $iteration ( 0 .. $number_of_rounds - 1 ) {
        $booster->update( dtrain => $data, iteration => $iteration );
    }
    return $booster;
}

1;

lib/AI/XGBoost.pm  view on Meta::CPAN

     });
 
 # For binay classification predictions are probability confidence scores in [0, 1]
 #  indicating that the label is positive (1 in the first column of agaricus.txt.test)
 my $predictions = $booster->predict(data => $test_data);
 
 say join "\n", @$predictions[0 .. 10];

 use aliased 'AI::XGBoost::DMatrix';
 use AI::XGBoost qw(train);
 use Data::Dataset::Classic::Iris;
 
 # We are going to solve a multiple classification problem:
 #  determining plant species using a set of flower's measures 
 
 # XGBoost uses number for "class" so we are going to codify classes
 my %class = (
     setosa => 0,
     versicolor => 1,
     virginica => 2
 );
 
 my $iris = Data::Dataset::Classic::Iris::get();
 
 # Split train and test, label and features
 my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 
 sub transpose {
 # Transposing without using PDL, Data::Table, Data::Frame or other modules
 # to keep minimal dependencies
     my $array = shift;
     my @aux = ();
     for my $row (@$array) {
         for my $column (0 .. scalar @$row - 1) {
             push @{$aux[$column]}, $row->[$column];
         }
     }
     return \@aux;
 }
 
 $train_dataset = transpose($train_dataset);
 $test_dataset = transpose($test_dataset);
 
 my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
 my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
 
 my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
 my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
 
 # Multiclass problems need a diferent objective function and the number
 #  of classes, in this case we are using 'multi:softprob' and
 #  num_class => 3
 my $booster = train(data => $train_data, number_of_rounds => 20, params => {
         max_depth => 3,
         eta => 0.3,
         silent => 1,
         objective => 'multi:softprob',
         num_class => 3

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN

    my $result      = XGBoosterPredict( $self->_handle, $data->handle );
    my $result_size = scalar @$result;
    my $matrix_rows = $data->num_row;
    if ( $result_size != $matrix_rows && $result_size % $matrix_rows == 0 ) {
        my $col_size = $result_size / $matrix_rows;
        return [ map { [ @$result[ $_ * $col_size .. $_ * $col_size + $col_size - 1 ] ] } 0 .. $matrix_rows - 1 ];
    }
    return $result;
}

sub set_param {
    my $self = shift;
    my ( $name, $value ) = @_;
    XGBoosterSetParam( $self->_handle, $name, $value );
    return $self;
}

sub set_attr {
    my $self = shift;
    my ( $name, $value ) = @_;
    XGBoosterSetAttr( $self->_handle, $name, $value );
    return $self;
}

sub get_attr {
    my $self = shift;
    my ($name) = @_;
    XGBoosterGetAttr( $self->_handle, $name );

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN

=head3 Parameters

=over 4

=item data

Data to predict

=back

=head2 set_param

Set booster parameter

=head3 Example

    $booster->set_param('objective', 'binary:logistic');

=head2 set_attr

Set a string attribute

=head2 get_attr

Get a string attribute

=head2 get_score

Get importance of each feature

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN

=head2 XGBoosterCreate

Create XGBoost learner

Parameters:

=over 4

=item matrices

matrices that are set to be cached

=back

=head2 XGBoosterSetParam

=head2 XGBoosterSetAttr

=head2 XGBoosterGetAttr

=head2 XGBoosterGetAttrNames

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN


=item

4: output feature contributions to individual predictions

=back

=item ntree_limit

limit number of trees used for prediction, this is only valid for boosted trees
when the parameter is set to 0, we will use all the trees

=back

Returns an arrayref with the predictions corresponding to the rows of data matrix

=head2 XGBoosterDumpModel

=head2 XGBoosterDumpModelEx

=head2 XGBoosterDumpModelWithFeatures

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN

=item nindptr

number of rows in the matrix + 1

=item nelem

number of nonzero elements in the matrix

=item num_col

number of columns; when it's set to 0, then guess from data

=item out

created dmatrix

=back

=head2 XGDMatrixCreateFromCSCEx

Create a matrix content from CSC format

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN

=item nindptr

number of rows in the matrix + 1

=item nelem

number of nonzero elements in the matrix

=item num_row

number of rows; when it's set to 0, then guess from data

=back

=head2 XGDMatrixCreateFromMat

Create matrix content from dense matrix

Parameters:

=over 4

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN

Create a new dmatrix from sliced content of existing matrix

Parameters:

=over 4

=item handle

instance of data matrix to be sliced

=item idxset

index set

=item len

length of index set

=item out

a sliced new matrix

=back

=head2 XGDMatrixNumRow

Get number of rows.

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN

=item handle

a instance of data matrix

=item field

field name

=item out_len

used to set result length

=item out_dptr

pointer to the result

=back

=head2 XGDMatrixGetUIntInfo

Get uint32 info vector from matrix

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN

=head2 XGBoosterCreate

Create xgboost learner

Parameters:

=over 4

=item dmats 

matrices that are set to be cached

=item len 

length of dmats

=item out 

handle to the result booster

=back

lib/AI/XGBoost/CAPI/RAW.pm  view on Meta::CPAN


=item

4: output feature contributions to individual predictions

=back

=item ntree_limit 

limit number of trees used for prediction, this is only valid for boosted trees
when the parameter is set to 0, we will use all the trees

=item out_len 

used to store length of returning result

=item out_result 

used to set a pointer to array

=back

=head2 XGBoosterLoadModel

Load model form existing file

Parameters:

=over 4

lib/AI/XGBoost/DMatrix.pm  view on Meta::CPAN

    my ( $package, %args ) = @_;
    my $handle = XGDMatrixCreateFromFile( @args{qw(filename silent)} );
    return __PACKAGE__->new( handle => $handle );
}

sub FromMat {
    my ( $package, %args ) = @_;
    my $handle = XGDMatrixCreateFromMat( @args{qw(matrix missing)} );
    my $matrix = __PACKAGE__->new( handle => $handle );
    if ( defined $args{label} ) {
        $matrix->set_label( $args{label} );
    }
    return $matrix;
}

sub set_float_info {
    my $self = shift();
    my ( $field, $info ) = @_;
    XGDMatrixSetFloatInfo( $self->handle, $field, $info );
    return $self;
}

sub set_float_info_pdl {
    my $self = shift();
    my ( $field, $info ) = @_;
    XGDMatrixSetFloatInfo( $self->handle, $field, $info->flat()->unpdl() );
    return $self;
}

sub get_float_info {
    my $self  = shift();
    my $field = shift();
    XGDMatrixGetFloatInfo( $self->handle, $field );
}

sub set_uint_info {
    my $self = shift();
    my ( $field, $info ) = @_;
    XGDMatrixSetUintInfo( $self->handle, $field, $info );
    return $self;
}

sub get_uint_info {
    my $self  = shift();
    my $field = shift();
    XGDMatrixGetUintInfo( $self->handle, $field );
}

sub save_binary {
    my $self = shift();
    my ( $filename, $silent ) = @_;
    $silent //= 1;
    XGDMatrixSaveBinary( $self->handle, $filename, $silent );
    return $self;
}

sub set_label {
    my $self  = shift();
    my $label = shift();
    $self->set_float_info( 'label', $label );
}

sub set_label_pdl {
    my $self  = shift();
    my $label = shift();
    $self->set_float_info_pdl( 'label', $label->flat()->unpdl() );
}

sub get_label {
    my $self = shift();
    $self->get_float_info('label');
}

sub set_weight {
    my $self   = shift();
    my $weight = shift();
    $self->set_float_info( 'weight', $weight );
    return $self;
}

sub set_weight_pdl {
    my $self   = shift();
    my $weight = shift();
    $self->set_float_info( 'weight', $weight->flat()->unpdl() );
    return $self;
}

sub get_weight {
    my $self = shift();
    $self->get_float_info('weight');
}

sub set_base_margin {
    my $self   = shift();
    my $margin = shift();
    $self->set_float_info( 'base_margin', $margin );
    return $self;
}

sub get_base_margin {
    my $self = shift();
    $self->get_float_info('base_margin');
}

sub set_group {
    my $self  = shift();
    my $group = shift();
    XGDMatrixSetGroup( $self->handle, $group );
    return $self;
}

sub num_row {
    my $self = shift();
    XGDMatrixNumRow( $self->handle );
}

lib/AI/XGBoost/DMatrix.pm  view on Meta::CPAN

=item label

Array with the labels of the rows of matrix. Optional

=item missing

Value to identify missing values. Optional, default `NaN`

=back

=head2 set_float_info

Set float type property

=head3 Parameters

=over 4

=item field

Field name of the information

=item info

array with the information

=back

=head2 set_float_info_pdl

Set float type property

=head3 Parameters

=over 4

=item field

Field name of the information

lib/AI/XGBoost/DMatrix.pm  view on Meta::CPAN

=head3 Parameters

=over 4

=item field

Field name of the information

=back

=head2 set_uint_info

Set uint type property

=head3 Parameters

=over 4

=item field

Field name of the information

lib/AI/XGBoost/DMatrix.pm  view on Meta::CPAN

=item filename

Filename and path

=item silent

Don't show information messages, optional, default 1

=back

=head2 set_label

Set label of DMatrix. This label is the "classes" in classification problems

=head3 Parameters

=over 4

=item data

Array with the labels

=back

=head2 set_label_pdl

Set label of DMatrix. This label is the "classes" in classification problems

=head3 Parameters

=over 4

=item data

Piddle with the labels

=back

=head2 get_label

Get label of DMatrix. This label is the "classes" in classification problems

=head2 set_weight

Set weight of each instance

=head3 Parameters

=over 4

=item weight

Array with the weights

=back

=head2 set_weight_pdl

Set weight of each instance

=head3 Parameters

=over 4

=item weight

pdl with the weights

=back

=head2 get_weight

Get the weight of each instance

=head2 set_base_margin

Set base margin of booster to start from

=head3 Parameters

=over 4

=item margin

Array with the margins

=back

=head2 get_base_margin

Get the base margin

=head2 set_group

Set group size

=head3 Parameters

=over 4

=item group

Array with the size of each group

misc/using_capi.c  view on Meta::CPAN

#include <stdio.h>
#include <xgboost/c_api.h>

int main() {
    
    DMatrixHandle dtrain;
    DMatrixHandle dtest;
    // Agaricus files can be found in XGBoost demo/data directory
    // Original source: http://archive.ics.uci.edu/ml/datasets/mushroom
    XGDMatrixCreateFromFile("agaricus.txt.test", 0, &dtest);
    XGDMatrixCreateFromFile("agaricus.txt.train", 0, &dtrain);
    DMatrixHandle cache[] = {dtrain};
    BoosterHandle booster;
    XGBoosterCreate(cache, 1, &booster);
    for (int iter = 0; iter < 11; iter++) {
        XGBoosterUpdateOneIter(booster, iter, dtrain);
    }

    bst_ulong out_len;



( run in 0.768 second using v1.01-cache-2.11-cpan-49f99fa48dc )