AI-XGBoost

 view release on metacpan or  search on metacpan

LICENSE  view on Meta::CPAN

      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You

LICENSE  view on Meta::CPAN

          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

examples/iris.pl  view on Meta::CPAN

    versicolor => 1,
    virginica => 2
);

my $iris = Data::Dataset::Classic::Iris::get();

# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];

sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
    my $array = shift;
    my @aux = ();
    for my $row (@$array) {
        for my $column (0 .. scalar @$row - 1) {
            push @{$aux[$column]}, $row->[$column];
        }
    }
    return \@aux;

lib/AI/XGBoost.pm  view on Meta::CPAN

use strict;
use warnings;

use AI::XGBoost::Booster;
use Exporter::Easy ( OK => ['train'] );

our $VERSION = '0.11';    # VERSION

# ABSTRACT: Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>

sub train {
    my %args = @_;
    my ( $params, $data, $number_of_rounds ) = @args{qw(params data number_of_rounds)};

    my $booster = AI::XGBoost::Booster->new( cache => [$data] );
    if ( defined $params ) {
        while ( my ( $name, $value ) = each %$params ) {
            $booster->set_param( $name, $value );
        }
    }
    for my $iteration ( 0 .. $number_of_rounds - 1 ) {

lib/AI/XGBoost.pm  view on Meta::CPAN

     versicolor => 1,
     virginica => 2
 );
 
 my $iris = Data::Dataset::Classic::Iris::get();
 
 # Split train and test, label and features
 my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 
 sub transpose {
 # Transposing without using PDL, Data::Table, Data::Frame or other modules
 # to keep minimal dependencies
     my $array = shift;
     my @aux = ();
     for my $row (@$array) {
         for my $column (0 .. scalar @$row - 1) {
             push @{$aux[$column]}, $row->[$column];
         }
     }
     return \@aux;

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN


# ABSTRACT: XGBoost main class for training, prediction and evaluation

use Moose;
use AI::XGBoost::CAPI qw(:all);
use namespace::autoclean;

has _handle => ( is       => 'rw',
                 init_arg => undef, );

sub update {
    my $self = shift;
    my %args = @_;
    my ( $iteration, $dtrain ) = @args{qw(iteration dtrain)};
    XGBoosterUpdateOneIter( $self->_handle, $iteration, $dtrain->handle );
    return $self;
}

sub boost {
    my $self = shift;
    my %args = @_;
    my ( $dtrain, $grad, $hess ) = @args{qw(dtrain grad hess)};
    XGBoosterBoostOneIter( $self->_handle, $dtrain, $grad, $hess );
    return $self;
}

sub predict {
    my $self        = shift;
    my %args        = @_;
    my $data        = $args{'data'};
    my $result      = XGBoosterPredict( $self->_handle, $data->handle );
    my $result_size = scalar @$result;
    my $matrix_rows = $data->num_row;
    if ( $result_size != $matrix_rows && $result_size % $matrix_rows == 0 ) {
        my $col_size = $result_size / $matrix_rows;
        return [ map { [ @$result[ $_ * $col_size .. $_ * $col_size + $col_size - 1 ] ] } 0 .. $matrix_rows - 1 ];
    }
    return $result;
}

sub set_param {
    my $self = shift;
    my ( $name, $value ) = @_;
    XGBoosterSetParam( $self->_handle, $name, $value );
    return $self;
}

sub set_attr {
    my $self = shift;
    my ( $name, $value ) = @_;
    XGBoosterSetAttr( $self->_handle, $name, $value );
    return $self;
}

sub get_attr {
    my $self = shift;
    my ($name) = @_;
    XGBoosterGetAttr( $self->_handle, $name );
}

sub get_score {
    my $self = shift;
    my %args = @_;
    my ( $fmap, $importance_type ) = @args{qw(fmap importance_type)};

    if ( $importance_type eq "weight" ) {
        my @trees = $self->get_dump;
    } else {

    }

}

sub get_dump {
    my $self = shift;
    return XGBoosterDumpModelEx( $self->_handle, "", 1, "text" );
}

sub attributes {
    my $self = shift;
    return { map { $_ => $self->get_attr($_) } @{ XGBoosterGetAttrNames( $self->_handle ) } };
}

sub TO_JSON {
    my $self = shift;
    my $trees = XGBoosterDumpModelEx( $self->_handle, "", 1, "json" );
    return "[" . join( ',', @$trees ) . "]";
}

sub BUILD {
    my $self = shift;
    my $args = shift;
    $self->_handle( XGBoosterCreate( [ map { $_->handle } @{ $args->{'cache'} } ] ) );
}

sub DEMOLISH {
    my $self = shift();
    XGBoosterFree( $self->_handle );
}

__PACKAGE__->meta->make_immutable();

1;

__END__

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN


Returns all attributes of the booster as a HASHREF

=head2 TO_JSON

Serialize the booster to JSON.

This method is to be used with the option C<convert_blessed> from L<JSON>.
(See L<https://metacpan.org/pod/JSON#OBJECT-SERIALISATION>)

Warning: this API is subject to changes

=head2 BUILD

Use new, this method is just an internal helper

=head2 DEMOLISH

Internal destructor. This method is called automatically

=head1 AUTHOR

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN

    ]
);
use AI::XGBoost::CAPI::RAW;
use FFI::Platypus;
use Exception::Class ( 'XGBoostException' );

our $VERSION = '0.11';    # VERSION

# ABSTRACT: Perl wrapper for XGBoost C API https://github.com/dmlc/xgboost

sub XGDMatrixCreateFromFile {
    my ( $filename, $silent ) = @_;
    $silent //= 1;
    my $matrix = 0;
    my $error = AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromFile( $filename, $silent, \$matrix );
    _CheckCall($error);
    return $matrix;
}

sub XGDMatrixCreateFromMat {
    my ( $data, $missing ) = @_;
    $missing //= "NaN";

    # TODO Support simple arrays
    # TODO Support PDL
    # TODO ¿Adapters?
    my $data_adapter = [ map { @$_ } @$data ];
    my $nrows        = scalar @$data;
    my $ncols        = scalar @{ $data->[0] };
    my $matrix       = 0;
    my $error = AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromMat( $data_adapter, $nrows, $ncols, $missing, \$matrix );
    _CheckCall($error);
    return $matrix;
}

sub XGDMatrixNumRow {
    my ($matrix) = @_;
    my $rows = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixNumRow( $matrix, \$rows ) );
    return $rows;
}

sub XGDMatrixNumCol {
    my ($matrix) = @_;
    my $cols = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixNumCol( $matrix, \$cols ) );
    return $cols;
}

sub XGDMatrixSetFloatInfo {
    my ( $matrix, $info, $data ) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixSetFloatInfo( $matrix, $info, $data, scalar @$data ) );
}

sub XGDMatrixGetFloatInfo {
    my ( $matrix, $info ) = @_;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixGetFloatInfo( $matrix, $info, \$out_len, \$out_result ) );
    my $ffi = FFI::Platypus->new();
    return $ffi->cast( opaque => "float[$out_len]", $out_result );
}

sub XGDMatrixSetUintInfo {
    my ( $matrix, $info, $data ) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixSetUintInfo( $matrix, $info, $data, scalar @$data ) );
}

sub XGDMatrixGetUintInfo {
    my ( $matrix, $info ) = @_;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixGetUintInfo( $matrix, $info, \$out_len, \$out_result ) );
    my $ffi = FFI::Platypus->new();
    return $ffi->cast( opaque => "uint32[$out_len]", $out_result );
}

sub XGDMatrixSaveBinary {
    my ( $matrix, $filename, $silent ) = @_;
    $silent //= 1;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixSaveBinary( $matrix, $filename, $silent ) );
}

sub XGDMatrixSliceDMatrix {
    my ( $matrix, $list_of_indices ) = @_;
    my $new_matrix = 0;
    my $error = AI::XGBoost::CAPI::RAW::XGDMatrixSliceDMatrix( $matrix, $list_of_indices, scalar @$list_of_indices,
                                                               \$new_matrix );
    _CheckCall($error);
    return $new_matrix;
}

sub XGDMatrixFree {
    my ($matrix) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGDMatrixFree($matrix) );
    return ();
}

sub XGBoosterCreate {
    my ($matrices) = @_;
    my $booster = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterCreate( $matrices, scalar @$matrices, \$booster ) );
    return $booster;
}

sub XGBoosterSetParam {
    my ( $booster, $name, $value ) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterSetParam( $booster, $name, $value ) );
}

sub XGBoosterSetAttr {
    my ( $booster, $name, $value ) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterSetAttr( $booster, $name, $value ) );
}

sub XGBoosterGetAttr {
    my ( $booster, $name ) = @_;
    my $value   = 0;
    my $success = -1;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterGetAttr( $booster, $name, \$value, \$success ) );
    if ($success) {
        my $ffi = FFI::Platypus->new();
        return $ffi->cast( opaque => "string", $value );
    }
    return ();
}

sub XGBoosterGetAttrNames {
    my ($booster)  = @_;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterGetAttrNames( $booster, \$out_len, \$out_result ) );
    my $ffi = FFI::Platypus->new();
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterUpdateOneIter {
    my ( $booster, $iter, $train_matrix ) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterUpdateOneIter( $booster, $iter, $train_matrix ) );
    return ();
}

sub XGBoosterBoostOneIter {
    my ( $booster, $train_matrix, $gradient, $hessian ) = @_;
    my $out_result = 0;
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterBoostOneIter(
                                                        $booster, $train_matrix, $gradient, $hessian, scalar(@$gradient)
                )
    );
    return ();
}

sub XGBoosterEvalOneIter {
    my ( $booster, $iter, $matrices, $matrices_names ) = @_;
    my $out_result                     = 0;
    my $number_of_matrices             = scalar @$matrices;
    my $ffi                            = FFI::Platypus->new();
    my $array_of_opaque_matrices_names = [ map { $ffi->cast( string => "opaque", $_ ) } @$matrices_names ];
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterEvalOneIter(
                                                            $booster, $iter, $matrices, $array_of_opaque_matrices_names,
                                                            $number_of_matrices, \$out_result
                )
    );
    $out_result = $ffi->cast( opaque => "opaque[$number_of_matrices]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterPredict {
    my ( $booster, $data_matrix, $option_mask, $ntree_limit ) = @_;
    $option_mask //= 0;
    $ntree_limit //= 0;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterPredict( $booster,     $data_matrix, $option_mask,
                                                          $ntree_limit, \$out_len,    \$out_result
                )
    );
    my $ffi = FFI::Platypus->new();
    return $ffi->cast( opaque => "float[$out_len]", $out_result );
}

sub XGBoosterDumpModel {
    my ( $booster, $feature_map, $with_stats ) = @_;
    $feature_map //= "";
    $with_stats  //= 1;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall(
           AI::XGBoost::CAPI::RAW::XGBoosterDumpModel( $booster, $feature_map, $with_stats, \$out_len, \$out_result ) );
    my $ffi = FFI::Platypus->new();
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterDumpModelEx {
    my ( $booster, $feature_map, $with_stats, $format ) = @_;
    $feature_map //= "";
    $with_stats  //= 1;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterDumpModelEx(
                                                   $booster, $feature_map, $with_stats, $format, \$out_len, \$out_result
                )
    );
    my $ffi = FFI::Platypus->new();
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterDumpModelWithFeatures {
    my ( $booster, $feature_names, $feature_types, $with_stats ) = @_;
    $with_stats //= 1;
    my $out_len                       = 0;
    my $out_result                    = 0;
    my $ffi                           = FFI::Platypus->new();
    my $number_of_features            = scalar @$feature_names;
    my $array_of_opaque_feature_names = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_names ];
    my $array_of_opaque_feature_types = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_types ];
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterDumpModelWithFeatures( $booster, $number_of_features,
                                                                        $array_of_opaque_feature_names,
                                                                        $array_of_opaque_feature_types,
                                                                        $with_stats, \$out_len, \$out_result
                )
    );
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterDumpModelExWithFeatures {
    my ( $booster, $feature_names, $feature_types, $with_stats, $format ) = @_;
    my $out_len                       = 0;
    my $out_result                    = 0;
    my $ffi                           = FFI::Platypus->new();
    my $number_of_features            = scalar @$feature_names;
    my $array_of_opaque_feature_names = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_names ];
    my $array_of_opaque_feature_types = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_types ];
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterDumpModelExWithFeatures( $booster, $number_of_features,
                                                                          $array_of_opaque_feature_names,
                                                                          $array_of_opaque_feature_types,
                                                                          $with_stats, $format, \$out_len, \$out_result
                )
    );
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterFree {
    my ($booster) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterFree($booster) );
    return ();
}

# _CheckCall
#
#  Check return code and if necesary, launch an exception
#
sub _CheckCall {
    my ($return_code) = @_;
    if ($return_code) {
        my $error_message = AI::XGBoost::CAPI::RAW::XGBGetLastError();
        XGBoostException->throw( error => $error_message );
    }
}

1;

__END__

lib/AI/XGBoost/DMatrix.pm  view on Meta::CPAN


# ABSTRACT: XGBoost class for data

use Moose;
use AI::XGBoost::CAPI qw(:all);
use Carp;
use namespace::autoclean;

has handle => ( is => 'ro', );

sub From {
    my ( $package, %args ) = @_;
    return __PACKAGE__->FromFile( filename => $args{file}, silent => $args{silent} ) if ( defined $args{file} );
    return __PACKAGE__->FromMat( map { $_ => $args{$_} if defined $_ } qw(matrix missing label) )
      if ( defined $args{matrix} );
    Carp::cluck( "I don't know how to build a " . __PACKAGE__ . " with this data: " . join( ", ", %args ) );
}

sub FromFile {
    my ( $package, %args ) = @_;
    my $handle = XGDMatrixCreateFromFile( @args{qw(filename silent)} );
    return __PACKAGE__->new( handle => $handle );
}

sub FromMat {
    my ( $package, %args ) = @_;
    my $handle = XGDMatrixCreateFromMat( @args{qw(matrix missing)} );
    my $matrix = __PACKAGE__->new( handle => $handle );
    if ( defined $args{label} ) {
        $matrix->set_label( $args{label} );
    }
    return $matrix;
}

sub set_float_info {
    my $self = shift();
    my ( $field, $info ) = @_;
    XGDMatrixSetFloatInfo( $self->handle, $field, $info );
    return $self;
}

sub set_float_info_pdl {
    my $self = shift();
    my ( $field, $info ) = @_;
    XGDMatrixSetFloatInfo( $self->handle, $field, $info->flat()->unpdl() );
    return $self;
}

sub get_float_info {
    my $self  = shift();
    my $field = shift();
    XGDMatrixGetFloatInfo( $self->handle, $field );
}

sub set_uint_info {
    my $self = shift();
    my ( $field, $info ) = @_;
    XGDMatrixSetUintInfo( $self->handle, $field, $info );
    return $self;
}

sub get_uint_info {
    my $self  = shift();
    my $field = shift();
    XGDMatrixGetUintInfo( $self->handle, $field );
}

sub save_binary {
    my $self = shift();
    my ( $filename, $silent ) = @_;
    $silent //= 1;
    XGDMatrixSaveBinary( $self->handle, $filename, $silent );
    return $self;
}

sub set_label {
    my $self  = shift();
    my $label = shift();
    $self->set_float_info( 'label', $label );
}

sub set_label_pdl {
    my $self  = shift();
    my $label = shift();
    $self->set_float_info_pdl( 'label', $label->flat()->unpdl() );
}

sub get_label {
    my $self = shift();
    $self->get_float_info('label');
}

sub set_weight {
    my $self   = shift();
    my $weight = shift();
    $self->set_float_info( 'weight', $weight );
    return $self;
}

sub set_weight_pdl {
    my $self   = shift();
    my $weight = shift();
    $self->set_float_info( 'weight', $weight->flat()->unpdl() );
    return $self;
}

sub get_weight {
    my $self = shift();
    $self->get_float_info('weight');
}

sub set_base_margin {
    my $self   = shift();
    my $margin = shift();
    $self->set_float_info( 'base_margin', $margin );
    return $self;
}

sub get_base_margin {
    my $self = shift();
    $self->get_float_info('base_margin');
}

sub set_group {
    my $self  = shift();
    my $group = shift();
    XGDMatrixSetGroup( $self->handle, $group );
    return $self;
}

sub num_row {
    my $self = shift();
    XGDMatrixNumRow( $self->handle );
}

sub num_col {
    my $self = shift();
    XGDMatrixNumCol( $self->handle );
}

sub dims {
    my $self = shift();
    return ( $self->num_row(), $self->num_col() );
}

sub slice {
    my $self              = shift;
    my ($list_of_indices) = @_;
    my $handle            = XGDMatrixSliceDMatrix( $self->handle(), $list_of_indices );
    return __PACKAGE__->new( handle => $handle );
}

sub DEMOLISH {
    my $self = shift();
    XGDMatrixFree( $self->handle );
}

__PACKAGE__->meta->make_immutable();

1;

__END__



( run in 2.040 seconds using v1.01-cache-2.11-cpan-88abd93f124 )