AI-XGBoost

 view release on metacpan or  search on metacpan

MANIFEST  view on Meta::CPAN

META.json
META.yml
README
dist.ini
examples/agaricus.txt.test
examples/agaricus.txt.train
examples/basic.pl
examples/capi.pl
examples/capi_dump_model.pl
examples/capi_raw.pl
examples/featmap.txt
examples/iris.pl
lib/AI/XGBoost.pm
lib/AI/XGBoost/Booster.pm
lib/AI/XGBoost/CAPI.pm
lib/AI/XGBoost/CAPI/RAW.pm
lib/AI/XGBoost/DMatrix.pm
misc/using_capi.c
t/00-load.t
t/10-cast_arguments.t
t/20-dmatrix.t

examples/capi_dump_model.pl  view on Meta::CPAN

use 5.010;
use Data::Dumper;
use AI::XGBoost::CAPI qw(:all);

my $dtrain = XGDMatrixCreateFromFile('agaricus.txt.train');
my $dtest = XGDMatrixCreateFromFile('agaricus.txt.test');

my $booster = XGBoosterCreate([$dtrain]);
XGBoosterUpdateOneIter($booster, 1, $dtrain);

my $json_model_with_stats = XGBoosterDumpModelEx($booster, "featmap.txt", 1, "json");

say Dumper $json_model_with_stats;

XGBoosterFree($booster);
XGDMatrixFree($dtrain);
XGDMatrixFree($dtest);




examples/iris.pl  view on Meta::CPAN

# XGBoost uses number for "class" so we are going to codify classes
my %class = (
    setosa => 0,
    versicolor => 1,
    virginica => 2
);

my $iris = Data::Dataset::Classic::Iris::get();

# Split train and test, label and features
my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];

sub transpose {
# Transposing without using PDL, Data::Table, Data::Frame or other modules
# to keep minimal dependencies
    my $array = shift;
    my @aux = ();
    for my $row (@$array) {
        for my $column (0 .. scalar @$row - 1) {
            push @{$aux[$column]}, $row->[$column];
        }
    }
    return \@aux;
}

$train_dataset = transpose($train_dataset);
$test_dataset = transpose($test_dataset);

my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
my $test_label = [map {$class{$_}} @{$iris->{'species'}}];

my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);

# Multiclass problems need a diferent objective function and the number
#  of classes, in this case we are using 'multi:softprob' and
#  num_class => 3
my $booster = train(data => $train_data, number_of_rounds => 20, params => {
        max_depth => 3,
        eta => 0.3,

lib/AI/XGBoost.pm  view on Meta::CPAN

 # XGBoost uses number for "class" so we are going to codify classes
 my %class = (
     setosa => 0,
     versicolor => 1,
     virginica => 2
 );
 
 my $iris = Data::Dataset::Classic::Iris::get();
 
 # Split train and test, label and features
 my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 
 sub transpose {
 # Transposing without using PDL, Data::Table, Data::Frame or other modules
 # to keep minimal dependencies
     my $array = shift;
     my @aux = ();
     for my $row (@$array) {
         for my $column (0 .. scalar @$row - 1) {
             push @{$aux[$column]}, $row->[$column];
         }
     }
     return \@aux;
 }
 
 $train_dataset = transpose($train_dataset);
 $test_dataset = transpose($test_dataset);
 
 my $train_label = [map {$class{$_}} @{$iris->{'species'}}];
 my $test_label = [map {$class{$_}} @{$iris->{'species'}}];
 
 my $train_data = DMatrix->From(matrix => $train_dataset, label => $train_label);
 my $test_data = DMatrix->From(matrix => $test_dataset, label => $test_label);
 
 # Multiclass problems need a diferent objective function and the number
 #  of classes, in this case we are using 'multi:softprob' and
 #  num_class => 3
 my $booster = train(data => $train_data, number_of_rounds => 20, params => {
         max_depth => 3,
         eta => 0.3,

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN


sub predict {
    my $self        = shift;
    my %args        = @_;
    my $data        = $args{'data'};
    my $result      = XGBoosterPredict( $self->_handle, $data->handle );
    my $result_size = scalar @$result;
    my $matrix_rows = $data->num_row;
    if ( $result_size != $matrix_rows && $result_size % $matrix_rows == 0 ) {
        my $col_size = $result_size / $matrix_rows;
        return [ map { [ @$result[ $_ * $col_size .. $_ * $col_size + $col_size - 1 ] ] } 0 .. $matrix_rows - 1 ];
    }
    return $result;
}

sub set_param {
    my $self = shift;
    my ( $name, $value ) = @_;
    XGBoosterSetParam( $self->_handle, $name, $value );
    return $self;
}

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN


sub get_attr {
    my $self = shift;
    my ($name) = @_;
    XGBoosterGetAttr( $self->_handle, $name );
}

sub get_score {
    my $self = shift;
    my %args = @_;
    my ( $fmap, $importance_type ) = @args{qw(fmap importance_type)};

    if ( $importance_type eq "weight" ) {
        my @trees = $self->get_dump;
    } else {

    }

}

sub get_dump {
    my $self = shift;
    return XGBoosterDumpModelEx( $self->_handle, "", 1, "text" );
}

sub attributes {
    my $self = shift;
    return { map { $_ => $self->get_attr($_) } @{ XGBoosterGetAttrNames( $self->_handle ) } };
}

sub TO_JSON {
    my $self = shift;
    my $trees = XGBoosterDumpModelEx( $self->_handle, "", 1, "json" );
    return "[" . join( ',', @$trees ) . "]";
}

sub BUILD {
    my $self = shift;
    my $args = shift;
    $self->_handle( XGBoosterCreate( [ map { $_->handle } @{ $args->{'cache'} } ] ) );
}

sub DEMOLISH {
    my $self = shift();
    XGBoosterFree( $self->_handle );
}

__PACKAGE__->meta->make_immutable();

1;

lib/AI/XGBoost/Booster.pm  view on Meta::CPAN

=item gain

Average gain of the feature when it is used in trees

=item cover

Average coverage of the feature when it is used in trees

=back

=item fmap

Name of feature map file

=back

=head2 get_dump

=head2 attributes

Returns all attributes of the booster as a HASHREF

=head2 TO_JSON

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN

    return $matrix;
}

sub XGDMatrixCreateFromMat {
    my ( $data, $missing ) = @_;
    $missing //= "NaN";

    # TODO Support simple arrays
    # TODO Support PDL
    # TODO ¿Adapters?
    my $data_adapter = [ map { @$_ } @$data ];
    my $nrows        = scalar @$data;
    my $ncols        = scalar @{ $data->[0] };
    my $matrix       = 0;
    my $error = AI::XGBoost::CAPI::RAW::XGDMatrixCreateFromMat( $data_adapter, $nrows, $ncols, $missing, \$matrix );
    _CheckCall($error);
    return $matrix;
}

sub XGDMatrixNumRow {
    my ($matrix) = @_;

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN

    return ();
}

sub XGBoosterGetAttrNames {
    my ($booster)  = @_;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterGetAttrNames( $booster, \$out_len, \$out_result ) );
    my $ffi = FFI::Platypus->new();
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterUpdateOneIter {
    my ( $booster, $iter, $train_matrix ) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterUpdateOneIter( $booster, $iter, $train_matrix ) );
    return ();
}

sub XGBoosterBoostOneIter {
    my ( $booster, $train_matrix, $gradient, $hessian ) = @_;

lib/AI/XGBoost/CAPI.pm  view on Meta::CPAN

                )
    );
    return ();
}

sub XGBoosterEvalOneIter {
    my ( $booster, $iter, $matrices, $matrices_names ) = @_;
    my $out_result                     = 0;
    my $number_of_matrices             = scalar @$matrices;
    my $ffi                            = FFI::Platypus->new();
    my $array_of_opaque_matrices_names = [ map { $ffi->cast( string => "opaque", $_ ) } @$matrices_names ];
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterEvalOneIter(
                                                            $booster, $iter, $matrices, $array_of_opaque_matrices_names,
                                                            $number_of_matrices, \$out_result
                )
    );
    $out_result = $ffi->cast( opaque => "opaque[$number_of_matrices]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterPredict {
    my ( $booster, $data_matrix, $option_mask, $ntree_limit ) = @_;
    $option_mask //= 0;
    $ntree_limit //= 0;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterPredict( $booster,     $data_matrix, $option_mask,
                                                          $ntree_limit, \$out_len,    \$out_result
                )
    );
    my $ffi = FFI::Platypus->new();
    return $ffi->cast( opaque => "float[$out_len]", $out_result );
}

sub XGBoosterDumpModel {
    my ( $booster, $feature_map, $with_stats ) = @_;
    $feature_map //= "";
    $with_stats  //= 1;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall(
           AI::XGBoost::CAPI::RAW::XGBoosterDumpModel( $booster, $feature_map, $with_stats, \$out_len, \$out_result ) );
    my $ffi = FFI::Platypus->new();
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterDumpModelEx {
    my ( $booster, $feature_map, $with_stats, $format ) = @_;
    $feature_map //= "";
    $with_stats  //= 1;
    my $out_len    = 0;
    my $out_result = 0;
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterDumpModelEx(
                                                   $booster, $feature_map, $with_stats, $format, \$out_len, \$out_result
                )
    );
    my $ffi = FFI::Platypus->new();
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterDumpModelWithFeatures {
    my ( $booster, $feature_names, $feature_types, $with_stats ) = @_;
    $with_stats //= 1;
    my $out_len                       = 0;
    my $out_result                    = 0;
    my $ffi                           = FFI::Platypus->new();
    my $number_of_features            = scalar @$feature_names;
    my $array_of_opaque_feature_names = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_names ];
    my $array_of_opaque_feature_types = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_types ];
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterDumpModelWithFeatures( $booster, $number_of_features,
                                                                        $array_of_opaque_feature_names,
                                                                        $array_of_opaque_feature_types,
                                                                        $with_stats, \$out_len, \$out_result
                )
    );
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterDumpModelExWithFeatures {
    my ( $booster, $feature_names, $feature_types, $with_stats, $format ) = @_;
    my $out_len                       = 0;
    my $out_result                    = 0;
    my $ffi                           = FFI::Platypus->new();
    my $number_of_features            = scalar @$feature_names;
    my $array_of_opaque_feature_names = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_names ];
    my $array_of_opaque_feature_types = [ map { $ffi->cast( string => "opaque", $_ ) } @$feature_types ];
    _CheckCall(
                AI::XGBoost::CAPI::RAW::XGBoosterDumpModelExWithFeatures( $booster, $number_of_features,
                                                                          $array_of_opaque_feature_names,
                                                                          $array_of_opaque_feature_types,
                                                                          $with_stats, $format, \$out_len, \$out_result
                )
    );
    $out_result = $ffi->cast( opaque => "opaque[$out_len]", $out_result );
    return [ map { $ffi->cast( opaque => "string", $_ ) } @$out_result ];
}

sub XGBoosterFree {
    my ($booster) = @_;
    _CheckCall( AI::XGBoost::CAPI::RAW::XGBoosterFree($booster) );
    return ();
}

# _CheckCall
#

lib/AI/XGBoost/DMatrix.pm  view on Meta::CPAN

use Moose;
use AI::XGBoost::CAPI qw(:all);
use Carp;
use namespace::autoclean;

has handle => ( is => 'ro', );

sub From {
    my ( $package, %args ) = @_;
    return __PACKAGE__->FromFile( filename => $args{file}, silent => $args{silent} ) if ( defined $args{file} );
    return __PACKAGE__->FromMat( map { $_ => $args{$_} if defined $_ } qw(matrix missing label) )
      if ( defined $args{matrix} );
    Carp::cluck( "I don't know how to build a " . __PACKAGE__ . " with this data: " . join( ", ", %args ) );
}

sub FromFile {
    my ( $package, %args ) = @_;
    my $handle = XGDMatrixCreateFromFile( @args{qw(filename silent)} );
    return __PACKAGE__->new( handle => $handle );
}

t/20-dmatrix.t  view on Meta::CPAN


{
    my $matrix = [ [ 1, 1 ] ];
    my $data = AI::XGBoost::DMatrix->FromMat( matrix => $matrix );
    is( $data->num_row, scalar @$matrix,          'DMatrix constructed has the right number of rows' );
    is( $data->num_col, scalar @{ $matrix->[0] }, 'DMatrix constructed has the right number of cols' );
    is_deeply( [ $data->dims ], [ 1, 2 ], 'DMatrix dim method returns correct dimensions' );
}

{
    my $matrix = [ map { [$_] } 0 .. 9 ];
    my $dmatrix = AI::XGBoost::DMatrix->FromMat( matrix => $matrix );
    my $sliced_matrix = $dmatrix->slice( [ map { $_ % 2 ? $_ : () } 0 .. 9 ] );
    is( $sliced_matrix->num_row, ( scalar @$matrix ) / 2, 'Sliced DMatrix has right number of rows' );
    is( $sliced_matrix->num_col, scalar @{ $matrix->[0] }, 'Sliced DMatrix has right number of cols' );
}



( run in 0.727 second using v1.01-cache-2.11-cpan-49f99fa48dc )