AI-XGBoost

 view release on metacpan or  search on metacpan

lib/AI/XGBoost.pm  view on Meta::CPAN

package AI::XGBoost;
use strict;
use warnings;

use AI::XGBoost::Booster;
use Exporter::Easy ( OK => ['train'] );

our $VERSION = '0.11';    # VERSION

# ABSTRACT: Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>

sub train {
    my %args = @_;
    my ( $params, $data, $number_of_rounds ) = @args{qw(params data number_of_rounds)};

    my $booster = AI::XGBoost::Booster->new( cache => [$data] );
    if ( defined $params ) {
        while ( my ( $name, $value ) = each %$params ) {
            $booster->set_param( $name, $value );
        }
    }
    for my $iteration ( 0 .. $number_of_rounds - 1 ) {
        $booster->update( dtrain => $data, iteration => $iteration );
    }
    return $booster;
}

1;

__END__

=pod

=encoding utf-8

=head1 NAME

AI::XGBoost - Perl wrapper for XGBoost library L<https://github.com/dmlc/xgboost>

=head1 VERSION

version 0.11

=head1 SYNOPSIS

 use 5.010;
 use aliased 'AI::XGBoost::DMatrix';
 use AI::XGBoost qw(train);
 
 # We are going to solve a binary classification problem:
 #  Mushroom poisonous or not
 
 my $train_data = DMatrix->From(file => 'agaricus.txt.train');
 my $test_data = DMatrix->From(file => 'agaricus.txt.test');
 
 # With XGBoost we can solve this problem using 'gbtree' booster
 #  and as loss function a logistic regression 'binary:logistic'
 #  (Gradient Boosting Regression Tree)
 # XGBoost Tree Booster has a lot of parameters that we can tune
 # (https://github.com/dmlc/xgboost/blob/master/doc/parameter.md)
 
 my $booster = train(data => $train_data, number_of_rounds => 10, params => {
         objective => 'binary:logistic',
         eta => 1.0,
         max_depth => 2,
         silent => 1
     });
 
 # For binay classification predictions are probability confidence scores in [0, 1]
 #  indicating that the label is positive (1 in the first column of agaricus.txt.test)
 my $predictions = $booster->predict(data => $test_data);
 
 say join "\n", @$predictions[0 .. 10];

 use aliased 'AI::XGBoost::DMatrix';
 use AI::XGBoost qw(train);
 use Data::Dataset::Classic::Iris;
 
 # We are going to solve a multiple classification problem:
 #  determining plant species using a set of flower's measures 
 
 # XGBoost uses number for "class" so we are going to codify classes
 my %class = (
     setosa => 0,
     versicolor => 1,
     virginica => 2
 );
 
 my $iris = Data::Dataset::Classic::Iris::get();
 
 # Split train and test, label and features
 my $train_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 my $test_dataset = [map {$iris->{$_}} grep {$_ ne 'species'} keys %$iris];
 



( run in 1.141 second using v1.01-cache-2.11-cpan-5a3173703d6 )