Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#' barplot(contr1, horiz = TRUE, las = 2, xlab = "contribution to prediction in log-odds")
#' par(mar = old_mar)
#' 
#' 
#' ## multiclass classification in iris dataset:
#' 
#' lb <- as.numeric(iris$Species) - 1
#' num_class <- 3
#' set.seed(11)
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
#'                objective = "multi:softprob", num_class = num_class)
#' # predict for softmax returns num_class probability numbers per case:
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' # reshape it to a num_class-columns matrix
#' pred <- matrix(pred, ncol=num_class, byrow=TRUE)
#' # convert the probabilities to softmax labels
#' pred_labels <- max.col(pred) - 1
#' # the following should result in the same error as seen in the last iteration
#' sum(pred_labels != lb)/length(lb)
#' 
#' # compare that to the predictions from softmax:
#' set.seed(11)
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
#'                objective = "multi:softmax", num_class = num_class)
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' all.equal(pred, pred_labels)
#' # prediction from using only 5 iterations should result 
#' # in the same error as seen in iteration 5:
#' pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
#' sum(pred5 != lb)/length(lb)
#' 
#' 
#' ## random forest-like model of 25 trees for binary classification:
#' 
#' set.seed(11)
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
#'                nthread = 2, nrounds = 1, objective = "binary:logistic",
#'                num_parallel_tree = 25, subsample = 0.6, colsample_bytree = 0.1)
#' # Inspect the prediction error vs number of trees:
#' lb <- test$label
#' dtest <- xgb.DMatrix(test$data, label=lb)
#' err <- sapply(1:25, function(n) {
#'   pred <- predict(bst, dtest, ntreelimit=n)
#'   sum((pred > 0.5) != lb)/length(lb)
#' })
#' plot(err, type='l', ylim=c(0,0.1), xlab='#trees')
#'
#' @rdname predict.xgb.Booster
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
                                predleaf = FALSE, predcontrib = FALSE, reshape = FALSE, ...) {

  object <- xgb.Booster.complete(object, saveraw = FALSE)
  if (!inherits(newdata, "xgb.DMatrix"))
    newdata <- xgb.DMatrix(newdata, missing = missing)
  if (is.null(ntreelimit))
    ntreelimit <- NVL(object$best_ntreelimit, 0)
  if (NVL(object$params[['booster']], '') == 'gblinear')
    ntreelimit <- 0
  if (ntreelimit < 0)
    stop("ntreelimit cannot be negative")
  
  option <- 0L + 1L * as.logical(outputmargin) + 2L * as.logical(predleaf) + 4L * as.logical(predcontrib)
  
  ret <- .Call(XGBoosterPredict_R, object$handle, newdata, option[1], as.integer(ntreelimit))
  
  n_ret <- length(ret)
  n_row <- nrow(newdata)
  npred_per_case <- n_ret / n_row
  
  if (n_ret %% n_row != 0)
    stop("prediction length ", n_ret, " is not multiple of nrows(newdata) ", n_row)
  
  if (predleaf) {
    ret <- if (n_ret == n_row) {
      matrix(ret, ncol = 1)
    } else {
      matrix(ret, nrow = n_row, byrow = TRUE)
    }
  } else if (predcontrib) {
    n_col1 <- ncol(newdata) + 1
    n_group <- npred_per_case / n_col1
    dnames <- list(NULL, c(colnames(newdata), "BIAS"))
    ret <- if (n_ret == n_row) {
      matrix(ret, ncol = 1, dimnames = dnames)
    } else if (n_group == 1) {
      matrix(ret, nrow = n_row, byrow = TRUE, dimnames = dnames)
    } else {
      grp_mask <- rep(seq_len(n_col1), n_row) +
        rep((seq_len(n_row) - 1) * n_col1 * n_group, each = n_col1)
      lapply(seq_len(n_group), function(g) {
        matrix(ret[grp_mask + n_col1 * (g - 1)], nrow = n_row, byrow = TRUE, dimnames = dnames)
      })
    }
  } else if (reshape && npred_per_case > 1) {
    ret <- matrix(ret, nrow = n_row, byrow = TRUE)
  }
  return(ret)
}

#' @rdname predict.xgb.Booster
#' @export
predict.xgb.Booster.handle <- function(object, ...) {

  bst <- xgb.handleToBooster(object)

  ret <- predict(bst, ...)
  return(ret)
}


#' Accessors for serializable attributes of a model.
#'
#' These methods allow to manipulate the key-value attribute strings of an xgboost model.
#'
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
#' @param name a non-empty character string specifying which attribute is to be accessed.



( run in 2.700 seconds using v1.01-cache-2.11-cpan-437f7b0c052 )