verify results from the CPAN

Alien-XGBoost
#' E.g., in a random forest-like model, \code{ntreelimit} would limit the number of trees.
#' But for multiclass classification, while there are multiple trees per iteration, 
#' \code{ntreelimit} limits the number of boosting iterations.
#' 
#' Also note that \code{ntreelimit} would currently do nothing for predictions from gblinear, 
#' since gblinear doesn't keep its boosting history.
#' 
#' One possible practical applications of the \code{predleaf} option is to use the model 
#' as a generator of new features which capture non-linearity and interactions, 
#' e.g., as implemented in \code{\link{xgb.create.features}}.
#' 
#' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
#' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
#' (feature_beta * feature_value). For "gbtree" booster, feature contribution is calculated 
#' as a sum of average contribution of that feature's split nodes across all trees to an 
#' individual prediction, following the idea explained in 
#' \url{http://blog.datadive.net/interpreting-random-forests/}.
#' 
#' @return 
#' For regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or 
#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on 
#' the \code{reshape} value.
#' 
#' When \code{predleaf = TRUE}, the output is a matrix object with the 
#' number of columns corresponding to the number of trees.
#' 
#' When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
#' \code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
#' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
#' such a matrix. The contribution values are on the scale of untransformed margin 
#' (e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
#' 
#' @seealso
#' \code{\link{xgb.train}}.
#' 
#' @examples
#' ## binary classification:
#' 
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' 
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
#'                eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
#' pred1 <- predict(bst, test$data, ntreelimit = 1)
#' 
#' # Predicting tree leafs:
#' # the result is an nsamples X ntrees matrix
#' pred_leaf <- predict(bst, test$data, predleaf = TRUE)
#' str(pred_leaf)
#' 
#' # Predicting feature contributions to predictions:
#' # the result is an nsamples X (nfeatures + 1) matrix
#' pred_contr <- predict(bst, test$data, predcontrib = TRUE)
#' str(pred_contr)
#' # verify that contributions' sums are equal to log-odds of predictions (up to foat precision):
#' summary(rowSums(pred_contr) - qlogis(pred))
#' # for the 1st record, let's inspect its features that had non-zero contribution to prediction:
#' contr1 <- pred_contr[1,]
#' contr1 <- contr1[-length(contr1)]    # drop BIAS
#' contr1 <- contr1[contr1 != 0]        # drop non-contributing features
#' contr1 <- contr1[order(abs(contr1))] # order by contribution magnitude
#' old_mar <- par("mar")
#' par(mar = old_mar + c(0,7,0,0))
#' barplot(contr1, horiz = TRUE, las = 2, xlab = "contribution to prediction in log-odds")
#' par(mar = old_mar)
#' 
#' 
#' ## multiclass classification in iris dataset:
#' 
#' lb <- as.numeric(iris$Species) - 1
#' num_class <- 3
#' set.seed(11)
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
#'                objective = "multi:softprob", num_class = num_class)
#' # predict for softmax returns num_class probability numbers per case:
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' # reshape it to a num_class-columns matrix
#' pred <- matrix(pred, ncol=num_class, byrow=TRUE)
#' # convert the probabilities to softmax labels
#' pred_labels <- max.col(pred) - 1
#' # the following should result in the same error as seen in the last iteration
#' sum(pred_labels != lb)/length(lb)
#' 
#' # compare that to the predictions from softmax:
#' set.seed(11)
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
#'                objective = "multi:softmax", num_class = num_class)
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' all.equal(pred, pred_labels)
#' # prediction from using only 5 iterations should result 
#' # in the same error as seen in iteration 5:
#' pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
#' sum(pred5 != lb)/length(lb)
#' 
#' 
#' ## random forest-like model of 25 trees for binary classification:
#' 
#' set.seed(11)
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
#'                nthread = 2, nrounds = 1, objective = "binary:logistic",
#'                num_parallel_tree = 25, subsample = 0.6, colsample_bytree = 0.1)
#' # Inspect the prediction error vs number of trees:
#' lb <- test$label
#' dtest <- xgb.DMatrix(test$data, label=lb)
#' err <- sapply(1:25, function(n) {
#'   pred <- predict(bst, dtest, ntreelimit=n)
#'   sum((pred > 0.5) != lb)/length(lb)
#' })
#' plot(err, type='l', ylim=c(0,0.1), xlab='#trees')
#'
#' @rdname predict.xgb.Booster
( run in 0.659 second using v1.01-cache-2.11-cpan-13bb782fe5a )