Alien-XGBoost

 view release on metacpan or  search on metacpan

MANIFEST  view on Meta::CPAN

xgboost/cub/cub/host/mutex.cuh
xgboost/cub/cub/iterator/arg_index_input_iterator.cuh
xgboost/cub/cub/iterator/cache_modified_input_iterator.cuh
xgboost/cub/cub/iterator/cache_modified_output_iterator.cuh
xgboost/cub/cub/iterator/constant_input_iterator.cuh
xgboost/cub/cub/iterator/counting_input_iterator.cuh
xgboost/cub/cub/iterator/discard_output_iterator.cuh
xgboost/cub/cub/iterator/tex_obj_input_iterator.cuh
xgboost/cub/cub/iterator/tex_ref_input_iterator.cuh
xgboost/cub/cub/iterator/transform_input_iterator.cuh
xgboost/cub/cub/thread/thread_load.cuh
xgboost/cub/cub/thread/thread_operators.cuh
xgboost/cub/cub/thread/thread_reduce.cuh
xgboost/cub/cub/thread/thread_scan.cuh
xgboost/cub/cub/thread/thread_search.cuh
xgboost/cub/cub/thread/thread_store.cuh
xgboost/cub/cub/util_allocator.cuh
xgboost/cub/cub/util_arch.cuh
xgboost/cub/cub/util_debug.cuh
xgboost/cub/cub/util_device.cuh
xgboost/cub/cub/util_macro.cuh
xgboost/cub/cub/util_namespace.cuh
xgboost/cub/cub/util_ptx.cuh
xgboost/cub/cub/util_type.cuh
xgboost/cub/cub/warp/specializations/warp_reduce_shfl.cuh
xgboost/cub/cub/warp/specializations/warp_reduce_smem.cuh

MANIFEST  view on Meta::CPAN

xgboost/dmlc-core/include/dmlc/logging.h
xgboost/dmlc-core/include/dmlc/lua.h
xgboost/dmlc-core/include/dmlc/memory.h
xgboost/dmlc-core/include/dmlc/memory_io.h
xgboost/dmlc-core/include/dmlc/omp.h
xgboost/dmlc-core/include/dmlc/optional.h
xgboost/dmlc-core/include/dmlc/parameter.h
xgboost/dmlc-core/include/dmlc/recordio.h
xgboost/dmlc-core/include/dmlc/registry.h
xgboost/dmlc-core/include/dmlc/serializer.h
xgboost/dmlc-core/include/dmlc/thread_local.h
xgboost/dmlc-core/include/dmlc/threadediter.h
xgboost/dmlc-core/include/dmlc/timer.h
xgboost/dmlc-core/include/dmlc/type_traits.h
xgboost/dmlc-core/make/config.mk
xgboost/dmlc-core/make/dmlc.mk
xgboost/dmlc-core/scripts/lint.py
xgboost/dmlc-core/scripts/packages.mk
xgboost/dmlc-core/scripts/setup_nvcc.sh
xgboost/dmlc-core/scripts/travis/travis_before_cache.sh
xgboost/dmlc-core/scripts/travis/travis_osx_install.sh
xgboost/dmlc-core/scripts/travis/travis_script.sh

MANIFEST  view on Meta::CPAN

xgboost/dmlc-core/src/io/input_split_base.h
xgboost/dmlc-core/src/io/line_split.cc
xgboost/dmlc-core/src/io/line_split.h
xgboost/dmlc-core/src/io/local_filesys.cc
xgboost/dmlc-core/src/io/local_filesys.h
xgboost/dmlc-core/src/io/recordio_split.cc
xgboost/dmlc-core/src/io/recordio_split.h
xgboost/dmlc-core/src/io/s3_filesys.cc
xgboost/dmlc-core/src/io/s3_filesys.h
xgboost/dmlc-core/src/io/single_file_split.h
xgboost/dmlc-core/src/io/threaded_input_split.h
xgboost/dmlc-core/src/io/uri_spec.h
xgboost/dmlc-core/src/recordio.cc
xgboost/dmlc-core/test/README.md
xgboost/dmlc-core/test/csv_parser_test.cc
xgboost/dmlc-core/test/dataiter_test.cc
xgboost/dmlc-core/test/dmlc_test.mk
xgboost/dmlc-core/test/filesys_test.cc
xgboost/dmlc-core/test/iostream_test.cc
xgboost/dmlc-core/test/libfm_parser_test.cc
xgboost/dmlc-core/test/libsvm_parser_test.cc

MANIFEST  view on Meta::CPAN

xgboost/dmlc-core/test/strtonum_test.cc
xgboost/dmlc-core/test/unittest/dmlc_unittest.mk
xgboost/dmlc-core/test/unittest/unittest_any.cc
xgboost/dmlc-core/test/unittest/unittest_array_view.cc
xgboost/dmlc-core/test/unittest/unittest_config.cc
xgboost/dmlc-core/test/unittest/unittest_json.cc
xgboost/dmlc-core/test/unittest/unittest_logging.cc
xgboost/dmlc-core/test/unittest/unittest_main.cc
xgboost/dmlc-core/test/unittest/unittest_optional.cc
xgboost/dmlc-core/test/unittest/unittest_serializer.cc
xgboost/dmlc-core/test/unittest/unittest_threaditer.cc
xgboost/dmlc-core/tracker/README.md
xgboost/dmlc-core/tracker/dmlc-submit
xgboost/dmlc-core/tracker/dmlc_tracker/__init__.py
xgboost/dmlc-core/tracker/dmlc_tracker/launcher.py
xgboost/dmlc-core/tracker/dmlc_tracker/local.py
xgboost/dmlc-core/tracker/dmlc_tracker/mpi.py
xgboost/dmlc-core/tracker/dmlc_tracker/opts.py
xgboost/dmlc-core/tracker/dmlc_tracker/sge.py
xgboost/dmlc-core/tracker/dmlc_tracker/ssh.py
xgboost/dmlc-core/tracker/dmlc_tracker/submit.py

MANIFEST  view on Meta::CPAN

xgboost/rabit/src/allreduce_robust-inl.h
xgboost/rabit/src/allreduce_robust.cc
xgboost/rabit/src/allreduce_robust.h
xgboost/rabit/src/c_api.cc
xgboost/rabit/src/engine.cc
xgboost/rabit/src/engine_base.cc
xgboost/rabit/src/engine_empty.cc
xgboost/rabit/src/engine_mock.cc
xgboost/rabit/src/engine_mpi.cc
xgboost/rabit/src/socket.h
xgboost/rabit/src/thread_local.h
xgboost/rabit/test/Makefile
xgboost/rabit/test/README.md
xgboost/rabit/test/lazy_recover.cc
xgboost/rabit/test/local_recover.cc
xgboost/rabit/test/local_recover.py
xgboost/rabit/test/model_recover.cc
xgboost/rabit/test/speed_runner.py
xgboost/rabit/test/speed_test.cc
xgboost/rabit/test/test.mk
xgboost/src/c_api/c_api.cc

xgboost/CMakeLists.txt  view on Meta::CPAN

set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
  "Space separated list of compute versions to be built against")


# Compiler flags
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
if(MSVC)
  # Multithreaded compilation
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
else()
  # Correct error for GCC 5 and cuda
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES")
  # Performance
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
endif()

include_directories (
    ${PROJECT_SOURCE_DIR}/include

xgboost/Makefile  view on Meta::CPAN

else
# linux defaults
ifndef CC
export CC = gcc
endif
ifndef CXX
export CXX = g++
endif
endif

export LDFLAGS= -pthread -lm $(ADD_LDFLAGS) $(DMLC_LDFLAGS) $(PLUGIN_LDFLAGS)
export CFLAGS=  -std=c++11 -Wall -Wno-unknown-pragmas -Iinclude $(ADD_CFLAGS) $(PLUGIN_CFLAGS)
CFLAGS += -I$(DMLC_CORE)/include -I$(RABIT)/include -I$(GTEST_PATH)/include
#java include path
export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java

ifeq ($(TEST_COVER), 1)
	CFLAGS += -g -O0 -fprofile-arcs -ftest-coverage
else
	CFLAGS += -O3 -funroll-loops
ifeq ($(USE_SSE), 1)

xgboost/NEWS.md  view on Meta::CPAN

XGBoost Change Log
==================

This file records the changes in xgboost library in reverse chronological order.

## in progress version
* Updated Sklearn API
  - Updated to allow use of all XGBoost parameters via **kwargs.
  - Updated nthread to n_jobs and seed to random_state (as per Sklearn convention).
* Refactored gbm to allow more friendly cache strategy
  - Specialized some prediction routine
* Automatically remove nan from input data when it is sparse.
  - This can solve some of user reported problem of istart != hist.size
* Minor fixes
  - Thread local variable is upgraded so it is automatically freed at thread exit.
* Migrate to C++11
  - The current master version now requires C++11 enabled compiled(g++4.8 or higher)
* New functionality
  - Ability to adjust tree model's statistics to a new dataset without changing tree structures.
  - Extracting feature contributions to individual predictions.
* R package:
  - New parameters:
    - `silent` in `xgb.DMatrix()`
    - `use_int_id` in `xgb.model.dt.tree()`
    - `predcontrib` in `predict()`

xgboost/NEWS.md  view on Meta::CPAN

  - Remove most of the raw pointers to smart ptrs, for RAII safety.
* Add official option to approximate algorithm `tree_method` to parameter.
  - Change default behavior to switch to prefer faster algorithm.
  - User will get a message when approximate algorithm is chosen.
* Change library name to libxgboost.so
* Backward compatiblity
  - The binary buffer file is not backward compatible with previous version.
  - The model file is backward compatible on 64 bit platforms.
* The model file is compatible between 64/32 bit platforms(not yet tested).
* External memory version and other advanced features will be exposed to R library as well on linux.
  - Previously some of the features are blocked due to C++11 and threading limits.
  - The windows version is still blocked due to Rtools do not support ```std::thread```.
* rabit and dmlc-core are maintained through git submodule
  - Anyone can open PR to update these dependencies now.
* Improvements
  - Rabit and xgboost libs are not thread-safe and use thread local PRNGs
  - This could fix some of the previous problem which runs xgboost on multiple threads.
* JVM Package
  - Enable xgboost4j for java and scala
  - XGBoost distributed now runs on Flink and Spark.
* Support model attributes listing for meta data.
  - https://github.com/dmlc/xgboost/pull/1198
  - https://github.com/dmlc/xgboost/pull/1166
* Support callback API
  - https://github.com/dmlc/xgboost/issues/892
  - https://github.com/dmlc/xgboost/pull/1211
  - https://github.com/dmlc/xgboost/pull/1264

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#' \code{xgb.Booster.complete} function explicitely once after loading a model as an R-object.
#' That would prevent further repeated implicit reconstruction of an internal booster model.
#' 
#' @return 
#' An object of \code{xgb.Booster} class.
#' 
#' @examples
#' 
#' data(agaricus.train, package='xgboost')
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' saveRDS(bst, "xgb.model.rds")
#' 
#' bst1 <- readRDS("xgb.model.rds")
#' # the handle is invalid:
#' print(bst1$handle)
#' 
#' bst1 <- xgb.Booster.complete(bst1)
#' # now the handle points to a valid internal booster model:
#' print(bst1$handle)
#' 

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#' 
#' @examples
#' ## binary classification:
#' 
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' 
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
#'                eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
#' pred1 <- predict(bst, test$data, ntreelimit = 1)
#' 
#' # Predicting tree leafs:
#' # the result is an nsamples X ntrees matrix
#' pred_leaf <- predict(bst, test$data, predleaf = TRUE)
#' str(pred_leaf)
#' 

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#' barplot(contr1, horiz = TRUE, las = 2, xlab = "contribution to prediction in log-odds")
#' par(mar = old_mar)
#' 
#' 
#' ## multiclass classification in iris dataset:
#' 
#' lb <- as.numeric(iris$Species) - 1
#' num_class <- 3
#' set.seed(11)
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
#'                objective = "multi:softprob", num_class = num_class)
#' # predict for softmax returns num_class probability numbers per case:
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' # reshape it to a num_class-columns matrix
#' pred <- matrix(pred, ncol=num_class, byrow=TRUE)
#' # convert the probabilities to softmax labels
#' pred_labels <- max.col(pred) - 1
#' # the following should result in the same error as seen in the last iteration
#' sum(pred_labels != lb)/length(lb)
#' 
#' # compare that to the predictions from softmax:
#' set.seed(11)
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
#'                objective = "multi:softmax", num_class = num_class)
#' pred <- predict(bst, as.matrix(iris[, -5]))
#' str(pred)
#' all.equal(pred, pred_labels)
#' # prediction from using only 5 iterations should result 
#' # in the same error as seen in iteration 5:
#' pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
#' sum(pred5 != lb)/length(lb)
#' 
#' 
#' ## random forest-like model of 25 trees for binary classification:
#' 
#' set.seed(11)
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
#'                nthread = 2, nrounds = 1, objective = "binary:logistic",
#'                num_parallel_tree = 25, subsample = 0.6, colsample_bytree = 0.1)
#' # Inspect the prediction error vs number of trees:
#' lb <- test$label
#' dtest <- xgb.DMatrix(test$data, label=lb)
#' err <- sapply(1:25, function(n) {
#'   pred <- predict(bst, dtest, ntreelimit=n)
#'   sum((pred > 0.5) != lb)/length(lb)
#' })
#' plot(err, type='l', ylim=c(0,0.1), xlab='#trees')
#'

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#' or \code{NULL} if an attribute wasn't stored in a model.
#' 
#' \code{xgb.attributes} returns a list of all attribute stored in a model 
#' or \code{NULL} if a model has no stored attributes.
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#'
#' xgb.attr(bst, "my_attribute") <- "my attribute value"
#' print(xgb.attr(bst, "my_attribute"))
#' xgb.attributes(bst) <- list(a = 123, b = "abc")
#'
#' xgb.save(bst, 'xgb.model')
#' bst1 <- xgb.load('xgb.model')
#' print(xgb.attr(bst1, "my_attribute"))
#' print(xgb.attributes(bst1))
#' 

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#'
#' @details
#' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#'
#' xgb.parameters(bst) <- list(eta = 0.1)
#' 
#' @rdname xgb.parameters
#' @export
`xgb.parameters<-` <- function(object, value) {
  if (length(value) == 0) return(object)
  p <- as.list(value)
  if (is.null(names(p)) || any(nchar(names(p)) == 0)) {
    stop("parameter names cannot be empty strings")

xgboost/R-package/R/xgb.Booster.R  view on Meta::CPAN

#' Print information about xgb.Booster.
#' 
#' @param x an xgb.Booster object
#' @param verbose whether to print detailed data (e.g., attribute values)
#' @param ... not currently used
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' attr(bst, 'myattr') <- 'memo'
#' 
#' print(bst)
#' print(bst, verbose=TRUE)
#'
#' @method print xgb.Booster 
#' @export
print.xgb.Booster <- function(x, verbose = FALSE, ...) {
  cat('##### xgb.Booster\n')
  

xgboost/R-package/R/xgb.create.features.R  view on Meta::CPAN

#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
#' dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
#'
#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
#' nround = 4
#'
#' bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
#' 
#' # Model accuracy without new features
#' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
#'                    length(agaricus.test$label)
#' 
#' # Convert previous features to one hot encoding
#' new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
#' 
#' # learning with new features
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
#' watchlist <- list(train = new.dtrain)
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
#' 
#' # Model accuracy with new features
#' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /
#'                   length(agaricus.test$label)
#' 
#' # Here the accuracy was already good and is now perfect.
#' cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now",
#'           accuracy.after, "!\n"))
#' 
#' @export

xgboost/R-package/R/xgb.cv.R  view on Meta::CPAN

#' 
#' @param params the list of parameters. Commonly used ones are:
#' \itemize{
#'   \item \code{objective} objective function, common ones are
#'   \itemize{
#'     \item \code{reg:linear} linear regression
#'     \item \code{binary:logistic} logistic regression for classification
#'   }
#'   \item \code{eta} step size of each boosting step
#'   \item \code{max_depth} maximum depth of the tree
#'   \item \code{nthread} number of thread used in training, if not set, all threads are used
#' }
#'
#'   See \code{\link{xgb.train}} for further details.
#'   See also demo/ for walkthrough example in R.
#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
#' @param nrounds the max number of iterations
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. 
#' @param label vector of response values. Should be provided only when data is an R-matrix.
#' @param missing is only used when input is a dense matrix. By default is set to NA, which means 
#'        that NA values should be considered as 'missing' by the algorithm. 

xgboost/R-package/R/xgb.cv.R  view on Meta::CPAN

#'         (only available with early stopping).
#'   \item \code{pred} CV prediction values available when \code{prediction} is set. 
#'         It is either vector or matrix (see \code{\link{cb.cv.predict}}).
#'   \item \code{models} a liost of the CV folds' models. It is only available with the explicit 
#'         setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
#' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"),
#'                   max_depth = 3, eta = 1, objective = "binary:logistic")
#' print(cv)
#' print(cv, verbose=TRUE)
#' 
#' @export
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
                   prediction = FALSE, showsd = TRUE, metrics=list(),
                   obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, 
                   verbose = TRUE, print_every_n=1L,
                   early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {

xgboost/R-package/R/xgb.cv.R  view on Meta::CPAN

#' @param ... passed to \code{data.table.print}
#' 
#' @details
#' When not verbose, it would only print the evaluation results, 
#' including the best iteration (when available).
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' print(cv)
#' print(cv, verbose=TRUE)
#' 
#' @rdname print.xgb.cv
#' @method print xgb.cv.synchronous
#' @export
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
  cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
  
  if (verbose) {

xgboost/R-package/R/xgb.dump.R  view on Meta::CPAN

#' @return
#' If fname is not provided or set to \code{NULL} the function will return the model
#' as a \code{character} vector. Otherwise it will return \code{TRUE}.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' # save the model in file 'xgb.model.dump'
#' xgb.dump(bst, 'xgb.model.dump', with_stats = TRUE)
#' 
#' # print the model without saving it to a file
#' print(xgb.dump(bst, with_stats = TRUE))
#' 
#' # print in JSON format:
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
#' 
#' @export

xgboost/R-package/R/xgb.importance.R  view on Meta::CPAN

#' 
#' If \code{feature_names} is not provided and \code{model} doesn't have \code{feature_names}, 
#' index of the features will be used instead. Because the index is extracted from the model dump
#' (based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).
#' 
#' @examples
#' 
#' # binomial classification using gbtree:
#' data(agaricus.train, package='xgboost')
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' xgb.importance(model = bst)
#' 
#' # binomial classification using gblinear:
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, booster = "gblinear", 
#'                eta = 0.3, nthread = 1, nrounds = 20, objective = "binary:logistic")
#' xgb.importance(model = bst)
#' 
#' # multiclass classification using gbtree:
#' nclass <- 3
#' nrounds <- 10
#' mbst <- xgboost(data = as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1,
#'                max_depth = 3, eta = 0.2, nthread = 2, nrounds = nrounds,
#'                objective = "multi:softprob", num_class = nclass)
#' # all classes clumped together:
#' xgb.importance(model = mbst)
#' # inspect importances separately for each class:
#' xgb.importance(model = mbst, trees = seq(from=0, by=nclass, length.out=nrounds))
#' xgb.importance(model = mbst, trees = seq(from=1, by=nclass, length.out=nrounds))
#' xgb.importance(model = mbst, trees = seq(from=2, by=nclass, length.out=nrounds))
#' 
#' # multiclass classification using gblinear:
#' mbst <- xgboost(data = scale(as.matrix(iris[, -5])), label = as.numeric(iris$Species) - 1,
#'                booster = "gblinear", eta = 0.2, nthread = 1, nrounds = 15,
#'                objective = "multi:softprob", num_class = nclass)
#' xgb.importance(model = mbst)
#'
#' @export
xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
                           data = NULL, label = NULL, target = NULL){
  
  if (!(is.null(data) && is.null(label) && is.null(target)))
    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
  

xgboost/R-package/R/xgb.load.R  view on Meta::CPAN

#' 
#' @seealso 
#' \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}. 
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.load <- function(modelfile) {
  if (is.null(modelfile))
    stop("xgb.load: modelfile cannot be NULL")

  handle <- xgb.Booster.handle(modelfile = modelfile)
  # re-use modelfile if it is raw so we do not need to serialize

xgboost/R-package/R/xgb.model.dt.tree.R  view on Meta::CPAN

#' When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers
#' in the "ID" column. When \code{use_int_id=TRUE}, those columns point to node identifiers from 
#' the corresponding trees in the "Node" column.
#' 
#' @examples
#' # Basic use:
#' 
#' data(agaricus.train, package='xgboost')
#' 
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' 
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
#' 
#' # This bst model already has feature_names stored with it, so those would be used when 
#' # feature_names is not set:
#' (dt <- xgb.model.dt.tree(model = bst))
#' 
#' # How to match feature names of splits that are following a current 'Yes' branch:
#' 
#' merge(dt, dt[, .(ID, Y.Feature=Feature)], by.x='Yes', by.y='ID', all.x=TRUE)[order(Tree,Node)]

xgboost/R-package/R/xgb.plot.deepness.R  view on Meta::CPAN

#' @seealso 
#' 
#' \code{\link{xgb.train}}, \code{\link{xgb.model.dt.tree}}.
#' 
#' @examples
#' 
#' data(agaricus.train, package='xgboost')
#'
#' # Change max_depth to a higher number to get a more significant result
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
#'                eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
#'                subsample = 0.5, min_child_weight = 2)
#'
#' xgb.plot.deepness(bst)
#' xgb.ggplot.deepness(bst)
#' 
#' xgb.plot.deepness(bst, which='max.depth', pch=16, col=rgb(0,0,1,0.3), cex=2)
#' 
#' xgb.plot.deepness(bst, which='med.weight', pch=16, col=rgb(0,0,1,0.3), cex=2)
#'
#' @rdname xgb.plot.deepness

xgboost/R-package/R/xgb.plot.importance.R  view on Meta::CPAN

#' The \code{xgb.ggplot.importance} function returns a ggplot graph which could be customized afterwards.
#' E.g., to change the title of the graph, add \code{+ ggtitle("A GRAPH NAME")} to the result.
#'
#' @seealso
#' \code{\link[graphics]{barplot}}.
#' 
#' @examples
#' data(agaricus.train)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#'
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
#' 
#' xgb.plot.importance(importance_matrix, rel_to_first = TRUE, xlab = "Relative importance")
#' 
#' (gg <- xgb.ggplot.importance(importance_matrix, measure = "Frequency", rel_to_first = TRUE))
#' gg + ggplot2::ylab("Frequency")
#'
#' @rdname xgb.plot.importance
#' @export

xgboost/R-package/R/xgb.plot.multi.trees.R  view on Meta::CPAN

#' The function projects each tree onto one, and keeps for each position the 
#' \code{features_keep} first features (based on the Gain per feature measure).
#' 
#' This function is inspired by this blog post:
#' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#'                  eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
#'                  min_child_weight = 50)
#'
#' p <- xgb.plot.multi.trees(model = bst, feature_names = colnames(agaricus.train$data),
#'                           features_keep = 3)
#' print(p)
#'
#' @export
xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, plot_width = NULL, plot_height = NULL, ...){
  check.deprecation(...)
  tree.matrix <- xgb.model.dt.tree(feature_names = feature_names, model = model)

xgboost/R-package/R/xgb.plot.tree.R  view on Meta::CPAN

#' 
#' When \code{render = FALSE}:
#' silently returns a graph object which is of DiagrammeR's class \code{dgr_graph}.
#' This could be useful if one wants to modify some of the graph attributes
#' before rendering the graph with \code{\link[DiagrammeR]{render_graph}}.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' 
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' # plot all the trees
#' xgb.plot.tree(model = bst)
#' # plot only the first tree and display the node ID:
#' xgb.plot.tree(model = bst, trees = 0, show_node_id = TRUE)
#' 
#' \dontrun{
#' # Below is an example of how to save this plot to a file. 
#' # Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed.
#' library(DiagrammeR)
#' gr <- xgb.plot.tree(model=bst, trees=0:1, render=FALSE)

xgboost/R-package/R/xgb.save.R  view on Meta::CPAN

#' 
#' @seealso 
#' \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}. 
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.save <- function(model, fname) {
  if (typeof(fname) != "character")
    stop("fname must be character")
  if (!inherits(model, "xgb.Booster")) {
    stop("model must be xgb.Booster.",
         if (inherits(model, "xgb.DMatrix")) " Use xgb.DMatrix.save to save an xgb.DMatrix object." else "")

xgboost/R-package/R/xgb.save.raw.R  view on Meta::CPAN

#' Save xgboost model from xgboost or xgb.train
#' 
#' @param model the model object.
#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
#'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' raw <- xgb.save.raw(bst)
#' bst <- xgb.load(raw)
#' pred <- predict(bst, test$data)
#'
#' @export
xgb.save.raw <- function(model) {
  model <- xgb.get.handle(model)
  .Call(XGBoosterModelToRaw_R, model)
}

xgboost/R-package/R/xgb.train.R  view on Meta::CPAN

#' @param weight a vector indicating the weight for each row of the input.
#' 
#' @details 
#' These are the training functions for \code{xgboost}. 
#' 
#' The \code{xgb.train} interface supports advanced features such as \code{watchlist}, 
#' customized objective and evaluation metric functions, therefore it is more flexible 
#' than the \code{xgboost} interface.
#'
#' Parallelization is automatically enabled if \code{OpenMP} is present. 
#' Number of threads can also be manually specified via \code{nthread} parameter.
#' 
#' The evaluation metric is chosen automatically by Xgboost (according to the objective)
#' when the \code{eval_metric} parameter is not provided.
#' User may set one or several \code{eval_metric} parameters. 
#' Note that when using a customized metric, only this single metric can be used.
#' The folloiwing is the list of built-in metrics for which Xgboost provides optimized implementation:
#'   \itemize{
#'      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
#'      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
#'      \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}

xgboost/R-package/R/xgb.train.R  view on Meta::CPAN

#' 
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' 
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
#' dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
#' watchlist <- list(train = dtrain, eval = dtest)
#' 
#' ## A simple xgb.train example:
#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, 
#'               objective = "binary:logistic", eval_metric = "auc")
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
#' 
#' 
#' ## An xgb.train example where custom objective and evaluation metric are used:
#' logregobj <- function(preds, dtrain) {
#'    labels <- getinfo(dtrain, "label")
#'    preds <- 1/(1 + exp(-preds))
#'    grad <- preds - labels
#'    hess <- preds * (1 - preds)
#'    return(list(grad = grad, hess = hess))
#' }
#' evalerror <- function(preds, dtrain) {
#'   labels <- getinfo(dtrain, "label")
#'   err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
#'   return(list(metric = "error", value = err))
#' }
#' 
#' # These functions could be used by passing them either:
#' #  as 'objective' and 'eval_metric' parameters in the params list:
#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, 
#'               objective = logregobj, eval_metric = evalerror)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
#' 
#' #  or through the ... arguments:
#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#'                  objective = logregobj, eval_metric = evalerror)
#' 
#' #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#'                  obj = logregobj, feval = evalerror)
#' 
#' 
#' ## An xgb.train example of using variable learning rates at each iteration:
#' param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
#'               objective = "binary:logistic", eval_metric = "auc")
#' my_etas <- list(eta = c(0.5, 0.1))
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#'                  callbacks = list(cb.reset.parameters(my_etas)))
#' 
#' ## Early stopping:
#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
#'                  early_stopping_rounds = 3)
#' 
#' ## An 'xgboost' interface example:
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, 
#'                max_depth = 2, eta = 1, nthread = 2, nrounds = 2, 
#'                objective = "binary:logistic")
#' pred <- predict(bst, agaricus.test$data)
#' 
#' @rdname xgb.train
#' @export
xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
                      obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
                      early_stopping_rounds = NULL, maximize = NULL,
                      save_period = NULL, save_name = "xgboost.model", 
                      xgb_model = NULL, callbacks = list(), ...) {

xgboost/R-package/configure  view on Meta::CPAN

  OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)"
  ac_pkg_openmp=no
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5
$as_echo_n "checking whether OpenMP will work in a package... " >&6; }
  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h.  */
#include <omp.h>
int
main ()
{
 return omp_get_num_threads ();
  ;
  return 0;
}
_ACEOF
  PKG_CFLAGS="${OPENMP_CFLAGS}" PKG_LIBS="${OPENMP_CFLAGS}" "$RBIN" CMD SHLIB conftest.c 1>&5 2>&5 && "$RBIN" --vanilla -q -e "dyn.load(paste('conftest',.Platform\$dynlib.ext,sep=''))" 1>&5 2>&5 && ac_pkg_openmp=yes
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}" >&5
$as_echo "${ac_pkg_openmp}" >&6; }
  if test "${ac_pkg_openmp}" = no; then
    OPENMP_CXXFLAGS=''
  fi

xgboost/R-package/configure.ac  view on Meta::CPAN

then
  OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)"
fi

if test `uname -s` = "Darwin"
then
  OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)"
  ac_pkg_openmp=no
  AC_MSG_CHECKING([whether OpenMP will work in a package])
  AC_LANG_CONFTEST(
  [AC_LANG_PROGRAM([[#include <omp.h>]], [[ return omp_get_num_threads (); ]])])
  PKG_CFLAGS="${OPENMP_CFLAGS}" PKG_LIBS="${OPENMP_CFLAGS}" "$RBIN" CMD SHLIB conftest.c 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD && "$RBIN" --vanilla -q -e "dyn.load(paste('conftest',.Platform\$dynlib.ext,sep=''))" 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAG...
  AC_MSG_RESULT([${ac_pkg_openmp}])
  if test "${ac_pkg_openmp}" = no; then
    OPENMP_CXXFLAGS=''
  fi
fi

AC_SUBST(OPENMP_CXXFLAGS)
AC_CONFIG_FILES([src/Makevars])
AC_OUTPUT

xgboost/R-package/demo/basic_walkthrough.R  view on Meta::CPAN

# the loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
class(train$label)
class(train$data)

#-------------Basic Training using XGBoost-----------------
# this is the basic usage of xgboost you can put matrix in data field
# note: we are putting in sparse matrix here, xgboost naturally handles sparse input
# use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
print("Training xgboost with sparseMatrix")
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nrounds = 2,
               nthread = 2, objective = "binary:logistic")
# alternatively, you can put in dense matrix, i.e. basic R-matrix
print("Training xgboost with Matrix")
bst <- xgboost(data = as.matrix(train$data), label = train$label, max_depth = 2, eta = 1, nrounds = 2,
               nthread = 2, objective = "binary:logistic")

# you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features
print("Training xgboost with xgb.DMatrix")
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2, 
               objective = "binary:logistic")

# Verbose = 0,1,2
print("Train xgboost with verbose 0, no message")
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
               nthread = 2, objective = "binary:logistic", verbose = 0)
print("Train xgboost with verbose 1, print evaluation metric")
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
               nthread = 2, objective = "binary:logistic", verbose = 1)
print("Train xgboost with verbose 2, also print information about tree")
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
               nthread = 2, objective = "binary:logistic", verbose = 2)

# you can also specify data as file path to a LibSVM format input
# since we do not have this file with us, the following line is just for illustration
# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic")

#--------------------basic prediction using xgboost--------------
# you can do prediction using the following line
# you can put in Matrix, sparseMatrix, or xgb.DMatrix 
pred <- predict(bst, test$data)
err <- mean(as.numeric(pred > 0.5) != test$label)

xgboost/R-package/demo/basic_walkthrough.R  view on Meta::CPAN

# to use advanced features, we need to put data in xgb.DMatrix
dtrain <- xgb.DMatrix(data = train$data, label=train$label)
dtest <- xgb.DMatrix(data = test$data, label=test$label)
#---------------Using watchlist----------------
# watchlist is a list of xgb.DMatrix, each of them is tagged with name
watchlist <- list(train=dtrain, test=dtest)
# to train with watchlist, use xgb.train, which contains more advanced features
# watchlist allows us to monitor the evaluation result on all data in the list 
print("Train xgboost using xgb.train with watchlist")
bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
                 nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
print("train xgboost using xgb.train with watchlist, watch logloss and error")
bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
                 eval_metric = "error", eval_metric = "logloss",
                 nthread = 2, objective = "binary:logistic")

# xgb.DMatrix can also be saved using xgb.DMatrix.save
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data=dtrain2, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
                 nthread = 2, objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo
label = getinfo(dtest, "label")
pred <- predict(bst, dtest)
err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
print(paste("test-error=", err))

# You can dump the tree you learned using xgb.dump into a text file
xgb.dump(bst, "dump.raw.txt", with_stats = T)

# Finally, you can check which features are the most important.

xgboost/R-package/demo/boost_from_prediction.R  view on Meta::CPAN

data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

watchlist <- list(eval = dtest, train = dtrain)
###
# advanced: start from a initial base prediction
#
print('start running example to start from a initial prediction')
# train xgboost for 1 round
param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic')
bst <- xgb.train(param, dtrain, 1, watchlist)
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
ptrain <- predict(bst, dtrain, outputmargin=TRUE)
ptest  <- predict(bst, dtest, outputmargin=TRUE)
# set the base_margin property of dtrain and dtest
# base margin is the base prediction we will boost from
setinfo(dtrain, "base_margin", ptrain)
setinfo(dtest, "base_margin", ptest)

xgboost/R-package/demo/create_sparse_matrix.R  view on Meta::CPAN


# Create the output vector (not sparse)
# 1. Set, for all rows, field in Y column to 0; 
# 2. set Y to 1 when Improved == Marked; 
# 3. Return Y column
output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]

# Following is the same process as other demo
cat("Learning...\n")
bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 9,
               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")

importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst)
print(importance)
# According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age. The second most important feature is having received a placebo or not. The sex is third. Then we see our generated features ...

# Does these result make sense?
# Let's check some Chi2 between each of these features and the outcome.

print(chisq.test(df$Age, df$Y))
# Pearson correlation between Age and illness disappearing is 35

xgboost/R-package/demo/cross_validation.R  view on Meta::CPAN

require(xgboost)
# load in the agaricus dataset
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

nround <- 2
param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic')

cat('running cross validation\n')
# do cross validation, this will print result out as
# [iteration]  metric_name:mean_value+std_value
# std_value is standard deviation of the metric
xgb.cv(param, dtrain, nround, nfold=5, metrics={'error'})

cat('running cross validation, disable standard deviation display\n')
# do cross validation, this will print result out as
# [iteration]  metric_name:mean_value+std_value

xgboost/R-package/demo/custom_objective.R  view on Meta::CPAN

# this may make buildin evalution metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
evalerror <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
  err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
  return(list(metric = "error", value = err))
}

param <- list(max_depth=2, eta=1, nthread = 2, silent=1, 
              objective=logregobj, eval_metric=evalerror)
print ('start training with user customized objective')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist)

#
# there can be cases where you want additional information 
# being considered besides the property of DMatrix you can get by getinfo
# you can set additional information as attributes if DMatrix

xgboost/R-package/demo/custom_objective.R  view on Meta::CPAN

# this is new customized objective, where you can access things you set
# same thing applies to customized evaluation function
logregobjattr <- function(preds, dtrain) {
  # now you can access the attribute in customized function
  labels <- attr(dtrain, 'label')
  preds <- 1/(1 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1 - preds)
  return(list(grad = grad, hess = hess))
}
param <- list(max_depth=2, eta=1, nthread = 2, silent=1, 
              objective=logregobjattr, eval_metric=evalerror)
print ('start training with user customized objective, with additional attributes in DMatrix')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist)

xgboost/R-package/demo/early_stopping.R  view on Meta::CPAN

require(xgboost)
# load in the agaricus dataset
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(max_depth=2, eta=1, nthread = 2, silent=1)
watchlist <- list(eval = dtest)
num_round <- 20
# user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss
logregobj <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
  preds <- 1/(1 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1 - preds)
  return(list(grad = grad, hess = hess))

xgboost/R-package/demo/generalized_linear_model.R  view on Meta::CPAN

#  this script demonstrate how to fit generalized linear model in xgboost
#  basically, we are using linear model, instead of tree for our boosters
#  you can fit a linear regression, or logistic regression model
##

# change booster to gblinear, so that we are fitting a linear model
# alpha is the L1 regularizer 
# lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term
param <- list(objective = "binary:logistic", booster = "gblinear",
              nthread = 2, alpha = 0.0001, lambda = 1)

# normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun), 
# there could be affection on convergence with parallelization on certain cases
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable

##
# the rest of settings are the same
##
watchlist <- list(eval = dtest, train = dtrain)

xgboost/R-package/demo/predict_first_ntree.R  view on Meta::CPAN

data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
nround = 2

# training the model for two rounds
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest,'label')

### predict using first 1 tree
ypred1 = predict(bst, dtest, ntreelimit=1)
# by default, we predict using all the trees
ypred2 = predict(bst, dtest)

cat('error of ypred1=', mean(as.numeric(ypred1>0.5)!=labels),'\n')
cat('error of ypred2=', mean(as.numeric(ypred2>0.5)!=labels),'\n')

xgboost/R-package/demo/predict_leaf_indices.R  view on Meta::CPAN

# load in the agaricus dataset
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)

param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
nround = 4

# training the model for two rounds
bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)

# Model accuracy without new features
accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)

# by default, we predict using all the trees

pred_with_leaf = predict(bst, dtest, predleaf = TRUE)
head(pred_with_leaf)

create.new.tree.features <- function(model, original.features){

xgboost/R-package/demo/predict_leaf_indices.R  view on Meta::CPAN

}

# Convert previous features to one hot encoding
new.features.train <- create.new.tree.features(bst, agaricus.train$data)
new.features.test <- create.new.tree.features(bst, agaricus.test$data)

# learning with new features
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)

# Model accuracy with new features
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)

# Here the accuracy was already good and is now perfect.
cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!\n"))

xgboost/R-package/man/predict.xgb.Booster.Rd  view on Meta::CPAN

}
\examples{
## binary classification:

data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test

bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
               eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
# use all trees by default
pred <- predict(bst, test$data)
# use only the 1st tree
pred1 <- predict(bst, test$data, ntreelimit = 1)

# Predicting tree leafs:
# the result is an nsamples X ntrees matrix
pred_leaf <- predict(bst, test$data, predleaf = TRUE)
str(pred_leaf)

xgboost/R-package/man/predict.xgb.Booster.Rd  view on Meta::CPAN

barplot(contr1, horiz = TRUE, las = 2, xlab = "contribution to prediction in log-odds")
par(mar = old_mar)


## multiclass classification in iris dataset:

lb <- as.numeric(iris$Species) - 1
num_class <- 3
set.seed(11)
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
               objective = "multi:softprob", num_class = num_class)
# predict for softmax returns num_class probability numbers per case:
pred <- predict(bst, as.matrix(iris[, -5]))
str(pred)
# reshape it to a num_class-columns matrix
pred <- matrix(pred, ncol=num_class, byrow=TRUE)
# convert the probabilities to softmax labels
pred_labels <- max.col(pred) - 1
# the following should result in the same error as seen in the last iteration
sum(pred_labels != lb)/length(lb)

# compare that to the predictions from softmax:
set.seed(11)
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
               objective = "multi:softmax", num_class = num_class)
pred <- predict(bst, as.matrix(iris[, -5]))
str(pred)
all.equal(pred, pred_labels)
# prediction from using only 5 iterations should result 
# in the same error as seen in iteration 5:
pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
sum(pred5 != lb)/length(lb)


## random forest-like model of 25 trees for binary classification:

set.seed(11)
bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
               nthread = 2, nrounds = 1, objective = "binary:logistic",
               num_parallel_tree = 25, subsample = 0.6, colsample_bytree = 0.1)
# Inspect the prediction error vs number of trees:
lb <- test$label
dtest <- xgb.DMatrix(test$data, label=lb)
err <- sapply(1:25, function(n) {
  pred <- predict(bst, dtest, ntreelimit=n)
  sum((pred > 0.5) != lb)/length(lb)
})
plot(err, type='l', ylim=c(0,0.1), xlab='#trees')



( run in 0.457 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )