Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/R-package/tests/testthat/test_helpers.R  view on Meta::CPAN

})

if (grepl('Windows', Sys.info()[['sysname']]) ||
    grepl('Linux', Sys.info()[['sysname']]) ||
    grepl('Darwin', Sys.info()[['sysname']])) {
    test_that("xgb-attribute numeric precision", {
      # check that lossless conversion works with 17 digits
      # numeric -> character -> numeric
      X <- 10^runif(100, -20, 20)
      X2X <- as.numeric(format(X, digits = 17))
      expect_identical(X, X2X)
      # retrieved attributes to be the same as written
      for (x in X) {
        xgb.attr(bst.Tree, "x") <- x
        expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
        xgb.attributes(bst.Tree) <- list(a = "A", b = x)
        expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
      }
    })
}

test_that("xgb.Booster serializing as R object works", {
  saveRDS(bst.Tree, 'xgb.model.rds')
  bst <- readRDS('xgb.model.rds')
  dtrain <- xgb.DMatrix(sparse_matrix, label = label)
  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
  expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
  xgb.save(bst, 'xgb.model')
  nil_ptr <- new("externalptr")
  class(nil_ptr) <- "xgb.Booster.handle"
  expect_true(identical(bst$handle, nil_ptr))
  bst <- xgb.Booster.complete(bst)
  expect_true(!identical(bst$handle, nil_ptr))
  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
})

test_that("xgb.model.dt.tree works with and without feature names", {
  names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
  dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
  expect_equal(names.dt.trees, names(dt.tree))
  expect_equal(dim(dt.tree), c(188, 10))
  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
  
  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
  expect_equal(dt.tree, dt.tree.0)
  
  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
  dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
  expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
  expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE])

  # using integer node ID instead of character
  dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE)
  expect_equal(as.integer(tstrsplit(dt.tree$Yes, '-')[[2]]), dt.tree.int$Yes)
  expect_equal(as.integer(tstrsplit(dt.tree$No, '-')[[2]]), dt.tree.int$No)
  expect_equal(as.integer(tstrsplit(dt.tree$Missing, '-')[[2]]), dt.tree.int$Missing)
})

test_that("xgb.model.dt.tree throws error for gblinear", {
  expect_error(xgb.model.dt.tree(model = bst.GLM))
})

test_that("xgb.importance works with and without feature names", {
  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
  expect_equal(dim(importance.Tree), c(7, 4))
  expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
  expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
  
  importance.Tree.0 <- xgb.importance(model = bst.Tree)
  expect_equal(importance.Tree, importance.Tree.0)
  
  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
  importance.Tree.x <- xgb.importance(model = bst.Tree)
  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
  
  imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
  expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
  xgb.ggplot.importance(importance_matrix = importance.Tree)
  
  # for multiclass
  imp.Tree <- xgb.importance(model = mbst.Tree)
  expect_equal(dim(imp.Tree), c(4, 4))
  xgb.importance(model = mbst.Tree, trees = seq(from=0, by=nclass, length.out=nrounds))
})

test_that("xgb.importance works with GLM model", {
  importance.GLM <- xgb.importance(feature_names = feature.names, model = bst.GLM)
  expect_equal(dim(importance.GLM), c(10, 2))
  expect_equal(colnames(importance.GLM), c("Feature", "Weight"))
  xgb.importance(model = bst.GLM)
  imp2plot <- xgb.plot.importance(importance.GLM)
  expect_equal(colnames(imp2plot), c("Feature", "Weight", "Importance"))
  xgb.ggplot.importance(importance.GLM)
  
  # for multiclass
  imp.GLM <- xgb.importance(model = mbst.GLM)
  expect_equal(dim(imp.GLM), c(12, 3))
  expect_equal(imp.GLM$Class, rep(0:2, each=4))
})

test_that("xgb.model.dt.tree and xgb.importance work with a single split model", {
  bst1 <- xgboost(data = sparse_matrix, label = label, max_depth = 1,
                  eta = 1, nthread = 2, nrounds = 1, verbose = 0,
                  objective = "binary:logistic")
  expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error
  expect_equal(nrow(dt), 3)
  expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error
  expect_equal(nrow(imp), 1)
  expect_equal(imp$Gain, 1)
})

test_that("xgb.plot.tree works with and without feature names", {
  xgb.plot.tree(feature_names = feature.names, model = bst.Tree)
  xgb.plot.tree(model = bst.Tree)
})

test_that("xgb.plot.multi.trees works with and without feature names", {



( run in 2.640 seconds using v1.01-cache-2.11-cpan-5735350b133 )