Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/R-package/R/xgb.plot.deepness.R  view on Meta::CPAN

#' Plot model trees deepness
#'
#' Visualizes distributions related to depth of tree leafs.
#' \code{xgb.plot.deepness} uses base R graphics, while \code{xgb.ggplot.deepness} uses the ggplot backend.
#' 
#' @param model either an \code{xgb.Booster} model generated by the \code{xgb.train} function
#'        or a data.table result of the \code{xgb.model.dt.tree} function.
#' @param plot (base R barplot) whether a barplot should be produced. 
#'        If FALSE, only a data.table is returned.
#' @param which which distribution to plot (see details).
#' @param ... other parameters passed to \code{barplot} or \code{plot}.
#' 
#' @details
#' 
#' When \code{which="2x1"}, two distributions with respect to the leaf depth
#' are plotted on top of each other:
#' \itemize{
#'  \item the distribution of the number of leafs in a tree model at a certain depth;
#'  \item the distribution of average weighted number of observations ("cover") 
#'        ending up in leafs at certain depth.
#' }
#' Those could be helpful in determining sensible ranges of the \code{max_depth} 
#' and \code{min_child_weight} parameters.
#' 
#' When \code{which="max.depth"} or \code{which="med.depth"}, plots of either maximum or median depth
#' per tree with respect to tree number are created. And \code{which="med.weight"} allows to see how
#' a tree's median absolute leaf weight changes through the iterations.
#'
#' This function was inspired by the blog post
#' \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}.
#' 
#' @return
#' 
#' Other than producing plots (when \code{plot=TRUE}), the \code{xgb.plot.deepness} function
#' silently returns a processed data.table where each row corresponds to a terminal leaf in a tree model,
#' and contains information about leaf's depth, cover, and weight (which is used in calculating predictions).
#' 
#' The \code{xgb.ggplot.deepness} silently returns either a list of two ggplot graphs when \code{which="2x1"}
#' or a single ggplot graph for the other \code{which} options.
#'
#' @seealso 
#' 
#' \code{\link{xgb.train}}, \code{\link{xgb.model.dt.tree}}.
#' 
#' @examples
#' 
#' data(agaricus.train, package='xgboost')
#'
#' # Change max_depth to a higher number to get a more significant result
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
#'                eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
#'                subsample = 0.5, min_child_weight = 2)
#'
#' xgb.plot.deepness(bst)
#' xgb.ggplot.deepness(bst)
#' 
#' xgb.plot.deepness(bst, which='max.depth', pch=16, col=rgb(0,0,1,0.3), cex=2)
#' 
#' xgb.plot.deepness(bst, which='med.weight', pch=16, col=rgb(0,0,1,0.3), cex=2)
#'
#' @rdname xgb.plot.deepness
#' @export
xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.depth", "med.weight"),
                              plot = TRUE, ...) {
  
  if (!(inherits(model, "xgb.Booster") || is.data.table(model)))
    stop("model: Has to be either an xgb.Booster model generaged by the xgb.train function\n",
         "or a data.table result of the xgb.importance function")

  if (!requireNamespace("igraph", quietly = TRUE))
    stop("igraph package is required for plotting the graph deepness.", call. = FALSE)

  which <- match.arg(which)
  
  dt_tree <- model
  if (inherits(model, "xgb.Booster"))
    dt_tree <- xgb.model.dt.tree(model = model)
  
  if (!all(c("Feature", "Tree", "ID", "Yes", "No", "Cover") %in% colnames(dt_tree)))
    stop("Model tree columns are not as expected!\n",
         "  Note that this function works only for tree models.")
  
  dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID")
  setkeyv(dt_depths, c("Tree", "ID"))
  # count by depth levels, and also calculate average cover at a depth
  dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
  setkey(dt_summaries, "Depth")



( run in 0.453 second using v1.01-cache-2.11-cpan-71847e10f99 )