Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/R-package/R/xgb.plot.deepness.R  view on Meta::CPAN

  dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
  setkey(dt_summaries, "Depth")
  
  if (plot) {
    if (which == "2x1") {
      op <- par(no.readonly = TRUE)
      par(mfrow = c(2,1),
          oma = c(3,1,3,1) + 0.1,
          mar = c(1,4,1,0) + 0.1)

      dt_summaries[, barplot(N, border = NA, ylab = 'Number of leafs', ...)]

      dt_summaries[, barplot(Cover, border = NA, ylab = "Weighted cover", names.arg = Depth, ...)]
    
      title("Model complexity", xlab = "Leaf depth", outer = TRUE, line = 1)
      par(op)
    } else if (which == "max.depth") {
      dt_depths[, max(Depth), Tree][
                , plot(jitter(V1, amount = 0.1) ~ Tree, ylab = 'Max tree leaf depth', xlab = "tree #", ...)]
    } else if (which == "med.depth") {
      dt_depths[, median(as.numeric(Depth)), Tree][
                , plot(jitter(V1, amount = 0.1) ~ Tree, ylab = 'Median tree leaf depth', xlab = "tree #", ...)]
    } else if (which == "med.weight") {
      dt_depths[, median(abs(Weight)), Tree][
                , plot(V1 ~ Tree, ylab = 'Median absolute leaf weight', xlab = "tree #", ...)]
    }
  }
  invisible(dt_depths)
}

# Extract path depths from root to leaf
# from data.table containing the nodes and edges of the trees.
# internal utility function
get.leaf.depth <- function(dt_tree) {
  # extract tree graph's edges
  dt_edges <- rbindlist(list(
      dt_tree[Feature != "Leaf", .(ID, To = Yes, Tree)],
      dt_tree[Feature != "Leaf", .(ID, To = No, Tree)]
    ))
  # whether "To" is a leaf:
  dt_edges <- 
    merge(dt_edges,
          dt_tree[Feature == "Leaf", .(ID, Leaf = TRUE)],
          all.x = TRUE, by.x = "To", by.y = "ID")
  dt_edges[is.na(Leaf), Leaf := FALSE]

  dt_edges[, {
    graph <- igraph::graph_from_data_frame(.SD[,.(ID, To)])
    # min(ID) in a tree is a root node
    paths_tmp <- igraph::shortest_paths(graph, from = min(ID), to = To[Leaf == TRUE])
    # list of paths to each leaf in a tree
    paths <- lapply(paths_tmp$vpath, names)
    # combine into a resulting path lengths table for a tree
    data.table(Depth = sapply(paths, length), ID = To[Leaf == TRUE])
  }, by = Tree]
}

# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(
  c(
    ".N", "N", "Depth", "Quality", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight"
  )
)



( run in 0.923 second using v1.01-cache-2.11-cpan-39bf76dae61 )