Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/R-package/R/xgb.model.dt.tree.R  view on Meta::CPAN

  
  td <- data.table(t = text)
  td[position, Tree := 1L]
  td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L]
  
  if (is.null(trees)) {
    trees <- 0:max(td$Tree)
  } else {
    trees <- trees[trees >= 0 & trees <= max(td$Tree)]
  }
  td <- td[Tree %in% trees & !grepl('^booster', t)]
  
  td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.integer ]
  if (!use_int_id) td[, ID := add.tree.id(Node, Tree)]
  td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))]

  # parse branch lines
  branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
                      "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
  branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
  td[isLeaf == FALSE, 
     (branch_cols) := {
      # skip some indices with spurious capture groups from anynumber_regex
      xtr <- stri_match_first_regex(t, branch_rx)[, c(2,3,5,6,7,8,10), drop = FALSE]
      xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
      lapply(seq_len(ncol(xtr)), function(i) xtr[,i])
    }]
  # assign feature_names when available
  if (!is.null(feature_names)) {
    if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
      stop("feature_names has less elements than there are features used in the model")
    td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1] ]
  }
  
  # parse leaf lines
  leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
  leaf_cols <- c("Feature", "Quality", "Cover")
  td[isLeaf == TRUE,
     (leaf_cols) := {
      xtr <- stri_match_first_regex(t, leaf_rx)[, c(2,4)]
      c("Leaf", lapply(seq_len(ncol(xtr)), function(i) xtr[,i]))
    }]
  
  # convert some columns to numeric
  numeric_cols <- c("Split", "Quality", "Cover")
  td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
  if (use_int_id) {
    int_cols <- c("Yes", "No", "Missing")
    td[, (int_cols) := lapply(.SD, as.integer), .SDcols = int_cols]
  }
  
  td[, t := NULL]
  td[, isLeaf := NULL]
  
  td[order(Tree, Node)]
}

# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Tree", "Node", "ID", "Feature", "t", "isLeaf",".SD", ".SDcols"))



( run in 1.161 second using v1.01-cache-2.11-cpan-39bf76dae61 )