Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/R-package/R/callbacks.R  view on Meta::CPAN

#' Callback closures for booster training.
#'
#' These are used to perform various service tasks either during boosting iterations or at the end.
#' This approach helps to modularize many of such tasks without bloating the main training methods, 
#' and it offers .
#' 
#' @details
#' By default, a callback function is run after each boosting iteration.
#' An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function.
#' 
#' When a callback function has \code{finalize} parameter, its finalizer part will also be run after 
#' the boosting is completed.
#' 
#' WARNING: side-effects!!! Be aware that these callback functions access and modify things in 
#' the environment from which they are called from, which is a fairly uncommon thing to do in R.
#' 
#' To write a custom callback closure, make sure you first understand the main concepts about R envoronments.
#' Check either R documentation on \code{\link[base]{environment}} or the 
#' \href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R" 
#' book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks -
#' choose ones that do something similar to what you want to achieve. Also, you would need to get familiar 
#' with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments.
#' 
#' @seealso
#' \code{\link{cb.print.evaluation}},
#' \code{\link{cb.evaluation.log}},
#' \code{\link{cb.reset.parameters}},
#' \code{\link{cb.early.stop}},
#' \code{\link{cb.save.model}},
#' \code{\link{cb.cv.predict}},
#' \code{\link{xgb.train}},
#' \code{\link{xgb.cv}}
#' 
#' @name callbacks
NULL

#
# Callbacks -------------------------------------------------------------------
# 

#' Callback closure for printing the result of evaluation
#' 
#' @param period  results would be printed every number of periods
#' @param showsd  whether standard deviations should be printed (when available)
#' 
#' @details
#' The callback function prints the result of evaluation at every \code{period} iterations.
#' The initial and the last iteration's evaluations are always printed.
#' 
#' Callback function expects the following values to be set in its calling frame:
#' \code{bst_evaluation} (also \code{bst_evaluation_err} when available),
#' \code{iteration},
#' \code{begin_iteration},
#' \code{end_iteration}.
#' 
#' @seealso
#' \code{\link{callbacks}}
#' 
#' @export
cb.print.evaluation <- function(period = 1, showsd = TRUE) {
  
  callback <- function(env = parent.frame()) {
    if (length(env$bst_evaluation) == 0 ||
        period == 0 ||
        NVL(env$rank, 0) != 0 )
      return()
    
    i <- env$iteration 
    if ((i-1) %% period == 0 ||
        i == env$begin_iteration ||
        i == env$end_iteration) {
      stdev <- if (showsd) env$bst_evaluation_err else NULL
      msg <- format.eval.string(i, env$bst_evaluation, stdev)
      cat(msg, '\n')
    }
  }
  attr(callback, 'call') <- match.call()
  attr(callback, 'name') <- 'cb.print.evaluation'
  callback
}


#' Callback closure for logging the evaluation history
#' 
#' @details
#' This callback function appends the current iteration evaluation results \code{bst_evaluation}
#' available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
#' 
#' The finalizer callback (called with \code{finalize = TURE} in the end) converts 
#' the \code{evaluation_log} list into a final data.table.
#' 
#' The iteration evaluation result \code{bst_evaluation} must be a named numeric vector. 
#' 
#' Note: in the column names of the final data.table, the dash '-' character is replaced with 
#' the underscore '_' in order to make the column names more like regular R identifiers.
#' 
#' Callback function expects the following values to be set in its calling frame:
#' \code{evaluation_log},
#' \code{bst_evaluation},
#' \code{iteration}.
#' 
#' @seealso
#' \code{\link{callbacks}}
#' 
#' @export
cb.evaluation.log <- function() {

xgboost/R-package/R/callbacks.R  view on Meta::CPAN

      stop("Parent frame has neither 'bst' nor ('bst_folds' and 'basket')")
    }
  }
  
  finalizer <- function(env) {
    if (!is.null(env$bst)) {
      attr_best_score = as.numeric(xgb.attr(env$bst$handle, 'best_score'))
      if (best_score != attr_best_score)
        stop("Inconsistent 'best_score' values between the closure state: ", best_score,
             " and the xgb.attr: ", attr_best_score)
      env$bst$best_iteration = best_iteration
      env$bst$best_ntreelimit = best_ntreelimit
      env$bst$best_score = best_score
    } else {
      env$basket$best_iteration <- best_iteration
      env$basket$best_ntreelimit <- best_ntreelimit
    }
  }

  callback <- function(env = parent.frame(), finalize = FALSE) {
    if (best_iteration < 0)
      init(env)
    
    if (finalize)
      return(finalizer(env))
    
    i <- env$iteration
    score = env$bst_evaluation[metric_idx]
    
    if (( maximize && score > best_score) ||
        (!maximize && score < best_score)) {
      
      best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
      best_score <<- score
      best_iteration <<- i
      best_ntreelimit <<- best_iteration * env$num_parallel_tree
      # save the property to attributes, so they will occur in checkpoint
      if (!is.null(env$bst)) {
        xgb.attributes(env$bst) <- list(
          best_iteration = best_iteration - 1, # convert to 0-based index
          best_score = best_score,
          best_msg = best_msg,
          best_ntreelimit = best_ntreelimit)
      }
    } else if (i - best_iteration >= stopping_rounds) {
      env$stop_condition <- TRUE
      env$end_iteration <- i
      if (verbose && NVL(env$rank, 0) == 0)
        cat("Stopping. Best iteration:\n", best_msg, "\n\n", sep = '')
    }
  }
  attr(callback, 'call') <- match.call()
  attr(callback, 'name') <- 'cb.early.stop'
  callback
}


#' Callback closure for saving a model file.
#' 
#' @param save_period save the model to disk after every 
#'        \code{save_period} iterations; 0 means save the model at the end.
#' @param save_name the name or path for the saved model file.
#'        It can contain a \code{\link[base]{sprintf}} formatting specifier 
#'        to include the integer iteration number in the file name.
#'        E.g., with \code{save_name} = 'xgboost_%04d.model', 
#'        the file saved at iteration 50 would be named "xgboost_0050.model".
#' 
#' @details 
#' This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
#' 
#' Callback function expects the following values to be set in its calling frame:
#' \code{bst},
#' \code{iteration},
#' \code{begin_iteration},
#' \code{end_iteration}.
#' 
#' @seealso
#' \code{\link{callbacks}}
#' 
#' @export
cb.save.model <- function(save_period = 0, save_name = "xgboost.model") {
  
  if (save_period < 0)
    stop("'save_period' cannot be negative")

  callback <- function(env = parent.frame()) {
    if (is.null(env$bst))
      stop("'save_model' callback requires the 'bst' booster object in its calling frame")
    
    if ((save_period > 0 && (env$iteration - env$begin_iteration) %% save_period == 0) ||
        (save_period == 0 && env$iteration == env$end_iteration))
      xgb.save(env$bst, sprintf(save_name, env$iteration))
  }
  attr(callback, 'call') <- match.call()
  attr(callback, 'name') <- 'cb.save.model'
  callback
}


#' Callback closure for returning cross-validation based predictions.
#' 
#' @param save_models a flag for whether to save the folds' models.
#' 
#' @details 
#' This callback function saves predictions for all of the test folds,
#' and also allows to save the folds' models.
#' 
#' It is a "finalizer" callback and it uses early stopping information whenever it is available,
#' thus it must be run after the early stopping callback if the early stopping is used.
#' 
#' Callback function expects the following values to be set in its calling frame:
#' \code{bst_folds},
#' \code{basket},
#' \code{data},
#' \code{end_iteration},
#' \code{params},
#' \code{num_parallel_tree},
#' \code{num_class}.
#' 
#' @return 
#' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,



( run in 0.994 second using v1.01-cache-2.11-cpan-96521ef73a4 )