view release on metacpan or search on metacpan
xgboost/dmlc-core/src/io/local_filesys.cc view on Meta::CPAN
}
}
private:
std::FILE *fp_;
bool use_stdio_;
};
FileInfo LocalFileSystem::GetPathInfo(const URI &path) {
struct stat sb;
if (stat(path.name.c_str(), &sb) == -1) {
int errsv = errno;
LOG(FATAL) << "LocalFileSystem.GetPathInfo " << path.name
<< " Error:" << strerror(errsv);
}
FileInfo ret;
ret.path = path;
ret.size = sb.st_size;
if ((sb.st_mode & S_IFMT) == S_IFDIR) {
ret.type = kDirectory;
xgboost/dmlc-core/src/io/single_file_split.h view on Meta::CPAN
if (!use_stdin_) std::fclose(fp_);
}
virtual void BeforeFirst(void) {
fseek(fp_, 0, SEEK_SET);
}
virtual void HintChunkSize(size_t chunk_size) {
buffer_size_ = std::max(chunk_size, buffer_size_);
}
virtual size_t GetTotalSize(void) {
struct stat buf;
fstat(fileno(fp_), &buf);
return buf.st_size;
}
virtual size_t Read(void *ptr, size_t size) {
return std::fread(ptr, 1, size, fp_);
}
virtual void ResetPartition(unsigned part_index, unsigned num_parts) {
CHECK(part_index == 0 && num_parts == 1);
this->BeforeFirst();
}
virtual void Write(const void *ptr, size_t size) {
xgboost/include/xgboost/tree_model.h view on Meta::CPAN
}
/*! \brief get node given nid */
inline const Node& operator[](int nid) const {
return nodes[nid];
}
/*! \brief get const reference to nodes */
inline const std::vector<Node>& GetNodes() const { return nodes; }
/*! \brief get node statistics given nid */
inline NodeStat& stat(int nid) {
return stats[nid];
}
/*! \brief get node statistics given nid */
inline const NodeStat& stat(int nid) const {
return stats[nid];
}
/*! \brief get leaf vector given nid */
inline bst_float* leafvec(int nid) {
if (leaf_vector.size() == 0) return nullptr;
return& leaf_vector[nid * param.size_leaf_vector];
}
/*! \brief get leaf vector given nid */
inline const bst_float* leafvec(int nid) const {
if (leaf_vector.size() == 0) return nullptr;
xgboost/include/xgboost/tree_model.h view on Meta::CPAN
this->FillNodeMeanValue(root_id);
}
}
inline bst_float RegTree::FillNodeMeanValue(int nid) {
bst_float result;
auto& node = (*this)[nid];
if (node.is_leaf()) {
result = node.leaf_value();
} else {
result = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess;
result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess;
result /= this->stat(nid).sum_hess;
}
this->node_mean_values[nid] = result;
return result;
}
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const {
CHECK_GT(this->node_mean_values.size(), 0U);
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
bst_float node_value;
xgboost/plugin/updater_gpu/src/common.cuh view on Meta::CPAN
std::vector<NodeType> node_flags(h_nodes.size(), UNUSED);
flag_nodes(h_nodes, &node_flags, 0, NODE);
int nid = 0;
for (int gpu_nid = 0; gpu_nid < h_nodes.size(); gpu_nid++) {
NodeType flag = node_flags[gpu_nid];
const Node& n = h_nodes[gpu_nid];
if (flag == NODE) {
tree.AddChilds(nid);
tree[nid].set_split(n.split.findex, n.split.fvalue, n.split.missing_left);
tree.stat(nid).loss_chg = n.split.loss_chg;
tree.stat(nid).base_weight = n.weight;
tree.stat(nid).sum_hess = n.sum_gradients.hess;
tree[tree[nid].cleft()].set_leaf(0);
tree[tree[nid].cright()].set_leaf(0);
nid++;
} else if (flag == LEAF) {
tree[nid].set_leaf(n.weight * param.learning_rate);
tree.stat(nid).sum_hess = n.sum_gradients.hess;
nid++;
}
}
}
// Set gradient pair to 0 with p = 1 - subsample
inline void subsample_gpair(dh::dvec<bst_gpair>* p_gpair, float subsample,
int offset) {
if (subsample == 1.0) {
return;
xgboost/plugin/updater_gpu/src/exact/gpu_builder.cuh view on Meta::CPAN
}
void dense2sparse(RegTree* p_tree) {
RegTree& tree = *p_tree;
std::vector<Node<node_id_t>> hNodes = nodes.as_vector();
int nodeId = 0;
for (int i = 0; i < maxNodes; ++i) {
const Node<node_id_t>& n = hNodes[i];
if ((i != 0) && hNodes[i].isLeaf()) {
tree[nodeId].set_leaf(n.weight * param.learning_rate);
tree.stat(nodeId).sum_hess = n.gradSum.hess;
++nodeId;
} else if (!hNodes[i].isUnused()) {
tree.AddChilds(nodeId);
tree[nodeId].set_split(n.colIdx, n.threshold, n.dir == LeftDir);
tree.stat(nodeId).loss_chg = n.score;
tree.stat(nodeId).sum_hess = n.gradSum.hess;
tree.stat(nodeId).base_weight = n.weight;
tree[tree[nodeId].cleft()].set_leaf(0);
tree[tree[nodeId].cright()].set_leaf(0);
++nodeId;
}
}
}
};
} // namespace exact
} // namespace tree
xgboost/src/tree/tree_model.cc view on Meta::CPAN
if (depth != 0) fo << std::endl;
for (int i = 0; i < depth+1; ++i) fo << " ";
} else {
for (int i = 0; i < depth; ++i) fo << '\t';
}
if (tree[nid].is_leaf()) {
if (format == "json") {
fo << "{ \"nodeid\": " << nid
<< ", \"leaf\": " << tree[nid].leaf_value();
if (with_stats) {
fo << ", \"cover\": " << tree.stat(nid).sum_hess;
}
fo << " }";
} else {
fo << nid << ":leaf=" << tree[nid].leaf_value();
if (with_stats) {
fo << ",cover=" << tree.stat(nid).sum_hess;
}
fo << '\n';
}
} else {
// right then left,
bst_float cond = tree[nid].split_cond();
const unsigned split_index = tree[nid].split_index();
if (split_index < fmap.size()) {
switch (fmap.type(split_index)) {
case FeatureMap::kIndicator: {
xgboost/src/tree/tree_model.cc view on Meta::CPAN
<< ", \"missing\": " << tree[nid].cdefault();
} else {
fo << nid << ":[f" << split_index << "<"<< cond
<< "] yes=" << tree[nid].cleft()
<< ",no=" << tree[nid].cright()
<< ",missing=" << tree[nid].cdefault();
}
}
if (with_stats) {
if (format == "json") {
fo << ", \"gain\": " << tree.stat(nid).loss_chg
<< ", \"cover\": " << tree.stat(nid).sum_hess;
} else {
fo << ",gain=" << tree.stat(nid).loss_chg << ",cover=" << tree.stat(nid).sum_hess;
}
}
if (format == "json") {
fo << ", \"children\": [";
} else {
fo << '\n';
}
DumpRegTree(fo, tree, fmap, tree[nid].cleft(), depth + 1, false, with_stats, format);
DumpRegTree(fo, tree, fmap, tree[nid].cright(), depth + 1, true, with_stats, format);
if (format == "json") {
xgboost/src/tree/updater_colmaker.cc view on Meta::CPAN
// if nothing left to be expand, break
if (qexpand_.size() == 0) break;
}
// set all the rest expanding nodes to leaf
for (size_t i = 0; i < qexpand_.size(); ++i) {
const int nid = qexpand_[i];
(*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
}
// remember auxiliary statistics in the tree node
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
p_tree->stat(nid).base_weight = snode[nid].weight;
p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
}
}
protected:
// initialize temp data structure
inline void InitData(const std::vector<bst_gpair>& gpair,
const DMatrix& fmat,
const RegTree& tree) {
CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
xgboost/src/tree/updater_fast_hist.cc view on Meta::CPAN
// set all the rest expanding nodes to leaf
// This post condition is not needed in current code, but may be necessary
// when there are stopping rule that leaves qexpand non-empty
while (!qexpand_->empty()) {
const int nid = qexpand_->top().nid;
qexpand_->pop();
(*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
}
// remember auxiliary statistics in the tree node
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
p_tree->stat(nid).base_weight = snode[nid].weight;
p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
}
pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});
if (param.debug_verbose > 0) {
double total_time = dmlc::GetTime() - gstart;
LOG(INFO) << "\nInitData: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
xgboost/src/tree/updater_histmaker.cc view on Meta::CPAN
// find split based on histogram statistics
this->FindSplit(depth, gpair, p_fmat, fwork_set, p_tree);
// reset position after split
this->ResetPositionAfterSplit(p_fmat, *p_tree);
this->UpdateQueueExpand(*p_tree);
// if nothing left to be expand, break
if (qexpand.size() == 0) break;
}
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
}
}
// this function does two jobs
// (1) reset the position in array position, to be the latest leaf id
// (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
DMatrix *p_fmat,
const std::vector <bst_uint> &fset,
const RegTree &tree) = 0;
// initialize the current working set of features in this round
xgboost/src/tree/updater_histmaker.cc view on Meta::CPAN
node_sum, fset[i], &best, &left_sum[wid]);
}
}
// get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
const SplitEntry &best = sol[wid];
const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
this->SetStats(p_tree, nid, node_sum);
// set up the values
p_tree->stat(nid).loss_chg = best.loss_chg;
// now we know the solution in snode[nid], set split
if (best.loss_chg > rt_eps) {
p_tree->AddChilds(nid);
(*p_tree)[nid].set_split(best.split_index(),
best.split_value, best.default_left());
// mark right child as 0, to indicate fresh leaf
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
// right side sum
TStats right_sum;
right_sum.SetSubstract(node_sum, left_sum[wid]);
this->SetStats(p_tree, (*p_tree)[nid].cleft(), left_sum[wid]);
this->SetStats(p_tree, (*p_tree)[nid].cright(), right_sum);
} else {
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
}
}
}
inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
}
};
template<typename TStats>
class CQHistMaker: public HistMaker<TStats> {
public:
CQHistMaker() : cache_dmatrix_(nullptr) {
}
xgboost/src/tree/updater_prune.cc view on Meta::CPAN
}
param.learning_rate = lr;
syncher->Update(gpair, p_fmat, trees);
}
private:
// try to prune off current leaf
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
if (tree[nid].is_root()) return npruned;
int pid = tree[nid].parent();
RegTree::NodeStat &s = tree.stat(pid);
++s.leaf_child_cnt;
if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) {
// need to be pruned
tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight);
// tail recursion
return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
} else {
return npruned;
}
}
/*! \brief do pruning of a tree */
inline void DoPrune(RegTree &tree) { // NOLINT(*)
int npruned = 0;
// initialize auxiliary statistics
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
tree.stat(nid).leaf_child_cnt = 0;
}
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
if (tree[nid].is_leaf()) {
npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
}
}
if (!param.silent) {
LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, "
<< tree.num_extra_nodes() << " extra nodes, " << npruned
<< " pruned nodes, max_depth=" << tree.MaxDepth();
xgboost/src/tree/updater_refresh.cc view on Meta::CPAN
// tranverse tree
while (!tree[pid].is_leaf()) {
unsigned split_index = tree[pid].split_index();
pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
gstats[pid].Add(gpair, info, ridx);
}
}
inline void Refresh(const TStats *gstats,
int nid, RegTree *p_tree) {
RegTree &tree = *p_tree;
tree.stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param));
tree.stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
gstats[nid].SetLeafVec(param, tree.leafvec(nid));
if (tree[nid].is_leaf()) {
if (param.refresh_leaf) {
tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
}
} else {
tree.stat(nid).loss_chg = static_cast<bst_float>(
gstats[tree[nid].cleft()].CalcGain(param) +
gstats[tree[nid].cright()].CalcGain(param) -
gstats[nid].CalcGain(param));
this->Refresh(gstats, tree[nid].cleft(), p_tree);
this->Refresh(gstats, tree[nid].cright(), p_tree);
}
}
// training parameter
TrainParam param;
// reducer
xgboost/src/tree/updater_skmaker.cc view on Meta::CPAN
}
if (qexpand.size() != 0) {
this->GetNodeStats(gpair, *p_fmat, *p_tree,
&thread_stats, &node_stats);
this->SyncNodeStats();
}
// set all statistics correctly
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
this->SetStats(nid, node_stats[nid], p_tree);
if (!(*p_tree)[nid].is_leaf()) {
p_tree->stat(nid).loss_chg = static_cast<bst_float>(
node_stats[(*p_tree)[nid].cleft()].CalcGain(param) +
node_stats[(*p_tree)[nid].cright()].CalcGain(param) -
node_stats[nid].CalcGain(param));
}
}
// set left leaves
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
}
}
// define the sketch we want to use
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
private:
// statistics needed in the gradient calculation
struct SKStats {
/*! \brief sum of all positive gradient */
double pos_grad;
xgboost/src/tree/updater_skmaker.cc view on Meta::CPAN
summary_array[base + 1],
summary_array[base + 2],
node_stats[nid], fid, &best);
}
}
// get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
const SplitEntry &best = sol[wid];
// set up the values
p_tree->stat(nid).loss_chg = best.loss_chg;
this->SetStats(nid, node_stats[nid], p_tree);
// now we know the solution in snode[nid], set split
if (best.loss_chg > rt_eps) {
p_tree->AddChilds(nid);
(*p_tree)[nid].set_split(best.split_index(),
best.split_value, best.default_left());
// mark right child as 0, to indicate fresh leaf
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
} else {
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
}
}
}
// set statistics on ptree
inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) {
p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
}
inline void EnumerateSplit(const WXQSketch::Summary &pos_grad,
const WXQSketch::Summary &neg_grad,
const WXQSketch::Summary &sum_hess,
const SKStats &node_sum,
bst_uint fid,
SplitEntry *best) {
if (sum_hess.size == 0) return;
double root_gain = node_sum.CalcGain(param);
xgboost/tests/cpp/helpers.cc view on Meta::CPAN
#include "./helpers.h"
#include "xgboost/c_api.h"
#include <random>
std::string TempFileName() {
return std::tmpnam(nullptr);
}
bool FileExists(const std::string name) {
struct stat st;
return stat(name.c_str(), &st) == 0;
}
long GetFileSize(const std::string filename) {
struct stat st;
stat(filename.c_str(), &st);
return st.st_size;
}
std::string CreateSimpleTestData() {
std::string tmp_file = TempFileName();
std::ofstream fo;
fo.open(tmp_file);
fo << "0 0:0 1:10 2:20\n";
fo << "1 0:0 3:30 4:40\n";
fo.close();