Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/src/tree/updater_colmaker.cc view on Meta::CPAN
}
} else {
// push to default branch
if (tree[nid].default_left()) {
this->SetEncodePosition(ridx, tree[nid].cleft());
} else {
this->SetEncodePosition(ridx, tree[nid].cright());
}
}
}
}
// customization part
// synchronize the best solution of each node
virtual void SyncBestSolution(const std::vector<int> &qexpand) {
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
NodeEntry &e = snode[nid];
for (int tid = 0; tid < this->nthread; ++tid) {
e.best.Update(stemp[tid][nid].best);
}
}
}
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
DMatrix *p_fmat,
const RegTree &tree) {
// step 1, classify the non-default data into right places
std::vector<unsigned> fsplits;
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
if (!tree[nid].is_leaf()) {
fsplits.push_back(tree[nid].split_index());
}
}
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
while (iter->Next()) {
const ColBatch &batch = iter->Value();
for (size_t i = 0; i < batch.size; ++i) {
ColBatch::Inst col = batch[i];
const bst_uint fid = batch.col_index[i];
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const int nid = this->DecodePosition(ridx);
const bst_float fvalue = col[j].fvalue;
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft());
} else {
this->SetEncodePosition(ridx, tree[nid].cright());
}
}
}
}
}
}
// utils to get/set position, with encoded format
// return decoded position
inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx];
return pid < 0 ? ~pid : pid;
}
// encode the encoded position value for ridx
inline void SetEncodePosition(bst_uint ridx, int nid) {
if (position[ridx] < 0) {
position[ridx] = ~nid;
} else {
position[ridx] = nid;
}
}
// --data fields--
const TrainParam& param;
// number of omp thread used during training
const int nthread;
// Per feature: shuffle index of each feature index
std::vector<bst_uint> feat_index;
// Instance Data: current node position in the tree of each instance
std::vector<int> position;
// PerThread x PerTreeNode: statistics for per thread construction
std::vector< std::vector<ThreadEntry> > stemp;
/*! \brief TreeNode Data: statistics for each constructed node */
std::vector<NodeEntry> snode;
/*! \brief queue of nodes to be expanded */
std::vector<int> qexpand_;
// constraint value
std::vector<TConstraint> constraints_;
};
};
// distributed column maker
template<typename TStats, typename TConstraint>
class DistColMaker : public ColMaker<TStats, TConstraint> {
public:
DistColMaker() : builder(param) {
pruner.reset(TreeUpdater::Create("prune"));
}
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
param.InitAllowUnknown(args);
pruner->Init(args);
}
void Update(const std::vector<bst_gpair> &gpair,
DMatrix* dmat,
const std::vector<RegTree*> &trees) override {
TStats::CheckInfo(dmat->info());
CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time";
// build the tree
builder.Update(gpair, dmat, trees[0]);
//// prune the tree, note that pruner will sync the tree
pruner->Update(gpair, dmat, trees);
// update position after the tree is pruned
builder.UpdatePosition(dmat, *trees[0]);
}
private:
struct Builder : public ColMaker<TStats, TConstraint>::Builder {
public:
explicit Builder(const TrainParam ¶m)
: ColMaker<TStats, TConstraint>::Builder(param) {
( run in 0.417 second using v1.01-cache-2.11-cpan-59e3e3084b8 )