Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/plugin/updater_gpu/src/exact/gpu_builder.cuh view on Meta::CPAN
// evaluate the full-grad reduction for the root node
sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
}
void initNodeData(int level, node_id_t nodeStart, int nNodes) {
// all instances belong to root node at the beginning!
if (level == 0) {
nodes.fill(Node<node_id_t>());
nodeAssigns.current_dvec().fill(0);
nodeAssignsPerInst.fill(0);
// for root node, just update the gradient/score/weight/id info
// before splitting it! Currently all data is on GPU, hence this
// stupid little kernel
initRootNode<<<1, 1>>>(nodes.data(), gradSums.data(), param);
} else {
const int BlkDim = 256;
const int ItemsPerThread = 4;
// assign default node ids first
int nBlks = dh::div_round_up(nRows, BlkDim);
fillDefaultNodeIds<<<nBlks, BlkDim>>>(nodeAssignsPerInst.data(),
nodes.data(), nRows);
// evaluate the correct child indices of non-missing values next
nBlks = dh::div_round_up(nVals, BlkDim * ItemsPerThread);
assignNodeIds<<<nBlks, BlkDim>>>(
nodeAssignsPerInst.data(), nodeLocations.current(),
nodeAssigns.current(), instIds.current(), nodes.data(),
colOffsets.data(), vals.current(), nVals, nCols);
// gather the node assignments across all other columns too
gather<node_id_t>(dh::get_device_idx(param.gpu_id), nodeAssigns.current(),
nodeAssignsPerInst.data(), instIds.current(), nVals);
sortKeys(level);
}
}
void sortKeys(int level) {
// segmented-sort the arrays based on node-id's
// but we don't need more than level+1 bits for sorting!
segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
colOffsets, 0, level + 1);
gather<float, int>(dh::get_device_idx(param.gpu_id), vals.other(),
vals.current(), instIds.other(), instIds.current(),
nodeLocations.current(), nVals);
vals.buff().selector ^= 1;
instIds.buff().selector ^= 1;
}
void markLeaves() {
const int BlkDim = 128;
int nBlks = dh::div_round_up(maxNodes, BlkDim);
markLeavesKernel<<<nBlks, BlkDim>>>(nodes.data(), maxNodes);
}
void dense2sparse(RegTree* p_tree) {
RegTree& tree = *p_tree;
std::vector<Node<node_id_t>> hNodes = nodes.as_vector();
int nodeId = 0;
for (int i = 0; i < maxNodes; ++i) {
const Node<node_id_t>& n = hNodes[i];
if ((i != 0) && hNodes[i].isLeaf()) {
tree[nodeId].set_leaf(n.weight * param.learning_rate);
tree.stat(nodeId).sum_hess = n.gradSum.hess;
++nodeId;
} else if (!hNodes[i].isUnused()) {
tree.AddChilds(nodeId);
tree[nodeId].set_split(n.colIdx, n.threshold, n.dir == LeftDir);
tree.stat(nodeId).loss_chg = n.score;
tree.stat(nodeId).sum_hess = n.gradSum.hess;
tree.stat(nodeId).base_weight = n.weight;
tree[tree[nodeId].cleft()].set_leaf(0);
tree[tree[nodeId].cright()].set_leaf(0);
++nodeId;
}
}
}
};
} // namespace exact
} // namespace tree
} // namespace xgboost
( run in 2.123 seconds using v1.01-cache-2.11-cpan-97f6503c9c8 )