Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/plugin/updater_gpu/src/exact/gpu_builder.cuh view on Meta::CPAN
namespace tree {
namespace exact {
template <typename node_id_t>
__global__ void initRootNode(Node<node_id_t>* nodes, const bst_gpair* sums,
const TrainParam param) {
// gradients already evaluated inside transferGrads
Node<node_id_t> n;
n.gradSum = sums[0];
n.score = CalcGain(param, n.gradSum.grad, n.gradSum.hess);
n.weight = CalcWeight(param, n.gradSum.grad, n.gradSum.hess);
n.id = 0;
nodes[0] = n;
}
template <typename node_id_t>
__global__ void assignColIds(int* colIds, const int* colOffsets) {
int myId = blockIdx.x;
int start = colOffsets[myId];
int end = colOffsets[myId + 1];
for (int id = start + threadIdx.x; id < end; id += blockDim.x) {
colIds[id] = myId;
}
}
template <typename node_id_t>
__global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst,
const Node<node_id_t>* nodes, int nRows) {
int id = threadIdx.x + (blockIdx.x * blockDim.x);
if (id >= nRows) {
return;
}
// if this element belongs to none of the currently active node-id's
node_id_t nId = nodeIdsPerInst[id];
if (nId == UNUSED_NODE) {
return;
}
const Node<node_id_t> n = nodes[nId];
node_id_t result;
if (n.isLeaf() || n.isUnused()) {
result = UNUSED_NODE;
} else if (n.isDefaultLeft()) {
result = (2 * n.id) + 1;
} else {
result = (2 * n.id) + 2;
}
nodeIdsPerInst[id] = result;
}
template <typename node_id_t>
__global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations,
const node_id_t* nodeIds, const int* instId,
const Node<node_id_t>* nodes,
const int* colOffsets, const float* vals,
int nVals, int nCols) {
int id = threadIdx.x + (blockIdx.x * blockDim.x);
const int stride = blockDim.x * gridDim.x;
for (; id < nVals; id += stride) {
// fusing generation of indices for node locations
nodeLocations[id] = id;
// using nodeIds here since the previous kernel would have updated
// the nodeIdsPerInst with all default assignments
int nId = nodeIds[id];
// if this element belongs to none of the currently active node-id's
if (nId != UNUSED_NODE) {
const Node<node_id_t> n = nodes[nId];
int colId = n.colIdx;
// printf("nid=%d colId=%d id=%d\n", nId, colId, id);
int start = colOffsets[colId];
int end = colOffsets[colId + 1];
///@todo: too much wasteful threads!!
if ((id >= start) && (id < end) && !(n.isLeaf() || n.isUnused())) {
node_id_t result = (2 * n.id) + 1 + (vals[id] >= n.threshold);
nodeIdsPerInst[instId[id]] = result;
}
}
}
}
template <typename node_id_t>
__global__ void markLeavesKernel(Node<node_id_t>* nodes, int len) {
int id = (blockIdx.x * blockDim.x) + threadIdx.x;
if ((id < len) && !nodes[id].isUnused()) {
int lid = (id << 1) + 1;
int rid = (id << 1) + 2;
if ((lid >= len) || (rid >= len)) {
nodes[id].score = -FLT_MAX; // bottom-most nodes
} else if (nodes[lid].isUnused() && nodes[rid].isUnused()) {
nodes[id].score = -FLT_MAX; // unused child nodes
}
}
}
// unit test forward declaration for friend function access
template <typename node_id_t>
void testSmallData();
template <typename node_id_t>
void testLargeData();
template <typename node_id_t>
void testAllocate();
template <typename node_id_t>
void testMarkLeaves();
template <typename node_id_t>
void testDense2Sparse();
template <typename node_id_t>
class GPUBuilder;
template <typename node_id_t>
std::shared_ptr<xgboost::DMatrix> setupGPUBuilder(
const std::string& file,
xgboost::tree::exact::GPUBuilder<node_id_t>& builder);
template <typename node_id_t>
class GPUBuilder {
public:
GPUBuilder() : allocated(false) {}
~GPUBuilder() {}
void Init(const TrainParam& p) {
param = p;
maxNodes = (1 << (param.max_depth + 1)) - 1;
( run in 0.613 second using v1.01-cache-2.11-cpan-39bf76dae61 )