Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/plugin/updater_gpu/src/exact/gpu_builder.cuh  view on Meta::CPAN

    // evaluate the full-grad reduction for the root node
    sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
  }

  void initNodeData(int level, node_id_t nodeStart, int nNodes) {
    // all instances belong to root node at the beginning!
    if (level == 0) {
      nodes.fill(Node<node_id_t>());
      nodeAssigns.current_dvec().fill(0);
      nodeAssignsPerInst.fill(0);
      // for root node, just update the gradient/score/weight/id info
      // before splitting it! Currently all data is on GPU, hence this
      // stupid little kernel
      initRootNode<<<1, 1>>>(nodes.data(), gradSums.data(), param);
    } else {
      const int BlkDim = 256;
      const int ItemsPerThread = 4;
      // assign default node ids first
      int nBlks = dh::div_round_up(nRows, BlkDim);
      fillDefaultNodeIds<<<nBlks, BlkDim>>>(nodeAssignsPerInst.data(),
                                            nodes.data(), nRows);
      // evaluate the correct child indices of non-missing values next
      nBlks = dh::div_round_up(nVals, BlkDim * ItemsPerThread);
      assignNodeIds<<<nBlks, BlkDim>>>(
          nodeAssignsPerInst.data(), nodeLocations.current(),
          nodeAssigns.current(), instIds.current(), nodes.data(),
          colOffsets.data(), vals.current(), nVals, nCols);
      // gather the node assignments across all other columns too
      gather<node_id_t>(dh::get_device_idx(param.gpu_id), nodeAssigns.current(),
                        nodeAssignsPerInst.data(), instIds.current(), nVals);
      sortKeys(level);
    }
  }

  void sortKeys(int level) {
    // segmented-sort the arrays based on node-id's
    // but we don't need more than level+1 bits for sorting!
    segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
                  colOffsets, 0, level + 1);
    gather<float, int>(dh::get_device_idx(param.gpu_id), vals.other(),
                       vals.current(), instIds.other(), instIds.current(),
                       nodeLocations.current(), nVals);
    vals.buff().selector ^= 1;
    instIds.buff().selector ^= 1;
  }

  void markLeaves() {
    const int BlkDim = 128;
    int nBlks = dh::div_round_up(maxNodes, BlkDim);
    markLeavesKernel<<<nBlks, BlkDim>>>(nodes.data(), maxNodes);
  }

  void dense2sparse(RegTree* p_tree) {
    RegTree& tree = *p_tree;
    std::vector<Node<node_id_t>> hNodes = nodes.as_vector();
    int nodeId = 0;
    for (int i = 0; i < maxNodes; ++i) {
      const Node<node_id_t>& n = hNodes[i];
      if ((i != 0) && hNodes[i].isLeaf()) {
        tree[nodeId].set_leaf(n.weight * param.learning_rate);
        tree.stat(nodeId).sum_hess = n.gradSum.hess;
        ++nodeId;
      } else if (!hNodes[i].isUnused()) {
        tree.AddChilds(nodeId);
        tree[nodeId].set_split(n.colIdx, n.threshold, n.dir == LeftDir);
        tree.stat(nodeId).loss_chg = n.score;
        tree.stat(nodeId).sum_hess = n.gradSum.hess;
        tree.stat(nodeId).base_weight = n.weight;
        tree[tree[nodeId].cleft()].set_leaf(0);
        tree[tree[nodeId].cright()].set_leaf(0);
        ++nodeId;
      }
    }
  }
};

}  // namespace exact
}  // namespace tree
}  // namespace xgboost



( run in 2.123 seconds using v1.01-cache-2.11-cpan-97f6503c9c8 )