Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/src/tree/updater_fast_hist.cc view on Meta::CPAN
right.push_back(rid[k]);
}
} else {
if (default_left) {
left.push_back(rid[k]);
} else {
right.push_back(rid[k]);
}
}
}
}
for (size_t i = nrows - rest; i < nrows; ++i) {
const size_t rid = rowset.begin[i];
const auto row = gmat[rid];
const auto p = std::lower_bound(row.index, row.index + row.size, lower_bound);
auto& left = row_split_tloc[0].left;
auto& right = row_split_tloc[0].right;
if (p != row.index + row.size && *p < upper_bound) {
CHECK_LT(*p, static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
if (static_cast<int32_t>(*p) <= split_cond) {
left.push_back(rid);
} else {
right.push_back(rid);
}
} else {
if (default_left) {
left.push_back(rid);
} else {
right.push_back(rid);
}
}
}
}
template<typename T>
inline void ApplySplitSparseData(const RowSetCollection::Elem rowset,
const GHistIndexMatrix& gmat,
std::vector<RowSetCollection::Split>* p_row_split_tloc,
const Column<T>& column,
bst_uint lower_bound,
bst_uint upper_bound,
bst_int split_cond,
bool default_left) {
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
const size_t nrows = rowset.end - rowset.begin;
#pragma omp parallel num_threads(nthread)
{
const size_t tid = static_cast<size_t>(omp_get_thread_num());
const size_t ibegin = tid * nrows / nthread;
const size_t iend = (tid + 1) * nrows / nthread;
if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range
// search first nonzero row with index >= rowset[ibegin]
const size_t* p = std::lower_bound(column.row_ind,
column.row_ind + column.len,
rowset.begin[ibegin]);
auto& left = row_split_tloc[tid].left;
auto& right = row_split_tloc[tid].right;
if (p != column.row_ind + column.len && *p <= rowset.begin[iend - 1]) {
size_t cursor = p - column.row_ind;
for (size_t i = ibegin; i < iend; ++i) {
const size_t rid = rowset.begin[i];
while (cursor < column.len
&& column.row_ind[cursor] < rid
&& column.row_ind[cursor] <= rowset.begin[iend - 1]) {
++cursor;
}
if (cursor < column.len && column.row_ind[cursor] == rid) {
const T rbin = column.index[cursor];
CHECK_LT(rbin + column.index_base,
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
if (static_cast<int32_t>(rbin + column.index_base) <= split_cond) {
left.push_back(rid);
} else {
right.push_back(rid);
}
++cursor;
} else {
// missing value
if (default_left) {
left.push_back(rid);
} else {
right.push_back(rid);
}
}
}
} else { // all rows in [ibegin, iend) have missing values
if (default_left) {
for (size_t i = ibegin; i < iend; ++i) {
const size_t rid = rowset.begin[i];
left.push_back(rid);
}
} else {
for (size_t i = ibegin; i < iend; ++i) {
const size_t rid = rowset.begin[i];
right.push_back(rid);
}
}
}
}
}
}
inline void InitNewNode(int nid,
const GHistIndexMatrix& gmat,
const std::vector<bst_gpair>& gpair,
const DMatrix& fmat,
const RegTree& tree) {
{
snode.resize(tree.param.num_nodes, NodeEntry(param));
constraints_.resize(tree.param.num_nodes);
}
// setup constraints before calculating the weight
{
auto& stats = snode[nid].stats;
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
/* specialized code for dense data
For dense data (with no missing value),
the sum of gradient histogram is equal to snode[nid] */
GHistRow hist = hist_[nid];
const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr;
const uint32_t ibegin = row_ptr[fid_least_bins_];
const uint32_t iend = row_ptr[fid_least_bins_ + 1];
for (uint32_t i = ibegin; i < iend; ++i) {
const GHistEntry et = hist.begin[i];
stats.Add(et.sum_grad, et.sum_hess);
}
} else {
const RowSetCollection::Elem e = row_set_collection_[nid];
for (const size_t* it = e.begin; it < e.end; ++it) {
stats.Add(gpair[*it]);
}
}
if (!tree[nid].is_root()) {
const int pid = tree[nid].parent();
( run in 1.106 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )