Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/src/tree/updater_fast_hist.cc  view on Meta::CPAN

              right.push_back(rid[k]);
            }
          } else {
            if (default_left) {
              left.push_back(rid[k]);
            } else {
              right.push_back(rid[k]);
            }
          }
        }
      }
      for (size_t i = nrows - rest; i < nrows; ++i) {
        const size_t rid = rowset.begin[i];
        const auto row = gmat[rid];
        const auto p = std::lower_bound(row.index, row.index + row.size, lower_bound);
        auto& left = row_split_tloc[0].left;
        auto& right = row_split_tloc[0].right;
        if (p != row.index + row.size && *p < upper_bound) {
          CHECK_LT(*p, static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
          if (static_cast<int32_t>(*p) <= split_cond) {
            left.push_back(rid);
          } else {
            right.push_back(rid);
          }
        } else {
          if (default_left) {
            left.push_back(rid);
          } else {
            right.push_back(rid);
          }
        }
      }
    }

    template<typename T>
    inline void ApplySplitSparseData(const RowSetCollection::Elem rowset,
                                    const GHistIndexMatrix& gmat,
                                    std::vector<RowSetCollection::Split>* p_row_split_tloc,
                                    const Column<T>& column,
                                    bst_uint lower_bound,
                                    bst_uint upper_bound,
                                    bst_int split_cond,
                                    bool default_left) {
      std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
      const size_t nrows = rowset.end - rowset.begin;

      #pragma omp parallel num_threads(nthread)
      {
        const size_t tid = static_cast<size_t>(omp_get_thread_num());
        const size_t ibegin = tid * nrows / nthread;
        const size_t iend = (tid + 1) * nrows / nthread;
        if (ibegin < iend) {  // ensure that [ibegin, iend) is nonempty range
          // search first nonzero row with index >= rowset[ibegin]
          const size_t* p = std::lower_bound(column.row_ind,
                                             column.row_ind + column.len,
                                             rowset.begin[ibegin]);

          auto& left = row_split_tloc[tid].left;
          auto& right = row_split_tloc[tid].right;
          if (p != column.row_ind + column.len && *p <= rowset.begin[iend - 1]) {
            size_t cursor = p - column.row_ind;

            for (size_t i = ibegin; i < iend; ++i) {
              const size_t rid = rowset.begin[i];
              while (cursor < column.len
                     && column.row_ind[cursor] < rid
                     && column.row_ind[cursor] <= rowset.begin[iend - 1]) {
                ++cursor;
              }
              if (cursor < column.len && column.row_ind[cursor] == rid) {
                const T rbin = column.index[cursor];
                CHECK_LT(rbin + column.index_base,
                  static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
                if (static_cast<int32_t>(rbin + column.index_base) <= split_cond) {
                  left.push_back(rid);
                } else {
                  right.push_back(rid);
                }
                ++cursor;
              } else {
                // missing value
                if (default_left) {
                  left.push_back(rid);
                } else {
                  right.push_back(rid);
                }
              }
            }
          } else {  // all rows in [ibegin, iend) have missing values
            if (default_left) {
              for (size_t i = ibegin; i < iend; ++i) {
                const size_t rid = rowset.begin[i];
                left.push_back(rid);
              }
            } else {
              for (size_t i = ibegin; i < iend; ++i) {
                const size_t rid = rowset.begin[i];
                right.push_back(rid);
              }
            }
          }
        }
      }
    }

    inline void InitNewNode(int nid,
                            const GHistIndexMatrix& gmat,
                            const std::vector<bst_gpair>& gpair,
                            const DMatrix& fmat,
                            const RegTree& tree) {
      {
        snode.resize(tree.param.num_nodes, NodeEntry(param));
        constraints_.resize(tree.param.num_nodes);
      }

      // setup constraints before calculating the weight
      {
        auto& stats = snode[nid].stats;
        if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
          /* specialized code for dense data
             For dense data (with no missing value),
                the sum of gradient histogram is equal to snode[nid] */
          GHistRow hist = hist_[nid];
          const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr;

          const uint32_t ibegin = row_ptr[fid_least_bins_];
          const uint32_t iend = row_ptr[fid_least_bins_ + 1];
          for (uint32_t i = ibegin; i < iend; ++i) {
            const GHistEntry et = hist.begin[i];
            stats.Add(et.sum_grad, et.sum_hess);
          }
        } else {
          const RowSetCollection::Elem e = row_set_collection_[nid];
          for (const size_t* it = e.begin; it < e.end; ++it) {
            stats.Add(gpair[*it]);
          }
        }
        if (!tree[nid].is_root()) {
          const int pid = tree[nid].parent();



( run in 1.106 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )