Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/include/xgboost/tree_model.h  view on Meta::CPAN

  // delete a tree node, keep the parent field to allow trace back
  inline void DeleteNode(int nid) {
    CHECK_GE(nid, param.num_roots);
    deleted_nodes.push_back(nid);
    nodes[nid].mark_delete();
    ++param.num_deleted;
  }

 public:
  /*!
   * \brief change a non leaf node to a leaf node, delete its children
   * \param rid node id of the node
   * \param value new leaf value
   */
  inline void ChangeToLeaf(int rid, bst_float value) {
    CHECK(nodes[nodes[rid].cleft() ].is_leaf());
    CHECK(nodes[nodes[rid].cright()].is_leaf());
    this->DeleteNode(nodes[rid].cleft());
    this->DeleteNode(nodes[rid].cright());
    nodes[rid].set_leaf(value);
  }
  /*!
   * \brief collapse a non leaf node to a leaf node, delete its children
   * \param rid node id of the node
   * \param value new leaf value
   */
  inline void CollapseToLeaf(int rid, bst_float value) {
    if (nodes[rid].is_leaf()) return;
    if (!nodes[nodes[rid].cleft() ].is_leaf()) {
      CollapseToLeaf(nodes[rid].cleft(), 0.0f);
    }
    if (!nodes[nodes[rid].cright() ].is_leaf()) {
      CollapseToLeaf(nodes[rid].cright(), 0.0f);
    }
    this->ChangeToLeaf(rid, value);
  }

 public:
  /*! \brief model parameter */
  TreeParam param;
  /*! \brief constructor */
  TreeModel() {
    param.num_nodes = 1;
    param.num_roots = 1;
    param.num_deleted = 0;
    nodes.resize(1);
  }
  /*! \brief get node given nid */
  inline Node& operator[](int nid) {
    return nodes[nid];
  }
  /*! \brief get node given nid */
  inline const Node& operator[](int nid) const {
    return nodes[nid];
  }

  /*! \brief get const reference to nodes */
  inline const std::vector<Node>& GetNodes() const { return nodes; }

  /*! \brief get node statistics given nid */
  inline NodeStat& stat(int nid) {
    return stats[nid];
  }
  /*! \brief get node statistics given nid */
  inline const NodeStat& stat(int nid) const {
    return stats[nid];
  }
  /*! \brief get leaf vector given nid */
  inline bst_float* leafvec(int nid) {
    if (leaf_vector.size() == 0) return nullptr;
    return& leaf_vector[nid * param.size_leaf_vector];
  }
  /*! \brief get leaf vector given nid */
  inline const bst_float* leafvec(int nid) const {
    if (leaf_vector.size() == 0) return nullptr;
    return& leaf_vector[nid * param.size_leaf_vector];
  }
  /*! \brief initialize the model */
  inline void InitModel() {
    param.num_nodes = param.num_roots;
    nodes.resize(param.num_nodes);
    stats.resize(param.num_nodes);
    leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
    for (int i = 0; i < param.num_nodes; i ++) {
      nodes[i].set_leaf(0.0f);
      nodes[i].set_parent(-1);
    }
  }
  /*!
   * \brief load model from stream
   * \param fi input stream
   */
  inline void Load(dmlc::Stream* fi) {
    CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
    nodes.resize(param.num_nodes);
    stats.resize(param.num_nodes);
    CHECK_NE(param.num_nodes, 0);
    CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()),
             sizeof(Node) * nodes.size());
    CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()),
             sizeof(NodeStat) * stats.size());
    if (param.size_leaf_vector != 0) {
      CHECK(fi->Read(&leaf_vector));
    }
    // chg deleted nodes
    deleted_nodes.resize(0);
    for (int i = param.num_roots; i < param.num_nodes; ++i) {
      if (nodes[i].is_deleted()) deleted_nodes.push_back(i);
    }
    CHECK_EQ(static_cast<int>(deleted_nodes.size()), param.num_deleted);
  }
  /*!
   * \brief save model to stream
   * \param fo output stream
   */
  inline void Save(dmlc::Stream* fo) const {
    CHECK_EQ(param.num_nodes, static_cast<int>(nodes.size()));
    CHECK_EQ(param.num_nodes, static_cast<int>(stats.size()));
    fo->Write(&param, sizeof(TreeParam));
    CHECK_NE(param.num_nodes, 0);
    fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size());
    fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size());
    if (param.size_leaf_vector != 0) fo->Write(leaf_vector);
  }
  /*!

xgboost/include/xgboost/tree_model.h  view on Meta::CPAN

  std::fill(data.begin(), data.end(), e);
}

inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
  for (bst_uint i = 0; i < inst.length; ++i) {
    if (inst[i].index >= data.size()) continue;
    data[inst[i].index].fvalue = inst[i].fvalue;
  }
}

inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
  for (bst_uint i = 0; i < inst.length; ++i) {
    if (inst[i].index >= data.size()) continue;
    data[inst[i].index].flag = -1;
  }
}

inline size_t RegTree::FVec::size() const {
  return data.size();
}

inline bst_float RegTree::FVec::fvalue(size_t i) const {
  return data[i].fvalue;
}

inline bool RegTree::FVec::is_missing(size_t i) const {
  return data[i].flag == -1;
}

inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const {
  int pid = static_cast<int>(root_id);
  while (!(*this)[pid].is_leaf()) {
    unsigned split_index = (*this)[pid].split_index();
    pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
  }
  return pid;
}

inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
  int pid = this->GetLeafIndex(feat, root_id);
  return (*this)[pid].leaf_value();
}

inline void RegTree::FillNodeMeanValues() {
  size_t num_nodes = this->param.num_nodes;
  if (this->node_mean_values.size() == num_nodes) {
    return;
  }
  this->node_mean_values.resize(num_nodes);
  for (int root_id = 0; root_id < param.num_roots; ++root_id) {
    this->FillNodeMeanValue(root_id);
  }
}

inline bst_float RegTree::FillNodeMeanValue(int nid) {
  bst_float result;
  auto& node = (*this)[nid];
  if (node.is_leaf()) {
    result = node.leaf_value();
  } else {
    result  = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess;
    result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess;
    result /= this->stat(nid).sum_hess;
  }
  this->node_mean_values[nid] = result;
  return result;
}

inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
                                            bst_float *out_contribs) const {
  CHECK_GT(this->node_mean_values.size(), 0U);
  // this follows the idea of http://blog.datadive.net/interpreting-random-forests/
  bst_float node_value;
  unsigned split_index;
  int pid = static_cast<int>(root_id);
  // update bias value
  node_value = this->node_mean_values[pid];
  out_contribs[feat.size()] += node_value;
  if ((*this)[pid].is_leaf()) {
    // nothing to do anymore
    return;
  }
  while (!(*this)[pid].is_leaf()) {
    split_index = (*this)[pid].split_index();
    pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
    bst_float new_value = this->node_mean_values[pid];
    // update feature weight
    out_contribs[split_index] += new_value - node_value;
    node_value = new_value;
  }
  bst_float leaf_value = (*this)[pid].leaf_value();
  // update leaf feature weight
  out_contribs[split_index] += leaf_value - node_value;
}

/*! \brief get next position of the tree given current pid */
inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
  bst_float split_value = (*this)[pid].split_cond();
  if (is_unknown) {
    return (*this)[pid].cdefault();
  } else {
    if (fvalue < split_value) {
      return (*this)[pid].cleft();
    } else {
      return (*this)[pid].cright();
    }
  }
}
}  // namespace xgboost
#endif  // XGBOOST_TREE_MODEL_H_



( run in 0.537 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )