Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/include/xgboost/tree_model.h view on Meta::CPAN
};
/*!
* \brief define regression tree to be the most common tree model.
* This is the data structure used in xgboost's major tree models.
*/
class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
public:
/*!
* \brief dense feature vector that can be taken by RegTree
* and can be construct from sparse feature vector.
*/
struct FVec {
public:
/*!
* \brief initialize the vector with size vector
* \param size The size of the feature vector.
*/
inline void Init(size_t size);
/*!
* \brief fill the vector with sparse vector
* \param inst The sparse instance to fill.
*/
inline void Fill(const RowBatch::Inst& inst);
/*!
* \brief drop the trace after fill, must be called after fill.
* \param inst The sparse instance to drop.
*/
inline void Drop(const RowBatch::Inst& inst);
/*!
* \brief returns the size of the feature vector
* \return the size of the feature vector
*/
inline size_t size() const;
/*!
* \brief get ith value
* \param i feature index.
* \return the i-th feature value
*/
inline bst_float fvalue(size_t i) const;
/*!
* \brief check whether i-th entry is missing
* \param i feature index.
* \return whether i-th value is missing.
*/
inline bool is_missing(size_t i) const;
private:
/*!
* \brief a union value of value and flag
* when flag == -1, this indicate the value is missing
*/
union Entry {
bst_float fvalue;
int flag;
};
std::vector<Entry> data;
};
/*!
* \brief get the leaf index
* \param feat dense feature vector, if the feature is missing the field is set to NaN
* \param root_id starting root index of the instance
* \return the leaf index of the given feature
*/
inline int GetLeafIndex(const FVec& feat, unsigned root_id = 0) const;
/*!
* \brief get the prediction of regression tree, only accepts dense feature vector
* \param feat dense feature vector, if the feature is missing the field is set to NaN
* \param root_id starting root index of the instance
* \return the leaf index of the given feature
*/
inline bst_float Predict(const FVec& feat, unsigned root_id = 0) const;
/*!
* \brief calculate the feature contributions for the given root
* \param feat dense feature vector, if the feature is missing the field is set to NaN
* \param root_id starting root index of the instance
* \param out_contribs output vector to hold the contributions
*/
inline void CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const;
/*!
* \brief get next position of the tree given current pid
* \param pid Current node id.
* \param fvalue feature value if not missing.
* \param is_unknown Whether current required feature is missing.
*/
inline int GetNext(int pid, bst_float fvalue, bool is_unknown) const;
/*!
* \brief dump the model in the requested format as a text string
* \param fmap feature map that may help give interpretations of feature
* \param with_stats whether dump out statistics as well
* \param format the format to dump the model in
* \return the string of dumped model
*/
std::string DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const;
/*!
* \brief calculate the mean value for each node, required for feature contributions
*/
inline void FillNodeMeanValues();
private:
inline bst_float FillNodeMeanValue(int nid);
std::vector<bst_float> node_mean_values;
};
// implementations of inline functions
// do not need to read if only use the model
inline void RegTree::FVec::Init(size_t size) {
Entry e; e.flag = -1;
data.resize(size);
std::fill(data.begin(), data.end(), e);
}
inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue;
data[inst[i].index].fvalue = inst[i].fvalue;
}
}
inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue;
data[inst[i].index].flag = -1;
}
}
inline size_t RegTree::FVec::size() const {
return data.size();
}
inline bst_float RegTree::FVec::fvalue(size_t i) const {
( run in 0.632 second using v1.01-cache-2.11-cpan-39bf76dae61 )