Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/dmlc-core/src/io/local_filesys.cc  view on Meta::CPAN

    }
  }

 private:
  std::FILE *fp_;
  bool use_stdio_;
};

FileInfo LocalFileSystem::GetPathInfo(const URI &path) {
  struct stat sb;
  if (stat(path.name.c_str(), &sb) == -1) {
    int errsv = errno;
    LOG(FATAL) << "LocalFileSystem.GetPathInfo " << path.name
               << " Error:" << strerror(errsv);
  }
  FileInfo ret;
  ret.path = path;
  ret.size = sb.st_size;

  if ((sb.st_mode & S_IFMT) == S_IFDIR) {
    ret.type = kDirectory;

xgboost/dmlc-core/src/io/single_file_split.h  view on Meta::CPAN

    if (!use_stdin_) std::fclose(fp_);
  }
  virtual void BeforeFirst(void) {
    fseek(fp_, 0, SEEK_SET);
  }
  virtual void HintChunkSize(size_t chunk_size) {
    buffer_size_ = std::max(chunk_size, buffer_size_);
  }
  virtual size_t GetTotalSize(void) {
    struct stat buf;
    fstat(fileno(fp_), &buf);
    return buf.st_size;
  }
  virtual size_t Read(void *ptr, size_t size) {
    return std::fread(ptr, 1, size, fp_);
  }
  virtual void ResetPartition(unsigned part_index, unsigned num_parts) {
    CHECK(part_index == 0 && num_parts == 1);
    this->BeforeFirst();
  }
  virtual void Write(const void *ptr, size_t size) {

xgboost/include/xgboost/tree_model.h  view on Meta::CPAN

  }
  /*! \brief get node given nid */
  inline const Node& operator[](int nid) const {
    return nodes[nid];
  }

  /*! \brief get const reference to nodes */
  inline const std::vector<Node>& GetNodes() const { return nodes; }

  /*! \brief get node statistics given nid */
  inline NodeStat& stat(int nid) {
    return stats[nid];
  }
  /*! \brief get node statistics given nid */
  inline const NodeStat& stat(int nid) const {
    return stats[nid];
  }
  /*! \brief get leaf vector given nid */
  inline bst_float* leafvec(int nid) {
    if (leaf_vector.size() == 0) return nullptr;
    return& leaf_vector[nid * param.size_leaf_vector];
  }
  /*! \brief get leaf vector given nid */
  inline const bst_float* leafvec(int nid) const {
    if (leaf_vector.size() == 0) return nullptr;

xgboost/include/xgboost/tree_model.h  view on Meta::CPAN

    this->FillNodeMeanValue(root_id);
  }
}

inline bst_float RegTree::FillNodeMeanValue(int nid) {
  bst_float result;
  auto& node = (*this)[nid];
  if (node.is_leaf()) {
    result = node.leaf_value();
  } else {
    result  = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess;
    result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess;
    result /= this->stat(nid).sum_hess;
  }
  this->node_mean_values[nid] = result;
  return result;
}

inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
                                            bst_float *out_contribs) const {
  CHECK_GT(this->node_mean_values.size(), 0U);
  // this follows the idea of http://blog.datadive.net/interpreting-random-forests/
  bst_float node_value;

xgboost/plugin/updater_gpu/src/common.cuh  view on Meta::CPAN

  std::vector<NodeType> node_flags(h_nodes.size(), UNUSED);
  flag_nodes(h_nodes, &node_flags, 0, NODE);

  int nid = 0;
  for (int gpu_nid = 0; gpu_nid < h_nodes.size(); gpu_nid++) {
    NodeType flag = node_flags[gpu_nid];
    const Node& n = h_nodes[gpu_nid];
    if (flag == NODE) {
      tree.AddChilds(nid);
      tree[nid].set_split(n.split.findex, n.split.fvalue, n.split.missing_left);
      tree.stat(nid).loss_chg = n.split.loss_chg;
      tree.stat(nid).base_weight = n.weight;
      tree.stat(nid).sum_hess = n.sum_gradients.hess;
      tree[tree[nid].cleft()].set_leaf(0);
      tree[tree[nid].cright()].set_leaf(0);
      nid++;
    } else if (flag == LEAF) {
      tree[nid].set_leaf(n.weight * param.learning_rate);
      tree.stat(nid).sum_hess = n.sum_gradients.hess;
      nid++;
    }
  }
}

// Set gradient pair to 0 with p = 1 - subsample
inline void subsample_gpair(dh::dvec<bst_gpair>* p_gpair, float subsample,
                            int offset) {
  if (subsample == 1.0) {
    return;

xgboost/plugin/updater_gpu/src/exact/gpu_builder.cuh  view on Meta::CPAN

  }

  void dense2sparse(RegTree* p_tree) {
    RegTree& tree = *p_tree;
    std::vector<Node<node_id_t>> hNodes = nodes.as_vector();
    int nodeId = 0;
    for (int i = 0; i < maxNodes; ++i) {
      const Node<node_id_t>& n = hNodes[i];
      if ((i != 0) && hNodes[i].isLeaf()) {
        tree[nodeId].set_leaf(n.weight * param.learning_rate);
        tree.stat(nodeId).sum_hess = n.gradSum.hess;
        ++nodeId;
      } else if (!hNodes[i].isUnused()) {
        tree.AddChilds(nodeId);
        tree[nodeId].set_split(n.colIdx, n.threshold, n.dir == LeftDir);
        tree.stat(nodeId).loss_chg = n.score;
        tree.stat(nodeId).sum_hess = n.gradSum.hess;
        tree.stat(nodeId).base_weight = n.weight;
        tree[tree[nodeId].cleft()].set_leaf(0);
        tree[tree[nodeId].cright()].set_leaf(0);
        ++nodeId;
      }
    }
  }
};

}  // namespace exact
}  // namespace tree

xgboost/src/tree/tree_model.cc  view on Meta::CPAN

    if (depth != 0) fo << std::endl;
    for (int i = 0; i < depth+1; ++i) fo << "  ";
  } else {
    for (int i = 0; i < depth; ++i) fo << '\t';
  }
  if (tree[nid].is_leaf()) {
    if (format == "json") {
      fo << "{ \"nodeid\": " << nid
         << ", \"leaf\": " << tree[nid].leaf_value();
      if (with_stats) {
        fo << ", \"cover\": " << tree.stat(nid).sum_hess;
      }
      fo << " }";
    } else {
      fo << nid << ":leaf=" << tree[nid].leaf_value();
      if (with_stats) {
        fo << ",cover=" << tree.stat(nid).sum_hess;
      }
      fo << '\n';
    }
  } else {
    // right then left,
    bst_float cond = tree[nid].split_cond();
    const unsigned split_index = tree[nid].split_index();
    if (split_index < fmap.size()) {
      switch (fmap.type(split_index)) {
        case FeatureMap::kIndicator: {

xgboost/src/tree/tree_model.cc  view on Meta::CPAN

           << ", \"missing\": " << tree[nid].cdefault();
      } else {
        fo << nid << ":[f" << split_index << "<"<< cond
           << "] yes=" << tree[nid].cleft()
           << ",no=" << tree[nid].cright()
           << ",missing=" << tree[nid].cdefault();
      }
    }
    if (with_stats) {
      if (format == "json") {
        fo << ", \"gain\": " << tree.stat(nid).loss_chg
           << ", \"cover\": " << tree.stat(nid).sum_hess;
      } else {
        fo << ",gain=" << tree.stat(nid).loss_chg << ",cover=" << tree.stat(nid).sum_hess;
      }
    }
    if (format == "json") {
      fo << ", \"children\": [";
    } else {
      fo << '\n';
    }
    DumpRegTree(fo, tree, fmap, tree[nid].cleft(), depth + 1, false, with_stats, format);
    DumpRegTree(fo, tree, fmap, tree[nid].cright(), depth + 1, true, with_stats, format);
    if (format == "json") {

xgboost/src/tree/updater_colmaker.cc  view on Meta::CPAN

        // if nothing left to be expand, break
        if (qexpand_.size() == 0) break;
      }
      // set all the rest expanding nodes to leaf
      for (size_t i = 0; i < qexpand_.size(); ++i) {
        const int nid = qexpand_[i];
        (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
      }
      // remember auxiliary statistics in the tree node
      for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
        p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
        p_tree->stat(nid).base_weight = snode[nid].weight;
        p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
        snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
      }
    }

   protected:
    // initialize temp data structure
    inline void InitData(const std::vector<bst_gpair>& gpair,
                         const DMatrix& fmat,
                         const RegTree& tree) {
      CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)

xgboost/src/tree/updater_fast_hist.cc  view on Meta::CPAN

      // set all the rest expanding nodes to leaf
      // This post condition is not needed in current code, but may be necessary
      // when there are stopping rule that leaves qexpand non-empty
      while (!qexpand_->empty()) {
        const int nid = qexpand_->top().nid;
        qexpand_->pop();
        (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
      }
      // remember auxiliary statistics in the tree node
      for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
        p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
        p_tree->stat(nid).base_weight = snode[nid].weight;
        p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
        snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
      }

      pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});

      if (param.debug_verbose > 0) {
        double total_time = dmlc::GetTime() - gstart;
        LOG(INFO) << "\nInitData:          "
                  << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
                  << " (" << std::fixed << std::setw(5) << std::setprecision(2)

xgboost/src/tree/updater_histmaker.cc  view on Meta::CPAN

      // find split based on histogram statistics
      this->FindSplit(depth, gpair, p_fmat, fwork_set, p_tree);
      // reset position after split
      this->ResetPositionAfterSplit(p_fmat, *p_tree);
      this->UpdateQueueExpand(*p_tree);
      // if nothing left to be expand, break
      if (qexpand.size() == 0) break;
    }
    for (size_t i = 0; i < qexpand.size(); ++i) {
      const int nid = qexpand[i];
      (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
    }
  }
  // this function does two jobs
  // (1) reset the position in array position, to be the latest leaf id
  // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
  virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
                                  DMatrix *p_fmat,
                                  const std::vector <bst_uint> &fset,
                                  const RegTree &tree) = 0;
  // initialize the current working set of features in this round

xgboost/src/tree/updater_histmaker.cc  view on Meta::CPAN

                       node_sum, fset[i], &best, &left_sum[wid]);
      }
    }
    // get the best result, we can synchronize the solution
    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
      const int nid = qexpand[wid];
      const SplitEntry &best = sol[wid];
      const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
      this->SetStats(p_tree, nid, node_sum);
      // set up the values
      p_tree->stat(nid).loss_chg = best.loss_chg;
      // now we know the solution in snode[nid], set split
      if (best.loss_chg > rt_eps) {
        p_tree->AddChilds(nid);
        (*p_tree)[nid].set_split(best.split_index(),
                                 best.split_value, best.default_left());
        // mark right child as 0, to indicate fresh leaf
        (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
        (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
        // right side sum
        TStats right_sum;
        right_sum.SetSubstract(node_sum, left_sum[wid]);
        this->SetStats(p_tree, (*p_tree)[nid].cleft(), left_sum[wid]);
        this->SetStats(p_tree, (*p_tree)[nid].cright(), right_sum);
      } else {
        (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
      }
    }
  }

  inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
    p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
    p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
    node_sum.SetLeafVec(param, p_tree->leafvec(nid));
  }
};

template<typename TStats>
class CQHistMaker: public HistMaker<TStats> {
 public:
  CQHistMaker() : cache_dmatrix_(nullptr) {
  }

xgboost/src/tree/updater_prune.cc  view on Meta::CPAN

    }
    param.learning_rate = lr;
    syncher->Update(gpair, p_fmat, trees);
  }

 private:
  // try to prune off current leaf
  inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
    if (tree[nid].is_root()) return npruned;
    int pid = tree[nid].parent();
    RegTree::NodeStat &s = tree.stat(pid);
    ++s.leaf_child_cnt;
    if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) {
      // need to be pruned
      tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight);
      // tail recursion
      return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
    } else {
      return npruned;
    }
  }
  /*! \brief do pruning of a tree */
  inline void DoPrune(RegTree &tree) { // NOLINT(*)
    int npruned = 0;
    // initialize auxiliary statistics
    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
      tree.stat(nid).leaf_child_cnt = 0;
    }
    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
      if (tree[nid].is_leaf()) {
        npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
      }
    }
    if (!param.silent) {
      LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, "
                << tree.num_extra_nodes() << " extra nodes, " << npruned
                << " pruned nodes, max_depth=" << tree.MaxDepth();

xgboost/src/tree/updater_refresh.cc  view on Meta::CPAN

    // tranverse tree
    while (!tree[pid].is_leaf()) {
      unsigned split_index = tree[pid].split_index();
      pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
      gstats[pid].Add(gpair, info, ridx);
    }
  }
  inline void Refresh(const TStats *gstats,
                      int nid, RegTree *p_tree) {
    RegTree &tree = *p_tree;
    tree.stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param));
    tree.stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
    gstats[nid].SetLeafVec(param, tree.leafvec(nid));
    if (tree[nid].is_leaf()) {
      if (param.refresh_leaf) {
        tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
      }
    } else {
      tree.stat(nid).loss_chg = static_cast<bst_float>(
          gstats[tree[nid].cleft()].CalcGain(param) +
          gstats[tree[nid].cright()].CalcGain(param) -
          gstats[nid].CalcGain(param));
      this->Refresh(gstats, tree[nid].cleft(), p_tree);
      this->Refresh(gstats, tree[nid].cright(), p_tree);
    }
  }
  // training parameter
  TrainParam param;
  // reducer

xgboost/src/tree/updater_skmaker.cc  view on Meta::CPAN

    }
    if (qexpand.size() != 0) {
      this->GetNodeStats(gpair, *p_fmat, *p_tree,
                         &thread_stats, &node_stats);
      this->SyncNodeStats();
    }
    // set all statistics correctly
    for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
      this->SetStats(nid, node_stats[nid], p_tree);
      if (!(*p_tree)[nid].is_leaf()) {
        p_tree->stat(nid).loss_chg = static_cast<bst_float>(
            node_stats[(*p_tree)[nid].cleft()].CalcGain(param) +
            node_stats[(*p_tree)[nid].cright()].CalcGain(param) -
            node_stats[nid].CalcGain(param));
      }
    }
    // set left leaves
    for (size_t i = 0; i < qexpand.size(); ++i) {
      const int nid = qexpand[i];
      (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
    }
  }
  // define the sketch we want to use
  typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;

 private:
  // statistics needed in the gradient calculation
  struct SKStats {
    /*! \brief sum of all positive gradient */
    double pos_grad;

xgboost/src/tree/updater_skmaker.cc  view on Meta::CPAN

                       summary_array[base + 1],
                       summary_array[base + 2],
                       node_stats[nid], fid, &best);
      }
    }
    // get the best result, we can synchronize the solution
    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
      const int nid = qexpand[wid];
      const SplitEntry &best = sol[wid];
      // set up the values
      p_tree->stat(nid).loss_chg = best.loss_chg;
      this->SetStats(nid, node_stats[nid], p_tree);
      // now we know the solution in snode[nid], set split
      if (best.loss_chg > rt_eps) {
        p_tree->AddChilds(nid);
        (*p_tree)[nid].set_split(best.split_index(),
                                 best.split_value, best.default_left());
        // mark right child as 0, to indicate fresh leaf
        (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
        (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
      } else {
        (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
      }
    }
  }
  // set statistics on ptree
  inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) {
    p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
    p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
    node_sum.SetLeafVec(param, p_tree->leafvec(nid));
  }
  inline void EnumerateSplit(const WXQSketch::Summary &pos_grad,
                             const WXQSketch::Summary &neg_grad,
                             const WXQSketch::Summary &sum_hess,
                             const SKStats &node_sum,
                             bst_uint fid,
                             SplitEntry *best) {
    if (sum_hess.size == 0) return;
    double root_gain = node_sum.CalcGain(param);

xgboost/tests/cpp/helpers.cc  view on Meta::CPAN

#include "./helpers.h"
#include "xgboost/c_api.h"
#include <random>

std::string TempFileName() {
  return std::tmpnam(nullptr);
}

bool FileExists(const std::string name) {
  struct stat st;
  return stat(name.c_str(), &st) == 0; 
}

long GetFileSize(const std::string filename) {
  struct stat st;
  stat(filename.c_str(), &st);
  return st.st_size;
}

std::string CreateSimpleTestData() {
  std::string tmp_file = TempFileName();
  std::ofstream fo;
  fo.open(tmp_file);
  fo << "0 0:0 1:10 2:20\n";
  fo << "1 0:0 3:30 4:40\n";
  fo.close();



( run in 0.845 second using v1.01-cache-2.11-cpan-49f99fa48dc )