Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/dmlc-core/include/dmlc/data.h  view on Meta::CPAN

   */
  const IndexType *field;
  /*!
   * \brief index of each instance
   */
  const IndexType *index;
  /*!
   * \brief array value of each instance, this can be NULL
   *  indicating every value is set to be 1
   */
  const real_t *value;
  /*!
   * \param i the input index
   * \return field for i-th feature
   */
  inline IndexType get_field(size_t i) const {
    return field[i];
  }
  /*!
   * \param i the input index
   * \return i-th feature
   */
  inline IndexType get_index(size_t i) const {
    return index[i];
  }
  /*!
   * \param i the input index
   * \return i-th feature value, this function is always
   *  safe even when value == NULL
   */
  inline real_t get_value(size_t i) const {
    return value == NULL ? 1.0f : value[i];
  }
  /*!
   * \brief helper function to compute dot product of current
   * \param weight the dense array of weight we want to product
   * \param size the size of the weight vector
   * \tparam V type of the weight vector
   * \return the result of dot product
   */
  template<typename V>
  inline V SDot(const V *weight, size_t size) const {
    V sum = static_cast<V>(0);
    if (value == NULL) {
      for (size_t i = 0; i < length; ++i) {
        CHECK(index[i] < size) << "feature index exceed bound";
        sum += weight[index[i]];
      }
    } else {
      for (size_t i = 0; i < length; ++i) {
        CHECK(index[i] < size) << "feature index exceed bound";
        sum += weight[index[i]] * value[i];
      }
    }
    return sum;
  }
};

/*!
 * \brief a block of data, containing several rows in sparse matrix
 *  This is useful for (streaming-sxtyle) algorithms that scans through rows of data
 *  examples include: SGD, GD, L-BFGS, kmeans
 *
 *  The size of batch is usually large enough so that parallelizing over the rows
 *  can give significant speedup
 * \tparam IndexType type to store the index used in row batch
 */
template<typename IndexType>
struct RowBlock {
  /*! \brief batch size */
  size_t size;
  /*! \brief array[size+1], row pointer to beginning of each rows */
  const size_t *offset;
  /*! \brief array[size] label of each instance */
  const real_t *label;
  /*! \brief With weight: array[size] label of each instance, otherwise nullptr */
  const real_t *weight;
  /*! \brief field id*/
  const IndexType *field;
  /*! \brief feature index */
  const IndexType *index;
  /*! \brief feature value, can be NULL, indicating all values are 1 */
  const real_t *value;
  /*!
   * \brief get specific rows in the batch
   * \param rowid the rowid in that row
   * \return the instance corresponding to the row
   */
  inline Row<IndexType> operator[](size_t rowid) const;
  /*! \return memory cost of the block in bytes */
  inline size_t MemCostBytes(void) const {
    size_t cost = size * (sizeof(size_t) + sizeof(real_t));
    if (weight != NULL) cost += size * sizeof(real_t);
    size_t ndata = offset[size] - offset[0];
    if (field != NULL) cost += ndata * sizeof(IndexType);
    if (index != NULL) cost += ndata * sizeof(IndexType);
    if (value != NULL) cost += ndata * sizeof(real_t);
    return cost;
  }
  /*!
   * \brief slice a RowBlock to get rows in [begin, end)
   * \param begin the begin row index
   * \param end the end row index
   * \return the sliced RowBlock
   */
  inline RowBlock Slice(size_t begin, size_t end) const {
    CHECK(begin <= end && end <= size);
    RowBlock ret;
    ret.size = end - begin;
    ret.label = label + begin;
    if (weight != NULL) {
      ret.weight = weight + begin;
    } else {
      ret.weight = NULL;
    }
    ret.offset = offset + begin;
    ret.field = field;
    ret.index = index;
    ret.value = value;
    return ret;
  }

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 2.415 seconds using v1.00-cache-2.02-grep-82fe00e-cpan-2cc899e4a130 )