Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/dmlc-core/include/dmlc/data.h  view on Meta::CPAN

 * \brief Data structure that holds the data
 * Row block iterator interface that gets RowBlocks
 * Difference between RowBlockIter and Parser:
 *     RowBlockIter caches the data internally that can be used
 *     to iterate the dataset multiple times,
 *     Parser holds very limited internal state and was usually
 *     used to read data only once
 *
 * \sa Parser
 * \tparam IndexType type of index in RowBlock
 *  Create function was only implemented for IndexType uint64_t and uint32_t
 */
template<typename IndexType>
class RowBlockIter : public DataIter<RowBlock<IndexType> > {
 public:
  /*!
   * \brief create a new instance of iterator that returns rowbatch
   *  by default, a in-memory based iterator will be returned
   *
   * \param uri the uri of the input, can contain hdfs prefix
   * \param part_index the part id of current input
   * \param num_parts total number of splits
   * \param type type of dataset can be: "libsvm", ...
   *
   * \return the created data iterator
   */
  static RowBlockIter<IndexType> *
  Create(const char *uri,
         unsigned part_index,
         unsigned num_parts,
         const char *type);
  /*! \return maximum feature dimension in the dataset */
  virtual size_t NumCol() const = 0;
};

/*!
 * \brief parser interface that parses input data
 * used to load dmlc data format into your own data format
 * Difference between RowBlockIter and Parser:
 *     RowBlockIter caches the data internally that can be used
 *     to iterate the dataset multiple times,
 *     Parser holds very limited internal state and was usually
 *     used to read data only once
 *
 *
 * \sa RowBlockIter
 * \tparam IndexType type of index in RowBlock
 *  Create function was only implemented for IndexType uint64_t and uint32_t
 */
template <typename IndexType>
class Parser : public DataIter<RowBlock<IndexType> > {
 public:
  /*!
  * \brief create a new instance of parser based on the "type"
  *
  * \param uri_ the uri of the input, can contain hdfs prefix
  * \param part_index the part id of current input
  * \param num_parts total number of splits
  * \param type type of dataset can be: "libsvm", "auto", ...
  *
  * When "auto" is passed, the type is decided by format argument string in URI.
  *
  * \return the created parser
  */
  static Parser<IndexType> *
  Create(const char *uri_,
         unsigned part_index,
         unsigned num_parts,
         const char *type);
  /*! \return size of bytes read so far */
  virtual size_t BytesRead(void) const = 0;
  /*! \brief Factory type of the parser*/
  typedef Parser<IndexType>* (*Factory)
      (const std::string& path,
       const std::map<std::string, std::string>& args,
       unsigned part_index,
       unsigned num_parts);
};

/*!
 * \brief registry entry of parser factory
 * \tparam IndexType The type of index
 */
template<typename IndexType>
struct ParserFactoryReg
    : public FunctionRegEntryBase<ParserFactoryReg<IndexType>,
                                  typename Parser<IndexType>::Factory> {};

/*!
 * \brief Register a new distributed parser to dmlc-core.
 *
 * \param IndexType The type of Batch index, can be uint32_t or uint64_t
 * \param TypeName The typename of of the data.
 * \param FactoryFunction The factory function that creates the parser.
 *
 * \begincode
 *
 *  // defin the factory function
 *  template<typename IndexType>
 *  Parser<IndexType>*
 *  CreateLibSVMParser(const char* uri, unsigned part_index, unsigned num_parts) {
 *    return new LibSVMParser(uri, part_index, num_parts);
 *  }
 *
 *  // Register it to DMLC
 *  // Then we can use Parser<uint32_t>::Create(uri, part_index, num_parts, "libsvm");
 *  // to create the parser
 *
 *  DMLC_REGISTER_DATA_PARSER(uint32_t, libsvm, CreateLibSVMParser<uint32_t>);
 *  DMLC_REGISTER_DATA_PARSER(uint64_t, libsvm, CreateLibSVMParser<uint64_t>);
 *
 * \endcode
 */
#define DMLC_REGISTER_DATA_PARSER(IndexType, TypeName, FactoryFunction) \
  DMLC_REGISTRY_REGISTER(::dmlc::ParserFactoryReg<IndexType>,           \
                         ParserFactoryReg ## _ ## IndexType, TypeName)  \
  .set_body(FactoryFunction)


// implementation of operator[]
template<typename IndexType>



( run in 0.518 second using v1.01-cache-2.11-cpan-39bf76dae61 )