Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/dmlc-core/include/dmlc/data.h view on Meta::CPAN
* \brief Data structure that holds the data
* Row block iterator interface that gets RowBlocks
* Difference between RowBlockIter and Parser:
* RowBlockIter caches the data internally that can be used
* to iterate the dataset multiple times,
* Parser holds very limited internal state and was usually
* used to read data only once
*
* \sa Parser
* \tparam IndexType type of index in RowBlock
* Create function was only implemented for IndexType uint64_t and uint32_t
*/
template<typename IndexType>
class RowBlockIter : public DataIter<RowBlock<IndexType> > {
public:
/*!
* \brief create a new instance of iterator that returns rowbatch
* by default, a in-memory based iterator will be returned
*
* \param uri the uri of the input, can contain hdfs prefix
* \param part_index the part id of current input
* \param num_parts total number of splits
* \param type type of dataset can be: "libsvm", ...
*
* \return the created data iterator
*/
static RowBlockIter<IndexType> *
Create(const char *uri,
unsigned part_index,
unsigned num_parts,
const char *type);
/*! \return maximum feature dimension in the dataset */
virtual size_t NumCol() const = 0;
};
/*!
* \brief parser interface that parses input data
* used to load dmlc data format into your own data format
* Difference between RowBlockIter and Parser:
* RowBlockIter caches the data internally that can be used
* to iterate the dataset multiple times,
* Parser holds very limited internal state and was usually
* used to read data only once
*
*
* \sa RowBlockIter
* \tparam IndexType type of index in RowBlock
* Create function was only implemented for IndexType uint64_t and uint32_t
*/
template <typename IndexType>
class Parser : public DataIter<RowBlock<IndexType> > {
public:
/*!
* \brief create a new instance of parser based on the "type"
*
* \param uri_ the uri of the input, can contain hdfs prefix
* \param part_index the part id of current input
* \param num_parts total number of splits
* \param type type of dataset can be: "libsvm", "auto", ...
*
* When "auto" is passed, the type is decided by format argument string in URI.
*
* \return the created parser
*/
static Parser<IndexType> *
Create(const char *uri_,
unsigned part_index,
unsigned num_parts,
const char *type);
/*! \return size of bytes read so far */
virtual size_t BytesRead(void) const = 0;
/*! \brief Factory type of the parser*/
typedef Parser<IndexType>* (*Factory)
(const std::string& path,
const std::map<std::string, std::string>& args,
unsigned part_index,
unsigned num_parts);
};
/*!
* \brief registry entry of parser factory
* \tparam IndexType The type of index
*/
template<typename IndexType>
struct ParserFactoryReg
: public FunctionRegEntryBase<ParserFactoryReg<IndexType>,
typename Parser<IndexType>::Factory> {};
/*!
* \brief Register a new distributed parser to dmlc-core.
*
* \param IndexType The type of Batch index, can be uint32_t or uint64_t
* \param TypeName The typename of of the data.
* \param FactoryFunction The factory function that creates the parser.
*
* \begincode
*
* // defin the factory function
* template<typename IndexType>
* Parser<IndexType>*
* CreateLibSVMParser(const char* uri, unsigned part_index, unsigned num_parts) {
* return new LibSVMParser(uri, part_index, num_parts);
* }
*
* // Register it to DMLC
* // Then we can use Parser<uint32_t>::Create(uri, part_index, num_parts, "libsvm");
* // to create the parser
*
* DMLC_REGISTER_DATA_PARSER(uint32_t, libsvm, CreateLibSVMParser<uint32_t>);
* DMLC_REGISTER_DATA_PARSER(uint64_t, libsvm, CreateLibSVMParser<uint64_t>);
*
* \endcode
*/
#define DMLC_REGISTER_DATA_PARSER(IndexType, TypeName, FactoryFunction) \
DMLC_REGISTRY_REGISTER(::dmlc::ParserFactoryReg<IndexType>, \
ParserFactoryReg ## _ ## IndexType, TypeName) \
.set_body(FactoryFunction)
// implementation of operator[]
template<typename IndexType>
( run in 0.518 second using v1.01-cache-2.11-cpan-39bf76dae61 )