Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/dmlc-core/src/io/input_split_base.h view on Meta::CPAN
return file_offset_.back();
}
// implement next record
virtual bool NextRecord(Blob *out_rec) {
while (!ExtractNextRecord(out_rec, &tmp_chunk_)) {
if (!tmp_chunk_.Load(this, buffer_size_)) return false;
}
return true;
}
// implement next chunk
virtual bool NextChunk(Blob *out_chunk) {
while (!ExtractNextChunk(out_chunk, &tmp_chunk_)) {
if (!tmp_chunk_.Load(this, buffer_size_)) return false;
}
return true;
}
// implement ResetPartition.
virtual void ResetPartition(unsigned rank, unsigned nsplit);
/*!
* \brief read a chunk of data into buf
* the data can span multiple records,
* but cannot contain partial records
*
* \param buf the memory region of the buffer,
* should be properly aligned to 64 bits
* \param size the maximum size of memory,
* after the function returns, it stores the size of the chunk
* \return whether end of file was reached
*/
bool ReadChunk(void *buf, size_t *size);
/*!
* \brief extract next chunk from the chunk
* \param out_chunk the output record
* \param chunk the chunk information
* \return true if non-empty record is extracted
* false if the chunk is already finishes its life
*/
bool ExtractNextChunk(Blob *out_rchunk, Chunk *chunk);
/*!
* \brief extract next record from the chunk
* \param out_rec the output record
* \param chunk the chunk information
* \return true if non-empty record is extracted
* false if the chunk is already finishes its life
*/
virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk) = 0;
protected:
// constructor
InputSplitBase()
: fs_(NULL),
align_bytes_(8),
tmp_chunk_(kBufferSize),
buffer_size_(kBufferSize) {}
/*!
* \brief intialize the base before doing anything
* \param fs the filesystem ptr
* \param uri the uri of the files
* \param rank the rank of the split
* \param nsplit number of splits
* \param align_bytes the head split must be multiple of align_bytes
* this also checks if file size are multiple of align_bytes
*/
void Init(FileSystem *fs,
const char *uri,
size_t align_bytes);
// to be implemented by child class
/*!
* \brief seek to the beginning of the first record
* in current file pointer
* \return how many bytes we read past
*/
virtual size_t SeekRecordBegin(Stream *fi) = 0;
/*!
* \brief find the last occurance of record header
* \param begin beginning of the buffer
* \param end end of the buffer
* \return the pointer between [begin, end] indicating the
* last record head
*/
virtual const char*
FindLastRecordBegin(const char *begin, const char *end) = 0;
private:
/*! \brief FileSystem */
FileSystem *filesys_;
/*! \brief information about files */
std::vector<FileInfo> files_;
/*! \brief current input stream */
SeekStream *fs_;
/*! \brief bytes to be aligned */
size_t align_bytes_;
/*! \brief file pointer of which file to read on */
size_t file_ptr_;
/*! \brief file pointer where the end of file lies */
size_t file_ptr_end_;
/*! \brief get the current offset */
size_t offset_curr_;
/*! \brief beginning of offset */
size_t offset_begin_;
/*! \brief end of the offset */
size_t offset_end_;
/*! \brief temporal chunk */
Chunk tmp_chunk_;
/*! \brief buffer size */
size_t buffer_size_;
/*! \brief byte-offset of each file */
std::vector<size_t> file_offset_;
/*! \brief internal overflow buffer */
std::string overflow_;
/*! \brief initialize information in files */
void InitInputFileInfo(const std::string& uri);
/*! \brief strip continous chars in the end of str */
std::string StripEnd(std::string str, char ch);
/*! \brief same as stream.Read */
size_t Read(void *ptr, size_t size);
};
} // namespace io
} // namespace dmlc
#endif // DMLC_IO_INPUT_SPLIT_BASE_H_
( run in 0.812 second using v1.01-cache-2.11-cpan-d59ab9ce9b0 )