Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/iterator/tex_obj_input_iterator.cuh view on Meta::CPAN
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/**
* \addtogroup UtilIterator
* @{
*/
/**
* \brief A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects.
*
* \par Overview
* - TexObjInputIteratorTwraps a native device pointer of type <tt>ValueType*</tt>. References
* to elements are to be loaded through texture cache.
* - Can be used to load any data type from memory through texture cache.
* - Can be manipulated and exchanged within and between host and device
* functions, can only be constructed within host functions, and can only be
* dereferenced within device functions.
* - With regard to nested/dynamic parallelism, TexObjInputIteratorTiterators may only be
* created by the host thread, but can be used by any descendant kernel.
* - Compatible with Thrust API v1.7 or newer.
*
* \par Snippet
* The code snippet below illustrates the use of \p TexRefInputIteratorTto
* dereference a device array of doubles through texture cache.
* \par
* \code
* #include <cub/cub.cuh> // or equivalently <cub/iterator/tex_obj_input_iterator.cuh>
*
* // Declare, allocate, and initialize a device array
* int num_items; // e.g., 7
* double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0]
*
* // Create an iterator wrapper
* cub::TexObjInputIterator<double> itr;
* itr.BindTexture(d_in, sizeof(double) * num_items);
* ...
*
* // Within device code:
* printf("%f\n", itr[0]); // 8.0
* printf("%f\n", itr[1]); // 6.0
* printf("%f\n", itr[6]); // 9.0
*
* ...
* itr.UnbindTexture();
*
* \endcode
*
* \tparam T The value type of this iterator
* \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t)
*/
template <
typename T,
typename OffsetT = ptrdiff_t>
class TexObjInputIterator
{
public:
// Required iterator traits
typedef TexObjInputIterator self_type; ///< My own type
typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another
typedef T value_type; ///< The type of the element the iterator can point to
typedef T* pointer; ///< The type of a pointer to an element the iterator can point to
typedef T reference; ///< The type of a reference to an element the iterator can point to
#if (THRUST_VERSION >= 100700)
// Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
typedef typename thrust::detail::iterator_facade_category<
thrust::device_system_tag,
thrust::random_access_traversal_tag,
value_type,
reference
>::type iterator_category; ///< The iterator category
#else
typedef std::random_access_iterator_tag iterator_category; ///< The iterator category
#endif // THRUST_VERSION
private:
// Largest texture word we can use in device
typedef typename UnitWord<T>::TextureWord TextureWord;
// Number of texture words per T
enum {
TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
};
private:
T* ptr;
difference_type tex_offset;
cudaTextureObject_t tex_obj;
public:
/// Constructor
__host__ __device__ __forceinline__ TexObjInputIterator()
:
ptr(NULL),
tex_offset(0),
tex_obj(0)
{}
/// Use this iterator to bind \p ptr with a texture reference
template <typename QualifiedT>
cudaError_t BindTexture(
QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
size_t bytes = size_t(-1), ///< Number of bytes in the range
size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator
{
this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
this->tex_offset = tex_offset;
cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<TextureWord>();
cudaResourceDesc res_desc;
cudaTextureDesc tex_desc;
memset(&res_desc, 0, sizeof(cudaResourceDesc));
memset(&tex_desc, 0, sizeof(cudaTextureDesc));
res_desc.resType = cudaResourceTypeLinear;
res_desc.res.linear.devPtr = this->ptr;
res_desc.res.linear.desc = channel_desc;
res_desc.res.linear.sizeInBytes = bytes;
tex_desc.readMode = cudaReadModeElementType;
return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
}
/// Unbind this iterator from its texture reference
cudaError_t UnbindTexture()
{
return cudaDestroyTextureObject(tex_obj);
}
/// Postfix increment
__host__ __device__ __forceinline__ self_type operator++(int)
{
self_type retval = *this;
tex_offset++;
return retval;
}
/// Prefix increment
__host__ __device__ __forceinline__ self_type operator++()
{
tex_offset++;
return *this;
}
/// Indirection
__host__ __device__ __forceinline__ reference operator*() const
{
#if (CUB_PTX_ARCH == 0)
// Simply dereference the pointer on the host
return ptr[tex_offset];
#else
( run in 0.452 second using v1.01-cache-2.11-cpan-d7f47b0818f )