Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/cub/iterator/tex_obj_input_iterator.cuh  view on Meta::CPAN

CUB_NS_PREFIX

/// CUB namespace
namespace cub {

/**
 * \addtogroup UtilIterator
 * @{
 */



/**
 * \brief A random-access input wrapper for dereferencing array values through texture cache.  Uses newer Kepler-style texture objects.
 *
 * \par Overview
 * - TexObjInputIteratorTwraps a native device pointer of type <tt>ValueType*</tt>. References
 *   to elements are to be loaded through texture cache.
 * - Can be used to load any data type from memory through texture cache.
 * - Can be manipulated and exchanged within and between host and device
 *   functions, can only be constructed within host functions, and can only be
 *   dereferenced within device functions.
 * - With regard to nested/dynamic parallelism, TexObjInputIteratorTiterators may only be
 *   created by the host thread, but can be used by any descendant kernel.
 * - Compatible with Thrust API v1.7 or newer.
 *
 * \par Snippet
 * The code snippet below illustrates the use of \p TexRefInputIteratorTto
 * dereference a device array of doubles through texture cache.
 * \par
 * \code
 * #include <cub/cub.cuh>   // or equivalently <cub/iterator/tex_obj_input_iterator.cuh>
 *
 * // Declare, allocate, and initialize a device array
 * int num_items;   // e.g., 7
 * double *d_in;    // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0]
 *
 * // Create an iterator wrapper
 * cub::TexObjInputIterator<double> itr;
 * itr.BindTexture(d_in, sizeof(double) * num_items);
 * ...
 *
 * // Within device code:
 * printf("%f\n", itr[0]);      // 8.0
 * printf("%f\n", itr[1]);      // 6.0
 * printf("%f\n", itr[6]);      // 9.0
 *
 * ...
 * itr.UnbindTexture();
 *
 * \endcode
 *
 * \tparam T                    The value type of this iterator
 * \tparam OffsetT              The difference type of this iterator (Default: \p ptrdiff_t)
 */
template <
    typename    T,
    typename    OffsetT = ptrdiff_t>
class TexObjInputIterator
{
public:

    // Required iterator traits
    typedef TexObjInputIterator                 self_type;              ///< My own type
    typedef OffsetT                             difference_type;        ///< Type to express the result of subtracting one iterator from another
    typedef T                                   value_type;             ///< The type of the element the iterator can point to
    typedef T*                                  pointer;                ///< The type of a pointer to an element the iterator can point to
    typedef T                                   reference;              ///< The type of a reference to an element the iterator can point to

#if (THRUST_VERSION >= 100700)
    // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
    typedef typename thrust::detail::iterator_facade_category<
        thrust::device_system_tag,
        thrust::random_access_traversal_tag,
        value_type,
        reference
      >::type iterator_category;                                        ///< The iterator category
#else
    typedef std::random_access_iterator_tag     iterator_category;      ///< The iterator category
#endif  // THRUST_VERSION

private:

    // Largest texture word we can use in device
    typedef typename UnitWord<T>::TextureWord TextureWord;

    // Number of texture words per T
    enum {
        TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
    };

private:

    T*                  ptr;
    difference_type     tex_offset;
    cudaTextureObject_t tex_obj;

public:

    /// Constructor
    __host__ __device__ __forceinline__ TexObjInputIterator()
    :
        ptr(NULL),
        tex_offset(0),
        tex_obj(0)
    {}

    /// Use this iterator to bind \p ptr with a texture reference
    template <typename QualifiedT>
    cudaError_t BindTexture(
        QualifiedT      *ptr,               ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
        size_t          bytes = size_t(-1),         ///< Number of bytes in the range
        size_t          tex_offset = 0)     ///< OffsetT (in items) from \p ptr denoting the position of the iterator
    {
        this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
        this->tex_offset = tex_offset;

        cudaChannelFormatDesc   channel_desc = cudaCreateChannelDesc<TextureWord>();
        cudaResourceDesc        res_desc;
        cudaTextureDesc         tex_desc;
        memset(&res_desc, 0, sizeof(cudaResourceDesc));
        memset(&tex_desc, 0, sizeof(cudaTextureDesc));
        res_desc.resType                = cudaResourceTypeLinear;
        res_desc.res.linear.devPtr      = this->ptr;
        res_desc.res.linear.desc        = channel_desc;
        res_desc.res.linear.sizeInBytes = bytes;
        tex_desc.readMode               = cudaReadModeElementType;
        return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
    }

    /// Unbind this iterator from its texture reference
    cudaError_t UnbindTexture()
    {
        return cudaDestroyTextureObject(tex_obj);
    }

    /// Postfix increment
    __host__ __device__ __forceinline__ self_type operator++(int)
    {
        self_type retval = *this;
        tex_offset++;
        return retval;
    }

    /// Prefix increment
    __host__ __device__ __forceinline__ self_type operator++()
    {
        tex_offset++;
        return *this;
    }

    /// Indirection
    __host__ __device__ __forceinline__ reference operator*() const
    {
#if (CUB_PTX_ARCH == 0)
        // Simply dereference the pointer on the host
        return ptr[tex_offset];
#else



( run in 0.452 second using v1.01-cache-2.11-cpan-d7f47b0818f )