Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/cub/thread/thread_load.cuh  view on Meta::CPAN

            "=h"(retval) :                                                                  \
            _CUB_ASM_PTR_(ptr));                                                            \
        return retval;                                                                      \
    }


/**
 * Define an unsigned char (1B) ThreadLoad specialization for the given Cache load modifier
 */
#define _CUB_LOAD_1(cub_modifier, ptx_modifier)                                              \
    template<>                                                                              \
    __device__ __forceinline__ unsigned char ThreadLoad<cub_modifier, unsigned char const *>(unsigned char const *ptr)                   \
    {                                                                                       \
        unsigned short retval;                                                              \
        asm volatile (                                                                      \
        "{"                                                                                 \
        "   .reg .u8 datum;"                                                                \
        "    ld."#ptx_modifier".u8 datum, [%1];"                                            \
        "    cvt.u16.u8 %0, datum;"                                                         \
        "}" :                                                                               \
            "=h"(retval) :                                                                  \
            _CUB_ASM_PTR_(ptr));                                                            \
        return (unsigned char) retval;                                                      \
    }


/**
 * Define powers-of-two ThreadLoad specializations for the given Cache load modifier
 */
#define _CUB_LOAD_ALL(cub_modifier, ptx_modifier)                                            \
    _CUB_LOAD_16(cub_modifier, ptx_modifier)                                                 \
    _CUB_LOAD_8(cub_modifier, ptx_modifier)                                                  \
    _CUB_LOAD_4(cub_modifier, ptx_modifier)                                                  \
    _CUB_LOAD_2(cub_modifier, ptx_modifier)                                                  \
    _CUB_LOAD_1(cub_modifier, ptx_modifier)                                                  \


/**
 * Define powers-of-two ThreadLoad specializations for the various Cache load modifiers
 */
#if CUB_PTX_ARCH >= 200
    _CUB_LOAD_ALL(LOAD_CA, ca)
    _CUB_LOAD_ALL(LOAD_CG, cg)
    _CUB_LOAD_ALL(LOAD_CS, cs)
    _CUB_LOAD_ALL(LOAD_CV, cv)
#else
    _CUB_LOAD_ALL(LOAD_CA, global)
    // Use volatile to ensure coherent reads when this PTX is JIT'd to run on newer architectures with L1
    _CUB_LOAD_ALL(LOAD_CG, volatile.global)
    _CUB_LOAD_ALL(LOAD_CS, global)
    _CUB_LOAD_ALL(LOAD_CV, volatile.global)
#endif

#if CUB_PTX_ARCH >= 350
    _CUB_LOAD_ALL(LOAD_LDG, global.nc)
#else
    _CUB_LOAD_ALL(LOAD_LDG, global)
#endif


// Macro cleanup
#undef _CUB_LOAD_ALL
#undef _CUB_LOAD_1
#undef _CUB_LOAD_2
#undef _CUB_LOAD_4
#undef _CUB_LOAD_8
#undef _CUB_LOAD_16



/**
 * ThreadLoad definition for LOAD_DEFAULT modifier on iterator types
 */
template <typename InputIteratorT>
__device__ __forceinline__ typename std::iterator_traits<InputIteratorT>::value_type ThreadLoad(
    InputIteratorT          itr,
    Int2Type<LOAD_DEFAULT>  /*modifier*/,
    Int2Type<false>         /*is_pointer*/)
{
    return *itr;
}


/**
 * ThreadLoad definition for LOAD_DEFAULT modifier on pointer types
 */
template <typename T>
__device__ __forceinline__ T ThreadLoad(
    T                       *ptr,
    Int2Type<LOAD_DEFAULT>  /*modifier*/,
    Int2Type<true>          /*is_pointer*/)
{
    return *ptr;
}


/**
 * ThreadLoad definition for LOAD_VOLATILE modifier on primitive pointer types
 */
template <typename T>
__device__ __forceinline__ T ThreadLoadVolatilePointer(
    T                       *ptr,
    Int2Type<true>          /*is_primitive*/)
{
    T retval = *reinterpret_cast<volatile T*>(ptr);
    return retval;
}


/**
 * ThreadLoad definition for LOAD_VOLATILE modifier on non-primitive pointer types
 */
template <typename T>
__device__ __forceinline__ T ThreadLoadVolatilePointer(
    T                       *ptr,
    Int2Type<false>         /*is_primitive*/)
{
    typedef typename UnitWord<T>::VolatileWord VolatileWord;   // Word type for memcopying

    const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord);
/*



( run in 1.929 second using v1.01-cache-2.11-cpan-13bb782fe5a )