Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/cub/thread/thread_store.cuh  view on Meta::CPAN

    }


/**
 * Define a unsigned short (2B) ThreadStore specialization for the given Cache load modifier
 */
#define _CUB_STORE_2(cub_modifier, ptx_modifier)                                             \
    template<>                                                                              \
    __device__ __forceinline__ void ThreadStore<cub_modifier, unsigned short*, unsigned short>(unsigned short* ptr, unsigned short val)                     \
    {                                                                                       \
        asm volatile ("st."#ptx_modifier".u16 [%0], %1;" : :                                \
            _CUB_ASM_PTR_(ptr),                                                             \
            "h"(val));                                                                      \
    }


/**
 * Define a unsigned char (1B) ThreadStore specialization for the given Cache load modifier
 */
#define _CUB_STORE_1(cub_modifier, ptx_modifier)                                             \
    template<>                                                                              \
    __device__ __forceinline__ void ThreadStore<cub_modifier, unsigned char*, unsigned char>(unsigned char* ptr, unsigned char val)                         \
    {                                                                                       \
        asm volatile (                                                                      \
        "{"                                                                                 \
        "   .reg .u8 datum;"                                                                \
        "   cvt.u8.u16 datum, %1;"                                                          \
        "   st."#ptx_modifier".u8 [%0], datum;"                                             \
        "}" : :                                                                             \
            _CUB_ASM_PTR_(ptr),                                                             \
            "h"((unsigned short) val));                                                               \
    }

/**
 * Define powers-of-two ThreadStore specializations for the given Cache load modifier
 */
#define _CUB_STORE_ALL(cub_modifier, ptx_modifier)                                           \
    _CUB_STORE_16(cub_modifier, ptx_modifier)                                                \
    _CUB_STORE_8(cub_modifier, ptx_modifier)                                                 \
    _CUB_STORE_4(cub_modifier, ptx_modifier)                                                 \
    _CUB_STORE_2(cub_modifier, ptx_modifier)                                                 \
    _CUB_STORE_1(cub_modifier, ptx_modifier)                                                 \


/**
 * Define ThreadStore specializations for the various Cache load modifiers
 */
#if CUB_PTX_ARCH >= 200
    _CUB_STORE_ALL(STORE_WB, wb)
    _CUB_STORE_ALL(STORE_CG, cg)
    _CUB_STORE_ALL(STORE_CS, cs)
    _CUB_STORE_ALL(STORE_WT, wt)
#else
    _CUB_STORE_ALL(STORE_WB, global)
    _CUB_STORE_ALL(STORE_CG, global)
    _CUB_STORE_ALL(STORE_CS, global)
    _CUB_STORE_ALL(STORE_WT, volatile.global)
#endif


// Macro cleanup
#undef _CUB_STORE_ALL
#undef _CUB_STORE_1
#undef _CUB_STORE_2
#undef _CUB_STORE_4
#undef _CUB_STORE_8
#undef _CUB_STORE_16


/**
 * ThreadStore definition for STORE_DEFAULT modifier on iterator types
 */
template <typename OutputIteratorT, typename T>
__device__ __forceinline__ void ThreadStore(
    OutputIteratorT             itr,
    T                           val,
    Int2Type<STORE_DEFAULT>     /*modifier*/,
    Int2Type<false>             /*is_pointer*/)
{
    *itr = val;
}


/**
 * ThreadStore definition for STORE_DEFAULT modifier on pointer types
 */
template <typename T>
__device__ __forceinline__ void ThreadStore(
    T                           *ptr,
    T                           val,
    Int2Type<STORE_DEFAULT>     /*modifier*/,
    Int2Type<true>              /*is_pointer*/)
{
    *ptr = val;
}


/**
 * ThreadStore definition for STORE_VOLATILE modifier on primitive pointer types
 */
template <typename T>
__device__ __forceinline__ void ThreadStoreVolatilePtr(
    T                           *ptr,
    T                           val,
    Int2Type<true>              /*is_primitive*/)
{
    *reinterpret_cast<volatile T*>(ptr) = val;
}


/**
 * ThreadStore definition for STORE_VOLATILE modifier on non-primitive pointer types
 */
template <typename T>
__device__ __forceinline__ void ThreadStoreVolatilePtr(
    T                           *ptr,
    T                           val,
    Int2Type<false>             /*is_primitive*/)
{
    // Create a temporary using shuffle-words, then store using volatile-words
    typedef typename UnitWord<T>::VolatileWord  VolatileWord;  



( run in 0.565 second using v1.01-cache-2.11-cpan-e1769b4cff6 )