Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/thread/thread_store.cuh view on Meta::CPAN
}
/**
* Define a unsigned short (2B) ThreadStore specialization for the given Cache load modifier
*/
#define _CUB_STORE_2(cub_modifier, ptx_modifier) \
template<> \
__device__ __forceinline__ void ThreadStore<cub_modifier, unsigned short*, unsigned short>(unsigned short* ptr, unsigned short val) \
{ \
asm volatile ("st."#ptx_modifier".u16 [%0], %1;" : : \
_CUB_ASM_PTR_(ptr), \
"h"(val)); \
}
/**
* Define a unsigned char (1B) ThreadStore specialization for the given Cache load modifier
*/
#define _CUB_STORE_1(cub_modifier, ptx_modifier) \
template<> \
__device__ __forceinline__ void ThreadStore<cub_modifier, unsigned char*, unsigned char>(unsigned char* ptr, unsigned char val) \
{ \
asm volatile ( \
"{" \
" .reg .u8 datum;" \
" cvt.u8.u16 datum, %1;" \
" st."#ptx_modifier".u8 [%0], datum;" \
"}" : : \
_CUB_ASM_PTR_(ptr), \
"h"((unsigned short) val)); \
}
/**
* Define powers-of-two ThreadStore specializations for the given Cache load modifier
*/
#define _CUB_STORE_ALL(cub_modifier, ptx_modifier) \
_CUB_STORE_16(cub_modifier, ptx_modifier) \
_CUB_STORE_8(cub_modifier, ptx_modifier) \
_CUB_STORE_4(cub_modifier, ptx_modifier) \
_CUB_STORE_2(cub_modifier, ptx_modifier) \
_CUB_STORE_1(cub_modifier, ptx_modifier) \
/**
* Define ThreadStore specializations for the various Cache load modifiers
*/
#if CUB_PTX_ARCH >= 200
_CUB_STORE_ALL(STORE_WB, wb)
_CUB_STORE_ALL(STORE_CG, cg)
_CUB_STORE_ALL(STORE_CS, cs)
_CUB_STORE_ALL(STORE_WT, wt)
#else
_CUB_STORE_ALL(STORE_WB, global)
_CUB_STORE_ALL(STORE_CG, global)
_CUB_STORE_ALL(STORE_CS, global)
_CUB_STORE_ALL(STORE_WT, volatile.global)
#endif
// Macro cleanup
#undef _CUB_STORE_ALL
#undef _CUB_STORE_1
#undef _CUB_STORE_2
#undef _CUB_STORE_4
#undef _CUB_STORE_8
#undef _CUB_STORE_16
/**
* ThreadStore definition for STORE_DEFAULT modifier on iterator types
*/
template <typename OutputIteratorT, typename T>
__device__ __forceinline__ void ThreadStore(
OutputIteratorT itr,
T val,
Int2Type<STORE_DEFAULT> /*modifier*/,
Int2Type<false> /*is_pointer*/)
{
*itr = val;
}
/**
* ThreadStore definition for STORE_DEFAULT modifier on pointer types
*/
template <typename T>
__device__ __forceinline__ void ThreadStore(
T *ptr,
T val,
Int2Type<STORE_DEFAULT> /*modifier*/,
Int2Type<true> /*is_pointer*/)
{
*ptr = val;
}
/**
* ThreadStore definition for STORE_VOLATILE modifier on primitive pointer types
*/
template <typename T>
__device__ __forceinline__ void ThreadStoreVolatilePtr(
T *ptr,
T val,
Int2Type<true> /*is_primitive*/)
{
*reinterpret_cast<volatile T*>(ptr) = val;
}
/**
* ThreadStore definition for STORE_VOLATILE modifier on non-primitive pointer types
*/
template <typename T>
__device__ __forceinline__ void ThreadStoreVolatilePtr(
T *ptr,
T val,
Int2Type<false> /*is_primitive*/)
{
// Create a temporary using shuffle-words, then store using volatile-words
typedef typename UnitWord<T>::VolatileWord VolatileWord;
( run in 0.565 second using v1.01-cache-2.11-cpan-e1769b4cff6 )