Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/thread/thread_load.cuh view on Meta::CPAN
"=h"(retval) : \
_CUB_ASM_PTR_(ptr)); \
return retval; \
}
/**
* Define an unsigned char (1B) ThreadLoad specialization for the given Cache load modifier
*/
#define _CUB_LOAD_1(cub_modifier, ptx_modifier) \
template<> \
__device__ __forceinline__ unsigned char ThreadLoad<cub_modifier, unsigned char const *>(unsigned char const *ptr) \
{ \
unsigned short retval; \
asm volatile ( \
"{" \
" .reg .u8 datum;" \
" ld."#ptx_modifier".u8 datum, [%1];" \
" cvt.u16.u8 %0, datum;" \
"}" : \
"=h"(retval) : \
_CUB_ASM_PTR_(ptr)); \
return (unsigned char) retval; \
}
/**
* Define powers-of-two ThreadLoad specializations for the given Cache load modifier
*/
#define _CUB_LOAD_ALL(cub_modifier, ptx_modifier) \
_CUB_LOAD_16(cub_modifier, ptx_modifier) \
_CUB_LOAD_8(cub_modifier, ptx_modifier) \
_CUB_LOAD_4(cub_modifier, ptx_modifier) \
_CUB_LOAD_2(cub_modifier, ptx_modifier) \
_CUB_LOAD_1(cub_modifier, ptx_modifier) \
/**
* Define powers-of-two ThreadLoad specializations for the various Cache load modifiers
*/
#if CUB_PTX_ARCH >= 200
_CUB_LOAD_ALL(LOAD_CA, ca)
_CUB_LOAD_ALL(LOAD_CG, cg)
_CUB_LOAD_ALL(LOAD_CS, cs)
_CUB_LOAD_ALL(LOAD_CV, cv)
#else
_CUB_LOAD_ALL(LOAD_CA, global)
// Use volatile to ensure coherent reads when this PTX is JIT'd to run on newer architectures with L1
_CUB_LOAD_ALL(LOAD_CG, volatile.global)
_CUB_LOAD_ALL(LOAD_CS, global)
_CUB_LOAD_ALL(LOAD_CV, volatile.global)
#endif
#if CUB_PTX_ARCH >= 350
_CUB_LOAD_ALL(LOAD_LDG, global.nc)
#else
_CUB_LOAD_ALL(LOAD_LDG, global)
#endif
// Macro cleanup
#undef _CUB_LOAD_ALL
#undef _CUB_LOAD_1
#undef _CUB_LOAD_2
#undef _CUB_LOAD_4
#undef _CUB_LOAD_8
#undef _CUB_LOAD_16
/**
* ThreadLoad definition for LOAD_DEFAULT modifier on iterator types
*/
template <typename InputIteratorT>
__device__ __forceinline__ typename std::iterator_traits<InputIteratorT>::value_type ThreadLoad(
InputIteratorT itr,
Int2Type<LOAD_DEFAULT> /*modifier*/,
Int2Type<false> /*is_pointer*/)
{
return *itr;
}
/**
* ThreadLoad definition for LOAD_DEFAULT modifier on pointer types
*/
template <typename T>
__device__ __forceinline__ T ThreadLoad(
T *ptr,
Int2Type<LOAD_DEFAULT> /*modifier*/,
Int2Type<true> /*is_pointer*/)
{
return *ptr;
}
/**
* ThreadLoad definition for LOAD_VOLATILE modifier on primitive pointer types
*/
template <typename T>
__device__ __forceinline__ T ThreadLoadVolatilePointer(
T *ptr,
Int2Type<true> /*is_primitive*/)
{
T retval = *reinterpret_cast<volatile T*>(ptr);
return retval;
}
/**
* ThreadLoad definition for LOAD_VOLATILE modifier on non-primitive pointer types
*/
template <typename T>
__device__ __forceinline__ T ThreadLoadVolatilePointer(
T *ptr,
Int2Type<false> /*is_primitive*/)
{
typedef typename UnitWord<T>::VolatileWord VolatileWord; // Word type for memcopying
const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord);
/*
( run in 1.929 second using v1.01-cache-2.11-cpan-13bb782fe5a )