Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/block/block_discontinuity.cuh view on Meta::CPAN
typename FlagOp>
static __device__ __forceinline__ void FlagTails(
int linear_tid,
FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
FlagOp flag_op) ///< [in] Binary boolean flag predicate
{
flags[ITERATION] = ApplyOp<FlagOp>::FlagT(
flag_op,
input[ITERATION],
input[ITERATION + 1],
(linear_tid * ITEMS_PER_THREAD) + ITERATION + 1);
Iterate<ITERATION + 1, MAX_ITERATIONS>::FlagTails(linear_tid, flags, input, flag_op);
}
};
/// Templated unrolling of item comparison (termination case)
template <int MAX_ITERATIONS>
struct Iterate<MAX_ITERATIONS, MAX_ITERATIONS>
{
// Head flags
template <
int ITEMS_PER_THREAD,
typename FlagT,
typename FlagOp>
static __device__ __forceinline__ void FlagHeads(
int /*linear_tid*/,
FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags
T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items
FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate
{}
// Tail flags
template <
int ITEMS_PER_THREAD,
typename FlagT,
typename FlagOp>
static __device__ __forceinline__ void FlagTails(
int /*linear_tid*/,
FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags
T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate
{}
};
/******************************************************************************
* Thread fields
******************************************************************************/
/// Shared storage reference
_TempStorage &temp_storage;
/// Linear thread-id
unsigned int linear_tid;
public:
/// \smemstorage{BlockDiscontinuity}
struct TempStorage : Uninitialized<_TempStorage> {};
/******************************************************************//**
* \name Collective constructors
*********************************************************************/
//@{
/**
* \brief Collective constructor using a private static allocation of shared memory as temporary storage.
*/
__device__ __forceinline__ BlockDiscontinuity()
:
temp_storage(PrivateStorage()),
linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
{}
/**
* \brief Collective constructor using the specified memory allocation as temporary storage.
*/
__device__ __forceinline__ BlockDiscontinuity(
TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage
:
temp_storage(temp_storage.Alias()),
linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
{}
//@} end member group
/******************************************************************//**
* \name Head flag operations
*********************************************************************/
//@{
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
template <
int ITEMS_PER_THREAD,
typename FlagT,
typename FlagOp>
__device__ __forceinline__ void FlagHeads(
FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items
FlagOp flag_op) ///< [in] Binary boolean flag predicate
{
// Share last item
temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
CTA_SYNC();
if (linear_tid == 0)
{
// Set flag for first thread-item (preds[0] is undefined)
head_flags[0] = 1;
}
( run in 0.819 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )