Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/block/block_histogram.cuh view on Meta::CPAN
int BLOCK_DIM_Z = 1,
int PTX_ARCH = CUB_PTX_ARCH>
class BlockHistogram
{
private:
/******************************************************************************
* Constants and type definitions
******************************************************************************/
/// Constants
enum
{
/// The thread block size in threads
BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
};
/**
* Ensure the template parameterization meets the requirements of the
* targeted device architecture. BLOCK_HISTO_ATOMIC can only be used
* on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used
* regardless.
*/
static const BlockHistogramAlgorithm SAFE_ALGORITHM =
((ALGORITHM == BLOCK_HISTO_ATOMIC) && (PTX_ARCH < 120)) ?
BLOCK_HISTO_SORT :
ALGORITHM;
/// Internal specialization.
typedef typename If<(SAFE_ALGORITHM == BLOCK_HISTO_SORT),
BlockHistogramSort<T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH>,
BlockHistogramAtomic<BINS> >::Type InternalBlockHistogram;
/// Shared memory storage layout type for BlockHistogram
typedef typename InternalBlockHistogram::TempStorage _TempStorage;
/******************************************************************************
* Thread fields
******************************************************************************/
/// Shared storage reference
_TempStorage &temp_storage;
/// Linear thread-id
unsigned int linear_tid;
/******************************************************************************
* Utility methods
******************************************************************************/
/// Internal storage allocator
__device__ __forceinline__ _TempStorage& PrivateStorage()
{
__shared__ _TempStorage private_storage;
return private_storage;
}
public:
/// \smemstorage{BlockHistogram}
struct TempStorage : Uninitialized<_TempStorage> {};
/******************************************************************//**
* \name Collective constructors
*********************************************************************/
//@{
/**
* \brief Collective constructor using a private static allocation of shared memory as temporary storage.
*/
__device__ __forceinline__ BlockHistogram()
:
temp_storage(PrivateStorage()),
linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
{}
/**
* \brief Collective constructor using the specified memory allocation as temporary storage.
*/
__device__ __forceinline__ BlockHistogram(
TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage
:
temp_storage(temp_storage.Alias()),
linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
{}
//@} end member group
/******************************************************************//**
* \name Histogram operations
*********************************************************************/
//@{
/**
* \brief Initialize the shared histogram counters to zero.
*
* \par Snippet
* The code snippet below illustrates a the initialization and update of a
* histogram of 512 integer samples that are partitioned across 128 threads
* where each thread owns 4 samples.
* \par
* \code
* #include <cub/cub.cuh> // or equivalently <cub/block/block_histogram.cuh>
*
* __global__ void ExampleKernel(...)
* {
* // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each
* typedef cub::BlockHistogram<unsigned char, 128, 4, 256> BlockHistogram;
*
* // Allocate shared memory for BlockHistogram
* __shared__ typename BlockHistogram::TempStorage temp_storage;
*
* // Allocate shared memory for block-wide histogram bin counts
* __shared__ unsigned int smem_histogram[256];
*
( run in 0.516 second using v1.01-cache-2.11-cpan-d7f47b0818f )