Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/test/test_warp_scan.cu view on Meta::CPAN
typename T,
typename InitialValueT>
__device__ __forceinline__ void DeviceTest(
WarpScanT &warp_scan,
T &data,
NullType &initial_value,
Sum &scan_op,
T &aggregate,
Int2Type<BASIC> test_mode,
Int2Type<true> is_primitive)
{
// Test basic warp scan
warp_scan.InclusiveSum(data, data);
}
/// Inclusive sum aggregate
template <
typename WarpScanT,
typename T,
typename InitialValueT>
__device__ __forceinline__ void DeviceTest(
WarpScanT &warp_scan,
T &data,
NullType &initial_value,
Sum &scan_op,
T &aggregate,
Int2Type<AGGREGATE> test_mode,
Int2Type<true> is_primitive)
{
// Test with cumulative aggregate
warp_scan.InclusiveSum(data, data, aggregate);
}
/**
* WarpScan test kernel
*/
template <
int LOGICAL_WARP_THREADS,
TestMode TEST_MODE,
typename T,
typename ScanOpT,
typename InitialValueT>
__global__ void WarpScanKernel(
T *d_in,
T *d_out,
T *d_aggregate,
ScanOpT scan_op,
InitialValueT initial_value,
clock_t *d_elapsed)
{
// Cooperative warp-scan utility type (1 warp)
typedef WarpScan<T, LOGICAL_WARP_THREADS> WarpScanT;
// Allocate temp storage in shared memory
__shared__ typename WarpScanT::TempStorage temp_storage;
// Per-thread tile data
T data = d_in[threadIdx.x];
// Start cycle timer
__threadfence_block(); // workaround to prevent clock hoisting
clock_t start = clock();
__threadfence_block(); // workaround to prevent clock hoisting
T aggregate;
// Test scan
WarpScanT warp_scan(temp_storage);
DeviceTest(
warp_scan,
data,
initial_value,
scan_op,
aggregate,
Int2Type<TEST_MODE>(),
Int2Type<Traits<T>::PRIMITIVE>());
// Stop cycle timer
__threadfence_block(); // workaround to prevent clock hoisting
clock_t stop = clock();
__threadfence_block(); // workaround to prevent clock hoisting
// Store data
d_out[threadIdx.x] = data;
if (TEST_MODE != BASIC)
{
// Store aggregate
d_aggregate[threadIdx.x] = aggregate;
}
// Store time
if (threadIdx.x == 0)
{
*d_elapsed = (start > stop) ? start - stop : stop - start;
}
}
//---------------------------------------------------------------------
// Host utility subroutines
//---------------------------------------------------------------------
/**
* Initialize exclusive-scan problem (and solution)
*/
template <
typename T,
typename ScanOpT>
T Initialize(
GenMode gen_mode,
T *h_in,
T *h_reference,
int num_items,
ScanOpT scan_op,
T initial_value)
{
InitValue(gen_mode, h_in[0], 0);
T block_aggregate = h_in[0];
h_reference[0] = initial_value;
T inclusive = scan_op(initial_value, h_in[0]);
for (int i = 1; i < num_items; ++i)
{
InitValue(gen_mode, h_in[i], i);
h_reference[i] = inclusive;
inclusive = scan_op(inclusive, h_in[i]);
block_aggregate = scan_op(block_aggregate, h_in[i]);
}
return block_aggregate;
}
/**
* Initialize inclusive-scan problem (and solution)
*/
( run in 1.931 second using v1.01-cache-2.11-cpan-524268b4103 )