Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/test/test_device_select_if.cu view on Meta::CPAN
template <
Backend BACKEND,
bool IS_FLAGGED,
bool IS_PARTITION,
typename DeviceInputIteratorT,
typename FlagT,
typename SelectOpT,
typename T>
void Test(
DeviceInputIteratorT d_in,
FlagT* h_flags,
SelectOpT select_op,
T* h_reference,
int num_selected,
int num_items)
{
// Allocate device flags, output, and num-selected
FlagT* d_flags = NULL;
T* d_out = NULL;
int* d_num_selected_out = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_flags, sizeof(FlagT) * num_items));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * num_items));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int)));
// Allocate CDP device arrays
size_t* d_temp_storage_bytes = NULL;
cudaError_t* d_cdp_error = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1));
// Allocate temporary storage
void *d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
CubDebugExit(Dispatch(Int2Type<BACKEND>(), Int2Type<IS_FLAGGED>(), Int2Type<IS_PARTITION>(), 1, d_temp_storage_bytes, d_cdp_error,
d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, true));
CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes));
// Copy flags and clear device output array
CubDebugExit(cudaMemcpy(d_flags, h_flags, sizeof(FlagT) * num_items, cudaMemcpyHostToDevice));
CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * num_items));
CubDebugExit(cudaMemset(d_num_selected_out, 0, sizeof(int)));
// Run warmup/correctness iteration
CubDebugExit(Dispatch(Int2Type<BACKEND>(), Int2Type<IS_FLAGGED>(), Int2Type<IS_PARTITION>(), 1, d_temp_storage_bytes, d_cdp_error,
d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, true));
// Check for correctness (and display results, if specified)
int compare1 = (IS_PARTITION) ?
CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose) :
CompareDeviceResults(h_reference, d_out, num_selected, true, g_verbose);
printf("\t Data %s\n", compare1 ? "FAIL" : "PASS");
int compare2 = CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose);
printf("\t Count %s\n", compare2 ? "FAIL" : "PASS");
// Flush any stdout/stderr
fflush(stdout);
fflush(stderr);
// Performance
GpuTimer gpu_timer;
gpu_timer.Start();
CubDebugExit(Dispatch(Int2Type<BACKEND>(), Int2Type<IS_FLAGGED>(), Int2Type<IS_PARTITION>(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error,
d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, false));
gpu_timer.Stop();
float elapsed_millis = gpu_timer.ElapsedMillis();
// Display performance
if (g_timing_iterations > 0)
{
float avg_millis = elapsed_millis / g_timing_iterations;
float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f;
int num_output_items = (IS_PARTITION) ? num_items : num_selected;
int num_flag_items = (IS_FLAGGED) ? num_items : 0;
size_t num_bytes = sizeof(T) * (num_items + num_output_items) + sizeof(FlagT) * num_flag_items;
float giga_bandwidth = float(num_bytes) / avg_millis / 1000.0f / 1000.0f;
printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s, %.1f%% peak", avg_millis, giga_rate, giga_bandwidth, giga_bandwidth / g_device_giga_bandwidth * 100.0);
}
printf("\n\n");
// Flush any stdout/stderr
fflush(stdout);
fflush(stderr);
// Cleanup
if (d_flags) CubDebugExit(g_allocator.DeviceFree(d_flags));
if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out));
if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes));
if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error));
if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage));
// Correctness asserts
AssertEquals(0, compare1 | compare2);
}
/**
* Test on pointer type
*/
template <
Backend BACKEND,
bool IS_FLAGGED,
bool IS_PARTITION,
typename T>
void TestPointer(
int num_items,
float select_ratio)
{
typedef char FlagT;
// Allocate host arrays
T* h_in = new T[num_items];
FlagT* h_flags = new FlagT[num_items];
T* h_reference = new T[num_items];
// Initialize input
Initialize(h_in, num_items);
// Select a comparison value that is select_ratio through the space of [0,127]
T compare;
if (select_ratio <= 0.0)
InitValue(INTEGER_SEED, compare, 0); // select none
else if (select_ratio >= 1.0)
InitValue(INTEGER_SEED, compare, 127); // select all
( run in 0.548 second using v1.01-cache-2.11-cpan-39bf76dae61 )