Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/experimental/histogram/histogram_cub.h view on Meta::CPAN
******************************************************************************/
#include <cub/device/device_histogram.cuh>
using namespace cub;
template <
int NUM_CHANNELS,
int ACTIVE_CHANNELS,
int NUM_BINS,
typename PixelType>
double run_cub_histogram(
PixelType *d_image,
int width,
int height,
unsigned int *d_hist,
bool is_warmup)
{
enum {
is_float = Equals<PixelType, float4>::VALUE,
};
typedef typename If<is_float, float, unsigned char>::Type SampleT; // Sample type
typedef typename If<is_float, float, unsigned int>::Type LevelT; // Level type (uint32 for uchar)
// Setup data structures
unsigned int* d_histogram[ACTIVE_CHANNELS];
int num_levels[ACTIVE_CHANNELS]; ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channel<sub><em>i</em></sub> is <tt>num_levels[...
LevelT lower_level[ACTIVE_CHANNELS]; ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
LevelT upper_level[ACTIVE_CHANNELS]; ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL)
{
d_histogram[CHANNEL] = d_hist + (CHANNEL * NUM_BINS);
num_levels[CHANNEL] = NUM_BINS + 1;
lower_level[CHANNEL] = 0;
upper_level[CHANNEL] = (is_float) ? 1 : 256;
}
// Allocate temporary storage
size_t temp_storage_bytes = 0;
void *d_temp_storage = NULL;
SampleT* d_image_samples = (SampleT*) d_image;
// Get amount of temporary storage needed
DeviceHistogram::MultiHistogramEven<NUM_CHANNELS, ACTIVE_CHANNELS>(
d_temp_storage,
temp_storage_bytes,
d_image_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
width * height,
(cudaStream_t) 0,
is_warmup);
cudaMalloc(&d_temp_storage, temp_storage_bytes);
GpuTimer gpu_timer;
gpu_timer.Start();
// Compute histogram
DeviceHistogram::MultiHistogramEven<NUM_CHANNELS, ACTIVE_CHANNELS>(
d_temp_storage,
temp_storage_bytes,
d_image_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
width * height,
(cudaStream_t) 0,
is_warmup);
gpu_timer.Stop();
float elapsed_millis = gpu_timer.ElapsedMillis();
cudaFree(d_temp_storage);
return elapsed_millis;
}
( run in 0.341 second using v1.01-cache-2.11-cpan-39bf76dae61 )