Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/experimental/histogram_compare.cu view on Meta::CPAN
// Decode uchar1 pixel into bins
template <int NUM_BINS, int ACTIVE_CHANNELS>
void DecodePixelGold(uchar1 pixel, unsigned int (&bins)[ACTIVE_CHANNELS])
{
bins[0] = (unsigned int) pixel.x;
}
// Compute reference histogram. Specialized for uchar4
template <
int ACTIVE_CHANNELS,
int NUM_BINS,
typename PixelType>
void HistogramGold(PixelType *image, int width, int height, unsigned int* hist)
{
memset(hist, 0, ACTIVE_CHANNELS * NUM_BINS * sizeof(unsigned int));
for (int i = 0; i < width; i++)
{
for (int j = 0; j < height; j++)
{
PixelType pixel = image[i + j * width];
unsigned int bins[ACTIVE_CHANNELS];
DecodePixelGold<NUM_BINS>(pixel, bins);
for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL)
{
hist[(NUM_BINS * CHANNEL) + bins[CHANNEL]]++;
}
}
}
}
//---------------------------------------------------------------------
// Test execution
//---------------------------------------------------------------------
/**
* Run a specific histogram implementation
*/
template <
int ACTIVE_CHANNELS,
int NUM_BINS,
typename PixelType>
void RunTest(
std::vector<std::pair<std::string, double> >& timings,
PixelType* d_pixels,
const int width,
const int height,
unsigned int * d_hist,
unsigned int * h_hist,
int timing_iterations,
const char * long_name,
const char * short_name,
double (*f)(PixelType*, int, int, unsigned int*, bool))
{
if (!g_report) printf("%s ", long_name); fflush(stdout);
// Run single test to verify (and code cache)
(*f)(d_pixels, width, height, d_hist, !g_report);
int compare = CompareDeviceResults(h_hist, d_hist, ACTIVE_CHANNELS * NUM_BINS, true, g_verbose);
if (!g_report) printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout);
double elapsed_ms = 0;
for (int i = 0; i < timing_iterations; i++)
{
elapsed_ms += (*f)(d_pixels, width, height, d_hist, false);
}
double avg_us = (elapsed_ms / timing_iterations) * 1000; // average in us
timings.push_back(std::pair<std::string, double>(short_name, avg_us));
if (!g_report)
{
printf("Avg time %.3f us (%d iterations)\n", avg_us, timing_iterations); fflush(stdout);
}
else
{
printf("%.3f, ", avg_us); fflush(stdout);
}
AssertEquals(0, compare);
}
/**
* Evaluate corpus of histogram implementations
*/
template <
int NUM_CHANNELS,
int ACTIVE_CHANNELS,
int NUM_BINS,
typename PixelType>
void TestMethods(
PixelType* h_pixels,
int height,
int width,
int timing_iterations,
double bandwidth_GBs)
{
// Copy data to gpu
PixelType* d_pixels;
size_t pixel_bytes = width * height * sizeof(PixelType);
CubDebugExit(g_allocator.DeviceAllocate((void**) &d_pixels, pixel_bytes));
CubDebugExit(cudaMemcpy(d_pixels, h_pixels, pixel_bytes, cudaMemcpyHostToDevice));
if (g_report) printf("%.3f, ", double(pixel_bytes) / bandwidth_GBs / 1000);
// Allocate results arrays on cpu/gpu
unsigned int *h_hist;
unsigned int *d_hist;
size_t histogram_bytes = NUM_BINS * ACTIVE_CHANNELS * sizeof(unsigned int);
h_hist = (unsigned int *) malloc(histogram_bytes);
g_allocator.DeviceAllocate((void **) &d_hist, histogram_bytes);
// Compute reference cpu histogram
HistogramGold<ACTIVE_CHANNELS, NUM_BINS>(h_pixels, width, height, h_hist);
// Store timings
( run in 0.451 second using v1.01-cache-2.11-cpan-13bb782fe5a )