Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/experimental/histogram/histogram_cub.h  view on Meta::CPAN

 ******************************************************************************/

#include <cub/device/device_histogram.cuh>

using namespace cub;

template <
    int         NUM_CHANNELS,
    int         ACTIVE_CHANNELS,
    int         NUM_BINS,
    typename    PixelType>
double run_cub_histogram(
    PixelType *d_image,
    int width,
    int height,
    unsigned int *d_hist, 
    bool is_warmup)
{
    enum {
        is_float = Equals<PixelType, float4>::VALUE,
    };

    typedef typename If<is_float, float, unsigned char>::Type    SampleT;    // Sample type
    typedef typename If<is_float, float, unsigned int>::Type     LevelT;     // Level type (uint32 for uchar)

    // Setup data structures
    unsigned int*       d_histogram[ACTIVE_CHANNELS];
    int                 num_levels[ACTIVE_CHANNELS];            ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel.  Implies that the number of bins for channel<sub><em>i</em></sub> is <tt>num_levels[...
    LevelT              lower_level[ACTIVE_CHANNELS];           ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
    LevelT              upper_level[ACTIVE_CHANNELS];           ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel.

    for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL)
    {
        d_histogram[CHANNEL] = d_hist + (CHANNEL * NUM_BINS);
        num_levels[CHANNEL] = NUM_BINS + 1;
        lower_level[CHANNEL] = 0;
        upper_level[CHANNEL] = (is_float) ? 1 : 256;
    }

    // Allocate temporary storage
    size_t temp_storage_bytes = 0;
    void *d_temp_storage = NULL;

    SampleT* d_image_samples = (SampleT*) d_image;

    // Get amount of temporary storage needed
    DeviceHistogram::MultiHistogramEven<NUM_CHANNELS, ACTIVE_CHANNELS>(
        d_temp_storage,
        temp_storage_bytes,
        d_image_samples,
        d_histogram,
        num_levels,
        lower_level,
        upper_level,
        width * height, 
        (cudaStream_t) 0,
        is_warmup);

    cudaMalloc(&d_temp_storage, temp_storage_bytes);

    GpuTimer gpu_timer;
    gpu_timer.Start();

    // Compute histogram
    DeviceHistogram::MultiHistogramEven<NUM_CHANNELS, ACTIVE_CHANNELS>(
        d_temp_storage,
        temp_storage_bytes,
        d_image_samples,
        d_histogram,
        num_levels,
        lower_level,
        upper_level,
        width * height, 
        (cudaStream_t) 0,
        is_warmup);

    gpu_timer.Stop();
    float elapsed_millis = gpu_timer.ElapsedMillis();

    cudaFree(d_temp_storage);

    return elapsed_millis;
}



( run in 0.341 second using v1.01-cache-2.11-cpan-39bf76dae61 )