Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/test/test_block_histogram.cu  view on Meta::CPAN


        if (g_verbose) std::cout << CoutCast(h_samples[i]) << ", ";

        h_histograms_linear[h_samples[i]]++;
    }

    if (g_verbose) printf("\n\n");
}


/**
 * Test BlockHistogram
 */
template <
    typename                    SampleT,
    int                         BINS,
    int                         BLOCK_THREADS,
    int                         ITEMS_PER_THREAD,
    BlockHistogramAlgorithm     ALGORITHM>
void Test(
    GenMode                     gen_mode)
{
    int num_samples = BLOCK_THREADS * ITEMS_PER_THREAD;

    printf("cub::BlockHistogram %s %d %s samples (%dB), %d bins, %d threads, gen-mode %s\n",
        (ALGORITHM == BLOCK_HISTO_SORT) ? "BLOCK_HISTO_SORT" : "BLOCK_HISTO_ATOMIC",
        num_samples,
        typeid(SampleT).name(),
        (int) sizeof(SampleT),
        BINS,
        BLOCK_THREADS,
        (gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS");
    fflush(stdout);

    // Allocate host arrays
    SampleT         *h_samples          = new SampleT[num_samples];
    int   *h_reference = new int[BINS];

    // Initialize problem
    Initialize<BINS>(gen_mode, h_samples, h_reference, num_samples);

    // Allocate problem device arrays
    SampleT         *d_samples = NULL;
    int             *d_histogram = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples,             sizeof(SampleT) * num_samples));
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram,   sizeof(int) * BINS));

    // Initialize/clear device arrays
    CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * num_samples, cudaMemcpyHostToDevice));
    CubDebugExit(cudaMemset(d_histogram, 0, sizeof(int) * BINS));

    // Run kernel
    BlockHistogramKernel<BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM><<<1, BLOCK_THREADS>>>(
        d_samples,
        d_histogram);

    // Check for correctness (and display results, if specified)
    int compare = CompareDeviceResults((int*) h_reference, d_histogram, BINS, g_verbose, g_verbose);
    printf("\t%s\n\n", compare ? "FAIL" : "PASS");

    // Flush any stdout/stderr
    CubDebugExit(cudaPeekAtLastError());
    CubDebugExit(cudaDeviceSynchronize());
    fflush(stdout);
    fflush(stderr);

    // Cleanup
    if (h_samples) delete[] h_samples;
    if (h_reference) delete[] h_reference;
    if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples));
    if (d_histogram) CubDebugExit(g_allocator.DeviceFree(d_histogram));

    // Correctness asserts
    AssertEquals(0, compare);
}


/**
 * Test different sample distributions
 */
template <
    typename                    SampleT,
    int                         BINS,
    int                         BLOCK_THREADS,
    int                         ITEMS_PER_THREAD,
    BlockHistogramAlgorithm     ALGORITHM>
void Test()
{
    Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(UNIFORM);
    Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(INTEGER_SEED);
    Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(RANDOM);
}


/**
 * Test different ALGORITHM
 */
template <
    typename                    SampleT,
    int                         BINS,
    int                         BLOCK_THREADS,
    int                         ITEMS_PER_THREAD>
void Test()
{
    Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_SORT>();
    Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_ATOMIC>();
}


/**
 * Test different ITEMS_PER_THREAD
 */
template <
    typename                    SampleT,
    int                         BINS,
    int                         BLOCK_THREADS>
void Test()
{
    Test<SampleT, BINS, BLOCK_THREADS, 1>();
    Test<SampleT, BINS, BLOCK_THREADS, 5>();
}


/**
 * Test different BLOCK_THREADS



( run in 0.465 second using v1.01-cache-2.11-cpan-df04353d9ac )