Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/test/test_warp_scan.cu  view on Meta::CPAN

    typename    T,
    typename    InitialValueT>
__device__ __forceinline__ void DeviceTest(
    WarpScanT                       &warp_scan,
    T                               &data,
    NullType                        &initial_value,
    Sum                             &scan_op,
    T                               &aggregate,
    Int2Type<BASIC>                 test_mode,
    Int2Type<true>                  is_primitive)
{
    // Test basic warp scan
    warp_scan.InclusiveSum(data, data);
}

/// Inclusive sum aggregate
template <
    typename    WarpScanT,
    typename    T,
    typename    InitialValueT>
__device__ __forceinline__ void DeviceTest(
    WarpScanT                       &warp_scan,
    T                               &data,
    NullType                        &initial_value,
    Sum                             &scan_op,
    T                               &aggregate,
    Int2Type<AGGREGATE>             test_mode,
    Int2Type<true>                  is_primitive)
{
    // Test with cumulative aggregate
    warp_scan.InclusiveSum(data, data, aggregate);
}


/**
 * WarpScan test kernel
 */
template <
    int         LOGICAL_WARP_THREADS,
    TestMode    TEST_MODE,
    typename    T,
    typename    ScanOpT,
    typename    InitialValueT>
__global__ void WarpScanKernel(
    T               *d_in,
    T               *d_out,
    T               *d_aggregate,
    ScanOpT         scan_op,
    InitialValueT   initial_value,
    clock_t         *d_elapsed)
{
    // Cooperative warp-scan utility type (1 warp)
    typedef WarpScan<T, LOGICAL_WARP_THREADS> WarpScanT;

    // Allocate temp storage in shared memory
    __shared__ typename WarpScanT::TempStorage temp_storage;

    // Per-thread tile data
    T data = d_in[threadIdx.x];

    // Start cycle timer
    __threadfence_block();      // workaround to prevent clock hoisting
    clock_t start = clock();
    __threadfence_block();      // workaround to prevent clock hoisting

    T aggregate;

    // Test scan
    WarpScanT warp_scan(temp_storage);
    DeviceTest(
        warp_scan,
        data,
        initial_value,
        scan_op,
        aggregate,
        Int2Type<TEST_MODE>(),
        Int2Type<Traits<T>::PRIMITIVE>());

    // Stop cycle timer
    __threadfence_block();      // workaround to prevent clock hoisting
    clock_t stop = clock();
    __threadfence_block();      // workaround to prevent clock hoisting

    // Store data
    d_out[threadIdx.x] = data;

    if (TEST_MODE != BASIC)
    {
        // Store aggregate
        d_aggregate[threadIdx.x] = aggregate;
    }

    // Store time
    if (threadIdx.x == 0)
    {
        *d_elapsed = (start > stop) ? start - stop : stop - start;
    }
}


//---------------------------------------------------------------------
// Host utility subroutines
//---------------------------------------------------------------------

/**
 * Initialize exclusive-scan problem (and solution)
 */
template <
    typename        T,
    typename        ScanOpT>
T Initialize(
    GenMode         gen_mode,
    T               *h_in,
    T               *h_reference,
    int             num_items,
    ScanOpT         scan_op,
    T               initial_value)
{
    InitValue(gen_mode, h_in[0], 0);

    T block_aggregate   = h_in[0];
    h_reference[0]      = initial_value;
    T inclusive         = scan_op(initial_value, h_in[0]);

    for (int i = 1; i < num_items; ++i)
    {
        InitValue(gen_mode, h_in[i], i);
        h_reference[i] = inclusive;
        inclusive = scan_op(inclusive, h_in[i]);
        block_aggregate = scan_op(block_aggregate, h_in[i]);
    }

    return block_aggregate;
}


/**
 * Initialize inclusive-scan problem (and solution)
 */



( run in 1.931 second using v1.01-cache-2.11-cpan-524268b4103 )