Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/test/test_grid_barrier.cu  view on Meta::CPAN

    int grid_size = -1;

    // Initialize command line
    CommandLineArgs args(argc, argv);

    // Get args
    args.GetCmdLineArgument("i", iterations);
    args.GetCmdLineArgument("grid-size", grid_size);
    args.GetCmdLineArgument("block-size", block_size);

    // Print usage
    if (args.CheckCmdLineFlag("help"))
    {
        printf("%s "
            "[--device=<device-id>]"
            "[--i=<iterations>]"
            "[--grid-size<grid-size>]"
            "[--block-size<block-size>]"
            "\n", argv[0]);
        exit(0);
    }

    // Initialize device
    CubDebugExit(args.DeviceInit());

    // Get device ordinal
    int device_ordinal;
    CubDebugExit(cudaGetDevice(&device_ordinal));

    // Get device SM version
    int sm_version;
    CubDebugExit(SmVersion(sm_version, device_ordinal));

    // Get SM properties
    int sm_count, max_block_threads, max_sm_occupancy;
    CubDebugExit(cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal));
    CubDebugExit(cudaDeviceGetAttribute(&max_block_threads, cudaDevAttrMaxThreadsPerBlock, device_ordinal));
    CubDebugExit(MaxSmOccupancy(max_sm_occupancy, EmptyKernel<void>, 32));

    // Compute grid size and occupancy
    int occupancy = CUB_MIN((max_block_threads / block_size), max_sm_occupancy);

    if (grid_size == -1)
    {
        grid_size = occupancy * sm_count;
    }
    else
    {
        occupancy = grid_size / sm_count;
    }

    printf("Initializing software global barrier for Kernel<<<%d,%d>>> with %d occupancy\n",
        grid_size, block_size, occupancy);
    fflush(stdout);

    // Init global barrier
    GridBarrierLifetime global_barrier;
    global_barrier.Setup(grid_size);

    // Time kernel
    GpuTimer gpu_timer;
    gpu_timer.Start();
    Kernel<<<grid_size, block_size>>>(global_barrier, iterations);
    gpu_timer.Stop();

    retval = CubDebug(cudaThreadSynchronize());

    // Output timing results
    float avg_elapsed = gpu_timer.ElapsedMillis() / float(iterations);
    printf("%d iterations, %f total elapsed millis, %f avg elapsed millis\n",
        iterations,
        gpu_timer.ElapsedMillis(),
        avg_elapsed);

    return retval;
}



( run in 0.326 second using v1.01-cache-2.11-cpan-d7f47b0818f )