Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/device/dispatch/dispatch_spmv_orig.cuh view on Meta::CPAN
// Log spmv_search_kernel configuration
if (debug_synchronous) _CubLog("Invoking spmv_search_kernel<<<%d, %d, 0, %lld>>>()\n",
search_grid_size, search_block_size, (long long) stream);
// Invoke spmv_search_kernel
spmv_search_kernel<<<search_grid_size, search_block_size, 0, stream>>>(
num_merge_tiles,
d_tile_coordinates,
spmv_params);
// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
}
// Log spmv_kernel configuration
if (debug_synchronous) _CubLog("Invoking spmv_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n",
spmv_grid_size.x, spmv_grid_size.y, spmv_grid_size.z, spmv_config.block_threads, (long long) stream, spmv_config.items_per_thread, spmv_sm_occupancy);
// Invoke spmv_kernel
spmv_kernel<<<spmv_grid_size, spmv_config.block_threads, 0, stream>>>(
spmv_params,
d_tile_coordinates,
d_tile_carry_pairs,
num_merge_tiles,
tile_state,
num_segment_fixup_tiles);
// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
// Run reduce-by-key fixup if necessary
if (num_merge_tiles > 1)
{
// Log segment_fixup_kernel configuration
if (debug_synchronous) _CubLog("Invoking segment_fixup_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n",
segment_fixup_grid_size.x, segment_fixup_grid_size.y, segment_fixup_grid_size.z, segment_fixup_config.block_threads, (long long) stream, segment_fixup_config.items_per_thread, segment_fixup_sm_occupancy);
// Invoke segment_fixup_kernel
segment_fixup_kernel<<<segment_fixup_grid_size, segment_fixup_config.block_threads, 0, stream>>>(
d_tile_carry_pairs,
spmv_params.d_vector_y,
num_merge_tiles,
num_segment_fixup_tiles,
tile_state);
// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
}
#if (CUB_PTX_ARCH == 0)
// Free textures
if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
#endif
}
while (0);
return error;
#endif // CUB_RUNTIME_ENABLED
}
/**
* Internal dispatch routine for computing a device-wide reduction
*/
CUB_RUNTIME_FUNCTION __forceinline__
static cudaError_t Dispatch(
void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done.
size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation
SpmvParamsT& spmv_params, ///< SpMV input parameter bundle
cudaStream_t stream = 0, ///< [in] <b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>.
bool debug_synchronous = false) ///< [in] <b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false.
{
cudaError error = cudaSuccess;
do
{
// Get PTX version
int ptx_version;
#if (CUB_PTX_ARCH == 0)
if (CubDebug(error = PtxVersion(ptx_version))) break;
#else
ptx_version = CUB_PTX_ARCH;
#endif
// Get kernel kernel dispatch configurations
KernelConfig spmv_config, segment_fixup_config;
InitConfigs(ptx_version, spmv_config, segment_fixup_config);
if (CubDebug(error = Dispatch(
d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous,
DeviceSpmv1ColKernel<PtxSpmvPolicyT, ValueT, OffsetT>,
DeviceSpmvSearchKernel<PtxSpmvPolicyT, OffsetT, CoordinateT, SpmvParamsT>,
DeviceSpmvKernel<PtxSpmvPolicyT, ScanTileStateT, ValueT, OffsetT, CoordinateT, false, false>,
DeviceSegmentFixupKernel<PtxSegmentFixupPolicy, KeyValuePairT*, ValueT*, OffsetT, ScanTileStateT>,
spmv_config, segment_fixup_config))) break;
/*
// Dispatch
if (spmv_params.beta == 0.0)
{
if (spmv_params.alpha == 1.0)
{
// Dispatch y = A*x
if (CubDebug(error = Dispatch(
d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous,
DeviceSpmv1ColKernel<PtxSpmvPolicyT, ValueT, OffsetT>,
DeviceSpmvSearchKernel<PtxSpmvPolicyT, OffsetT, CoordinateT, SpmvParamsT>,
DeviceSpmvKernel<PtxSpmvPolicyT, ScanTileStateT, ValueT, OffsetT, CoordinateT, false, false>,
DeviceSegmentFixupKernel<PtxSegmentFixupPolicy, KeyValuePairT*, ValueT*, OffsetT, ScanTileStateT>,
spmv_config, segment_fixup_config))) break;
}
else
( run in 0.739 second using v1.01-cache-2.11-cpan-2398b32b56e )