Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/device/dispatch/dispatch_scan.cuh view on Meta::CPAN
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory.
*/
#pragma once
#include <stdio.h>
#include <iterator>
#include "../../agent/agent_scan.cuh"
#include "../../thread/thread_operators.cuh"
#include "../../grid/grid_queue.cuh"
#include "../../util_arch.cuh"
#include "../../util_debug.cuh"
#include "../../util_device.cuh"
#include "../../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/******************************************************************************
* Kernel entry points
*****************************************************************************/
/**
* Initialization kernel for tile status initialization (multi-block)
*/
template <
typename ScanTileStateT> ///< Tile status interface type
__global__ void DeviceScanInitKernel(
ScanTileStateT tile_state, ///< [in] Tile status interface
int num_tiles) ///< [in] Number of tiles
{
// Initialize tile status
tile_state.InitializeStatus(num_tiles);
}
/**
* Initialization kernel for tile status initialization (multi-block)
*/
template <
typename ScanTileStateT, ///< Tile status interface type
typename NumSelectedIteratorT> ///< Output iterator type for recording the number of items selected
__global__ void DeviceCompactInitKernel(
ScanTileStateT tile_state, ///< [in] Tile status interface
int num_tiles, ///< [in] Number of tiles
NumSelectedIteratorT d_num_selected_out) ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out)
{
// Initialize tile status
tile_state.InitializeStatus(num_tiles);
// Initialize d_num_selected_out
if ((blockIdx.x == 0) && (threadIdx.x == 0))
*d_num_selected_out = 0;
}
/**
* Scan kernel entry point (multi-block)
*/
template <
typename ScanPolicyT, ///< Parameterized ScanPolicyT tuning policy type
typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator
typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator
typename ScanTileStateT, ///< Tile status interface type
typename ScanOpT, ///< Binary scan functor type having member <tt>T operator()(const T &a, const T &b)</tt>
typename InitValueT, ///< Initial value to seed the exclusive scan (cub::NullType for inclusive scans)
typename OffsetT> ///< Signed integer type for global offsets
__launch_bounds__ (int(ScanPolicyT::BLOCK_THREADS))
__global__ void DeviceScanKernel(
InputIteratorT d_in, ///< Input data
OutputIteratorT d_out, ///< Output data
ScanTileStateT tile_state, ///< Tile status interface
int start_tile, ///< The starting tile for the current grid
ScanOpT scan_op, ///< Binary scan functor
InitValueT init_value, ///< Initial value to seed the exclusive scan
OffsetT num_items) ///< Total number of scan items for the entire problem
{
// Thread block type for scanning input tiles
typedef AgentScan<
ScanPolicyT,
InputIteratorT,
OutputIteratorT,
ScanOpT,
InitValueT,
OffsetT> AgentScanT;
// Shared memory for AgentScan
__shared__ typename AgentScanT::TempStorage temp_storage;
// Process tiles
AgentScanT(temp_storage, d_in, d_out, scan_op, init_value).ConsumeRange(
num_items,
tile_state,
start_tile);
}
/******************************************************************************
* Dispatch
******************************************************************************/
/**
* Utility class for dispatching the appropriately-tuned kernels for DeviceScan
*/
template <
typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator
typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator
typename ScanOpT, ///< Binary scan functor type having member <tt>T operator()(const T &a, const T &b)</tt>
typename InitValueT, ///< The init_value element type for ScanOpT (cub::NullType for inclusive scans)
typename OffsetT> ///< Signed integer type for global offsets
( run in 0.926 second using v1.01-cache-2.11-cpan-13bb782fe5a )