Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/device/dispatch/dispatch_rle.cuh view on Meta::CPAN
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* cub::DeviceRle provides device-wide, parallel operations for run-length-encoding sequences of data items residing within device-accessible memory.
*/
#pragma once
#include <stdio.h>
#include <iterator>
#include "dispatch_scan.cuh"
#include "../../agent/agent_rle.cuh"
#include "../../thread/thread_operators.cuh"
#include "../../grid/grid_queue.cuh"
#include "../../util_device.cuh"
#include "../../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/******************************************************************************
* Kernel entry points
*****************************************************************************/
/**
* Select kernel entry point (multi-block)
*
* Performs functor-based selection if SelectOp functor type != NullType
* Otherwise performs flag-based selection if FlagIterator's value type != NullType
* Otherwise performs discontinuity selection (keep unique)
*/
template <
typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type
typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator
typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator
typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator
typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator
typename ScanTileStateT, ///< Tile status interface type
typename EqualityOpT, ///< T equality operator type
typename OffsetT> ///< Signed integer type for global offsets
__launch_bounds__ (int(AgentRlePolicyT::BLOCK_THREADS))
__global__ void DeviceRleSweepKernel(
InputIteratorT d_in, ///< [in] Pointer to input sequence of data items
OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets
LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths
NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out)
ScanTileStateT tile_status, ///< [in] Tile status interface
EqualityOpT equality_op, ///< [in] Equality operator for input items
OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in)
int num_tiles) ///< [in] Total number of tiles for the entire problem
{
// Thread block type for selecting data from input tiles
typedef AgentRle<
AgentRlePolicyT,
InputIteratorT,
OffsetsOutputIteratorT,
LengthsOutputIteratorT,
EqualityOpT,
OffsetT> AgentRleT;
// Shared memory for AgentRle
__shared__ typename AgentRleT::TempStorage temp_storage;
// Process tiles
AgentRleT(temp_storage, d_in, d_offsets_out, d_lengths_out, equality_op, num_items).ConsumeRange(
num_tiles,
tile_status,
d_num_runs_out);
}
/******************************************************************************
* Dispatch
******************************************************************************/
/**
* Utility class for dispatching the appropriately-tuned kernels for DeviceRle
*/
template <
typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator
typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator
typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator
typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator
typename EqualityOpT, ///< T equality operator type
typename OffsetT> ///< Signed integer type for global offsets
struct DeviceRleDispatch
{
( run in 0.982 second using v1.01-cache-2.11-cpan-140bd7fdf52 )