Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/cub/agent/agent_spmv_row_based.cuh view on Meta::CPAN
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* cub::AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV.
*/
#pragma once
#include <iterator>
#include "../util_type.cuh"
#include "../block/block_reduce.cuh"
#include "../block/block_scan.cuh"
#include "../block/block_exchange.cuh"
#include "../thread/thread_search.cuh"
#include "../thread/thread_operators.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../iterator/counting_input_iterator.cuh"
#include "../iterator/tex_ref_input_iterator.cuh"
#include "../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/******************************************************************************
* Tuning policy
******************************************************************************/
/**
* Parameterizable tuning policy type for AgentSpmv
*/
template <
int _BLOCK_THREADS, ///< Threads per thread block
int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input)
CacheLoadModifier _ROW_OFFSETS_SEARCH_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets during search
CacheLoadModifier _ROW_OFFSETS_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets
CacheLoadModifier _COLUMN_INDICES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR column-indices
CacheLoadModifier _VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR values
CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading vector values
bool _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (vs. pre-staged through shared memory)
BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use
struct AgentSpmvPolicy
{
enum
{
BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block
ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input)
DIRECT_LOAD_NONZEROS = _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (pre-staged through shared memory)
};
static const CacheLoadModifier ROW_OFFSETS_SEARCH_LOAD_MODIFIER = _ROW_OFFSETS_SEARCH_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets
static const CacheLoadModifier ROW_OFFSETS_LOAD_MODIFIER = _ROW_OFFSETS_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets
static const CacheLoadModifier COLUMN_INDICES_LOAD_MODIFIER = _COLUMN_INDICES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR column-indices
static const CacheLoadModifier VALUES_LOAD_MODIFIER = _VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR values
static const CacheLoadModifier VECTOR_VALUES_LOAD_MODIFIER = _VECTOR_VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading vector values
static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use
};
/******************************************************************************
* Thread block abstractions
******************************************************************************/
template <
typename ValueT, ///< Matrix and vector value type
typename OffsetT> ///< Signed integer type for sequence offsets
struct SpmvParams
{
ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix <b>A</b>.
OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values
OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix <b>A</b>. (Indices are zero-valued.)
ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector <em>x</em>
ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector <em>y</em>
int num_rows; ///< Number of rows of matrix <b>A</b>.
int num_cols; ///< Number of columns of matrix <b>A</b>.
int num_nonzeros; ///< Number of nonzero elements of matrix <b>A</b>.
ValueT alpha; ///< Alpha multiplicand
ValueT beta; ///< Beta addend-multiplicand
TexRefInputIterator<ValueT, 66778899, OffsetT> t_vector_x;
};
/**
* \brief AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV.
*/
template <
typename AgentSpmvPolicyT, ///< Parameterized AgentSpmvPolicy tuning policy type
typename ValueT, ///< Matrix and vector value type
typename OffsetT, ///< Signed integer type for sequence offsets
bool HAS_ALPHA, ///< Whether the input parameter \p alpha is 1
bool HAS_BETA, ///< Whether the input parameter \p beta is 0
int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability
struct AgentSpmv
{
//---------------------------------------------------------------------
// Types and constants
//---------------------------------------------------------------------
/// Constants
enum
{
BLOCK_THREADS = AgentSpmvPolicyT::BLOCK_THREADS,
ITEMS_PER_THREAD = AgentSpmvPolicyT::ITEMS_PER_THREAD,
TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD,
};
( run in 1.050 second using v1.01-cache-2.11-cpan-39bf76dae61 )