Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/cub/block/block_radix_sort.cuh  view on Meta::CPAN

/******************************************************************************
 * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ******************************************************************************/

/**
 * \file
 * The cub::BlockRadixSort class provides [<em>collective</em>](index.html#sec0) methods for radix sorting of items partitioned across a CUDA thread block.
 */


#pragma once

#include "block_exchange.cuh"
#include "block_radix_rank.cuh"
#include "../util_ptx.cuh"
#include "../util_arch.cuh"
#include "../util_type.cuh"
#include "../util_namespace.cuh"

/// Optional outer namespace(s)
CUB_NS_PREFIX

/// CUB namespace
namespace cub {

/**
 * \brief The BlockRadixSort class provides [<em>collective</em>](index.html#sec0) methods for sorting items partitioned across a CUDA thread block using a radix sorting method.  ![](sorting_logo.png)
 * \ingroup BlockModule
 *
 * \tparam KeyT                 KeyT type
 * \tparam BLOCK_DIM_X          The thread block length in threads along the X dimension
 * \tparam ITEMS_PER_THREAD     The number of items per thread
 * \tparam ValueT               <b>[optional]</b> ValueT type (default: cub::NullType, which indicates a keys-only sort)
 * \tparam RADIX_BITS           <b>[optional]</b> The number of radix bits per digit place (default: 4 bits)
 * \tparam MEMOIZE_OUTER_SCAN   <b>[optional]</b> Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise).
 * \tparam INNER_SCAN_ALGORITHM <b>[optional]</b> The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS)
 * \tparam SMEM_CONFIG          <b>[optional]</b> Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte)
 * \tparam BLOCK_DIM_Y          <b>[optional]</b> The thread block length in threads along the Y dimension (default: 1)
 * \tparam BLOCK_DIM_Z          <b>[optional]</b> The thread block length in threads along the Z dimension (default: 1)
 * \tparam PTX_ARCH             <b>[optional]</b> \ptxversion
 *
 * \par Overview
 * - The [<em>radix sorting method</em>](http://en.wikipedia.org/wiki/Radix_sort) arranges
 *   items into ascending order.  It relies upon a positional representation for
 *   keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits,
 *   characters, etc.) specified from least-significant to most-significant.  For a
 *   given input sequence of keys and a set of rules specifying a total ordering
 *   of the symbolic alphabet, the radix sorting method produces a lexicographic
 *   ordering of those keys.
 * - BlockRadixSort can sort all of the built-in C++ numeric primitive types, e.g.:
 *   <tt>unsigned char</tt>, \p int, \p double, etc.  Within each key, the implementation treats fixed-length
 *   bit-sequences of \p RADIX_BITS as radix digit places.  Although the direct radix sorting
 *   method can only be applied to unsigned integral types, BlockRadixSort
 *   is able to sort signed and floating-point types via simple bit-wise transformations
 *   that ensure lexicographic key ordering.
 * - \rowmajor
 *
 * \par Performance Considerations
 * - \granularity
 *
 * \par A Simple Example
 * \blockcollective{BlockRadixSort}
 * \par
 * The code snippet below illustrates a sort of 512 integer keys that
 * are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
 * where each thread owns 4 consecutive items.
 * \par
 * \code
 * #include <cub/cub.cuh>   // or equivalently <cub/block/block_radix_sort.cuh>
 *
 * __global__ void ExampleKernel(...)
 * {
 *     // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer items each
 *     typedef cub::BlockRadixSort<int, 128, 4> BlockRadixSort;
 *
 *     // Allocate shared memory for BlockRadixSort
 *     __shared__ typename BlockRadixSort::TempStorage temp_storage;
 *
 *     // Obtain a segment of consecutive items that are blocked across threads
 *     int thread_keys[4];
 *     ...
 *
 *     // Collectively sort the keys
 *     BlockRadixSort(temp_storage).Sort(thread_keys);
 *
 *     ...
 * \endcode



( run in 1.064 second using v1.01-cache-2.11-cpan-5623c5533a1 )