Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/cub/device/device_segmented_radix_sort.cuh  view on Meta::CPAN


/******************************************************************************
 * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ******************************************************************************/

/**
 * \file
 * cub::DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory.
 */

#pragma once

#include <stdio.h>
#include <iterator>

#include "dispatch/dispatch_radix_sort.cuh"
#include "../util_arch.cuh"
#include "../util_namespace.cuh"

/// Optional outer namespace(s)
CUB_NS_PREFIX

/// CUB namespace
namespace cub {


/**
 * \brief DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. ![](segmented_sorting_logo.png)
 * \ingroup SegmentedModule
 *
 * \par Overview
 * The [<em>radix sorting method</em>](http://en.wikipedia.org/wiki/Radix_sort) arranges
 * items into ascending (or descending) order.  The algorithm relies upon a positional representation for
 * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits,
 * characters, etc.) specified from least-significant to most-significant.  For a
 * given input sequence of keys and a set of rules specifying a total ordering
 * of the symbolic alphabet, the radix sorting method produces a lexicographic
 * ordering of those keys.
 *
 * \par
 * DeviceSegmentedRadixSort can sort all of the built-in C++ numeric primitive types, e.g.:
 * <tt>unsigned char</tt>, \p int, \p double, etc.  Although the direct radix sorting
 * method can only be applied to unsigned integral types, DeviceSegmentedRadixSort
 * is able to sort signed and floating-point types via simple bit-wise transformations
 * that ensure lexicographic key ordering.
 *
 * \par Usage Considerations
 * \cdp_class{DeviceSegmentedRadixSort}
 *
 */
struct DeviceSegmentedRadixSort
{

    /******************************************************************//**
     * \name Key-value pairs
     *********************************************************************/
    //@{

    /**
     * \brief Sorts segments of key-value pairs into ascending order. (~<em>2N </em>auxiliary storage required)
     *
     * \par
     * - The contents of the input data are not altered by the sorting operation
     * - When input a contiguous sequence of segments, a single sequence
     *   \p segment_offsets (of length <tt>num_segments+1</tt>) can be aliased
     *   for both the \p d_begin_offsets and \p d_end_offsets parameters (where
     *   the latter is specified as <tt>segment_offsets+1</tt>).
     * - An optional bit subrange <tt>[begin_bit, end_bit)</tt> of differentiating key bits can be specified.  This can reduce overall sorting overhead and yield a corresponding performance improvement.
     * - \devicestorageNP  For sorting using only <em>O</em>(<tt>P</tt>) temporary storage, see the sorting interface using DoubleBuffer wrappers below.
     * - \devicestorage
     *
     * \par Snippet
     * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys
     * with associated vector of \p int values.
     * \par
     * \code
     * #include <cub/cub.cuh>   // or equivalently <cub/device/device_segmentd_radix_sort.cuh>
     *
     * // Declare, allocate, and initialize device-accessible pointers for sorting data
     * int  num_items;          // e.g., 7
     * int  num_segments;       // e.g., 3
     * int  *d_offsets;         // e.g., [0, 3, 3, 7]
     * int  *d_keys_in;         // e.g., [8, 6, 7, 5, 3, 0, 9]
     * int  *d_keys_out;        // e.g., [-, -, -, -, -, -, -]
     * int  *d_values_in;       // e.g., [0, 1, 2, 3, 4, 5, 6]
     * int  *d_values_out;      // e.g., [-, -, -, -, -, -, -]
     * ...
     *



( run in 0.672 second using v1.01-cache-2.11-cpan-5623c5533a1 )