Alien-XGBoost

 view release on metacpan or  search on metacpan

xgboost/cub/cub/block/block_discontinuity.cuh  view on Meta::CPAN

            typename        FlagOp>
        static __device__ __forceinline__ void FlagTails(
            int                     linear_tid,
            FlagT                   (&flags)[ITEMS_PER_THREAD],         ///< [out] Calling thread's discontinuity head_flags
            T                       (&input)[ITEMS_PER_THREAD],         ///< [in] Calling thread's input items
            FlagOp                  flag_op)                            ///< [in] Binary boolean flag predicate
        {
            flags[ITERATION] = ApplyOp<FlagOp>::FlagT(
                flag_op,
                input[ITERATION],
                input[ITERATION + 1],
                (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1);

            Iterate<ITERATION + 1, MAX_ITERATIONS>::FlagTails(linear_tid, flags, input, flag_op);
        }

    };

    /// Templated unrolling of item comparison (termination case)
    template <int MAX_ITERATIONS>
    struct Iterate<MAX_ITERATIONS, MAX_ITERATIONS>
    {
        // Head flags
        template <
            int             ITEMS_PER_THREAD,
            typename        FlagT,
            typename        FlagOp>
        static __device__ __forceinline__ void FlagHeads(
            int                     /*linear_tid*/,
            FlagT                   (&/*flags*/)[ITEMS_PER_THREAD],         ///< [out] Calling thread's discontinuity head_flags
            T                       (&/*input*/)[ITEMS_PER_THREAD],         ///< [in] Calling thread's input items
            T                       (&/*preds*/)[ITEMS_PER_THREAD],         ///< [out] Calling thread's predecessor items
            FlagOp                  /*flag_op*/)                            ///< [in] Binary boolean flag predicate
        {}

        // Tail flags
        template <
            int             ITEMS_PER_THREAD,
            typename        FlagT,
            typename        FlagOp>
        static __device__ __forceinline__ void FlagTails(
            int                     /*linear_tid*/,
            FlagT                   (&/*flags*/)[ITEMS_PER_THREAD],         ///< [out] Calling thread's discontinuity head_flags
            T                       (&/*input*/)[ITEMS_PER_THREAD],         ///< [in] Calling thread's input items
            FlagOp                  /*flag_op*/)                            ///< [in] Binary boolean flag predicate
        {}
    };


    /******************************************************************************
     * Thread fields
     ******************************************************************************/

    /// Shared storage reference
    _TempStorage &temp_storage;

    /// Linear thread-id
    unsigned int linear_tid;


public:

    /// \smemstorage{BlockDiscontinuity}
    struct TempStorage : Uninitialized<_TempStorage> {};


    /******************************************************************//**
     * \name Collective constructors
     *********************************************************************/
    //@{

    /**
     * \brief Collective constructor using a private static allocation of shared memory as temporary storage.
     */
    __device__ __forceinline__ BlockDiscontinuity()
    :
        temp_storage(PrivateStorage()),
        linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
    {}


    /**
     * \brief Collective constructor using the specified memory allocation as temporary storage.
     */
    __device__ __forceinline__ BlockDiscontinuity(
        TempStorage &temp_storage)  ///< [in] Reference to memory allocation having layout type TempStorage
    :
        temp_storage(temp_storage.Alias()),
        linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
    {}


    //@}  end member group
    /******************************************************************//**
     * \name Head flag operations
     *********************************************************************/
    //@{


#ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document

    template <
        int             ITEMS_PER_THREAD,
        typename        FlagT,
        typename        FlagOp>
    __device__ __forceinline__ void FlagHeads(
        FlagT           (&head_flags)[ITEMS_PER_THREAD],    ///< [out] Calling thread's discontinuity head_flags
        T               (&input)[ITEMS_PER_THREAD],         ///< [in] Calling thread's input items
        T               (&preds)[ITEMS_PER_THREAD],         ///< [out] Calling thread's predecessor items
        FlagOp          flag_op)                            ///< [in] Binary boolean flag predicate
    {
        // Share last item
        temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];

        CTA_SYNC();

        if (linear_tid == 0)
        {
            // Set flag for first thread-item (preds[0] is undefined)
            head_flags[0] = 1;
        }



( run in 0.819 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )