bind results from the CPAN

Alien-XGBoost

    thrust::copy_if(d_itr, d_itr + TEST_VALUES, d_copy_wrapper, SelectOp());

    int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose);
    printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
    AssertEquals(0, compare);

    // Cleanup
    if (h_copy) delete[] h_copy;
    if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));

#endif // THRUST_VERSION

    if (h_data) delete[] h_data;
    if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}


/**
 * Test tex-obj texture iterator
 */
template <typename T, typename CastT>
void TestTexObj()
{
    printf("\nTesting tex-obj iterator on type %s\n", typeid(T).name()); fflush(stdout);

    //
    // Test iterator manipulation in kernel
    //

    const unsigned int TEST_VALUES          = 11000;
    const unsigned int DUMMY_OFFSET         = 500;
    const unsigned int DUMMY_TEST_VALUES    = TEST_VALUES - DUMMY_OFFSET;

    T *h_data = new T[TEST_VALUES];
    for (int i = 0; i < TEST_VALUES; ++i)
    {
        RandomBits(h_data[i]);
    }

    // Allocate device arrays
    T *d_data   = NULL;
    T *d_dummy  = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
    CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));

    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
    CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice));

    // Initialize reference data
    T h_reference[8];
    h_reference[0] = h_data[0];          // Value at offset 0
    h_reference[1] = h_data[100];        // Value at offset 100
    h_reference[2] = h_data[1000];       // Value at offset 1000
    h_reference[3] = h_data[10000];      // Value at offset 10000
    h_reference[4] = h_data[1];          // Value at offset 1
    h_reference[5] = h_data[21];         // Value at offset 21
    h_reference[6] = h_data[11];         // Value at offset 11
    h_reference[7] = h_data[0];          // Value at offset 0;

    // Create and bind obj-based test iterator
    TexObjInputIterator<T> d_obj_itr;
    CubDebugExit(d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));

    Test(d_obj_itr, h_reference);

#if (THRUST_VERSION >= 100700)  // Thrust 1.7 or newer

    //
    // Test with thrust::copy_if()
    //

    T *d_copy = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
    thrust::device_ptr<T> d_copy_wrapper(d_copy);

    CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES));
    thrust::copy_if(d_obj_itr, d_obj_itr + TEST_VALUES, d_copy_wrapper, SelectOp());

    int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
    printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
    AssertEquals(0, compare);

    // Cleanup
    CubDebugExit(d_obj_itr.UnbindTexture());

    if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));

#endif  // THRUST_VERSION

    if (h_data) delete[] h_data;
    if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
    if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy));
}


#if CUDA_VERSION >= 5050

/**
 * Test tex-ref texture iterator
 */
template <typename T, typename CastT>
void TestTexRef()
{
    printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout);

    //
    // Test iterator manipulation in kernel
    //

    const unsigned int TEST_VALUES          = 11000;
    const unsigned int DUMMY_OFFSET         = 500;
    const unsigned int DUMMY_TEST_VALUES    = TEST_VALUES - DUMMY_OFFSET;

    T *h_data = new T[TEST_VALUES];
    for (int i = 0; i < TEST_VALUES; ++i)
    {
        RandomBits(h_data[i]);
    }

    // Allocate device arrays
    T *d_data   = NULL;
    T *d_dummy  = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
    CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));

    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
    CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice));

    // Initialize reference data
    T h_reference[8];
    h_reference[0] = h_data[0];          // Value at offset 0
    h_reference[1] = h_data[100];        // Value at offset 100
    h_reference[2] = h_data[1000];       // Value at offset 1000
    h_reference[3] = h_data[10000];      // Value at offset 10000
    h_reference[4] = h_data[1];          // Value at offset 1
    h_reference[5] = h_data[21];         // Value at offset 21
    h_reference[6] = h_data[11];         // Value at offset 11
    h_reference[7] = h_data[0];          // Value at offset 0;

    // Create and bind ref-based test iterator
    TexRefInputIterator<T, __LINE__> d_ref_itr;
    CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));

    // Create and bind dummy iterator of same type to check with interferance
    TexRefInputIterator<T, __LINE__> d_ref_itr2;
    CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES));

    Test(d_ref_itr, h_reference);

#if (THRUST_VERSION >= 100700)  // Thrust 1.7 or newer

    //
    // Test with thrust::copy_if()
    //

    T *d_copy = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
    thrust::device_ptr<T> d_copy_wrapper(d_copy);

    CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES));
    thrust::copy_if(d_ref_itr, d_ref_itr + TEST_VALUES, d_copy_wrapper, SelectOp());

    int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
    printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
    AssertEquals(0, compare);

    if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));

#endif  // THRUST_VERSION

    CubDebugExit(d_ref_itr.UnbindTexture());
    CubDebugExit(d_ref_itr2.UnbindTexture());

    if (h_data) delete[] h_data;
    if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
    if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy));
}


/**
 * Test texture transform iterator
 */
template <typename T, typename CastT>
void TestTexTransform()
{
    printf("\nTesting tex-transform iterator on type %s\n", typeid(T).name()); fflush(stdout);

    //
    // Test iterator manipulation in kernel
    //

    const unsigned int TEST_VALUES = 11000;

    T *h_data = new T[TEST_VALUES];
    for (int i = 0; i < TEST_VALUES; ++i)
    {
        InitValue(INTEGER_SEED, h_data[i], i);
    }

    // Allocate device arrays
    T *d_data = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
    CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));

    TransformOp<T> op;

    // Initialize reference data
    T h_reference[8];
    h_reference[0] = op(h_data[0]);          // Value at offset 0
    h_reference[1] = op(h_data[100]);        // Value at offset 100
    h_reference[2] = op(h_data[1000]);       // Value at offset 1000
    h_reference[3] = op(h_data[10000]);      // Value at offset 10000
    h_reference[4] = op(h_data[1]);          // Value at offset 1
    h_reference[5] = op(h_data[21]);         // Value at offset 21
    h_reference[6] = op(h_data[11]);         // Value at offset 11
    h_reference[7] = op(h_data[0]);          // Value at offset 0;

    // Create and bind texture iterator
    typedef TexRefInputIterator<T, __LINE__> TextureIterator;

    TextureIterator d_tex_itr;
    CubDebugExit(d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));

    // Create transform iterator
    TransformInputIterator<T, TransformOp<T>, TextureIterator> xform_itr(d_tex_itr, op);

    Test(xform_itr, h_reference);

#if (THRUST_VERSION >= 100700)  // Thrust 1.7 or newer

    //
    // Test with thrust::copy_if()
    //

    T *h_copy = new T[TEST_VALUES];
    for (int i = 0; i < TEST_VALUES; ++i)
        h_copy[i] = op(h_data[i]);

    T *d_copy = NULL;
    CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
    thrust::device_ptr<T> d_copy_wrapper(d_copy);

    thrust::copy_if(xform_itr, xform_itr + TEST_VALUES, d_copy_wrapper, SelectOp());

    int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose);
    printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
    AssertEquals(0, compare);

    // Cleanup
    if (h_copy) delete[] h_copy;
    if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));

#endif  // THRUST_VERSION

    CubDebugExit(d_tex_itr.UnbindTexture());
    if (h_data) delete[] h_data;
    if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}

#endif  // CUDA_VERSION




/**
 * Run non-integer tests
 */
template <typename T, typename CastT>
void Test(Int2Type<false> is_integer)
{
    TestModified<T, CastT>();
    TestTransform<T, CastT>();

#if CUB_CDP
    // Test tex-obj iterators if CUDA dynamic parallelism enabled
    TestTexObj<T, CastT>(type_string);
#endif  // CUB_CDP

#if CUDA_VERSION >= 5050
    // Test tex-ref iterators for CUDA 5.5
    TestTexRef<T, CastT>();
    TestTexTransform<T, CastT>();
#endif  // CUDA_VERSION
}

/**
 * Run integer tests
 */
template <typename T, typename CastT>
void Test(Int2Type<true> is_integer)
{
    TestConstant<T>(0);
    TestConstant<T>(99);

    TestCounting<T>(0);
    TestCounting<T>(99);

    // Run non-integer tests
    Test<T, CastT>(Int2Type<false>());
}

/**
 * Run tests
 */
template <typename T>
void Test()
{
    enum {
        IS_INTEGER = (Traits<T>::CATEGORY == SIGNED_INTEGER) || (Traits<T>::CATEGORY == UNSIGNED_INTEGER)
    };

    // Test non-const type
    Test<T, T>(Int2Type<IS_INTEGER>());

    // Test non-const type
( run in 0.579 second using v1.01-cache-2.11-cpan-2398b32b56e )