Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/test/test_iterator.cu view on Meta::CPAN
thrust::copy_if(d_itr, d_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
if (h_copy) delete[] h_copy;
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}
/**
* Test tex-obj texture iterator
*/
template <typename T, typename CastT>
void TestTexObj()
{
printf("\nTesting tex-obj iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
const unsigned int TEST_VALUES = 11000;
const unsigned int DUMMY_OFFSET = 500;
const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
RandomBits(h_data[i]);
}
// Allocate device arrays
T *d_data = NULL;
T *d_dummy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice));
// Initialize reference data
T h_reference[8];
h_reference[0] = h_data[0]; // Value at offset 0
h_reference[1] = h_data[100]; // Value at offset 100
h_reference[2] = h_data[1000]; // Value at offset 1000
h_reference[3] = h_data[10000]; // Value at offset 10000
h_reference[4] = h_data[1]; // Value at offset 1
h_reference[5] = h_data[21]; // Value at offset 21
h_reference[6] = h_data[11]; // Value at offset 11
h_reference[7] = h_data[0]; // Value at offset 0;
// Create and bind obj-based test iterator
TexObjInputIterator<T> d_obj_itr;
CubDebugExit(d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));
Test(d_obj_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES));
thrust::copy_if(d_obj_itr, d_obj_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
CubDebugExit(d_obj_itr.UnbindTexture());
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy));
}
#if CUDA_VERSION >= 5050
/**
* Test tex-ref texture iterator
*/
template <typename T, typename CastT>
void TestTexRef()
{
printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
const unsigned int TEST_VALUES = 11000;
const unsigned int DUMMY_OFFSET = 500;
const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
RandomBits(h_data[i]);
}
// Allocate device arrays
T *d_data = NULL;
T *d_dummy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice));
// Initialize reference data
T h_reference[8];
h_reference[0] = h_data[0]; // Value at offset 0
h_reference[1] = h_data[100]; // Value at offset 100
h_reference[2] = h_data[1000]; // Value at offset 1000
h_reference[3] = h_data[10000]; // Value at offset 10000
h_reference[4] = h_data[1]; // Value at offset 1
h_reference[5] = h_data[21]; // Value at offset 21
h_reference[6] = h_data[11]; // Value at offset 11
h_reference[7] = h_data[0]; // Value at offset 0;
// Create and bind ref-based test iterator
TexRefInputIterator<T, __LINE__> d_ref_itr;
CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));
// Create and bind dummy iterator of same type to check with interferance
TexRefInputIterator<T, __LINE__> d_ref_itr2;
CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
Test(d_ref_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES));
thrust::copy_if(d_ref_itr, d_ref_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
CubDebugExit(d_ref_itr.UnbindTexture());
CubDebugExit(d_ref_itr2.UnbindTexture());
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy));
}
/**
* Test texture transform iterator
*/
template <typename T, typename CastT>
void TestTexTransform()
{
printf("\nTesting tex-transform iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
const unsigned int TEST_VALUES = 11000;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
InitValue(INTEGER_SEED, h_data[i], i);
}
// Allocate device arrays
T *d_data = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
TransformOp<T> op;
// Initialize reference data
T h_reference[8];
h_reference[0] = op(h_data[0]); // Value at offset 0
h_reference[1] = op(h_data[100]); // Value at offset 100
h_reference[2] = op(h_data[1000]); // Value at offset 1000
h_reference[3] = op(h_data[10000]); // Value at offset 10000
h_reference[4] = op(h_data[1]); // Value at offset 1
h_reference[5] = op(h_data[21]); // Value at offset 21
h_reference[6] = op(h_data[11]); // Value at offset 11
h_reference[7] = op(h_data[0]); // Value at offset 0;
// Create and bind texture iterator
typedef TexRefInputIterator<T, __LINE__> TextureIterator;
TextureIterator d_tex_itr;
CubDebugExit(d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));
// Create transform iterator
TransformInputIterator<T, TransformOp<T>, TextureIterator> xform_itr(d_tex_itr, op);
Test(xform_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *h_copy = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
h_copy[i] = op(h_data[i]);
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
thrust::copy_if(xform_itr, xform_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
if (h_copy) delete[] h_copy;
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
CubDebugExit(d_tex_itr.UnbindTexture());
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}
#endif // CUDA_VERSION
/**
* Run non-integer tests
*/
template <typename T, typename CastT>
void Test(Int2Type<false> is_integer)
{
TestModified<T, CastT>();
TestTransform<T, CastT>();
#if CUB_CDP
// Test tex-obj iterators if CUDA dynamic parallelism enabled
TestTexObj<T, CastT>(type_string);
#endif // CUB_CDP
#if CUDA_VERSION >= 5050
// Test tex-ref iterators for CUDA 5.5
TestTexRef<T, CastT>();
TestTexTransform<T, CastT>();
#endif // CUDA_VERSION
}
/**
* Run integer tests
*/
template <typename T, typename CastT>
void Test(Int2Type<true> is_integer)
{
TestConstant<T>(0);
TestConstant<T>(99);
TestCounting<T>(0);
TestCounting<T>(99);
// Run non-integer tests
Test<T, CastT>(Int2Type<false>());
}
/**
* Run tests
*/
template <typename T>
void Test()
{
enum {
IS_INTEGER = (Traits<T>::CATEGORY == SIGNED_INTEGER) || (Traits<T>::CATEGORY == UNSIGNED_INTEGER)
};
// Test non-const type
Test<T, T>(Int2Type<IS_INTEGER>());
// Test non-const type
( run in 0.579 second using v1.01-cache-2.11-cpan-2398b32b56e )