Alien-XGBoost
view release on metacpan or search on metacpan
xgboost/cub/test/test_util.h view on Meta::CPAN
{
GetCmdLineArgument("device", dev);
}
if ((dev > deviceCount - 1) || (dev < 0))
{
dev = 0;
}
error = CubDebug(cudaSetDevice(dev));
if (error) break;
CubDebugExit(cudaMemGetInfo(&device_free_physmem, &device_total_physmem));
int ptx_version;
error = CubDebug(cub::PtxVersion(ptx_version));
if (error) break;
error = CubDebug(cudaGetDeviceProperties(&deviceProp, dev));
if (error) break;
if (deviceProp.major < 1) {
fprintf(stderr, "Device does not support CUDA.\n");
exit(1);
}
device_giga_bandwidth = float(deviceProp.memoryBusWidth) * deviceProp.memoryClockRate * 2 / 8 / 1000 / 1000;
if (!CheckCmdLineFlag("quiet"))
{
printf(
"Using device %d: %s (PTX version %d, SM%d, %d SMs, "
"%lld free / %lld total MB physmem, "
"%.3f GB/s @ %d kHz mem clock, ECC %s)\n",
dev,
deviceProp.name,
ptx_version,
deviceProp.major * 100 + deviceProp.minor * 10,
deviceProp.multiProcessorCount,
(unsigned long long) device_free_physmem / 1024 / 1024,
(unsigned long long) device_total_physmem / 1024 / 1024,
device_giga_bandwidth,
deviceProp.memoryClockRate,
(deviceProp.ECCEnabled) ? "on" : "off");
fflush(stdout);
}
} while (0);
return error;
}
};
/******************************************************************************
* Random bits generator
******************************************************************************/
int g_num_rand_samples = 0;
template <typename T>
bool IsNaN(T val) { return false; }
template<>
__noinline__ bool IsNaN<float>(float val)
{
volatile unsigned int bits = reinterpret_cast<unsigned int &>(val);
return (((bits >= 0x7F800001) && (bits <= 0x7FFFFFFF)) ||
((bits >= 0xFF800001) && (bits <= 0xFFFFFFFF)));
}
template<>
__noinline__ bool IsNaN<float1>(float1 val)
{
return (IsNaN(val.x));
}
template<>
__noinline__ bool IsNaN<float2>(float2 val)
{
return (IsNaN(val.y) || IsNaN(val.x));
}
template<>
__noinline__ bool IsNaN<float3>(float3 val)
{
return (IsNaN(val.z) || IsNaN(val.y) || IsNaN(val.x));
}
template<>
__noinline__ bool IsNaN<float4>(float4 val)
{
return (IsNaN(val.y) || IsNaN(val.x) || IsNaN(val.w) || IsNaN(val.z));
}
template<>
__noinline__ bool IsNaN<double>(double val)
{
volatile unsigned long long bits = *reinterpret_cast<unsigned long long *>(&val);
return (((bits >= 0x7FF0000000000001) && (bits <= 0x7FFFFFFFFFFFFFFF)) ||
((bits >= 0xFFF0000000000001) && (bits <= 0xFFFFFFFFFFFFFFFF)));
}
template<>
__noinline__ bool IsNaN<double1>(double1 val)
{
return (IsNaN(val.x));
}
template<>
__noinline__ bool IsNaN<double2>(double2 val)
{
return (IsNaN(val.y) || IsNaN(val.x));
}
template<>
__noinline__ bool IsNaN<double3>(double3 val)
{
return (IsNaN(val.z) || IsNaN(val.y) || IsNaN(val.x));
}
template<>
__noinline__ bool IsNaN<double4>(double4 val)
{
return (IsNaN(val.y) || IsNaN(val.x) || IsNaN(val.w) || IsNaN(val.z));
}
/**
* Generates random keys.
*
* We always take the second-order byte from rand() because the higher-order
* bits returned by rand() are commonly considered more uniformly distributed
* than the lower-order bits.
*
* We can decrease the entropy level of keys by adopting the technique
* of Thearling and Smith in which keys are computed from the bitwise AND of
* multiple random samples:
*
* entropy_reduction | Effectively-unique bits per key
* -----------------------------------------------------
* -1 | 0
* 0 | 32
* 1 | 25.95 (81%)
* 2 | 17.41 (54%)
* 3 | 10.78 (34%)
* 4 | 6.42 (20%)
* ... | ...
*
*/
template <typename K>
void RandomBits(
K &key,
int entropy_reduction = 0,
int begin_bit = 0,
int end_bit = sizeof(K) * 8)
{
const int NUM_BYTES = sizeof(K);
const int WORD_BYTES = sizeof(unsigned int);
const int NUM_WORDS = (NUM_BYTES + WORD_BYTES - 1) / WORD_BYTES;
unsigned int word_buff[NUM_WORDS];
if (entropy_reduction == -1)
{
memset((void *) &key, 0, sizeof(key));
return;
}
if (end_bit < 0)
end_bit = sizeof(K) * 8;
while (true)
{
// Generate random word_buff
for (int j = 0; j < NUM_WORDS; j++)
{
int current_bit = j * WORD_BYTES * 8;
unsigned int word = 0xffffffff;
word &= 0xffffffff << CUB_MAX(0, begin_bit - current_bit);
word &= 0xffffffff >> CUB_MAX(0, (current_bit + (WORD_BYTES * 8)) - end_bit);
for (int i = 0; i <= entropy_reduction; i++)
{
// Grab some of the higher bits from rand (better entropy, supposedly)
word &= mersenne::genrand_int32();
g_num_rand_samples++;
}
word_buff[j] = word;
}
memcpy(&key, word_buff, sizeof(K));
K copy = key;
if (!IsNaN(copy))
break; // avoids NaNs when generating random floating point numbers
}
}
/// Randomly select number between [0:max)
template <typename T>
T RandomValue(T max)
{
unsigned int bits;
unsigned int max_int = (unsigned int) -1;
do {
RandomBits(bits);
} while (bits == max_int);
return (T) ((double(bits) / double(max_int)) * double(max));
}
/******************************************************************************
* Console printing utilities
******************************************************************************/
/**
* Helper for casting character types to integers for cout printing
*/
template <typename T>
T CoutCast(T val) { return val; }
int CoutCast(char val) { return val; }
int CoutCast(unsigned char val) { return val; }
int CoutCast(signed char val) { return val; }
/******************************************************************************
* Test value initialization utilities
******************************************************************************/
/**
* Test problem generation options
*/
enum GenMode
{
UNIFORM, // Assign to '2', regardless of integer seed
INTEGER_SEED, // Assign to integer seed
RANDOM, // Assign to random, regardless of integer seed
};
/**
* Initialize value
*/
template <typename T>
__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, int index = 0)
{
switch (gen_mode)
{
#if (CUB_PTX_ARCH == 0)
case RANDOM:
RandomBits(value);
( run in 0.615 second using v1.01-cache-2.11-cpan-39bf76dae61 )