view release on metacpan or search on metacpan
Ryu_Library/ryu/benchmark/benchmark.cc view on Meta::CPAN
class benchmark_options {
public:
benchmark_options() = default;
benchmark_options(const benchmark_options&) = delete;
benchmark_options& operator=(const benchmark_options&) = delete;
bool run32() const { return m_run32; }
bool run64() const { return m_run64; }
int samples() const { return m_samples; }
int iterations() const { return m_iterations; }
bool verbose() const { return m_verbose; }
bool ryu_only() const { return m_ryu_only; }
bool classic() const { return m_classic; }
int small_digits() const { return m_small_digits; }
void parse(const char * const arg) {
if (strcmp(arg, "-32") == 0) {
m_run32 = true;
m_run64 = false;
} else if (strcmp(arg, "-64") == 0) {
Ryu_Library/ryu/benchmark/benchmark.cc view on Meta::CPAN
} else if (strcmp(arg, "-v") == 0) {
m_verbose = true;
} else if (strcmp(arg, "-ryu") == 0) {
m_ryu_only = true;
} else if (strcmp(arg, "-classic") == 0) {
m_classic = true;
} else if (strncmp(arg, "-samples=", 9) == 0) {
if (sscanf(arg, "-samples=%i", &m_samples) != 1 || m_samples < 1) {
fail(arg);
}
} else if (strncmp(arg, "-iterations=", 12) == 0) {
if (sscanf(arg, "-iterations=%i", &m_iterations) != 1 || m_iterations < 1) {
fail(arg);
}
} else if (strncmp(arg, "-small_digits=", 14) == 0) {
if (sscanf(arg, "-small_digits=%i", &m_small_digits) != 1 || m_small_digits < 1 || m_small_digits > 7) {
fail(arg);
}
} else {
fail(arg);
}
}
private:
void fail(const char * const arg) {
printf("Unrecognized option '%s'.\n", arg);
exit(EXIT_FAILURE);
}
// By default, run both 32 and 64-bit benchmarks with 10000 samples and 1000 iterations each.
bool m_run32 = true;
bool m_run64 = true;
int m_samples = 10000;
int m_iterations = 1000;
bool m_verbose = false;
bool m_ryu_only = false;
bool m_classic = false;
int m_small_digits = 0;
};
// returns 10^x
uint32_t exp10(const int x) {
uint32_t ret = 1;
Ryu_Library/ryu/benchmark/benchmark.cc view on Meta::CPAN
std::mt19937 mt32(12345);
mean_and_variance mv1;
mean_and_variance mv2;
int throwaway = 0;
if (options.classic()) {
for (int i = 0; i < options.samples(); ++i) {
uint32_t r = 0;
const float f = generate_float(options, mt32, r);
auto t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
f2s_buffered(f, bufferown);
throwaway += bufferown[2];
}
auto t2 = steady_clock::now();
double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv1.update(delta1);
double delta2 = 0.0;
if (!options.ryu_only()) {
t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
fcv(f);
throwaway += buffer[2];
}
t2 = steady_clock::now();
delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv2.update(delta2);
}
if (options.verbose()) {
if (options.ryu_only()) {
printf("%s,%u,%f\n", bufferown, r, delta1);
} else {
printf("%s,%u,%f,%f\n", bufferown, r, delta1, delta2);
}
}
Ryu_Library/ryu/benchmark/benchmark.cc view on Meta::CPAN
printf("For %x %20s %20s\n", r, bufferown, buffer);
}
}
} else {
std::vector<float> vec(options.samples());
for (int i = 0; i < options.samples(); ++i) {
uint32_t r = 0;
vec[i] = generate_float(options, mt32, r);
}
for (int j = 0; j < options.iterations(); ++j) {
auto t1 = steady_clock::now();
for (int i = 0; i < options.samples(); ++i) {
f2s_buffered(vec[i], bufferown);
throwaway += bufferown[2];
}
auto t2 = steady_clock::now();
double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.samples());
mv1.update(delta1);
double delta2 = 0.0;
Ryu_Library/ryu/benchmark/benchmark.cc view on Meta::CPAN
std::mt19937 mt32(12345);
mean_and_variance mv1;
mean_and_variance mv2;
int throwaway = 0;
if (options.classic()) {
for (int i = 0; i < options.samples(); ++i) {
uint64_t r = 0;
const double f = generate_double(options, mt32, r);
auto t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
d2s_buffered(f, bufferown);
throwaway += bufferown[2];
}
auto t2 = steady_clock::now();
double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv1.update(delta1);
double delta2 = 0.0;
if (!options.ryu_only()) {
t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
dcv(f);
throwaway += buffer[2];
}
t2 = steady_clock::now();
delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv2.update(delta2);
}
if (options.verbose()) {
if (options.ryu_only()) {
printf("%s,%" PRIu64 ",%f\n", bufferown, r, delta1);
} else {
printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
}
}
Ryu_Library/ryu/benchmark/benchmark.cc view on Meta::CPAN
printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
}
}
} else {
std::vector<double> vec(options.samples());
for (int i = 0; i < options.samples(); ++i) {
uint64_t r = 0;
vec[i] = generate_double(options, mt32, r);
}
for (int j = 0; j < options.iterations(); ++j) {
auto t1 = steady_clock::now();
for (int i = 0; i < options.samples(); ++i) {
d2s_buffered(vec[i], bufferown);
throwaway += bufferown[2];
}
auto t2 = steady_clock::now();
double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.samples());
mv1.update(delta1);
double delta2 = 0.0;
Ryu_Library/ryu/benchmark/benchmark_fixed.c view on Meta::CPAN
double stddev(mean_and_variance* mv) {
return sqrt(variance(mv));
}
double generate_double(uint64_t* r) {
*r = RandomU64();
double f = int64Bits2Double(*r);
return f;
}
static int bench64_fixed(const uint32_t samples, const uint32_t iterations, const int32_t precision, const bool verbose) {
char bufferown[BUFFER_SIZE];
char buffer[BUFFER_SIZE];
char fmt[100];
snprintf(fmt, 100, "%%.%df", precision);
RandomInit(12345);
mean_and_variance mv1;
init(&mv1);
mean_and_variance mv2;
init(&mv2);
int throwaway = 0;
for (int i = 0; i < samples; ++i) {
uint64_t r = 0;
const double f = generate_double(&r);
// printf("%f\n", f);
clock_t t1 = clock();
for (int j = 0; j < iterations; ++j) {
d2fixed_buffered(f, precision, bufferown);
throwaway += bufferown[2];
}
clock_t t2 = clock();
double delta1 = ((t2 - t1) * 1000000000.0) / ((double) iterations) / ((double) CLOCKS_PER_SEC);
update(&mv1, delta1);
double delta2 = 0.0;
t1 = clock();
for (int j = 0; j < iterations; ++j) {
snprintf(buffer, BUFFER_SIZE, fmt, f);
throwaway += buffer[2];
}
t2 = clock();
delta2 = ((t2 - t1) * 1000000000.0) / ((double) iterations) / ((double) CLOCKS_PER_SEC);
update(&mv2, delta2);
if (verbose) {
printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
}
// printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
if (strcmp(bufferown, buffer) != 0) {
printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
}
}
if (!verbose) {
printf("64: %8.3f %8.3f", mv1.mean, stddev(&mv1));
printf(" %8.3f %8.3f", mv2.mean, stddev(&mv2));
printf("\n");
}
return throwaway;
}
static int bench64_exp(const uint32_t samples, const uint32_t iterations, const int32_t precision, const bool verbose) {
char bufferown[BUFFER_SIZE];
char buffer[BUFFER_SIZE];
char fmt[100];
snprintf(fmt, 100, "%%.%de", precision);
RandomInit(12345);
mean_and_variance mv1;
init(&mv1);
mean_and_variance mv2;
init(&mv2);
int throwaway = 0;
for (int i = 0; i < samples; ++i) {
uint64_t r = 0;
const double f = generate_double(&r);
// printf("%.20e\n", f);
// printf("For %16" PRIX64 "\n", r);
clock_t t1 = clock();
for (int j = 0; j < iterations; ++j) {
d2exp_buffered(f, precision, bufferown);
throwaway += bufferown[2];
}
clock_t t2 = clock();
double delta1 = (t2 - t1) / (double) iterations / CLOCKS_PER_SEC * 1000000000.0;
update(&mv1, delta1);
double delta2 = 0.0;
t1 = clock();
for (int j = 0; j < iterations; ++j) {
snprintf(buffer, BUFFER_SIZE, fmt, f);
throwaway += buffer[2];
}
t2 = clock();
delta2 = (t2 - t1) / (double) iterations / CLOCKS_PER_SEC * 1000000000.0;
update(&mv2, delta2);
if (verbose) {
printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
}
// printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
if ((strcmp(bufferown, buffer) != 0) && !verbose) {
printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
}
Ryu_Library/ryu/benchmark/benchmark_fixed.c view on Meta::CPAN
// Also disable hyperthreading with something like this:
// cat /sys/devices/system/cpu/cpu*/topology/core_id
// sudo /bin/bash -c "echo 0 > /sys/devices/system/cpu/cpu6/online"
cpu_set_t my_set;
CPU_ZERO(&my_set);
CPU_SET(2, &my_set);
sched_setaffinity(getpid(), sizeof(cpu_set_t), &my_set);
#endif
int32_t samples = 10000;
int32_t iterations = 1000;
int32_t precision = 6;
bool verbose = false;
bool fixed = true;
for (int i = 1; i < argc; i++) {
char* arg = argv[i];
if (strcmp(arg, "-v") == 0) {
verbose = true;
} else if (strncmp(arg, "-samples=", 9) == 0) {
sscanf(arg, "-samples=%i", &samples);
} else if (strncmp(arg, "-iterations=", 12) == 0) {
sscanf(arg, "-iterations=%i", &iterations);
} else if (strncmp(arg, "-precision=", 11) == 0) {
sscanf(arg, "-precision=%i", &precision);
} else if (strcmp(arg, "-exp") == 0) {
fixed = false;
}
}
if (false) {
// double d = int64Bits2Double(0x426E5FDA4A181F94);
// double d = int64Bits2Double(0xC27EF2838AD07A1A);
double d = int64Bits2Double(0x426C19FD2EFA7294);
Ryu_Library/ryu/benchmark/benchmark_fixed.c view on Meta::CPAN
setbuf(stdout, NULL);
}
if (verbose) {
printf("ryu_output,float_bits_as_int,ryu_time_in_ns,snprintf_time_in_ns\n");
} else {
printf(" Average & Stddev Ryu%s\n", " Average & Stddev snprintf");
}
int throwaway = 0;
if (fixed) {
throwaway += bench64_fixed(samples, iterations, precision, verbose);
} else {
throwaway += bench64_exp(samples, iterations, precision, verbose);
}
if (argc == 1000) {
// Prevent the compiler from optimizing the code away.
printf("%d\n", throwaway);
}
return 0;
}
Ryu_Library/ryu/benchmark/benchmark_fixed.cc view on Meta::CPAN
class benchmark_options {
public:
benchmark_options() = default;
benchmark_options(const benchmark_options&) = delete;
benchmark_options& operator=(const benchmark_options&) = delete;
bool run32() const { return m_run32; }
bool run64() const { return m_run64; }
int samples() const { return m_samples; }
int iterations() const { return m_iterations; }
bool verbose() const { return m_verbose; }
bool ryu_only() const { return m_ryu_only; }
bool classic() const { return m_classic; }
int small_digits() const { return m_small_digits; }
int precision() const { return m_precision; }
void parse(const char * const arg) {
if (strcmp(arg, "-f") == 0) {
m_run32 = false;
m_run64 = true;
Ryu_Library/ryu/benchmark/benchmark_fixed.cc view on Meta::CPAN
} else if (strcmp(arg, "-v") == 0) {
m_verbose = true;
} else if (strcmp(arg, "-ryu") == 0) {
m_ryu_only = true;
} else if (strcmp(arg, "-classic") == 0) {
m_classic = true;
} else if (strncmp(arg, "-samples=", 9) == 0) {
if (sscanf(arg, "-samples=%i", &m_samples) != 1 || m_samples < 1) {
fail(arg);
}
} else if (strncmp(arg, "-iterations=", 12) == 0) {
if (sscanf(arg, "-iterations=%i", &m_iterations) != 1 || m_iterations < 1) {
fail(arg);
}
} else if (strncmp(arg, "-small_digits=", 14) == 0) {
if (sscanf(arg, "-small_digits=%i", &m_small_digits) != 1 || m_small_digits < 1 || m_small_digits > 7) {
fail(arg);
}
} else if (strncmp(arg, "-precision=", 11) == 0) {
if (sscanf(arg, "-precision=%i", &m_precision) != 1 || m_precision < 0 || m_precision > 2000) {
fail(arg);
}
Ryu_Library/ryu/benchmark/benchmark_fixed.cc view on Meta::CPAN
fail(arg);
}
}
private:
void fail(const char * const arg) {
printf("Unrecognized option '%s'.\n", arg);
exit(EXIT_FAILURE);
}
// By default, run both 32 and 64-bit benchmarks with 10000 samples and 1000 iterations each.
bool m_run32 = true;
bool m_run64 = true;
int m_samples = 10000;
int m_iterations = 1000;
bool m_verbose = false;
bool m_ryu_only = false;
bool m_classic = true;
int m_small_digits = 0;
int m_precision = 6;
};
// returns 10^x
uint32_t exp10(const int x) {
uint32_t ret = 1;
Ryu_Library/ryu/benchmark/benchmark_fixed.cc view on Meta::CPAN
std::mt19937 mt32(12345);
mean_and_variance mv1;
mean_and_variance mv2;
int throwaway = 0;
for (int i = 0; i < options.samples(); ++i) {
uint64_t r = 0;
const double f = generate_double(options, mt32, r);
// printf("%f\n", f);
auto t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
d2fixed_buffered(f, static_cast<uint32_t>(precision), bufferown);
throwaway += bufferown[2];
}
auto t2 = steady_clock::now();
double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv1.update(delta1);
double delta2 = 0.0;
if (!options.ryu_only()) {
t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
snprintf(buffer, BUFFER_SIZE, fmt, f);
throwaway += buffer[2];
}
t2 = steady_clock::now();
delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv2.update(delta2);
}
if (options.verbose()) {
if (options.ryu_only()) {
printf("%s,%" PRIu64 ",%f\n", bufferown, r, delta1);
} else {
printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
}
}
Ryu_Library/ryu/benchmark/benchmark_fixed.cc view on Meta::CPAN
std::mt19937 mt32(12345);
mean_and_variance mv1;
mean_and_variance mv2;
int throwaway = 0;
for (int i = 0; i < options.samples(); ++i) {
uint64_t r = 0;
const double f = generate_double(options, mt32, r);
// printf("%f\n", f);
auto t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
d2exp_buffered(f, static_cast<uint32_t>(precision), bufferown);
throwaway += bufferown[2];
}
auto t2 = steady_clock::now();
double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv1.update(delta1);
double delta2 = 0.0;
if (!options.ryu_only()) {
t1 = steady_clock::now();
for (int j = 0; j < options.iterations(); ++j) {
snprintf(buffer, BUFFER_SIZE, fmt, f);
throwaway += buffer[2];
}
t2 = steady_clock::now();
delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
mv2.update(delta2);
}
if (options.verbose()) {
if (options.ryu_only()) {
printf("%s,%" PRIu64 ",%f\n", bufferown, r, delta1);
} else {
printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
}
}
Ryu_Library/ryu/d2s.c view on Meta::CPAN
const uint64_t vmDiv100 = div100(vm);
if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
const uint64_t vrDiv100 = div100(vr);
const uint32_t vrMod100 = ((uint32_t) vr) - 100 * ((uint32_t) vrDiv100);
roundUp = vrMod100 >= 50;
vr = vrDiv100;
vp = vpDiv100;
vm = vmDiv100;
removed += 2;
}
// Loop iterations below (approximately), without optimization above:
// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
// Loop iterations below (approximately), with optimization above:
// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
for (;;) {
const uint64_t vpDiv10 = div10(vp);
const uint64_t vmDiv10 = div10(vm);
if (vpDiv10 <= vmDiv10) {
break;
}
const uint64_t vrDiv10 = div10(vr);
const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
roundUp = vrMod10 >= 5;
Ryu_Library/ryu/f2s.c view on Meta::CPAN
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
// Round even if the exact number is .....50..0.
lastRemovedDigit = 4;
}
// We need to take vr + 1 if vr is outside bounds or we need to round up.
output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
} else {
// Specialized for the common case (~96.0%). Percentages below are relative to this.
// Loop iterations below (approximately):
// 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
while (vp / 10 > vm / 10) {
lastRemovedDigit = (uint8_t) (vr % 10);
vr /= 10;
vp /= 10;
vm /= 10;
++removed;
}
#ifdef RYU_DEBUG
printf("%u %d\n", vr, lastRemovedDigit);
const uint64_t vmDiv100 = div100(vm);
if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
const uint64_t vrDiv100 = div100(vr);
const uint32_t vrMod100 = ((uint32_t) vr) - 100 * ((uint32_t) vrDiv100);
roundUp = vrMod100 >= 50;
vr = vrDiv100;
vp = vpDiv100;
vm = vmDiv100;
removed += 2;
}
// Loop iterations below (approximately), without optimization above:
// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
// Loop iterations below (approximately), with optimization above:
// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
for (;;) {
const uint64_t vpDiv10 = div10(vp);
const uint64_t vmDiv10 = div10(vm);
if (vpDiv10 <= vmDiv10) {
break;
}
const uint64_t vrDiv10 = div10(vr);
const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
roundUp = vrMod10 >= 5;
printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
// Round even if the exact number is .....50..0.
lastRemovedDigit = 4;
}
// We need to take vr + 1 if vr is outside bounds or we need to round up.
output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
} else {
// Specialized for the common case (~96.0%). Percentages below are relative to this.
// Loop iterations below (approximately):
// 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
while (vp / 10 > vm / 10) {
lastRemovedDigit = (uint8_t) (vr % 10);
vr /= 10;
vp /= 10;
vm /= 10;
++removed;
}
#ifdef RYU_DEBUG
printf("%u %d\n", vr, lastRemovedDigit);