Math-Ryu

 view release on metacpan or  search on metacpan

Ryu_Library/ryu/benchmark/benchmark.cc  view on Meta::CPAN


class benchmark_options {
public:
  benchmark_options() = default;
  benchmark_options(const benchmark_options&) = delete;
  benchmark_options& operator=(const benchmark_options&) = delete;

  bool run32() const { return m_run32; }
  bool run64() const { return m_run64; }
  int samples() const { return m_samples; }
  int iterations() const { return m_iterations; }
  bool verbose() const { return m_verbose; }
  bool ryu_only() const { return m_ryu_only; }
  bool classic() const { return m_classic; }
  int small_digits() const { return m_small_digits; }

  void parse(const char * const arg) {
    if (strcmp(arg, "-32") == 0) {
      m_run32 = true;
      m_run64 = false;
    } else if (strcmp(arg, "-64") == 0) {

Ryu_Library/ryu/benchmark/benchmark.cc  view on Meta::CPAN

    } else if (strcmp(arg, "-v") == 0) {
      m_verbose = true;
    } else if (strcmp(arg, "-ryu") == 0) {
      m_ryu_only = true;
    } else if (strcmp(arg, "-classic") == 0) {
      m_classic = true;
    } else if (strncmp(arg, "-samples=", 9) == 0) {
      if (sscanf(arg, "-samples=%i", &m_samples) != 1 || m_samples < 1) {
        fail(arg);
      }
    } else if (strncmp(arg, "-iterations=", 12) == 0) {
      if (sscanf(arg, "-iterations=%i", &m_iterations) != 1 || m_iterations < 1) {
        fail(arg);
      }
    } else if (strncmp(arg, "-small_digits=", 14) == 0) {
      if (sscanf(arg, "-small_digits=%i", &m_small_digits) != 1 || m_small_digits < 1 || m_small_digits > 7) {
        fail(arg);
      }
    } else {
      fail(arg);
    }
  }

private:
  void fail(const char * const arg) {
    printf("Unrecognized option '%s'.\n", arg);
    exit(EXIT_FAILURE);
  }

  // By default, run both 32 and 64-bit benchmarks with 10000 samples and 1000 iterations each.
  bool m_run32 = true;
  bool m_run64 = true;
  int m_samples = 10000;
  int m_iterations = 1000;
  bool m_verbose = false;
  bool m_ryu_only = false;
  bool m_classic = false;
  int m_small_digits = 0;
};

// returns 10^x
uint32_t exp10(const int x) {
  uint32_t ret = 1;

Ryu_Library/ryu/benchmark/benchmark.cc  view on Meta::CPAN

  std::mt19937 mt32(12345);
  mean_and_variance mv1;
  mean_and_variance mv2;
  int throwaway = 0;
  if (options.classic()) {
    for (int i = 0; i < options.samples(); ++i) {
      uint32_t r = 0;
      const float f = generate_float(options, mt32, r);

      auto t1 = steady_clock::now();
      for (int j = 0; j < options.iterations(); ++j) {
        f2s_buffered(f, bufferown);
        throwaway += bufferown[2];
      }
      auto t2 = steady_clock::now();
      double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
      mv1.update(delta1);

      double delta2 = 0.0;
      if (!options.ryu_only()) {
        t1 = steady_clock::now();
        for (int j = 0; j < options.iterations(); ++j) {
          fcv(f);
          throwaway += buffer[2];
        }
        t2 = steady_clock::now();
        delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
        mv2.update(delta2);
      }

      if (options.verbose()) {
        if (options.ryu_only()) {
          printf("%s,%u,%f\n", bufferown, r, delta1);
        } else {
          printf("%s,%u,%f,%f\n", bufferown, r, delta1, delta2);
        }
      }

Ryu_Library/ryu/benchmark/benchmark.cc  view on Meta::CPAN

        printf("For %x %20s %20s\n", r, bufferown, buffer);
      }
    }
  } else {
    std::vector<float> vec(options.samples());
    for (int i = 0; i < options.samples(); ++i) {
      uint32_t r = 0;
      vec[i] = generate_float(options, mt32, r);
    }

    for (int j = 0; j < options.iterations(); ++j) {
      auto t1 = steady_clock::now();
      for (int i = 0; i < options.samples(); ++i) {
        f2s_buffered(vec[i], bufferown);
        throwaway += bufferown[2];
      }
      auto t2 = steady_clock::now();
      double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.samples());
      mv1.update(delta1);

      double delta2 = 0.0;

Ryu_Library/ryu/benchmark/benchmark.cc  view on Meta::CPAN

  std::mt19937 mt32(12345);
  mean_and_variance mv1;
  mean_and_variance mv2;
  int throwaway = 0;
  if (options.classic()) {
    for (int i = 0; i < options.samples(); ++i) {
      uint64_t r = 0;
      const double f = generate_double(options, mt32, r);

      auto t1 = steady_clock::now();
      for (int j = 0; j < options.iterations(); ++j) {
        d2s_buffered(f, bufferown);
        throwaway += bufferown[2];
      }
      auto t2 = steady_clock::now();
      double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
      mv1.update(delta1);

      double delta2 = 0.0;
      if (!options.ryu_only()) {
        t1 = steady_clock::now();
        for (int j = 0; j < options.iterations(); ++j) {
          dcv(f);
          throwaway += buffer[2];
        }
        t2 = steady_clock::now();
        delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
        mv2.update(delta2);
      }

      if (options.verbose()) {
        if (options.ryu_only()) {
          printf("%s,%" PRIu64 ",%f\n", bufferown, r, delta1);
        } else {
          printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
        }
      }

Ryu_Library/ryu/benchmark/benchmark.cc  view on Meta::CPAN

        printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
      }
    }
  } else {
    std::vector<double> vec(options.samples());
    for (int i = 0; i < options.samples(); ++i) {
      uint64_t r = 0;
      vec[i] = generate_double(options, mt32, r);
    }

    for (int j = 0; j < options.iterations(); ++j) {
      auto t1 = steady_clock::now();
      for (int i = 0; i < options.samples(); ++i) {
        d2s_buffered(vec[i], bufferown);
        throwaway += bufferown[2];
      }
      auto t2 = steady_clock::now();
      double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.samples());
      mv1.update(delta1);

      double delta2 = 0.0;

Ryu_Library/ryu/benchmark/benchmark_fixed.c  view on Meta::CPAN

  double stddev(mean_and_variance* mv) {
    return sqrt(variance(mv));
  }

double generate_double(uint64_t* r) {
  *r = RandomU64();
  double f = int64Bits2Double(*r);
  return f;
}

static int bench64_fixed(const uint32_t samples, const uint32_t iterations, const int32_t precision, const bool verbose) {
  char bufferown[BUFFER_SIZE];
  char buffer[BUFFER_SIZE];
  char fmt[100];
  snprintf(fmt, 100, "%%.%df", precision);

  RandomInit(12345);
  mean_and_variance mv1;
  init(&mv1);
  mean_and_variance mv2;
  init(&mv2);
  int throwaway = 0;
  for (int i = 0; i < samples; ++i) {
    uint64_t r = 0;
    const double f = generate_double(&r);

//    printf("%f\n", f);
    clock_t t1 = clock();
    for (int j = 0; j < iterations; ++j) {
      d2fixed_buffered(f, precision, bufferown);
      throwaway += bufferown[2];
    }
    clock_t t2 = clock();
    double delta1 = ((t2 - t1) * 1000000000.0) / ((double) iterations) / ((double) CLOCKS_PER_SEC);
    update(&mv1, delta1);

    double delta2 = 0.0;
    t1 = clock();
    for (int j = 0; j < iterations; ++j) {
      snprintf(buffer, BUFFER_SIZE, fmt, f);
      throwaway += buffer[2];
    }
    t2 = clock();
    delta2 = ((t2 - t1) * 1000000000.0) / ((double) iterations) / ((double) CLOCKS_PER_SEC);
    update(&mv2, delta2);

    if (verbose) {
      printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
    }

//    printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
    if (strcmp(bufferown, buffer) != 0) {
      printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
    }
  }
  if (!verbose) {
    printf("64: %8.3f %8.3f", mv1.mean, stddev(&mv1));
    printf("     %8.3f %8.3f", mv2.mean, stddev(&mv2));
    printf("\n");
  }
  return throwaway;
}

static int bench64_exp(const uint32_t samples, const uint32_t iterations, const int32_t precision, const bool verbose) {
  char bufferown[BUFFER_SIZE];
  char buffer[BUFFER_SIZE];
  char fmt[100];
  snprintf(fmt, 100, "%%.%de", precision);

  RandomInit(12345);
  mean_and_variance mv1;
  init(&mv1);
  mean_and_variance mv2;
  init(&mv2);
  int throwaway = 0;
  for (int i = 0; i < samples; ++i) {
    uint64_t r = 0;
    const double f = generate_double(&r);

//    printf("%.20e\n", f);
//    printf("For %16" PRIX64 "\n", r);
    clock_t t1 = clock();
    for (int j = 0; j < iterations; ++j) {
      d2exp_buffered(f, precision, bufferown);
      throwaway += bufferown[2];
    }
    clock_t t2 = clock();
    double delta1 = (t2 - t1) / (double) iterations / CLOCKS_PER_SEC * 1000000000.0;
    update(&mv1, delta1);

    double delta2 = 0.0;
    t1 = clock();
    for (int j = 0; j < iterations; ++j) {
      snprintf(buffer, BUFFER_SIZE, fmt, f);
      throwaway += buffer[2];
    }
    t2 = clock();
    delta2 = (t2 - t1) / (double) iterations / CLOCKS_PER_SEC * 1000000000.0;
    update(&mv2, delta2);

    if (verbose) {
      printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
    }

//    printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
    if ((strcmp(bufferown, buffer) != 0) && !verbose) {
      printf("For %16" PRIX64 " %28s %28s\n", r, bufferown, buffer);
    }

Ryu_Library/ryu/benchmark/benchmark_fixed.c  view on Meta::CPAN

  // Also disable hyperthreading with something like this:
  // cat /sys/devices/system/cpu/cpu*/topology/core_id
  // sudo /bin/bash -c "echo 0 > /sys/devices/system/cpu/cpu6/online"
  cpu_set_t my_set;
  CPU_ZERO(&my_set);
  CPU_SET(2, &my_set);
  sched_setaffinity(getpid(), sizeof(cpu_set_t), &my_set);
#endif

  int32_t samples = 10000;
  int32_t iterations = 1000;
  int32_t precision = 6;
  bool verbose = false;
  bool fixed = true;
  for (int i = 1; i < argc; i++) {
    char* arg = argv[i];
    if (strcmp(arg, "-v") == 0) {
      verbose = true;
    } else if (strncmp(arg, "-samples=", 9) == 0) {
      sscanf(arg, "-samples=%i", &samples);
    } else if (strncmp(arg, "-iterations=", 12) == 0) {
      sscanf(arg, "-iterations=%i", &iterations);
    } else if (strncmp(arg, "-precision=", 11) == 0) {
      sscanf(arg, "-precision=%i", &precision);
    } else if (strcmp(arg, "-exp") == 0) {
      fixed = false;
    }
  }
  if (false) {
//    double d = int64Bits2Double(0x426E5FDA4A181F94);
//    double d = int64Bits2Double(0xC27EF2838AD07A1A);
    double d = int64Bits2Double(0x426C19FD2EFA7294);

Ryu_Library/ryu/benchmark/benchmark_fixed.c  view on Meta::CPAN

    setbuf(stdout, NULL);
  }

  if (verbose) {
    printf("ryu_output,float_bits_as_int,ryu_time_in_ns,snprintf_time_in_ns\n");
  } else {
    printf("    Average & Stddev Ryu%s\n", "  Average & Stddev snprintf");
  }
  int throwaway = 0;
  if (fixed) {
    throwaway += bench64_fixed(samples, iterations, precision, verbose);
  } else {
    throwaway += bench64_exp(samples, iterations, precision, verbose);
  }
  if (argc == 1000) {
    // Prevent the compiler from optimizing the code away.
    printf("%d\n", throwaway);
  }
  return 0;
}

Ryu_Library/ryu/benchmark/benchmark_fixed.cc  view on Meta::CPAN


class benchmark_options {
public:
  benchmark_options() = default;
  benchmark_options(const benchmark_options&) = delete;
  benchmark_options& operator=(const benchmark_options&) = delete;

  bool run32() const { return m_run32; }
  bool run64() const { return m_run64; }
  int samples() const { return m_samples; }
  int iterations() const { return m_iterations; }
  bool verbose() const { return m_verbose; }
  bool ryu_only() const { return m_ryu_only; }
  bool classic() const { return m_classic; }
  int small_digits() const { return m_small_digits; }
  int precision() const { return m_precision; }

  void parse(const char * const arg) {
    if (strcmp(arg, "-f") == 0) {
      m_run32 = false;
      m_run64 = true;

Ryu_Library/ryu/benchmark/benchmark_fixed.cc  view on Meta::CPAN

    } else if (strcmp(arg, "-v") == 0) {
      m_verbose = true;
    } else if (strcmp(arg, "-ryu") == 0) {
      m_ryu_only = true;
    } else if (strcmp(arg, "-classic") == 0) {
      m_classic = true;
    } else if (strncmp(arg, "-samples=", 9) == 0) {
      if (sscanf(arg, "-samples=%i", &m_samples) != 1 || m_samples < 1) {
        fail(arg);
      }
    } else if (strncmp(arg, "-iterations=", 12) == 0) {
      if (sscanf(arg, "-iterations=%i", &m_iterations) != 1 || m_iterations < 1) {
        fail(arg);
      }
    } else if (strncmp(arg, "-small_digits=", 14) == 0) {
      if (sscanf(arg, "-small_digits=%i", &m_small_digits) != 1 || m_small_digits < 1 || m_small_digits > 7) {
        fail(arg);
      }
    } else if (strncmp(arg, "-precision=", 11) == 0) {
      if (sscanf(arg, "-precision=%i", &m_precision) != 1 || m_precision < 0 || m_precision > 2000) {
        fail(arg);
      }

Ryu_Library/ryu/benchmark/benchmark_fixed.cc  view on Meta::CPAN

      fail(arg);
    }
  }

private:
  void fail(const char * const arg) {
    printf("Unrecognized option '%s'.\n", arg);
    exit(EXIT_FAILURE);
  }

  // By default, run both 32 and 64-bit benchmarks with 10000 samples and 1000 iterations each.
  bool m_run32 = true;
  bool m_run64 = true;
  int m_samples = 10000;
  int m_iterations = 1000;
  bool m_verbose = false;
  bool m_ryu_only = false;
  bool m_classic = true;
  int m_small_digits = 0;
  int m_precision = 6;
};

// returns 10^x
uint32_t exp10(const int x) {
  uint32_t ret = 1;

Ryu_Library/ryu/benchmark/benchmark_fixed.cc  view on Meta::CPAN

  std::mt19937 mt32(12345);
  mean_and_variance mv1;
  mean_and_variance mv2;
  int throwaway = 0;
  for (int i = 0; i < options.samples(); ++i) {
    uint64_t r = 0;
    const double f = generate_double(options, mt32, r);

//    printf("%f\n", f);
    auto t1 = steady_clock::now();
    for (int j = 0; j < options.iterations(); ++j) {
      d2fixed_buffered(f, static_cast<uint32_t>(precision), bufferown);
      throwaway += bufferown[2];
    }
    auto t2 = steady_clock::now();
    double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
    mv1.update(delta1);

    double delta2 = 0.0;
    if (!options.ryu_only()) {
      t1 = steady_clock::now();
      for (int j = 0; j < options.iterations(); ++j) {
        snprintf(buffer, BUFFER_SIZE, fmt, f);
        throwaway += buffer[2];
      }
      t2 = steady_clock::now();
      delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
      mv2.update(delta2);
    }

    if (options.verbose()) {
      if (options.ryu_only()) {
        printf("%s,%" PRIu64 ",%f\n", bufferown, r, delta1);
      } else {
        printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
      }
    }

Ryu_Library/ryu/benchmark/benchmark_fixed.cc  view on Meta::CPAN

  std::mt19937 mt32(12345);
  mean_and_variance mv1;
  mean_and_variance mv2;
  int throwaway = 0;
  for (int i = 0; i < options.samples(); ++i) {
    uint64_t r = 0;
    const double f = generate_double(options, mt32, r);

//    printf("%f\n", f);
    auto t1 = steady_clock::now();
    for (int j = 0; j < options.iterations(); ++j) {
      d2exp_buffered(f, static_cast<uint32_t>(precision), bufferown);
      throwaway += bufferown[2];
    }
    auto t2 = steady_clock::now();
    double delta1 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
    mv1.update(delta1);

    double delta2 = 0.0;
    if (!options.ryu_only()) {
      t1 = steady_clock::now();
      for (int j = 0; j < options.iterations(); ++j) {
        snprintf(buffer, BUFFER_SIZE, fmt, f);
        throwaway += buffer[2];
      }
      t2 = steady_clock::now();
      delta2 = duration_cast<nanoseconds>(t2 - t1).count() / static_cast<double>(options.iterations());
      mv2.update(delta2);
    }

    if (options.verbose()) {
      if (options.ryu_only()) {
        printf("%s,%" PRIu64 ",%f\n", bufferown, r, delta1);
      } else {
        printf("%s,%" PRIu64 ",%f,%f\n", bufferown, r, delta1, delta2);
      }
    }

Ryu_Library/ryu/d2s.c  view on Meta::CPAN

    const uint64_t vmDiv100 = div100(vm);
    if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
      const uint64_t vrDiv100 = div100(vr);
      const uint32_t vrMod100 = ((uint32_t) vr) - 100 * ((uint32_t) vrDiv100);
      roundUp = vrMod100 >= 50;
      vr = vrDiv100;
      vp = vpDiv100;
      vm = vmDiv100;
      removed += 2;
    }
    // Loop iterations below (approximately), without optimization above:
    // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
    // Loop iterations below (approximately), with optimization above:
    // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
    for (;;) {
      const uint64_t vpDiv10 = div10(vp);
      const uint64_t vmDiv10 = div10(vm);
      if (vpDiv10 <= vmDiv10) {
        break;
      }
      const uint64_t vrDiv10 = div10(vr);
      const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
      roundUp = vrMod10 >= 5;

Ryu_Library/ryu/f2s.c  view on Meta::CPAN

    printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
    if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
      // Round even if the exact number is .....50..0.
      lastRemovedDigit = 4;
    }
    // We need to take vr + 1 if vr is outside bounds or we need to round up.
    output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
  } else {
    // Specialized for the common case (~96.0%). Percentages below are relative to this.
    // Loop iterations below (approximately):
    // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
    while (vp / 10 > vm / 10) {
      lastRemovedDigit = (uint8_t) (vr % 10);
      vr /= 10;
      vp /= 10;
      vm /= 10;
      ++removed;
    }
#ifdef RYU_DEBUG
    printf("%u %d\n", vr, lastRemovedDigit);

d2s.c  view on Meta::CPAN

    const uint64_t vmDiv100 = div100(vm);
    if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
      const uint64_t vrDiv100 = div100(vr);
      const uint32_t vrMod100 = ((uint32_t) vr) - 100 * ((uint32_t) vrDiv100);
      roundUp = vrMod100 >= 50;
      vr = vrDiv100;
      vp = vpDiv100;
      vm = vmDiv100;
      removed += 2;
    }
    // Loop iterations below (approximately), without optimization above:
    // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
    // Loop iterations below (approximately), with optimization above:
    // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
    for (;;) {
      const uint64_t vpDiv10 = div10(vp);
      const uint64_t vmDiv10 = div10(vm);
      if (vpDiv10 <= vmDiv10) {
        break;
      }
      const uint64_t vrDiv10 = div10(vr);
      const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
      roundUp = vrMod10 >= 5;

f2s.c  view on Meta::CPAN

    printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false");
#endif
    if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) {
      // Round even if the exact number is .....50..0.
      lastRemovedDigit = 4;
    }
    // We need to take vr + 1 if vr is outside bounds or we need to round up.
    output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
  } else {
    // Specialized for the common case (~96.0%). Percentages below are relative to this.
    // Loop iterations below (approximately):
    // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
    while (vp / 10 > vm / 10) {
      lastRemovedDigit = (uint8_t) (vr % 10);
      vr /= 10;
      vp /= 10;
      vm /= 10;
      ++removed;
    }
#ifdef RYU_DEBUG
    printf("%u %d\n", vr, lastRemovedDigit);



( run in 1.419 second using v1.01-cache-2.11-cpan-71847e10f99 )