Math-Prime-Util

 view release on metacpan or  search on metacpan

totients.c  view on Meta::CPAN

 *
 * Page 7 of https://www.mimuw.edu.pl/~pan/papers/farey-esa.pdf
 * https://math.stackexchange.com/a/1740370/117584
 */

static UV _sumtotient_direct(UV n) {
  UV finalsum, *sumcache2;
  uint32_t sqrtn, sum, i, j, k, *sumcache1;
  bool flag;

  if (n <= 2)  return n;
  if (n > MAX_TOTSUM)  return 0;

  sqrtn = isqrt(n);
  flag = (n < (UV)sqrtn * ((UV)sqrtn+1));   /* Does n/r == r ? */

  sumcache2 = range_totient(0, sqrtn);

  New(0, sumcache1, sqrtn+1, uint32_t);
  for (sum = 1, i = 2; i <= sqrtn; i++) {
    sum += sumcache2[i];
    sumcache1[i] = sum;
  }
  if (flag)  sumcache2[sqrtn] = sumcache1[sqrtn];

  for (i = sqrtn - flag;  i > 0;  i--) {
    const UV m = n/i;
    const uint32_t s = isqrt(m);
    UV sum = (m+1)/2 * (m|1);      /* m*(m+1)/2; */
    sum -= (m - m/2);              /* k=1 */

    for (k = 2, j = k*i; j <= sqrtn; k++) {
      sum -= sumcache2[j];
      sum -= (m/k - m/(k+1)) * sumcache1[k];
      j += i;
    }
    for (; k <= s; k++) {
      sum -= sumcache1[m/k];
      sum -= (m/k - m/(k+1)) * sumcache1[k];
    }
    if (m < (UV)s * ((UV)s+1))
      sum += sumcache1[s];

    sumcache2[i] = sum;
  }
  finalsum = sumcache2[1];
  Safefree(sumcache1);
  Safefree(sumcache2);
  return finalsum;
}


/* Recursive method using a cache.  */

typedef struct {
  UV    hsize;
  UV   *nhash;  /* n value */
  UV   *shash;  /* sum for n */
} sumt_hash_t;
#define _CACHED_SUMT(x) \
  (((x)<csize)  ?  cdata[x]  :  _sumt((x), cdata, csize, thash))

static UV _sumt(UV n, const UV *cdata, UV csize, sumt_hash_t thash) {
  UV sum, s, k, lim, hn;
  uint32_t const probes = 8;
  uint32_t hashk;

  if (n < csize) return cdata[n];

  hn = n % thash.hsize;
  for (hashk = 0; hashk <= probes; hashk++) {
    if (thash.nhash[hn] == n)  return thash.shash[hn];
    if (thash.nhash[hn] == 0)  break;
    hn = (hn+1 < thash.hsize) ? hn+1 : 0;
  }

  s = isqrt(n);
  lim = n/(s+1);

  sum = (n+1)/2 * (n|1);    /* (n*(n+1))/2 */
  sum -= (n - n/2);
  for (k = 2; k <= lim; k++) {
    sum -= _CACHED_SUMT(n/k);
    sum -= ((n/k) - (n/(k+1))) * _CACHED_SUMT(k);
  }
  if (s > lim)
    sum -= ((n/s) - (n/(s+1))) * _CACHED_SUMT(s);

  for (; hashk <= probes; hashk++) {
    if (thash.nhash[hn] == 0) {
      thash.nhash[hn] = n;
      thash.shash[hn] = sum;
      break;
    }
    hn = (hn+1 < thash.hsize) ? hn+1 : 0;
  }
  return sum;
}

UV sumtotient(UV n) {
  UV sum, i, cbrtn, csize, *sumcache;
  sumt_hash_t thash;

  if (n <= 2)  return n;
  if (n > MAX_TOTSUM)  return 0;

  if (n < 4000) return _sumtotient_direct(n);

  cbrtn = icbrt(n);
  csize = cbrtn * cbrtn;

  sumcache = range_totient(0, csize-1);
  for (i = 2; i < csize; i++)
    sumcache[i] += sumcache[i-1];

  thash.hsize = next_prime(10 + 4*cbrtn);
  Newz(0, thash.nhash, thash.hsize, UV);
  New( 0, thash.shash, thash.hsize, UV);

  sum = _sumt(n, sumcache, csize, thash);
  Safefree(thash.nhash);
  Safefree(thash.shash);
  Safefree(sumcache);
  return sum;
}



#if HAVE_SUMTOTIENT_128
#define _CACHED_SUMT128(x) \
  (((x)<csize)  ?  (uint128_t)cdata[x]  :  _sumt128((x), cdata, csize, thash))
typedef struct {
  UV         hsize;
  UV        *nhash;  /* n value */
  uint128_t *shash;  /* sum for n */
} sumt_hash_128_t;
static uint128_t _sumt128(UV n, const UV *cdata, UV csize, sumt_hash_128_t thash) {
  uint128_t sum;
  UV s, k, lim, hn;
  uint32_t const probes = 16;
  uint32_t const hinc = 1 + ((n >> 8) & 15);  /* mitigate clustering */
  uint32_t hashk;

  if (n < csize) return cdata[n];

  hn = n % thash.hsize;
  for (hashk = 0; hashk <= probes; hashk++) {
    if (thash.nhash[hn] == n)  return thash.shash[hn];
    if (thash.nhash[hn] == 0)  break;
    hn = (hn+hinc < thash.hsize) ? hn+hinc : hn+hinc-thash.hsize;
  }

  s = isqrt(n);
  lim = n/(s+1);

  sum = ((uint128_t)n+1)/2 * (n|1);    /* (n*(n+1))/2 */
  sum -= (n - n/2);
  for (k = 2; k <= lim; k++) {
    sum -= _CACHED_SUMT128(n/k);
    sum -= ((n/k) - (n/(k+1))) * _CACHED_SUMT128(k);
  }
  if (s > lim)
    sum -= ((n/s) - (n/(s+1))) * _CACHED_SUMT128(s);

  for (; hashk <= probes; hashk++) {
    if (thash.nhash[hn] == 0) {
      thash.nhash[hn] = n;
      thash.shash[hn] = sum;
      break;
    }
    hn = (hn+hinc < thash.hsize) ? hn+hinc : hn+hinc-thash.hsize;
  }

  return sum;
}

int sumtotient128(UV n, UV *hi_sum, UV *lo_sum) {
  UV i, cbrtn, csize, hsize, *sumcache;
  uint128_t sum;
  sumt_hash_128_t thash;

  if (n <= 2)  { *hi_sum = 0;  *lo_sum = n; return 1; }
  /* sumtotient(2^64-1) < 2^128, so we can't overflow. */

  cbrtn = icbrt(n);
  csize = 0.6 * cbrtn * cbrtn;
  hsize = 8 * cbrtn;         /* 12.5% filled with csize = 1 * n^(2/3) */

  if (csize > 400000000U) {  /* Limit to 3GB */
    csize = 400000000;
    hsize = isqrt(n);
  }

  sumcache = range_totient(0, csize-1);
  for (i = 2; i < csize; i++)
    sumcache[i] += sumcache[i-1];

  /* Arguably we should expand the hash as it fills. */
  thash.hsize = next_prime( 16 + hsize );
  Newz(0, thash.nhash, thash.hsize, UV);
  New( 0, thash.shash, thash.hsize, uint128_t);

  sum = _sumt128(n, sumcache, csize, thash);
  *hi_sum = (sum >> 64) & UV_MAX;



( run in 0.489 second using v1.01-cache-2.11-cpan-39bf76dae61 )