Math-Prime-Util
view release on metacpan or search on metacpan
*
* Page 7 of https://www.mimuw.edu.pl/~pan/papers/farey-esa.pdf
* https://math.stackexchange.com/a/1740370/117584
*/
static UV _sumtotient_direct(UV n) {
UV finalsum, *sumcache2;
uint32_t sqrtn, sum, i, j, k, *sumcache1;
bool flag;
if (n <= 2) return n;
if (n > MAX_TOTSUM) return 0;
sqrtn = isqrt(n);
flag = (n < (UV)sqrtn * ((UV)sqrtn+1)); /* Does n/r == r ? */
sumcache2 = range_totient(0, sqrtn);
New(0, sumcache1, sqrtn+1, uint32_t);
for (sum = 1, i = 2; i <= sqrtn; i++) {
sum += sumcache2[i];
sumcache1[i] = sum;
}
if (flag) sumcache2[sqrtn] = sumcache1[sqrtn];
for (i = sqrtn - flag; i > 0; i--) {
const UV m = n/i;
const uint32_t s = isqrt(m);
UV sum = (m+1)/2 * (m|1); /* m*(m+1)/2; */
sum -= (m - m/2); /* k=1 */
for (k = 2, j = k*i; j <= sqrtn; k++) {
sum -= sumcache2[j];
sum -= (m/k - m/(k+1)) * sumcache1[k];
j += i;
}
for (; k <= s; k++) {
sum -= sumcache1[m/k];
sum -= (m/k - m/(k+1)) * sumcache1[k];
}
if (m < (UV)s * ((UV)s+1))
sum += sumcache1[s];
sumcache2[i] = sum;
}
finalsum = sumcache2[1];
Safefree(sumcache1);
Safefree(sumcache2);
return finalsum;
}
/* Recursive method using a cache. */
typedef struct {
UV hsize;
UV *nhash; /* n value */
UV *shash; /* sum for n */
} sumt_hash_t;
#define _CACHED_SUMT(x) \
(((x)<csize) ? cdata[x] : _sumt((x), cdata, csize, thash))
static UV _sumt(UV n, const UV *cdata, UV csize, sumt_hash_t thash) {
UV sum, s, k, lim, hn;
uint32_t const probes = 8;
uint32_t hashk;
if (n < csize) return cdata[n];
hn = n % thash.hsize;
for (hashk = 0; hashk <= probes; hashk++) {
if (thash.nhash[hn] == n) return thash.shash[hn];
if (thash.nhash[hn] == 0) break;
hn = (hn+1 < thash.hsize) ? hn+1 : 0;
}
s = isqrt(n);
lim = n/(s+1);
sum = (n+1)/2 * (n|1); /* (n*(n+1))/2 */
sum -= (n - n/2);
for (k = 2; k <= lim; k++) {
sum -= _CACHED_SUMT(n/k);
sum -= ((n/k) - (n/(k+1))) * _CACHED_SUMT(k);
}
if (s > lim)
sum -= ((n/s) - (n/(s+1))) * _CACHED_SUMT(s);
for (; hashk <= probes; hashk++) {
if (thash.nhash[hn] == 0) {
thash.nhash[hn] = n;
thash.shash[hn] = sum;
break;
}
hn = (hn+1 < thash.hsize) ? hn+1 : 0;
}
return sum;
}
UV sumtotient(UV n) {
UV sum, i, cbrtn, csize, *sumcache;
sumt_hash_t thash;
if (n <= 2) return n;
if (n > MAX_TOTSUM) return 0;
if (n < 4000) return _sumtotient_direct(n);
cbrtn = icbrt(n);
csize = cbrtn * cbrtn;
sumcache = range_totient(0, csize-1);
for (i = 2; i < csize; i++)
sumcache[i] += sumcache[i-1];
thash.hsize = next_prime(10 + 4*cbrtn);
Newz(0, thash.nhash, thash.hsize, UV);
New( 0, thash.shash, thash.hsize, UV);
sum = _sumt(n, sumcache, csize, thash);
Safefree(thash.nhash);
Safefree(thash.shash);
Safefree(sumcache);
return sum;
}
#if HAVE_SUMTOTIENT_128
#define _CACHED_SUMT128(x) \
(((x)<csize) ? (uint128_t)cdata[x] : _sumt128((x), cdata, csize, thash))
typedef struct {
UV hsize;
UV *nhash; /* n value */
uint128_t *shash; /* sum for n */
} sumt_hash_128_t;
static uint128_t _sumt128(UV n, const UV *cdata, UV csize, sumt_hash_128_t thash) {
uint128_t sum;
UV s, k, lim, hn;
uint32_t const probes = 16;
uint32_t const hinc = 1 + ((n >> 8) & 15); /* mitigate clustering */
uint32_t hashk;
if (n < csize) return cdata[n];
hn = n % thash.hsize;
for (hashk = 0; hashk <= probes; hashk++) {
if (thash.nhash[hn] == n) return thash.shash[hn];
if (thash.nhash[hn] == 0) break;
hn = (hn+hinc < thash.hsize) ? hn+hinc : hn+hinc-thash.hsize;
}
s = isqrt(n);
lim = n/(s+1);
sum = ((uint128_t)n+1)/2 * (n|1); /* (n*(n+1))/2 */
sum -= (n - n/2);
for (k = 2; k <= lim; k++) {
sum -= _CACHED_SUMT128(n/k);
sum -= ((n/k) - (n/(k+1))) * _CACHED_SUMT128(k);
}
if (s > lim)
sum -= ((n/s) - (n/(s+1))) * _CACHED_SUMT128(s);
for (; hashk <= probes; hashk++) {
if (thash.nhash[hn] == 0) {
thash.nhash[hn] = n;
thash.shash[hn] = sum;
break;
}
hn = (hn+hinc < thash.hsize) ? hn+hinc : hn+hinc-thash.hsize;
}
return sum;
}
int sumtotient128(UV n, UV *hi_sum, UV *lo_sum) {
UV i, cbrtn, csize, hsize, *sumcache;
uint128_t sum;
sumt_hash_128_t thash;
if (n <= 2) { *hi_sum = 0; *lo_sum = n; return 1; }
/* sumtotient(2^64-1) < 2^128, so we can't overflow. */
cbrtn = icbrt(n);
csize = 0.6 * cbrtn * cbrtn;
hsize = 8 * cbrtn; /* 12.5% filled with csize = 1 * n^(2/3) */
if (csize > 400000000U) { /* Limit to 3GB */
csize = 400000000;
hsize = isqrt(n);
}
sumcache = range_totient(0, csize-1);
for (i = 2; i < csize; i++)
sumcache[i] += sumcache[i-1];
/* Arguably we should expand the hash as it fills. */
thash.hsize = next_prime( 16 + hsize );
Newz(0, thash.nhash, thash.hsize, UV);
New( 0, thash.shash, thash.hsize, uint128_t);
sum = _sumt128(n, sumcache, csize, thash);
*hi_sum = (sum >> 64) & UV_MAX;
( run in 0.489 second using v1.01-cache-2.11-cpan-39bf76dae61 )