Alien-libsecp256k1
view release on metacpan or search on metacpan
libsecp256k1/src/ecmult_gen_impl.h view on Meta::CPAN
}
secp256k1_scalar_clear(&d);
/* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the
* (2*d[i]-1) * 2^(i-1) * G points, for various combinations of i positions.
* We rewrite our equation in terms of these table entries.
*
* Let mask(b) = sum(2^((b*COMB_TEETH + t)*COMB_SPACING) for t=0..COMB_TEETH-1),
* with b ranging from 0 to COMB_BLOCKS-1. So for example with COMB_BLOCKS=11,
* COMB_TEETH=6, COMB_SPACING=4, we would have:
* mask(0) = 2^0 + 2^4 + 2^8 + 2^12 + 2^16 + 2^20,
* mask(1) = 2^24 + 2^28 + 2^32 + 2^36 + 2^40 + 2^44,
* mask(2) = 2^48 + 2^52 + 2^56 + 2^60 + 2^64 + 2^68,
* ...
* mask(10) = 2^240 + 2^244 + 2^248 + 2^252 + 2^256 + 2^260
*
* We will split up the bits d[i] using these masks. Specifically, each mask is
* used COMB_SPACING times, with different shifts:
*
* d = (d & mask(0)<<0) + (d & mask(1)<<0) + ... + (d & mask(COMB_BLOCKS-1)<<0) +
* (d & mask(0)<<1) + (d & mask(1)<<1) + ... + (d & mask(COMB_BLOCKS-1)<<1) +
* ...
* (d & mask(0)<<(COMB_SPACING-1)) + ...
*
* Now define table(b, m) = (m - mask(b)/2) * G, and we will precompute these values for
* b=0..COMB_BLOCKS-1, and for all values m which (d & mask(b)) can take (so m can take on
* 2^COMB_TEETH distinct values).
*
* If m=(d & mask(b)), then table(b, m) is the sum of 2^i * (2*d[i]-1) * G/2, with i
* iterating over the set bits in mask(b). In our example, table(2, 2^48 + 2^56 + 2^68)
* would equal (2^48 - 2^52 + 2^56 - 2^60 - 2^64 + 2^68) * G/2.
*
* With that, we can rewrite comb(d, G/2) as:
*
* 2^0 * (table(0, d>>0 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>0 & mask(COMP_BLOCKS-1)))
* + 2^1 * (table(0, d>>1 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>1 & mask(COMP_BLOCKS-1)))
* + 2^2 * (table(0, d>>2 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>2 & mask(COMP_BLOCKS-1)))
* + ...
* + 2^(COMB_SPACING-1) * (table(0, d>>(COMB_SPACING-1) & mask(0)) + ...)
*
* Or more generically as
*
* sum(2^i * sum(table(b, d>>i & mask(b)), b=0..COMB_BLOCKS-1), i=0..COMB_SPACING-1)
*
* This is implemented using an outer loop that runs in reverse order over the lines of this
* equation, which in each iteration runs an inner loop that adds the terms of that line and
* then doubles the result before proceeding to the next line.
*
* In pseudocode:
* c = infinity
* for comb_off in range(COMB_SPACING - 1, -1, -1):
* for block in range(COMB_BLOCKS):
* c += table(block, (d >> comb_off) & mask(block))
* if comb_off > 0:
* c = 2*c
* return c
*
* This computes c = comb(d, G/2), and thus finally R = c + ctx->ge_offset. Note that it would
* be possible to apply an initial offset instead of a final offset (moving ge_offset to take
* the place of infinity above), but the chosen approach allows using (in a future improvement)
* an incomplete addition formula for most of the multiplication.
*
* The last question is how to implement the table(b, m) function. For any value of b,
* m=(d & mask(b)) can only take on at most 2^COMB_TEETH possible values (the last one may have
* fewer as there mask(b) may exceed the curve order). So we could create COMB_BLOCK tables
* which contain a value for each such m value.
*
* Now note that if m=(d & mask(b)), then flipping the relevant bits of m results in negating
* the result of table(b, m). This is because table(b,m XOR mask(b)) = table(b, mask(b) - m) =
* (mask(b) - m - mask(b)/2)*G = (-m + mask(b)/2)*G = -(m - mask(b)/2)*G = -table(b, m).
* Because of this it suffices to only store the first half of the m values for every b. If an
* entry from the second half is needed, we look up its bit-flipped version instead, and negate
* it.
*
* secp256k1_ecmult_gen_prec_table[b][index] stores the table(b, m) entries. Index
* is the relevant mask(b) bits of m packed together without gaps. */
/* Outer loop: iterate over comb_off from COMB_SPACING - 1 down to 0. */
comb_off = COMB_SPACING - 1;
while (1) {
uint32_t block;
uint32_t bit_pos = comb_off;
/* Inner loop: for each block, add table entries to the result. */
for (block = 0; block < COMB_BLOCKS; ++block) {
/* Gather the mask(block)-selected bits of d into bits. They're packed:
* bits[tooth] = d[(block*COMB_TEETH + tooth)*COMB_SPACING + comb_off]. */
uint32_t bits = 0, sign, abs, index, tooth;
/* Instead of reading individual bits here to construct the bits variable,
* build up the result by xoring rotated reads together. In every iteration,
* one additional bit is made correct, starting at the bottom. The bits
* above that contain junk. This reduces leakage by avoiding computations
* on variables that can have only a low number of possible values (e.g.,
* just two values when reading a single bit into a variable.) See:
* https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf
*/
for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
/* Construct bitdata s.t. the bottom bit is the bit we'd like to read.
*
* We could just set bitdata = recoded[bit_pos >> 5] >> (bit_pos & 0x1f)
* but this would simply discard the bits that fall off at the bottom,
* and thus, for example, bitdata could still have only two values if we
* happen to shift by exactly 31 positions. We use a rotation instead,
* which ensures that bitdata doesn't loose entropy. This relies on the
* rotation being atomic, i.e., the compiler emitting an actual rot
* instruction. */
uint32_t bitdata = secp256k1_rotr32(recoded[bit_pos >> 5], bit_pos & 0x1f);
/* Clear the bit at position tooth, but sssh, don't tell clang. */
uint32_t volatile vmask = ~(1 << tooth);
bits &= vmask;
/* Write the bit into position tooth (and junk into higher bits). */
bits ^= bitdata << tooth;
bit_pos += COMB_SPACING;
}
/* If the top bit of bits is 1, flip them all (corresponding to looking up
* the negated table value), and remember to negate the result in sign. */
sign = (bits >> (COMB_TEETH - 1)) & 1;
abs = (bits ^ -sign) & (COMB_POINTS - 1);
VERIFY_CHECK(sign == 0 || sign == 1);
( run in 0.696 second using v1.01-cache-2.11-cpan-5b529ec07f3 )