Crypt-Bear
view release on metacpan or search on metacpan
src/ec/ec_prime_i15.c view on Meta::CPAN
/*
* We use a custom interpreter that uses a dozen registers, and
* only six operations:
* MSET(d, a) copy a into d
* MADD(d, a) d = d+a (modular)
* MSUB(d, a) d = d-a (modular)
* MMUL(d, a, b) d = a*b (Montgomery multiplication)
* MINV(d, a, b) invert d modulo p; a and b are used as scratch registers
* MTZ(d) clear return value if d = 0
* Destination of MMUL (d) must be distinct from operands (a and b).
* There is no such constraint for MSUB and MADD.
*
* Registers include the operand coordinates, and temporaries.
*/
#define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4))
#define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4))
#define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4))
#define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b))
#define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b))
#define MTZ(d) (0x5000 + ((d) << 8))
#define ENDCODE 0
/*
* Registers for the input operands.
*/
#define P1x 0
#define P1y 1
#define P1z 2
#define P2x 3
#define P2y 4
#define P2z 5
/*
* Alternate names for the first input operand.
*/
#define Px 0
#define Py 1
#define Pz 2
/*
* Temporaries.
*/
#define t1 6
#define t2 7
#define t3 8
#define t4 9
#define t5 10
#define t6 11
#define t7 12
/*
* Extra scratch registers available when there is no second operand (e.g.
* for "double" and "affine").
*/
#define t8 3
#define t9 4
#define t10 5
/*
* Doubling formulas are:
*
* s = 4*x*y^2
* m = 3*(x + z^2)*(x - z^2)
* x' = m^2 - 2*s
* y' = m*(s - x') - 8*y^4
* z' = 2*y*z
*
* If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
* should. This case should not happen anyway, because our curves have
* prime order, and thus do not contain any point of order 2.
*
* If P is infinity (z = 0), then again the formulas yield infinity,
* which is correct. Thus, this code works for all points.
*
* Cost: 8 multiplications
*/
static const uint16_t code_double[] = {
/*
* Compute z^2 (in t1).
*/
MMUL(t1, Pz, Pz),
/*
* Compute x-z^2 (in t2) and then x+z^2 (in t1).
*/
MSET(t2, Px),
MSUB(t2, t1),
MADD(t1, Px),
/*
* Compute m = 3*(x+z^2)*(x-z^2) (in t1).
*/
MMUL(t3, t1, t2),
MSET(t1, t3),
MADD(t1, t3),
MADD(t1, t3),
/*
* Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
*/
MMUL(t3, Py, Py),
MADD(t3, t3),
MMUL(t2, Px, t3),
MADD(t2, t2),
/*
* Compute x' = m^2 - 2*s.
*/
MMUL(Px, t1, t1),
MSUB(Px, t2),
MSUB(Px, t2),
/*
* Compute z' = 2*y*z.
*/
MMUL(t4, Py, Pz),
MSET(Pz, t4),
MADD(Pz, t4),
/*
* Compute y' = m*(s - x') - 8*y^4. Note that we already have
* 2*y^2 in t3.
*/
MSUB(t2, Px),
MMUL(Py, t1, t2),
MMUL(t4, t3, t3),
MSUB(Py, t4),
MSUB(Py, t4),
ENDCODE
};
/*
* Addtions formulas are:
*
* u1 = x1 * z2^2
* u2 = x2 * z1^2
* s1 = y1 * z2^3
* s2 = y2 * z1^3
* h = u2 - u1
* r = s2 - s1
* x3 = r^2 - h^3 - 2 * u1 * h^2
* y3 = r * (u1 * h^2 - x3) - s1 * h^3
* z3 = h * z1 * z2
*
* If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
* z3 == 0, so the result is correct.
* If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
* not correct.
* h == 0 only if u1 == u2; this happens in two cases:
* -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
* -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
*
* Thus, the following situations are not handled correctly:
* -- P1 = 0 and P2 != 0
* -- P1 != 0 and P2 = 0
* -- P1 = P2
* All other cases are properly computed. However, even in "incorrect"
* situations, the three coordinates still are properly formed field
* elements.
*
* The returned flag is cleared if r == 0. This happens in the following
* cases:
* -- Both points are on the same horizontal line (same Y coordinate).
* -- Both points are infinity.
* -- One point is infinity and the other is on line Y = 0.
* The third case cannot happen with our curves (there is no valid point
* on line Y = 0 since that would be a point of order 2). If the two
* source points are non-infinity, then remains only the case where the
* two points are on the same horizontal line.
*
* This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
* P2 != 0:
* -- If the returned value is not the point at infinity, then it was properly
* computed.
* -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
* is indeed the point at infinity.
* -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
* use the 'double' code.
*
* Cost: 16 multiplications
*/
static const uint16_t code_add[] = {
/*
* Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
*/
MMUL(t3, P2z, P2z),
MMUL(t1, P1x, t3),
MMUL(t4, P2z, t3),
MMUL(t3, P1y, t4),
/*
* Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
*/
src/ec/ec_prime_i15.c view on Meta::CPAN
MMUL(t2, P1y, P1y),
/* Compare y^2 with x^3 - 3*x + b; they must match. */
MSUB(t1, t2),
MTZ(t1),
/* Set z to 1 (in Montgomery representation). */
MMUL(P1z, P2x, P2z),
ENDCODE
};
/*
* Conversion back to affine coordinates. This code snippet assumes that
* the z coordinate of P2 is set to 1 (not in Montgomery representation).
*/
static const uint16_t code_affine[] = {
/* Save z*R in t1. */
MSET(t1, P1z),
/* Compute z^3 in t2. */
MMUL(t2, P1z, P1z),
MMUL(t3, P1z, t2),
MMUL(t2, t3, P2z),
/* Invert to (1/z^3) in t2. */
MINV(t2, t3, t4),
/* Compute y. */
MSET(t3, P1y),
MMUL(P1y, t2, t3),
/* Compute (1/z^2) in t3. */
MMUL(t3, t2, t1),
/* Compute x. */
MSET(t2, P1x),
MMUL(P1x, t2, t3),
ENDCODE
};
static uint32_t
run_code(jacobian *P1, const jacobian *P2,
const curve_params *cc, const uint16_t *code)
{
uint32_t r;
uint16_t t[13][I15_LEN];
size_t u;
r = 1;
/*
* Copy the two operands in the dedicated registers.
*/
memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
/*
* Run formulas.
*/
for (u = 0;; u ++) {
unsigned op, d, a, b;
op = code[u];
if (op == 0) {
break;
}
d = (op >> 8) & 0x0F;
a = (op >> 4) & 0x0F;
b = op & 0x0F;
op >>= 12;
switch (op) {
uint32_t ctl;
size_t plen;
unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
case 0:
memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
break;
case 1:
ctl = br_i15_add(t[d], t[a], 1);
ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
br_i15_sub(t[d], cc->p, ctl);
break;
case 2:
br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
break;
case 3:
br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
break;
case 4:
plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
br_i15_encode(tp, plen, cc->p);
tp[plen - 1] -= 2;
br_i15_modpow(t[d], tp, plen,
cc->p, cc->p0i, t[a], t[b]);
break;
default:
r &= ~br_i15_iszero(t[d]);
break;
}
}
/*
* Copy back result.
*/
memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
return r;
}
static void
set_one(uint16_t *x, const uint16_t *p)
{
size_t plen;
plen = (p[0] + 31) >> 4;
memset(x, 0, plen * sizeof *x);
x[0] = p[0];
x[1] = 0x0001;
( run in 0.471 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )