Crypt-Bear
view release on metacpan or search on metacpan
src/ec/ec_p256_m31.c view on Meta::CPAN
/*
* A simple square-and-multiply for z^(2^31-1). We could save about
* two dozen multiplications here with an addition chain, but
* this would require a bit more code, and extra stack buffers.
*/
memcpy(t1, P->z, sizeof P->z);
for (i = 0; i < 30; i ++) {
square_f256(t1, t1);
mul_f256(t1, t1, P->z);
}
/*
* Square-and-multiply. Apart from the squarings, we have a few
* multiplications to set bits to 1; we multiply by the original z
* for setting 1 bit, and by t1 for setting 31 bits.
*/
memcpy(t2, P->z, sizeof P->z);
for (i = 1; i < 256; i ++) {
square_f256(t2, t2);
switch (i) {
case 31:
case 190:
case 221:
case 252:
mul_f256(t2, t2, t1);
break;
case 63:
case 253:
case 255:
mul_f256(t2, t2, P->z);
break;
}
}
/*
* Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3.
*/
mul_f256(t1, t2, t2);
mul_f256(P->x, t1, P->x);
mul_f256(t1, t1, t2);
mul_f256(P->y, t1, P->y);
reduce_final_f256(P->x);
reduce_final_f256(P->y);
/*
* Multiply z by 1/z. If z = 0, then this will yield 0, otherwise
* this will set z to 1.
*/
mul_f256(P->z, P->z, t2);
reduce_final_f256(P->z);
}
/*
* Double a point in P-256. This function works for all valid points,
* including the point at infinity.
*/
static void
p256_double(p256_jacobian *Q)
{
/*
* Doubling formulas are:
*
* s = 4*x*y^2
* m = 3*(x + z^2)*(x - z^2)
* x' = m^2 - 2*s
* y' = m*(s - x') - 8*y^4
* z' = 2*y*z
*
* These formulas work for all points, including points of order 2
* and points at infinity:
* - If y = 0 then z' = 0. But there is no such point in P-256
* anyway.
* - If z = 0 then z' = 0.
*/
uint32_t t1[9], t2[9], t3[9], t4[9];
/*
* Compute z^2 in t1.
*/
square_f256(t1, Q->z);
/*
* Compute x-z^2 in t2 and x+z^2 in t1.
*/
add_f256(t2, Q->x, t1);
sub_f256(t1, Q->x, t1);
/*
* Compute 3*(x+z^2)*(x-z^2) in t1.
*/
mul_f256(t3, t1, t2);
add_f256(t1, t3, t3);
add_f256(t1, t3, t1);
/*
* Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
*/
square_f256(t3, Q->y);
add_f256(t3, t3, t3);
mul_f256(t2, Q->x, t3);
add_f256(t2, t2, t2);
/*
* Compute x' = m^2 - 2*s.
*/
square_f256(Q->x, t1);
sub_f256(Q->x, Q->x, t2);
sub_f256(Q->x, Q->x, t2);
/*
* Compute z' = 2*y*z.
*/
mul_f256(t4, Q->y, Q->z);
add_f256(Q->z, t4, t4);
/*
* Compute y' = m*(s - x') - 8*y^4. Note that we already have
* 2*y^2 in t3.
*/
sub_f256(t2, t2, Q->x);
mul_f256(Q->y, t1, t2);
square_f256(t4, t3);
add_f256(t4, t4, t4);
sub_f256(Q->y, Q->y, t4);
}
/*
* Add point P2 to point P1.
*
* This function computes the wrong result in the following cases:
*
* - If P1 == 0 but P2 != 0
* - If P1 != 0 but P2 == 0
* - If P1 == P2
*
* In all three cases, P1 is set to the point at infinity.
*
* Returned value is 0 if one of the following occurs:
*
* - P1 and P2 have the same Y coordinate
* - P1 == 0 and P2 == 0
* - The Y coordinate of one of the points is 0 and the other point is
* the point at infinity.
*
* The third case cannot actually happen with valid points, since a point
* with Y == 0 is a point of order 2, and there is no point of order 2 on
* curve P-256.
*
* Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
* can apply the following:
*
* - If the result is not the point at infinity, then it is correct.
* - Otherwise, if the returned value is 1, then this is a case of
* P1+P2 == 0, so the result is indeed the point at infinity.
* - Otherwise, P1 == P2, so a "double" operation should have been
* performed.
*/
static uint32_t
p256_add(p256_jacobian *P1, const p256_jacobian *P2)
{
/*
* Addtions formulas are:
*
* u1 = x1 * z2^2
* u2 = x2 * z1^2
* s1 = y1 * z2^3
* s2 = y2 * z1^3
* h = u2 - u1
* r = s2 - s1
* x3 = r^2 - h^3 - 2 * u1 * h^2
* y3 = r * (u1 * h^2 - x3) - s1 * h^3
* z3 = h * z1 * z2
*/
uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
uint32_t ret;
int i;
/*
* Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
*/
square_f256(t3, P2->z);
mul_f256(t1, P1->x, t3);
mul_f256(t4, P2->z, t3);
mul_f256(t3, P1->y, t4);
/*
* Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
*/
square_f256(t4, P1->z);
mul_f256(t2, P2->x, t4);
mul_f256(t5, P1->z, t4);
mul_f256(t4, P2->y, t5);
/*
* Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
* We need to test whether r is zero, so we will do some extra
* reduce.
*/
sub_f256(t2, t2, t1);
sub_f256(t4, t4, t3);
reduce_final_f256(t4);
ret = 0;
for (i = 0; i < 9; i ++) {
ret |= t4[i];
}
ret = (ret | -ret) >> 31;
/*
* Compute u1*h^2 (in t6) and h^3 (in t5);
*/
square_f256(t7, t2);
mul_f256(t6, t1, t7);
mul_f256(t5, t7, t2);
/*
* Compute x3 = r^2 - h^3 - 2*u1*h^2.
*/
square_f256(P1->x, t4);
sub_f256(P1->x, P1->x, t5);
sub_f256(P1->x, P1->x, t6);
sub_f256(P1->x, P1->x, t6);
/*
* Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
*/
sub_f256(t6, t6, P1->x);
mul_f256(P1->y, t4, t6);
mul_f256(t1, t5, t3);
sub_f256(P1->y, P1->y, t1);
/*
* Compute z3 = h*z1*z2.
*/
mul_f256(t1, P1->z, P2->z);
mul_f256(P1->z, t1, t2);
return ret;
}
/*
* Add point P2 to point P1. This is a specialised function for the
* case when P2 is a non-zero point in affine coordinate.
*
* This function computes the wrong result in the following cases:
*
* - If P1 == 0
* - If P1 == P2
*
* In both cases, P1 is set to the point at infinity.
*
* Returned value is 0 if one of the following occurs:
*
* - P1 and P2 have the same Y coordinate
* - The Y coordinate of P2 is 0 and P1 is the point at infinity.
*
* The second case cannot actually happen with valid points, since a point
* with Y == 0 is a point of order 2, and there is no point of order 2 on
* curve P-256.
*
* Therefore, assuming that P1 != 0 on input, then the caller
* can apply the following:
*
* - If the result is not the point at infinity, then it is correct.
* - Otherwise, if the returned value is 1, then this is a case of
* P1+P2 == 0, so the result is indeed the point at infinity.
* - Otherwise, P1 == P2, so a "double" operation should have been
* performed.
*/
static uint32_t
p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2)
{
/*
* Addtions formulas are:
*
* u1 = x1
* u2 = x2 * z1^2
* s1 = y1
* s2 = y2 * z1^3
* h = u2 - u1
* r = s2 - s1
* x3 = r^2 - h^3 - 2 * u1 * h^2
* y3 = r * (u1 * h^2 - x3) - s1 * h^3
* z3 = h * z1
*/
uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
uint32_t ret;
int i;
/*
* Compute u1 = x1 (in t1) and s1 = y1 (in t3).
*/
memcpy(t1, P1->x, sizeof t1);
memcpy(t3, P1->y, sizeof t3);
/*
* Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
*/
square_f256(t4, P1->z);
mul_f256(t2, P2->x, t4);
mul_f256(t5, P1->z, t4);
mul_f256(t4, P2->y, t5);
/*
* Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
* We need to test whether r is zero, so we will do some extra
* reduce.
*/
sub_f256(t2, t2, t1);
sub_f256(t4, t4, t3);
reduce_final_f256(t4);
ret = 0;
for (i = 0; i < 9; i ++) {
ret |= t4[i];
}
ret = (ret | -ret) >> 31;
/*
* Compute u1*h^2 (in t6) and h^3 (in t5);
*/
square_f256(t7, t2);
mul_f256(t6, t1, t7);
mul_f256(t5, t7, t2);
/*
* Compute x3 = r^2 - h^3 - 2*u1*h^2.
*/
square_f256(P1->x, t4);
sub_f256(P1->x, P1->x, t5);
sub_f256(P1->x, P1->x, t6);
sub_f256(P1->x, P1->x, t6);
/*
* Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
( run in 1.458 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )