formula results from the CPAN

formula

Crypt-Bear

view release on metacpan or search on metacpan

	 * Return the point in Jacobian coordinates (and Montgomery
	 * representation).
	 */
	memcpy(P->x, x, sizeof x);
	memcpy(P->y, y, sizeof y);
	memcpy(P->z, F256_R, sizeof F256_R);
	return r;
}

/*
 * Final conversion for a point:
 *  - The point is converted back to affine coordinates.
 *  - Final reduction is performed.
 *  - The point is encoded into the provided buffer.
 *
 * If the point is the point-at-infinity, all operations are performed,
 * but the buffer contents are indeterminate, and 0 is returned. Otherwise,
 * the encoded point is written in the buffer, and 1 is returned.
 */
static uint32_t
point_encode(unsigned char *buf, const p256_jacobian *P)
{
	uint64_t t1[5], t2[5], z;

	/* Set t1 = 1/z^2 and t2 = 1/z^3. */
	f256_invert(t2, P->z);
	f256_montysquare(t1, t2);
	f256_montymul(t2, t2, t1);

	/* Compute affine coordinates x (in t1) and y (in t2). */
	f256_montymul(t1, P->x, t1);
	f256_montymul(t2, P->y, t2);

	/* Convert back from Montgomery representation, and finalize
	   reductions. */
	f256_frommonty(t1, t1);
	f256_frommonty(t2, t2);
	f256_final_reduce(t1);
	f256_final_reduce(t2);

	/* Encode. */
	buf[0] = 0x04;
	f256_encode(buf +  1, t1);
	f256_encode(buf + 33, t2);

	/* Return success if and only if P->z != 0. */
	z = P->z[0] | P->z[1] | P->z[2] | P->z[3] | P->z[4];
	return NEQ((uint32_t)(z | z >> 32), 0);
}

/*
 * Point doubling in Jacobian coordinates: point P is doubled.
 * Note: if the source point is the point-at-infinity, then the result is
 * still the point-at-infinity, which is correct. Moreover, if the three
 * coordinates were zero, then they still are zero in the returned value.
 */
static void
p256_double(p256_jacobian *P)
{
	/*
	 * Doubling formulas are:
	 *
	 *   s = 4*x*y^2
	 *   m = 3*(x + z^2)*(x - z^2)
	 *   x' = m^2 - 2*s
	 *   y' = m*(s - x') - 8*y^4
	 *   z' = 2*y*z
	 *
	 * These formulas work for all points, including points of order 2
	 * and points at infinity:
	 *   - If y = 0 then z' = 0. But there is no such point in P-256
	 *     anyway.
	 *   - If z = 0 then z' = 0.
	 */
	uint64_t t1[5], t2[5], t3[5], t4[5];

	/*
	 * Compute z^2 in t1.
	 */
	f256_montysquare(t1, P->z);

	/*
	 * Compute x-z^2 in t2 and x+z^2 in t1.
	 */
	f256_add(t2, P->x, t1);
	f256_sub(t1, P->x, t1);

	/*
	 * Compute 3*(x+z^2)*(x-z^2) in t1.
	 */
	f256_montymul(t3, t1, t2);
	f256_add(t1, t3, t3);
	f256_add(t1, t3, t1);

	/*
	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
	 */
	f256_montysquare(t3, P->y);
	f256_add(t3, t3, t3);
	f256_montymul(t2, P->x, t3);
	f256_add(t2, t2, t2);

	/*
	 * Compute x' = m^2 - 2*s.
	 */
	f256_montysquare(P->x, t1);
	f256_sub(P->x, P->x, t2);
	f256_sub(P->x, P->x, t2);

	/*
	 * Compute z' = 2*y*z.
	 */
	f256_montymul(t4, P->y, P->z);
	f256_add(P->z, t4, t4);
	f256_partial_reduce(P->z);

	/*
	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
	 * 2*y^2 in t3.
	 */
	f256_sub(t2, t2, P->x);
	f256_montymul(P->y, t1, t2);
	f256_montysquare(t4, t3);
	f256_add(t4, t4, t4);
	f256_sub(P->y, P->y, t4);
}

/*
 * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
 * This function computes the wrong result in the following cases:
 *
 *   - If P1 == 0 but P2 != 0
 *   - If P1 != 0 but P2 == 0
 *   - If P1 == P2
 *
 * In all three cases, P1 is set to the point at infinity.
 *
 * Returned value is 0 if one of the following occurs:
 *
 *   - P1 and P2 have the same Y coordinate.
 *   - P1 == 0 and P2 == 0.
 *   - The Y coordinate of one of the points is 0 and the other point is
 *     the point at infinity.
 *
 * The third case cannot actually happen with valid points, since a point
 * with Y == 0 is a point of order 2, and there is no point of order 2 on
 * curve P-256.
 *
 * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
 * can apply the following:
 *
 *   - If the result is not the point at infinity, then it is correct.
 *   - Otherwise, if the returned value is 1, then this is a case of
 *     P1+P2 == 0, so the result is indeed the point at infinity.
 *   - Otherwise, P1 == P2, so a "double" operation should have been
 *     performed.
 *
 * Note that you can get a returned value of 0 with a correct result,
 * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
 */
static uint32_t
p256_add(p256_jacobian *P1, const p256_jacobian *P2)
{
	/*
	 * Addtions formulas are:
	 *
	 *   u1 = x1 * z2^2
	 *   u2 = x2 * z1^2
	 *   s1 = y1 * z2^3
	 *   s2 = y2 * z1^3
	 *   h = u2 - u1
	 *   r = s2 - s1
	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
	 *   z3 = h * z1 * z2
	 */
	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt;
	uint32_t ret;

	/*
	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
	 */
	f256_montysquare(t3, P2->z);
	f256_montymul(t1, P1->x, t3);
	f256_montymul(t4, P2->z, t3);
	f256_montymul(t3, P1->y, t4);

	/*
	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
	 */
	f256_montysquare(t4, P1->z);
	f256_montymul(t2, P2->x, t4);
	f256_montymul(t5, P1->z, t4);
	f256_montymul(t4, P2->y, t5);

	/*
	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
	 * We need to test whether r is zero, so we will do some extra
	 * reduce.
	 */
	f256_sub(t2, t2, t1);
	f256_sub(t4, t4, t3);
	f256_final_reduce(t4);
	tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
	ret = (uint32_t)(tt | (tt >> 32));
	ret = (ret | -ret) >> 31;

	/*
	 * Compute u1*h^2 (in t6) and h^3 (in t5);
	 */
	f256_montysquare(t7, t2);
	f256_montymul(t6, t1, t7);
	f256_montymul(t5, t7, t2);

	/*
	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
	 */
	f256_montysquare(P1->x, t4);
	f256_sub(P1->x, P1->x, t5);
	f256_sub(P1->x, P1->x, t6);
	f256_sub(P1->x, P1->x, t6);

	/*
	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
	 */
	f256_sub(t6, t6, P1->x);
	f256_montymul(P1->y, t4, t6);
	f256_montymul(t1, t5, t3);
	f256_sub(P1->y, P1->y, t1);

	/*
	 * Compute z3 = h*z1*z2.
	 */
	f256_montymul(t1, P1->z, P2->z);
	f256_montymul(P1->z, t1, t2);

	return ret;
}

/*
 * Point addition (mixed coordinates): P1 is replaced with P1+P2.
 * This is a specialised function for the case when P2 is a non-zero point
 * in affine coordinates.
 *
 * This function computes the wrong result in the following cases:
 *
 *   - If P1 == 0
 *   - If P1 == P2
 *
 * In both cases, P1 is set to the point at infinity.
 *
 * Returned value is 0 if one of the following occurs:
 *
 *   - P1 and P2 have the same Y (affine) coordinate.
 *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
 *
 * The second case cannot actually happen with valid points, since a point
 * with Y == 0 is a point of order 2, and there is no point of order 2 on
 * curve P-256.
 *
 * Therefore, assuming that P1 != 0 on input, then the caller
 * can apply the following:
 *
 *   - If the result is not the point at infinity, then it is correct.
 *   - Otherwise, if the returned value is 1, then this is a case of
 *     P1+P2 == 0, so the result is indeed the point at infinity.
 *   - Otherwise, P1 == P2, so a "double" operation should have been
 *     performed.
 *
 * Again, a value of 0 may be returned in some cases where the addition
 * result is correct.
 */
static uint32_t
p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)
{
	/*
	 * Addtions formulas are:
	 *
	 *   u1 = x1
	 *   u2 = x2 * z1^2
	 *   s1 = y1
	 *   s2 = y2 * z1^3
	 *   h = u2 - u1
	 *   r = s2 - s1
	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
	 *   z3 = h * z1
	 */
	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt;
	uint32_t ret;

	/*
	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
	 */
	memcpy(t1, P1->x, sizeof t1);
	memcpy(t3, P1->y, sizeof t3);

	/*
	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
	 */
	f256_montysquare(t4, P1->z);
	f256_montymul(t2, P2->x, t4);
	f256_montymul(t5, P1->z, t4);
	f256_montymul(t4, P2->y, t5);

	/*
	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
	 * We need to test whether r is zero, so we will do some extra
	 * reduce.
	 */
	f256_sub(t2, t2, t1);
	f256_sub(t4, t4, t3);
	f256_final_reduce(t4);
	tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
	ret = (uint32_t)(tt | (tt >> 32));
	ret = (ret | -ret) >> 31;

	/*
	 * Compute u1*h^2 (in t6) and h^3 (in t5);
	 */
	f256_montysquare(t7, t2);
	f256_montymul(t6, t1, t7);
	f256_montymul(t5, t7, t2);

	/*
	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
	 */
	f256_montysquare(P1->x, t4);
	f256_sub(P1->x, P1->x, t5);
	f256_sub(P1->x, P1->x, t6);
	f256_sub(P1->x, P1->x, t6);

	/*
	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
	 */
	f256_sub(t6, t6, P1->x);
	f256_montymul(P1->y, t4, t6);
	f256_montymul(t1, t5, t3);
	f256_sub(P1->y, P1->y, t1);

	/*
	 * Compute z3 = h*z1*z2.
	 */
	f256_montymul(P1->z, P1->z, t2);

	return ret;
}

#if 0
/* unused */
/*
 * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
 * This is a specialised function for the case when P2 is a non-zero point
 * in affine coordinates.
 *
 * This function returns the correct result in all cases.
 */
static uint32_t
p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
{
	/*
	 * Addtions formulas, in the general case, are:
	 *
	 *   u1 = x1
	 *   u2 = x2 * z1^2
	 *   s1 = y1
	 *   s2 = y2 * z1^3
	 *   h = u2 - u1
	 *   r = s2 - s1
	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
	 *   z3 = h * z1
	 *
	 * These formulas mishandle the two following cases:
	 *
	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
	 *    incorrectly set to 0.
	 *
	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
	 *    are all set to 0.
	 *
	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
	 * we correctly get z3 = 0 (the point-at-infinity).
	 *
	 * To fix the case P1 = 0, we perform at the end a copy of P2
	 * over P1, conditional to z1 = 0.
	 *
	 * For P1 = P2: in that case, both h and r are set to 0, and
	 * we get x3, y3 and z3 equal to 0. We can test for that
	 * occurrence to make a mask which will be all-one if P1 = P2,
	 * or all-zero otherwise; then we can compute the double of P2
	 * and add it, combined with the mask, to (x3,y3,z3).
	 *
	 * Using the doubling formulas in p256_double() on (x2,y2),
	 * simplifying since P2 is affine (i.e. z2 = 1, implicitly),
	 * we get:
	 *   s = 4*x2*y2^2
	 *   m = 3*(x2 + 1)*(x2 - 1)
	 *   x' = m^2 - 2*s
	 *   y' = m*(s - x') - 8*y2^4
	 *   z' = 2*y2
	 * which requires only 6 multiplications. Added to the 11
	 * multiplications of the normal mixed addition in Jacobian
	 * coordinates, we get a cost of 17 multiplications in total.
	 */
	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt, zz;
	int i;

	/*
	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
	 */
	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3] | P1->z[4];
	zz = ((zz | -zz) >> 63) - (uint64_t)1;

	/*
	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
	 */
	memcpy(t1, P1->x, sizeof t1);
	memcpy(t3, P1->y, sizeof t3);

	/*
	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
	 */
	f256_montysquare(t4, P1->z);
	f256_montymul(t2, P2->x, t4);
	f256_montymul(t5, P1->z, t4);
	f256_montymul(t4, P2->y, t5);

	/*
	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
	 * reduce.
	 */
	f256_sub(t2, t2, t1);
	f256_sub(t4, t4, t3);

	/*
	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
	 * the mask tt to -1; otherwise, the mask will be 0.
	 */
	f256_final_reduce(t2);
	f256_final_reduce(t4);
	tt = t2[0] | t2[1] | t2[2] | t2[3] | t2[4]
		| t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
	tt = ((tt | -tt) >> 63) - (uint64_t)1;

	/*
	 * Compute u1*h^2 (in t6) and h^3 (in t5);
	 */
	f256_montysquare(t7, t2);
	f256_montymul(t6, t1, t7);
	f256_montymul(t5, t7, t2);

	/*
	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.

src/ec/ec_p256_m62.c view on Meta::CPAN

	if (klen > 32) {
		return 0;
	}
	z = 0;
	for (u = 0; u < klen; u ++) {
		z |= k[u];
	}
	if (klen == 32) {
		c = 0;
		for (u = 0; u < klen; u ++) {
			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);
		}
	} else {
		c = -1;
	}
	return NEQ(z, 0) & LT0(c);
}

static uint32_t
api_mul(unsigned char *G, size_t Glen,
	const unsigned char *k, size_t klen, int curve)
{
	uint32_t r;
	p256_jacobian P;

	(void)curve;
	if (Glen != 65) {
		return 0;
	}
	r = check_scalar(k, klen);
	r &= point_decode(&P, G);
	p256_mul(&P, k, klen);
	r &= point_encode(G, &P);
	return r;
}

static size_t
api_mulgen(unsigned char *R,
	const unsigned char *k, size_t klen, int curve)
{
	p256_jacobian P;

	(void)curve;
	p256_mulgen(&P, k, klen);
	point_encode(R, &P);
	return 65;
}

static uint32_t
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
	const unsigned char *x, size_t xlen,
	const unsigned char *y, size_t ylen, int curve)
{
	/*
	 * We might want to use Shamir's trick here: make a composite
	 * window of u*P+v*Q points, to merge the two doubling-ladders
	 * into one. This, however, has some complications:
	 *
	 *  - During the computation, we may hit the point-at-infinity.
	 *    Thus, we would need p256_add_complete_mixed() (complete
	 *    formulas for point addition), with a higher cost (17 muls
	 *    instead of 11).
	 *
	 *  - A 4-bit window would be too large, since it would involve
	 *    16*16-1 = 255 points. For the same window size as in the
	 *    p256_mul() case, we would need to reduce the window size
	 *    to 2 bits, and thus perform twice as many non-doubling
	 *    point additions.
	 *
	 *  - The window may itself contain the point-at-infinity, and
	 *    thus cannot be in all generality be made of affine points.
	 *    Instead, we would need to make it a window of points in
	 *    Jacobian coordinates. Even p256_add_complete_mixed() would
	 *    be inappropriate.
	 *
	 * For these reasons, the code below performs two separate
	 * point multiplications, then computes the final point addition
	 * (which is both a "normal" addition, and a doubling, to handle
	 * all cases).
	 */

	p256_jacobian P, Q;
	uint32_t r, t, s;
	uint64_t z;

	(void)curve;
	if (len != 65) {
		return 0;
	}
	r = point_decode(&P, A);
	p256_mul(&P, x, xlen);
	if (B == NULL) {
		p256_mulgen(&Q, y, ylen);
	} else {
		r &= point_decode(&Q, B);
		p256_mul(&Q, y, ylen);
	}

	/*
	 * The final addition may fail in case both points are equal.
	 */
	t = p256_add(&P, &Q);
	f256_final_reduce(P.z);
	z = P.z[0] | P.z[1] | P.z[2] | P.z[3] | P.z[4];
	s = EQ((uint32_t)(z | (z >> 32)), 0);
	p256_double(&Q);

	/*
	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
	 * have the following:
	 *
	 *   s = 0, t = 0   return P (normal addition)
	 *   s = 0, t = 1   return P (normal addition)
	 *   s = 1, t = 0   return Q (a 'double' case)
	 *   s = 1, t = 1   report an error (P+Q = 0)
	 */
	CCOPY(s & ~t, &P, &Q, sizeof Q);
	point_encode(A, &P);
	r &= ~(s & t);
	return r;
}

( run in 1.515 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )