Message ID | 20210305005203.3547587-4-stefanb@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Herbert Xu |
Headers | show |
Series | Add support for x509 certs with NIST P384/256/192 keys | expand |
On Thu, Mar 04, 2021 at 07:51:57PM -0500, Stefan Berger wrote: > From: Saulo Alessandre <saulo.alessandre@tse.jus.br> > > * crypto/ecc.c > - add vli_mmod_fast_384 > - change some routines to pass ecc_curve forward until vli_mmod_fast > > * crypto/ecc.h > - add ECC_CURVE_NIST_P384_DIGITS > - change ECC_MAX_DIGITS to P384 size > > Signed-off-by: Saulo Alessandre <saulo.alessandre@tse.jus.br> > Tested-by: Stefan Berger <stefanb@linux.ibm.com> Another "diffstat". /Jarkko > --- > crypto/ecc.c | 266 +++++++++++++++++++++++++++++++++++++-------------- > crypto/ecc.h | 3 +- > 2 files changed, 194 insertions(+), 75 deletions(-) > > diff --git a/crypto/ecc.c b/crypto/ecc.c > index f6cef5a7942d..c125576cda6b 100644 > --- a/crypto/ecc.c > +++ b/crypto/ecc.c > @@ -778,18 +778,133 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, > } > } > > +#define SL32OR32(x32, y32) (((u64)x32 << 32) | y32) > +#define AND64H(x64) (x64 & 0xffFFffFF00000000ull) > +#define AND64L(x64) (x64 & 0x00000000ffFFffFFull) > + > +/* Computes result = product % curve_prime > + * from "Mathematical routines for the NIST prime elliptic curves" > + */ > +static void vli_mmod_fast_384(u64 *result, const u64 *product, > + const u64 *curve_prime, u64 *tmp) > +{ > + int carry; > + const unsigned int ndigits = 6; > + > + /* t */ > + vli_set(result, product, ndigits); > + > + /* s1 */ > + tmp[0] = 0; // 0 || 0 > + tmp[1] = 0; // 0 || 0 > + tmp[2] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 > + tmp[3] = product[11]>>32; // 0 ||a23 > + tmp[4] = 0; // 0 || 0 > + tmp[5] = 0; // 0 || 0 > + carry = vli_lshift(tmp, tmp, 1, ndigits); > + carry += vli_add(result, result, tmp, ndigits); > + > + /* s2 */ > + tmp[0] = product[6]; //a13||a12 > + tmp[1] = product[7]; //a15||a14 > + tmp[2] = product[8]; //a17||a16 > + tmp[3] = product[9]; //a19||a18 > + tmp[4] = product[10]; //a21||a20 > + tmp[5] = product[11]; //a23||a22 > + carry += vli_add(result, result, tmp, ndigits); > + > + /* s3 */ > + tmp[0] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 > + tmp[1] = SL32OR32(product[6], (product[11]>>32)); //a12||a23 > + tmp[2] = SL32OR32(product[7], (product[6])>>32); //a14||a13 > + tmp[3] = SL32OR32(product[8], (product[7]>>32)); //a16||a15 > + tmp[4] = SL32OR32(product[9], (product[8]>>32)); //a18||a17 > + tmp[5] = SL32OR32(product[10], (product[9]>>32)); //a20||a19 > + carry += vli_add(result, result, tmp, ndigits); > + > + /* s4 */ > + tmp[0] = AND64H(product[11]); //a23|| 0 > + tmp[1] = (product[10]<<32); //a20|| 0 > + tmp[2] = product[6]; //a13||a12 > + tmp[3] = product[7]; //a15||a14 > + tmp[4] = product[8]; //a17||a16 > + tmp[5] = product[9]; //a19||a18 > + carry += vli_add(result, result, tmp, ndigits); > + > + /* s5 */ > + tmp[0] = 0; // 0|| 0 > + tmp[1] = 0; // 0|| 0 > + tmp[2] = product[10]; //a21||a20 > + tmp[3] = product[11]; //a23||a22 > + tmp[4] = 0; // 0|| 0 > + tmp[5] = 0; // 0|| 0 > + carry += vli_add(result, result, tmp, ndigits); > + > + /* s6 */ > + tmp[0] = AND64L(product[10]); // 0 ||a20 > + tmp[1] = AND64H(product[10]); //a21|| 0 > + tmp[2] = product[11]; //a23||a22 > + tmp[3] = 0; // 0 || 0 > + tmp[4] = 0; // 0 || 0 > + tmp[5] = 0; // 0 || 0 > + carry += vli_add(result, result, tmp, ndigits); > + > + /* d1 */ > + tmp[0] = SL32OR32(product[6], (product[11]>>32)); //a12||a23 > + tmp[1] = SL32OR32(product[7], (product[6]>>32)); //a14||a13 > + tmp[2] = SL32OR32(product[8], (product[7]>>32)); //a16||a15 > + tmp[3] = SL32OR32(product[9], (product[8]>>32)); //a18||a17 > + tmp[4] = SL32OR32(product[10], (product[9]>>32)); //a20||a19 > + tmp[5] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 > + carry -= vli_sub(result, result, tmp, ndigits); > + > + /* d2 */ > + tmp[0] = (product[10]<<32); //a20|| 0 > + tmp[1] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 > + tmp[2] = (product[11]>>32); // 0 ||a23 > + tmp[3] = 0; // 0 || 0 > + tmp[4] = 0; // 0 || 0 > + tmp[5] = 0; // 0 || 0 > + carry -= vli_sub(result, result, tmp, ndigits); > + > + /* d3 */ > + tmp[0] = 0; // 0 || 0 > + tmp[1] = AND64H(product[11]); //a23|| 0 > + tmp[2] = product[11]>>32; // 0 ||a23 > + tmp[3] = 0; // 0 || 0 > + tmp[4] = 0; // 0 || 0 > + tmp[5] = 0; // 0 || 0 > + carry -= vli_sub(result, result, tmp, ndigits); > + > + if (carry < 0) { > + do { > + carry += vli_add(result, result, curve_prime, ndigits); > + } while (carry < 0); > + } else { > + while (carry || vli_cmp(curve_prime, result, ndigits) != 1) > + carry -= vli_sub(result, result, curve_prime, ndigits); > + } > + > +} > + > +#undef SL32OR32 > +#undef AND64H > +#undef AND64L > + > /* Computes result = product % curve_prime for different curve_primes. > * > * Note that curve_primes are distinguished just by heuristic check and > * not by complete conformance check. > */ > static bool vli_mmod_fast(u64 *result, u64 *product, > - const u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > u64 tmp[2 * ECC_MAX_DIGITS]; > + const u64 *curve_prime = curve->p; > + const unsigned int ndigits = curve->g.ndigits; > > - /* Currently, both NIST primes have -1 in lowest qword. */ > - if (curve_prime[0] != -1ull) { > + /* Currently, all NIST have name nist_.* */ > + if (strncmp(curve->name, "nist_", 5) != 0) { > /* Try to handle Pseudo-Marsenne primes. */ > if (curve_prime[ndigits - 1] == -1ull) { > vli_mmod_special(result, product, curve_prime, > @@ -812,6 +927,9 @@ static bool vli_mmod_fast(u64 *result, u64 *product, > case 4: > vli_mmod_fast_256(result, product, curve_prime, tmp); > break; > + case 6: > + vli_mmod_fast_384(result, product, curve_prime, tmp); > + break; > default: > pr_err_ratelimited("ecc: unsupported digits size!\n"); > return false; > @@ -835,22 +953,22 @@ EXPORT_SYMBOL(vli_mod_mult_slow); > > /* Computes result = (left * right) % curve_prime. */ > static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right, > - const u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > u64 product[2 * ECC_MAX_DIGITS]; > > - vli_mult(product, left, right, ndigits); > - vli_mmod_fast(result, product, curve_prime, ndigits); > + vli_mult(product, left, right, curve->g.ndigits); > + vli_mmod_fast(result, product, curve); > } > > /* Computes result = left^2 % curve_prime. */ > static void vli_mod_square_fast(u64 *result, const u64 *left, > - const u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > u64 product[2 * ECC_MAX_DIGITS]; > > - vli_square(product, left, ndigits); > - vli_mmod_fast(result, product, curve_prime, ndigits); > + vli_square(product, left, curve->g.ndigits); > + vli_mmod_fast(result, product, curve); > } > > #define EVEN(vli) (!(vli[0] & 1)) > @@ -948,25 +1066,27 @@ static bool ecc_point_is_zero(const struct ecc_point *point) > > /* Double in place */ > static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, > - u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > /* t1 = x, t2 = y, t3 = z */ > u64 t4[ECC_MAX_DIGITS]; > u64 t5[ECC_MAX_DIGITS]; > + const u64 *curve_prime = curve->p; > + const unsigned int ndigits = curve->g.ndigits; > > if (vli_is_zero(z1, ndigits)) > return; > > /* t4 = y1^2 */ > - vli_mod_square_fast(t4, y1, curve_prime, ndigits); > + vli_mod_square_fast(t4, y1, curve); > /* t5 = x1*y1^2 = A */ > - vli_mod_mult_fast(t5, x1, t4, curve_prime, ndigits); > + vli_mod_mult_fast(t5, x1, t4, curve); > /* t4 = y1^4 */ > - vli_mod_square_fast(t4, t4, curve_prime, ndigits); > + vli_mod_square_fast(t4, t4, curve); > /* t2 = y1*z1 = z3 */ > - vli_mod_mult_fast(y1, y1, z1, curve_prime, ndigits); > + vli_mod_mult_fast(y1, y1, z1, curve); > /* t3 = z1^2 */ > - vli_mod_square_fast(z1, z1, curve_prime, ndigits); > + vli_mod_square_fast(z1, z1, curve); > > /* t1 = x1 + z1^2 */ > vli_mod_add(x1, x1, z1, curve_prime, ndigits); > @@ -975,7 +1095,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, > /* t3 = x1 - z1^2 */ > vli_mod_sub(z1, x1, z1, curve_prime, ndigits); > /* t1 = x1^2 - z1^4 */ > - vli_mod_mult_fast(x1, x1, z1, curve_prime, ndigits); > + vli_mod_mult_fast(x1, x1, z1, curve); > > /* t3 = 2*(x1^2 - z1^4) */ > vli_mod_add(z1, x1, x1, curve_prime, ndigits); > @@ -992,7 +1112,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, > /* t1 = 3/2*(x1^2 - z1^4) = B */ > > /* t3 = B^2 */ > - vli_mod_square_fast(z1, x1, curve_prime, ndigits); > + vli_mod_square_fast(z1, x1, curve); > /* t3 = B^2 - A */ > vli_mod_sub(z1, z1, t5, curve_prime, ndigits); > /* t3 = B^2 - 2A = x3 */ > @@ -1000,7 +1120,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, > /* t5 = A - x3 */ > vli_mod_sub(t5, t5, z1, curve_prime, ndigits); > /* t1 = B * (A - x3) */ > - vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); > + vli_mod_mult_fast(x1, x1, t5, curve); > /* t4 = B * (A - x3) - y1^4 = y3 */ > vli_mod_sub(t4, x1, t4, curve_prime, ndigits); > > @@ -1010,23 +1130,22 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, > } > > /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ > -static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime, > - unsigned int ndigits) > +static void apply_z(u64 *x1, u64 *y1, u64 *z, const struct ecc_curve *curve) > { > u64 t1[ECC_MAX_DIGITS]; > > - vli_mod_square_fast(t1, z, curve_prime, ndigits); /* z^2 */ > - vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */ > - vli_mod_mult_fast(t1, t1, z, curve_prime, ndigits); /* z^3 */ > - vli_mod_mult_fast(y1, y1, t1, curve_prime, ndigits); /* y1 * z^3 */ > + vli_mod_square_fast(t1, z, curve); /* z^2 */ > + vli_mod_mult_fast(x1, x1, t1, curve); /* x1 * z^2 */ > + vli_mod_mult_fast(t1, t1, z, curve); /* z^3 */ > + vli_mod_mult_fast(y1, y1, t1, curve); /* y1 * z^3 */ > } > > /* P = (x1, y1) => 2P, (x2, y2) => P' */ > static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, > - u64 *p_initial_z, u64 *curve_prime, > - unsigned int ndigits) > + u64 *p_initial_z, const struct ecc_curve *curve) > { > u64 z[ECC_MAX_DIGITS]; > + const unsigned int ndigits = curve->g.ndigits; > > vli_set(x2, x1, ndigits); > vli_set(y2, y1, ndigits); > @@ -1037,35 +1156,37 @@ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, > if (p_initial_z) > vli_set(z, p_initial_z, ndigits); > > - apply_z(x1, y1, z, curve_prime, ndigits); > + apply_z(x1, y1, z, curve); > > - ecc_point_double_jacobian(x1, y1, z, curve_prime, ndigits); > + ecc_point_double_jacobian(x1, y1, z, curve); > > - apply_z(x2, y2, z, curve_prime, ndigits); > + apply_z(x2, y2, z, curve); > } > > /* Input P = (x1, y1, Z), Q = (x2, y2, Z) > * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) > * or P => P', Q => P + Q > */ > -static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, > - unsigned int ndigits) > +static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, > + const struct ecc_curve *curve) > { > /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ > u64 t5[ECC_MAX_DIGITS]; > + const u64 *curve_prime = curve->p; > + const unsigned int ndigits = curve->g.ndigits; > > /* t5 = x2 - x1 */ > vli_mod_sub(t5, x2, x1, curve_prime, ndigits); > /* t5 = (x2 - x1)^2 = A */ > - vli_mod_square_fast(t5, t5, curve_prime, ndigits); > + vli_mod_square_fast(t5, t5, curve); > /* t1 = x1*A = B */ > - vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); > + vli_mod_mult_fast(x1, x1, t5, curve); > /* t3 = x2*A = C */ > - vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits); > + vli_mod_mult_fast(x2, x2, t5, curve); > /* t4 = y2 - y1 */ > vli_mod_sub(y2, y2, y1, curve_prime, ndigits); > /* t5 = (y2 - y1)^2 = D */ > - vli_mod_square_fast(t5, y2, curve_prime, ndigits); > + vli_mod_square_fast(t5, y2, curve); > > /* t5 = D - B */ > vli_mod_sub(t5, t5, x1, curve_prime, ndigits); > @@ -1074,11 +1195,11 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, > /* t3 = C - B */ > vli_mod_sub(x2, x2, x1, curve_prime, ndigits); > /* t2 = y1*(C - B) */ > - vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits); > + vli_mod_mult_fast(y1, y1, x2, curve); > /* t3 = B - x3 */ > vli_mod_sub(x2, x1, t5, curve_prime, ndigits); > /* t4 = (y2 - y1)*(B - x3) */ > - vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits); > + vli_mod_mult_fast(y2, y2, x2, curve); > /* t4 = y3 */ > vli_mod_sub(y2, y2, y1, curve_prime, ndigits); > > @@ -1089,22 +1210,24 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, > * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) > * or P => P - Q, Q => P + Q > */ > -static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, > - unsigned int ndigits) > +static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, > + const struct ecc_curve *curve) > { > /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ > u64 t5[ECC_MAX_DIGITS]; > u64 t6[ECC_MAX_DIGITS]; > u64 t7[ECC_MAX_DIGITS]; > + const u64 *curve_prime = curve->p; > + const unsigned int ndigits = curve->g.ndigits; > > /* t5 = x2 - x1 */ > vli_mod_sub(t5, x2, x1, curve_prime, ndigits); > /* t5 = (x2 - x1)^2 = A */ > - vli_mod_square_fast(t5, t5, curve_prime, ndigits); > + vli_mod_square_fast(t5, t5, curve); > /* t1 = x1*A = B */ > - vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); > + vli_mod_mult_fast(x1, x1, t5, curve); > /* t3 = x2*A = C */ > - vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits); > + vli_mod_mult_fast(x2, x2, t5, curve); > /* t4 = y2 + y1 */ > vli_mod_add(t5, y2, y1, curve_prime, ndigits); > /* t4 = y2 - y1 */ > @@ -1113,29 +1236,29 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, > /* t6 = C - B */ > vli_mod_sub(t6, x2, x1, curve_prime, ndigits); > /* t2 = y1 * (C - B) */ > - vli_mod_mult_fast(y1, y1, t6, curve_prime, ndigits); > + vli_mod_mult_fast(y1, y1, t6, curve); > /* t6 = B + C */ > vli_mod_add(t6, x1, x2, curve_prime, ndigits); > /* t3 = (y2 - y1)^2 */ > - vli_mod_square_fast(x2, y2, curve_prime, ndigits); > + vli_mod_square_fast(x2, y2, curve); > /* t3 = x3 */ > vli_mod_sub(x2, x2, t6, curve_prime, ndigits); > > /* t7 = B - x3 */ > vli_mod_sub(t7, x1, x2, curve_prime, ndigits); > /* t4 = (y2 - y1)*(B - x3) */ > - vli_mod_mult_fast(y2, y2, t7, curve_prime, ndigits); > + vli_mod_mult_fast(y2, y2, t7, curve); > /* t4 = y3 */ > vli_mod_sub(y2, y2, y1, curve_prime, ndigits); > > /* t7 = (y2 + y1)^2 = F */ > - vli_mod_square_fast(t7, t5, curve_prime, ndigits); > + vli_mod_square_fast(t7, t5, curve); > /* t7 = x3' */ > vli_mod_sub(t7, t7, t6, curve_prime, ndigits); > /* t6 = x3' - B */ > vli_mod_sub(t6, t7, x1, curve_prime, ndigits); > /* t6 = (y2 + y1)*(x3' - B) */ > - vli_mod_mult_fast(t6, t6, t5, curve_prime, ndigits); > + vli_mod_mult_fast(t6, t6, t5, curve); > /* t2 = y3' */ > vli_mod_sub(y1, t6, y1, curve_prime, ndigits); > > @@ -1165,41 +1288,37 @@ static void ecc_point_mult(struct ecc_point *result, > vli_set(rx[1], point->x, ndigits); > vli_set(ry[1], point->y, ndigits); > > - xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve_prime, > - ndigits); > + xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve); > > for (i = num_bits - 2; i > 0; i--) { > nb = !vli_test_bit(scalar, i); > - xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime, > - ndigits); > - xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, > - ndigits); > + xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve); > + xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve); > } > > nb = !vli_test_bit(scalar, 0); > - xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime, > - ndigits); > + xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve); > > /* Find final 1/Z value. */ > /* X1 - X0 */ > vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits); > /* Yb * (X1 - X0) */ > - vli_mod_mult_fast(z, z, ry[1 - nb], curve_prime, ndigits); > + vli_mod_mult_fast(z, z, ry[1 - nb], curve); > /* xP * Yb * (X1 - X0) */ > - vli_mod_mult_fast(z, z, point->x, curve_prime, ndigits); > + vli_mod_mult_fast(z, z, point->x, curve); > > /* 1 / (xP * Yb * (X1 - X0)) */ > vli_mod_inv(z, z, curve_prime, point->ndigits); > > /* yP / (xP * Yb * (X1 - X0)) */ > - vli_mod_mult_fast(z, z, point->y, curve_prime, ndigits); > + vli_mod_mult_fast(z, z, point->y, curve); > /* Xb * yP / (xP * Yb * (X1 - X0)) */ > - vli_mod_mult_fast(z, z, rx[1 - nb], curve_prime, ndigits); > + vli_mod_mult_fast(z, z, rx[1 - nb], curve); > /* End 1/Z calculation */ > > - xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, ndigits); > + xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve); > > - apply_z(rx[0], ry[0], z, curve_prime, ndigits); > + apply_z(rx[0], ry[0], z, curve); > > vli_set(result->x, rx[0], ndigits); > vli_set(result->y, ry[0], ndigits); > @@ -1220,9 +1339,9 @@ static void ecc_point_add(const struct ecc_point *result, > vli_mod_sub(z, result->x, p->x, curve->p, ndigits); > vli_set(px, p->x, ndigits); > vli_set(py, p->y, ndigits); > - xycz_add(px, py, result->x, result->y, curve->p, ndigits); > + xycz_add(px, py, result->x, result->y, curve); > vli_mod_inv(z, z, curve->p, ndigits); > - apply_z(result->x, result->y, z, curve->p, ndigits); > + apply_z(result->x, result->y, z, curve); > } > > /* Computes R = u1P + u2Q mod p using Shamir's trick. > @@ -1251,8 +1370,7 @@ void ecc_point_mult_shamir(const struct ecc_point *result, > points[2] = q; > points[3] = ∑ > > - num_bits = max(vli_num_bits(u1, ndigits), > - vli_num_bits(u2, ndigits)); > + num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits)); > i = num_bits - 1; > idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); > point = points[idx]; > @@ -1263,7 +1381,7 @@ void ecc_point_mult_shamir(const struct ecc_point *result, > z[0] = 1; > > for (--i; i >= 0; i--) { > - ecc_point_double_jacobian(rx, ry, z, curve->p, ndigits); > + ecc_point_double_jacobian(rx, ry, z, curve); > idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); > point = points[idx]; > if (point) { > @@ -1273,14 +1391,14 @@ void ecc_point_mult_shamir(const struct ecc_point *result, > > vli_set(tx, point->x, ndigits); > vli_set(ty, point->y, ndigits); > - apply_z(tx, ty, z, curve->p, ndigits); > + apply_z(tx, ty, z, curve); > vli_mod_sub(tz, rx, tx, curve->p, ndigits); > - xycz_add(tx, ty, rx, ry, curve->p, ndigits); > - vli_mod_mult_fast(z, z, tz, curve->p, ndigits); > + xycz_add(tx, ty, rx, ry, curve); > + vli_mod_mult_fast(z, z, tz, curve); > } > } > vli_mod_inv(z, z, curve->p, ndigits); > - apply_z(rx, ry, z, curve->p, ndigits); > + apply_z(rx, ry, z, curve); > } > EXPORT_SYMBOL(ecc_point_mult_shamir); > > @@ -1434,10 +1552,10 @@ int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve, > return -EINVAL; > > /* Check 3: Verify that y^2 == (x^3 + a·x + b) mod p */ > - vli_mod_square_fast(yy, pk->y, curve->p, pk->ndigits); /* y^2 */ > - vli_mod_square_fast(xxx, pk->x, curve->p, pk->ndigits); /* x^2 */ > - vli_mod_mult_fast(xxx, xxx, pk->x, curve->p, pk->ndigits); /* x^3 */ > - vli_mod_mult_fast(w, curve->a, pk->x, curve->p, pk->ndigits); /* a·x */ > + vli_mod_square_fast(yy, pk->y, curve); /* y^2 */ > + vli_mod_square_fast(xxx, pk->x, curve); /* x^2 */ > + vli_mod_mult_fast(xxx, xxx, pk->x, curve); /* x^3 */ > + vli_mod_mult_fast(w, curve->a, pk->x, curve); /* a·x */ > vli_mod_add(w, w, curve->b, curve->p, pk->ndigits); /* a·x + b */ > vli_mod_add(w, w, xxx, curve->p, pk->ndigits); /* x^3 + a·x + b */ > if (vli_cmp(yy, w, pk->ndigits) != 0) /* Equation */ > diff --git a/crypto/ecc.h b/crypto/ecc.h > index 2ea86dfb5cf7..8f40ebd74565 100644 > --- a/crypto/ecc.h > +++ b/crypto/ecc.h > @@ -29,7 +29,8 @@ > /* One digit is u64 qword. */ > #define ECC_CURVE_NIST_P192_DIGITS 3 > #define ECC_CURVE_NIST_P256_DIGITS 4 > -#define ECC_MAX_DIGITS (512 / 64) > +#define ECC_CURVE_NIST_P384_DIGITS 6 > +#define ECC_MAX_DIGITS (512 / 64) /* due to ecrdsa */ > > #define ECC_DIGITS_TO_BYTES_SHIFT 3 > > -- > 2.29.2 > >
Stefan, On Thu, Mar 04, 2021 at 07:51:57PM -0500, Stefan Berger wrote: > From: Saulo Alessandre <saulo.alessandre@tse.jus.br> > > * crypto/ecc.c > - add vli_mmod_fast_384 > - change some routines to pass ecc_curve forward until vli_mmod_fast > > * crypto/ecc.h > - add ECC_CURVE_NIST_P384_DIGITS > - change ECC_MAX_DIGITS to P384 size > > Signed-off-by: Saulo Alessandre <saulo.alessandre@tse.jus.br> > Tested-by: Stefan Berger <stefanb@linux.ibm.com> > --- > crypto/ecc.c | 266 +++++++++++++++++++++++++++++++++++++-------------- > crypto/ecc.h | 3 +- > 2 files changed, 194 insertions(+), 75 deletions(-) > > diff --git a/crypto/ecc.c b/crypto/ecc.c > index f6cef5a7942d..c125576cda6b 100644 > --- a/crypto/ecc.c > +++ b/crypto/ecc.c > @@ -778,18 +778,133 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, > ... > /* Computes result = product % curve_prime for different curve_primes. > * > * Note that curve_primes are distinguished just by heuristic check and > * not by complete conformance check. > */ > static bool vli_mmod_fast(u64 *result, u64 *product, > - const u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > u64 tmp[2 * ECC_MAX_DIGITS]; > + const u64 *curve_prime = curve->p; > + const unsigned int ndigits = curve->g.ndigits; > > - /* Currently, both NIST primes have -1 in lowest qword. */ > - if (curve_prime[0] != -1ull) { > + /* Currently, all NIST have name nist_.* */ > + if (strncmp(curve->name, "nist_", 5) != 0) { I am not sure, but maybe this strncmp should not be optimized somehow, since vli_mmod_fast could be called quite frequently. Perhaps by integer algo id or even callback? Thanks, > /* Try to handle Pseudo-Marsenne primes. */ > if (curve_prime[ndigits - 1] == -1ull) { > vli_mmod_special(result, product, curve_prime, > @@ -812,6 +927,9 @@ static bool vli_mmod_fast(u64 *result, u64 *product, > case 4: > vli_mmod_fast_256(result, product, curve_prime, tmp); > break; > + case 6: > + vli_mmod_fast_384(result, product, curve_prime, tmp); > + break; > default: > pr_err_ratelimited("ecc: unsupported digits size!\n"); > return false; > @@ -835,22 +953,22 @@ EXPORT_SYMBOL(vli_mod_mult_slow); > > /* Computes result = (left * right) % curve_prime. */ > static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right, > - const u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > u64 product[2 * ECC_MAX_DIGITS]; > > - vli_mult(product, left, right, ndigits); > - vli_mmod_fast(result, product, curve_prime, ndigits); > + vli_mult(product, left, right, curve->g.ndigits); > + vli_mmod_fast(result, product, curve); > } > > /* Computes result = left^2 % curve_prime. */ > static void vli_mod_square_fast(u64 *result, const u64 *left, > - const u64 *curve_prime, unsigned int ndigits) > + const struct ecc_curve *curve) > { > u64 product[2 * ECC_MAX_DIGITS]; > > - vli_square(product, left, ndigits); > - vli_mmod_fast(result, product, curve_prime, ndigits); > + vli_square(product, left, curve->g.ndigits); > + vli_mmod_fast(result, product, curve); > } > > #define EVEN(vli) (!(vli[0] & 1))
On 3/6/21 2:25 PM, Vitaly Chikunov wrote: > Stefan, > > On Thu, Mar 04, 2021 at 07:51:57PM -0500, Stefan Berger wrote: >> From: Saulo Alessandre <saulo.alessandre@tse.jus.br> >> >> * crypto/ecc.c >> - add vli_mmod_fast_384 >> - change some routines to pass ecc_curve forward until vli_mmod_fast >> >> * crypto/ecc.h >> - add ECC_CURVE_NIST_P384_DIGITS >> - change ECC_MAX_DIGITS to P384 size >> >> Signed-off-by: Saulo Alessandre <saulo.alessandre@tse.jus.br> >> Tested-by: Stefan Berger <stefanb@linux.ibm.com> >> --- >> crypto/ecc.c | 266 +++++++++++++++++++++++++++++++++++++-------------- >> crypto/ecc.h | 3 +- >> 2 files changed, 194 insertions(+), 75 deletions(-) >> >> diff --git a/crypto/ecc.c b/crypto/ecc.c >> index f6cef5a7942d..c125576cda6b 100644 >> --- a/crypto/ecc.c >> +++ b/crypto/ecc.c >> @@ -778,18 +778,133 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, >> ... >> /* Computes result = product % curve_prime for different curve_primes. >> * >> * Note that curve_primes are distinguished just by heuristic check and >> * not by complete conformance check. >> */ >> static bool vli_mmod_fast(u64 *result, u64 *product, >> - const u64 *curve_prime, unsigned int ndigits) >> + const struct ecc_curve *curve) >> { >> u64 tmp[2 * ECC_MAX_DIGITS]; >> + const u64 *curve_prime = curve->p; >> + const unsigned int ndigits = curve->g.ndigits; >> >> - /* Currently, both NIST primes have -1 in lowest qword. */ >> - if (curve_prime[0] != -1ull) { >> + /* Currently, all NIST have name nist_.* */ >> + if (strncmp(curve->name, "nist_", 5) != 0) { > I am not sure, but maybe this strncmp should not be optimized somehow, > since vli_mmod_fast could be called quite frequently. Perhaps by integer > algo id or even callback? Should be optimized or should not be? You seem to say both. The code code here is shared with ecrdsa. The comparison won't go beyond a single letter considering the naming of the curves define here: "cp256a": https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L49 "cp256b": https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L82 "cp256c": https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L119 "tc512a": https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L168 and here: "nist_192": https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecc_curve_defs.h#L18 "nist_256": https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecc_curve_defs.h#L45 All the ecrdsa curves were previously evaluating 'curve_prime[0] != -1ull', so it doesn't change anything. Stefan
Stefan, On Sat, Mar 06, 2021 at 06:29:18PM -0500, Stefan Berger wrote: > On 3/6/21 2:25 PM, Vitaly Chikunov wrote: > > > > On Thu, Mar 04, 2021 at 07:51:57PM -0500, Stefan Berger wrote: > > > From: Saulo Alessandre <saulo.alessandre@tse.jus.br> > > > > > > * crypto/ecc.c > > > - add vli_mmod_fast_384 > > > - change some routines to pass ecc_curve forward until vli_mmod_fast > > > > > > * crypto/ecc.h > > > - add ECC_CURVE_NIST_P384_DIGITS > > > - change ECC_MAX_DIGITS to P384 size > > > > > > Signed-off-by: Saulo Alessandre <saulo.alessandre@tse.jus.br> > > > Tested-by: Stefan Berger <stefanb@linux.ibm.com> > > > --- > > > crypto/ecc.c | 266 +++++++++++++++++++++++++++++++++++++-------------- > > > crypto/ecc.h | 3 +- > > > 2 files changed, 194 insertions(+), 75 deletions(-) > > > > > > diff --git a/crypto/ecc.c b/crypto/ecc.c > > > index f6cef5a7942d..c125576cda6b 100644 > > > --- a/crypto/ecc.c > > > +++ b/crypto/ecc.c > > > @@ -778,18 +778,133 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, > > > ... > > > /* Computes result = product % curve_prime for different curve_primes. > > > * > > > * Note that curve_primes are distinguished just by heuristic check and > > > * not by complete conformance check. > > > */ > > > static bool vli_mmod_fast(u64 *result, u64 *product, > > > - const u64 *curve_prime, unsigned int ndigits) > > > + const struct ecc_curve *curve) > > > { > > > u64 tmp[2 * ECC_MAX_DIGITS]; > > > + const u64 *curve_prime = curve->p; > > > + const unsigned int ndigits = curve->g.ndigits; > > > - /* Currently, both NIST primes have -1 in lowest qword. */ > > > - if (curve_prime[0] != -1ull) { > > > + /* Currently, all NIST have name nist_.* */ > > > + if (strncmp(curve->name, "nist_", 5) != 0) { > > I am not sure, but maybe this strncmp should not be optimized somehow, > > since vli_mmod_fast could be called quite frequently. Perhaps by integer > > algo id or even callback? > > Should be optimized or should not be? You seem to say both. Excuse me for the typo. I meant "should be optimized". I think, maybe it's time to add algo selector id (for the case statement, for example instead of `switch (ndigits)') or just callback for a low level mmod function. If you think this would not impact performance then nevermind. Thanks, > > The code code here is shared with ecrdsa. The comparison won't go beyond a > single letter considering the naming of the curves define here: > > "cp256a": > https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L49 > > "cp256b": > https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L82 > > "cp256c": > https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L119 > > "tc512a": > https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecrdsa_defs.h#L168 > > and here: > > "nist_192": > https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecc_curve_defs.h#L18 > > "nist_256": > https://elixir.bootlin.com/linux/v5.11.3/source/crypto/ecc_curve_defs.h#L45 > > > All the ecrdsa curves were previously evaluating 'curve_prime[0] != -1ull', > so it doesn't change anything. > > Stefan >
On 3/6/21 7:03 PM, Vitaly Chikunov wrote: > Stefan, > > On Sat, Mar 06, 2021 at 06:29:18PM -0500, Stefan Berger wrote: >> On 3/6/21 2:25 PM, Vitaly Chikunov wrote: >>> On Thu, Mar 04, 2021 at 07:51:57PM -0500, Stefan Berger wrote: >>>> From: Saulo Alessandre <saulo.alessandre@tse.jus.br> >>>> >>>> * crypto/ecc.c >>>> - add vli_mmod_fast_384 >>>> - change some routines to pass ecc_curve forward until vli_mmod_fast >>>> >>>> * crypto/ecc.h >>>> - add ECC_CURVE_NIST_P384_DIGITS >>>> - change ECC_MAX_DIGITS to P384 size >>>> >>>> Signed-off-by: Saulo Alessandre <saulo.alessandre@tse.jus.br> >>>> Tested-by: Stefan Berger <stefanb@linux.ibm.com> >>>> --- >>>> crypto/ecc.c | 266 +++++++++++++++++++++++++++++++++++++-------------- >>>> crypto/ecc.h | 3 +- >>>> 2 files changed, 194 insertions(+), 75 deletions(-) >>>> >>>> diff --git a/crypto/ecc.c b/crypto/ecc.c >>>> index f6cef5a7942d..c125576cda6b 100644 >>>> --- a/crypto/ecc.c >>>> +++ b/crypto/ecc.c >>>> @@ -778,18 +778,133 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, >>>> ... >>>> /* Computes result = product % curve_prime for different curve_primes. >>>> * >>>> * Note that curve_primes are distinguished just by heuristic check and >>>> * not by complete conformance check. >>>> */ >>>> static bool vli_mmod_fast(u64 *result, u64 *product, >>>> - const u64 *curve_prime, unsigned int ndigits) >>>> + const struct ecc_curve *curve) >>>> { >>>> u64 tmp[2 * ECC_MAX_DIGITS]; >>>> + const u64 *curve_prime = curve->p; >>>> + const unsigned int ndigits = curve->g.ndigits; >>>> - /* Currently, both NIST primes have -1 in lowest qword. */ >>>> - if (curve_prime[0] != -1ull) { >>>> + /* Currently, all NIST have name nist_.* */ >>>> + if (strncmp(curve->name, "nist_", 5) != 0) { >>> I am not sure, but maybe this strncmp should not be optimized somehow, >>> since vli_mmod_fast could be called quite frequently. Perhaps by integer >>> algo id or even callback? >> Should be optimized or should not be? You seem to say both. > Excuse me for the typo. I meant "should be optimized". I think, maybe > it's time to add algo selector id (for the case statement, for example > instead of `switch (ndigits)') or just callback for a low level mmod > function. > > If you think this would not impact performance then nevermind. I think it would only be a few cycles. Of course we could introduce a flag to indicate nist functions (rather than using strncmp on the name) or work with the callbacks (only for the nist functions?) as you mentioned, but maybe that's something we could do after? Either way we would have to pass the ecc_curve pointer all the way into vli_mmod_fast. So this change here is preparing for this as well. Stefan
diff --git a/crypto/ecc.c b/crypto/ecc.c index f6cef5a7942d..c125576cda6b 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -778,18 +778,133 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, } } +#define SL32OR32(x32, y32) (((u64)x32 << 32) | y32) +#define AND64H(x64) (x64 & 0xffFFffFF00000000ull) +#define AND64L(x64) (x64 & 0x00000000ffFFffFFull) + +/* Computes result = product % curve_prime + * from "Mathematical routines for the NIST prime elliptic curves" + */ +static void vli_mmod_fast_384(u64 *result, const u64 *product, + const u64 *curve_prime, u64 *tmp) +{ + int carry; + const unsigned int ndigits = 6; + + /* t */ + vli_set(result, product, ndigits); + + /* s1 */ + tmp[0] = 0; // 0 || 0 + tmp[1] = 0; // 0 || 0 + tmp[2] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 + tmp[3] = product[11]>>32; // 0 ||a23 + tmp[4] = 0; // 0 || 0 + tmp[5] = 0; // 0 || 0 + carry = vli_lshift(tmp, tmp, 1, ndigits); + carry += vli_add(result, result, tmp, ndigits); + + /* s2 */ + tmp[0] = product[6]; //a13||a12 + tmp[1] = product[7]; //a15||a14 + tmp[2] = product[8]; //a17||a16 + tmp[3] = product[9]; //a19||a18 + tmp[4] = product[10]; //a21||a20 + tmp[5] = product[11]; //a23||a22 + carry += vli_add(result, result, tmp, ndigits); + + /* s3 */ + tmp[0] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 + tmp[1] = SL32OR32(product[6], (product[11]>>32)); //a12||a23 + tmp[2] = SL32OR32(product[7], (product[6])>>32); //a14||a13 + tmp[3] = SL32OR32(product[8], (product[7]>>32)); //a16||a15 + tmp[4] = SL32OR32(product[9], (product[8]>>32)); //a18||a17 + tmp[5] = SL32OR32(product[10], (product[9]>>32)); //a20||a19 + carry += vli_add(result, result, tmp, ndigits); + + /* s4 */ + tmp[0] = AND64H(product[11]); //a23|| 0 + tmp[1] = (product[10]<<32); //a20|| 0 + tmp[2] = product[6]; //a13||a12 + tmp[3] = product[7]; //a15||a14 + tmp[4] = product[8]; //a17||a16 + tmp[5] = product[9]; //a19||a18 + carry += vli_add(result, result, tmp, ndigits); + + /* s5 */ + tmp[0] = 0; // 0|| 0 + tmp[1] = 0; // 0|| 0 + tmp[2] = product[10]; //a21||a20 + tmp[3] = product[11]; //a23||a22 + tmp[4] = 0; // 0|| 0 + tmp[5] = 0; // 0|| 0 + carry += vli_add(result, result, tmp, ndigits); + + /* s6 */ + tmp[0] = AND64L(product[10]); // 0 ||a20 + tmp[1] = AND64H(product[10]); //a21|| 0 + tmp[2] = product[11]; //a23||a22 + tmp[3] = 0; // 0 || 0 + tmp[4] = 0; // 0 || 0 + tmp[5] = 0; // 0 || 0 + carry += vli_add(result, result, tmp, ndigits); + + /* d1 */ + tmp[0] = SL32OR32(product[6], (product[11]>>32)); //a12||a23 + tmp[1] = SL32OR32(product[7], (product[6]>>32)); //a14||a13 + tmp[2] = SL32OR32(product[8], (product[7]>>32)); //a16||a15 + tmp[3] = SL32OR32(product[9], (product[8]>>32)); //a18||a17 + tmp[4] = SL32OR32(product[10], (product[9]>>32)); //a20||a19 + tmp[5] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 + carry -= vli_sub(result, result, tmp, ndigits); + + /* d2 */ + tmp[0] = (product[10]<<32); //a20|| 0 + tmp[1] = SL32OR32(product[11], (product[10]>>32)); //a22||a21 + tmp[2] = (product[11]>>32); // 0 ||a23 + tmp[3] = 0; // 0 || 0 + tmp[4] = 0; // 0 || 0 + tmp[5] = 0; // 0 || 0 + carry -= vli_sub(result, result, tmp, ndigits); + + /* d3 */ + tmp[0] = 0; // 0 || 0 + tmp[1] = AND64H(product[11]); //a23|| 0 + tmp[2] = product[11]>>32; // 0 ||a23 + tmp[3] = 0; // 0 || 0 + tmp[4] = 0; // 0 || 0 + tmp[5] = 0; // 0 || 0 + carry -= vli_sub(result, result, tmp, ndigits); + + if (carry < 0) { + do { + carry += vli_add(result, result, curve_prime, ndigits); + } while (carry < 0); + } else { + while (carry || vli_cmp(curve_prime, result, ndigits) != 1) + carry -= vli_sub(result, result, curve_prime, ndigits); + } + +} + +#undef SL32OR32 +#undef AND64H +#undef AND64L + /* Computes result = product % curve_prime for different curve_primes. * * Note that curve_primes are distinguished just by heuristic check and * not by complete conformance check. */ static bool vli_mmod_fast(u64 *result, u64 *product, - const u64 *curve_prime, unsigned int ndigits) + const struct ecc_curve *curve) { u64 tmp[2 * ECC_MAX_DIGITS]; + const u64 *curve_prime = curve->p; + const unsigned int ndigits = curve->g.ndigits; - /* Currently, both NIST primes have -1 in lowest qword. */ - if (curve_prime[0] != -1ull) { + /* Currently, all NIST have name nist_.* */ + if (strncmp(curve->name, "nist_", 5) != 0) { /* Try to handle Pseudo-Marsenne primes. */ if (curve_prime[ndigits - 1] == -1ull) { vli_mmod_special(result, product, curve_prime, @@ -812,6 +927,9 @@ static bool vli_mmod_fast(u64 *result, u64 *product, case 4: vli_mmod_fast_256(result, product, curve_prime, tmp); break; + case 6: + vli_mmod_fast_384(result, product, curve_prime, tmp); + break; default: pr_err_ratelimited("ecc: unsupported digits size!\n"); return false; @@ -835,22 +953,22 @@ EXPORT_SYMBOL(vli_mod_mult_slow); /* Computes result = (left * right) % curve_prime. */ static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right, - const u64 *curve_prime, unsigned int ndigits) + const struct ecc_curve *curve) { u64 product[2 * ECC_MAX_DIGITS]; - vli_mult(product, left, right, ndigits); - vli_mmod_fast(result, product, curve_prime, ndigits); + vli_mult(product, left, right, curve->g.ndigits); + vli_mmod_fast(result, product, curve); } /* Computes result = left^2 % curve_prime. */ static void vli_mod_square_fast(u64 *result, const u64 *left, - const u64 *curve_prime, unsigned int ndigits) + const struct ecc_curve *curve) { u64 product[2 * ECC_MAX_DIGITS]; - vli_square(product, left, ndigits); - vli_mmod_fast(result, product, curve_prime, ndigits); + vli_square(product, left, curve->g.ndigits); + vli_mmod_fast(result, product, curve); } #define EVEN(vli) (!(vli[0] & 1)) @@ -948,25 +1066,27 @@ static bool ecc_point_is_zero(const struct ecc_point *point) /* Double in place */ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, - u64 *curve_prime, unsigned int ndigits) + const struct ecc_curve *curve) { /* t1 = x, t2 = y, t3 = z */ u64 t4[ECC_MAX_DIGITS]; u64 t5[ECC_MAX_DIGITS]; + const u64 *curve_prime = curve->p; + const unsigned int ndigits = curve->g.ndigits; if (vli_is_zero(z1, ndigits)) return; /* t4 = y1^2 */ - vli_mod_square_fast(t4, y1, curve_prime, ndigits); + vli_mod_square_fast(t4, y1, curve); /* t5 = x1*y1^2 = A */ - vli_mod_mult_fast(t5, x1, t4, curve_prime, ndigits); + vli_mod_mult_fast(t5, x1, t4, curve); /* t4 = y1^4 */ - vli_mod_square_fast(t4, t4, curve_prime, ndigits); + vli_mod_square_fast(t4, t4, curve); /* t2 = y1*z1 = z3 */ - vli_mod_mult_fast(y1, y1, z1, curve_prime, ndigits); + vli_mod_mult_fast(y1, y1, z1, curve); /* t3 = z1^2 */ - vli_mod_square_fast(z1, z1, curve_prime, ndigits); + vli_mod_square_fast(z1, z1, curve); /* t1 = x1 + z1^2 */ vli_mod_add(x1, x1, z1, curve_prime, ndigits); @@ -975,7 +1095,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, /* t3 = x1 - z1^2 */ vli_mod_sub(z1, x1, z1, curve_prime, ndigits); /* t1 = x1^2 - z1^4 */ - vli_mod_mult_fast(x1, x1, z1, curve_prime, ndigits); + vli_mod_mult_fast(x1, x1, z1, curve); /* t3 = 2*(x1^2 - z1^4) */ vli_mod_add(z1, x1, x1, curve_prime, ndigits); @@ -992,7 +1112,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, /* t1 = 3/2*(x1^2 - z1^4) = B */ /* t3 = B^2 */ - vli_mod_square_fast(z1, x1, curve_prime, ndigits); + vli_mod_square_fast(z1, x1, curve); /* t3 = B^2 - A */ vli_mod_sub(z1, z1, t5, curve_prime, ndigits); /* t3 = B^2 - 2A = x3 */ @@ -1000,7 +1120,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, /* t5 = A - x3 */ vli_mod_sub(t5, t5, z1, curve_prime, ndigits); /* t1 = B * (A - x3) */ - vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); + vli_mod_mult_fast(x1, x1, t5, curve); /* t4 = B * (A - x3) - y1^4 = y3 */ vli_mod_sub(t4, x1, t4, curve_prime, ndigits); @@ -1010,23 +1130,22 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, } /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ -static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime, - unsigned int ndigits) +static void apply_z(u64 *x1, u64 *y1, u64 *z, const struct ecc_curve *curve) { u64 t1[ECC_MAX_DIGITS]; - vli_mod_square_fast(t1, z, curve_prime, ndigits); /* z^2 */ - vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */ - vli_mod_mult_fast(t1, t1, z, curve_prime, ndigits); /* z^3 */ - vli_mod_mult_fast(y1, y1, t1, curve_prime, ndigits); /* y1 * z^3 */ + vli_mod_square_fast(t1, z, curve); /* z^2 */ + vli_mod_mult_fast(x1, x1, t1, curve); /* x1 * z^2 */ + vli_mod_mult_fast(t1, t1, z, curve); /* z^3 */ + vli_mod_mult_fast(y1, y1, t1, curve); /* y1 * z^3 */ } /* P = (x1, y1) => 2P, (x2, y2) => P' */ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, - u64 *p_initial_z, u64 *curve_prime, - unsigned int ndigits) + u64 *p_initial_z, const struct ecc_curve *curve) { u64 z[ECC_MAX_DIGITS]; + const unsigned int ndigits = curve->g.ndigits; vli_set(x2, x1, ndigits); vli_set(y2, y1, ndigits); @@ -1037,35 +1156,37 @@ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, if (p_initial_z) vli_set(z, p_initial_z, ndigits); - apply_z(x1, y1, z, curve_prime, ndigits); + apply_z(x1, y1, z, curve); - ecc_point_double_jacobian(x1, y1, z, curve_prime, ndigits); + ecc_point_double_jacobian(x1, y1, z, curve); - apply_z(x2, y2, z, curve_prime, ndigits); + apply_z(x2, y2, z, curve); } /* Input P = (x1, y1, Z), Q = (x2, y2, Z) * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) * or P => P', Q => P + Q */ -static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, - unsigned int ndigits) +static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, + const struct ecc_curve *curve) { /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ u64 t5[ECC_MAX_DIGITS]; + const u64 *curve_prime = curve->p; + const unsigned int ndigits = curve->g.ndigits; /* t5 = x2 - x1 */ vli_mod_sub(t5, x2, x1, curve_prime, ndigits); /* t5 = (x2 - x1)^2 = A */ - vli_mod_square_fast(t5, t5, curve_prime, ndigits); + vli_mod_square_fast(t5, t5, curve); /* t1 = x1*A = B */ - vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); + vli_mod_mult_fast(x1, x1, t5, curve); /* t3 = x2*A = C */ - vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits); + vli_mod_mult_fast(x2, x2, t5, curve); /* t4 = y2 - y1 */ vli_mod_sub(y2, y2, y1, curve_prime, ndigits); /* t5 = (y2 - y1)^2 = D */ - vli_mod_square_fast(t5, y2, curve_prime, ndigits); + vli_mod_square_fast(t5, y2, curve); /* t5 = D - B */ vli_mod_sub(t5, t5, x1, curve_prime, ndigits); @@ -1074,11 +1195,11 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, /* t3 = C - B */ vli_mod_sub(x2, x2, x1, curve_prime, ndigits); /* t2 = y1*(C - B) */ - vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits); + vli_mod_mult_fast(y1, y1, x2, curve); /* t3 = B - x3 */ vli_mod_sub(x2, x1, t5, curve_prime, ndigits); /* t4 = (y2 - y1)*(B - x3) */ - vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits); + vli_mod_mult_fast(y2, y2, x2, curve); /* t4 = y3 */ vli_mod_sub(y2, y2, y1, curve_prime, ndigits); @@ -1089,22 +1210,24 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) * or P => P - Q, Q => P + Q */ -static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, - unsigned int ndigits) +static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, + const struct ecc_curve *curve) { /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ u64 t5[ECC_MAX_DIGITS]; u64 t6[ECC_MAX_DIGITS]; u64 t7[ECC_MAX_DIGITS]; + const u64 *curve_prime = curve->p; + const unsigned int ndigits = curve->g.ndigits; /* t5 = x2 - x1 */ vli_mod_sub(t5, x2, x1, curve_prime, ndigits); /* t5 = (x2 - x1)^2 = A */ - vli_mod_square_fast(t5, t5, curve_prime, ndigits); + vli_mod_square_fast(t5, t5, curve); /* t1 = x1*A = B */ - vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits); + vli_mod_mult_fast(x1, x1, t5, curve); /* t3 = x2*A = C */ - vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits); + vli_mod_mult_fast(x2, x2, t5, curve); /* t4 = y2 + y1 */ vli_mod_add(t5, y2, y1, curve_prime, ndigits); /* t4 = y2 - y1 */ @@ -1113,29 +1236,29 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, /* t6 = C - B */ vli_mod_sub(t6, x2, x1, curve_prime, ndigits); /* t2 = y1 * (C - B) */ - vli_mod_mult_fast(y1, y1, t6, curve_prime, ndigits); + vli_mod_mult_fast(y1, y1, t6, curve); /* t6 = B + C */ vli_mod_add(t6, x1, x2, curve_prime, ndigits); /* t3 = (y2 - y1)^2 */ - vli_mod_square_fast(x2, y2, curve_prime, ndigits); + vli_mod_square_fast(x2, y2, curve); /* t3 = x3 */ vli_mod_sub(x2, x2, t6, curve_prime, ndigits); /* t7 = B - x3 */ vli_mod_sub(t7, x1, x2, curve_prime, ndigits); /* t4 = (y2 - y1)*(B - x3) */ - vli_mod_mult_fast(y2, y2, t7, curve_prime, ndigits); + vli_mod_mult_fast(y2, y2, t7, curve); /* t4 = y3 */ vli_mod_sub(y2, y2, y1, curve_prime, ndigits); /* t7 = (y2 + y1)^2 = F */ - vli_mod_square_fast(t7, t5, curve_prime, ndigits); + vli_mod_square_fast(t7, t5, curve); /* t7 = x3' */ vli_mod_sub(t7, t7, t6, curve_prime, ndigits); /* t6 = x3' - B */ vli_mod_sub(t6, t7, x1, curve_prime, ndigits); /* t6 = (y2 + y1)*(x3' - B) */ - vli_mod_mult_fast(t6, t6, t5, curve_prime, ndigits); + vli_mod_mult_fast(t6, t6, t5, curve); /* t2 = y3' */ vli_mod_sub(y1, t6, y1, curve_prime, ndigits); @@ -1165,41 +1288,37 @@ static void ecc_point_mult(struct ecc_point *result, vli_set(rx[1], point->x, ndigits); vli_set(ry[1], point->y, ndigits); - xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve_prime, - ndigits); + xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve); for (i = num_bits - 2; i > 0; i--) { nb = !vli_test_bit(scalar, i); - xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime, - ndigits); - xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, - ndigits); + xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve); + xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve); } nb = !vli_test_bit(scalar, 0); - xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime, - ndigits); + xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve); /* Find final 1/Z value. */ /* X1 - X0 */ vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits); /* Yb * (X1 - X0) */ - vli_mod_mult_fast(z, z, ry[1 - nb], curve_prime, ndigits); + vli_mod_mult_fast(z, z, ry[1 - nb], curve); /* xP * Yb * (X1 - X0) */ - vli_mod_mult_fast(z, z, point->x, curve_prime, ndigits); + vli_mod_mult_fast(z, z, point->x, curve); /* 1 / (xP * Yb * (X1 - X0)) */ vli_mod_inv(z, z, curve_prime, point->ndigits); /* yP / (xP * Yb * (X1 - X0)) */ - vli_mod_mult_fast(z, z, point->y, curve_prime, ndigits); + vli_mod_mult_fast(z, z, point->y, curve); /* Xb * yP / (xP * Yb * (X1 - X0)) */ - vli_mod_mult_fast(z, z, rx[1 - nb], curve_prime, ndigits); + vli_mod_mult_fast(z, z, rx[1 - nb], curve); /* End 1/Z calculation */ - xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, ndigits); + xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve); - apply_z(rx[0], ry[0], z, curve_prime, ndigits); + apply_z(rx[0], ry[0], z, curve); vli_set(result->x, rx[0], ndigits); vli_set(result->y, ry[0], ndigits); @@ -1220,9 +1339,9 @@ static void ecc_point_add(const struct ecc_point *result, vli_mod_sub(z, result->x, p->x, curve->p, ndigits); vli_set(px, p->x, ndigits); vli_set(py, p->y, ndigits); - xycz_add(px, py, result->x, result->y, curve->p, ndigits); + xycz_add(px, py, result->x, result->y, curve); vli_mod_inv(z, z, curve->p, ndigits); - apply_z(result->x, result->y, z, curve->p, ndigits); + apply_z(result->x, result->y, z, curve); } /* Computes R = u1P + u2Q mod p using Shamir's trick. @@ -1251,8 +1370,7 @@ void ecc_point_mult_shamir(const struct ecc_point *result, points[2] = q; points[3] = ∑ - num_bits = max(vli_num_bits(u1, ndigits), - vli_num_bits(u2, ndigits)); + num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits)); i = num_bits - 1; idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); point = points[idx]; @@ -1263,7 +1381,7 @@ void ecc_point_mult_shamir(const struct ecc_point *result, z[0] = 1; for (--i; i >= 0; i--) { - ecc_point_double_jacobian(rx, ry, z, curve->p, ndigits); + ecc_point_double_jacobian(rx, ry, z, curve); idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); point = points[idx]; if (point) { @@ -1273,14 +1391,14 @@ void ecc_point_mult_shamir(const struct ecc_point *result, vli_set(tx, point->x, ndigits); vli_set(ty, point->y, ndigits); - apply_z(tx, ty, z, curve->p, ndigits); + apply_z(tx, ty, z, curve); vli_mod_sub(tz, rx, tx, curve->p, ndigits); - xycz_add(tx, ty, rx, ry, curve->p, ndigits); - vli_mod_mult_fast(z, z, tz, curve->p, ndigits); + xycz_add(tx, ty, rx, ry, curve); + vli_mod_mult_fast(z, z, tz, curve); } } vli_mod_inv(z, z, curve->p, ndigits); - apply_z(rx, ry, z, curve->p, ndigits); + apply_z(rx, ry, z, curve); } EXPORT_SYMBOL(ecc_point_mult_shamir); @@ -1434,10 +1552,10 @@ int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve, return -EINVAL; /* Check 3: Verify that y^2 == (x^3 + a·x + b) mod p */ - vli_mod_square_fast(yy, pk->y, curve->p, pk->ndigits); /* y^2 */ - vli_mod_square_fast(xxx, pk->x, curve->p, pk->ndigits); /* x^2 */ - vli_mod_mult_fast(xxx, xxx, pk->x, curve->p, pk->ndigits); /* x^3 */ - vli_mod_mult_fast(w, curve->a, pk->x, curve->p, pk->ndigits); /* a·x */ + vli_mod_square_fast(yy, pk->y, curve); /* y^2 */ + vli_mod_square_fast(xxx, pk->x, curve); /* x^2 */ + vli_mod_mult_fast(xxx, xxx, pk->x, curve); /* x^3 */ + vli_mod_mult_fast(w, curve->a, pk->x, curve); /* a·x */ vli_mod_add(w, w, curve->b, curve->p, pk->ndigits); /* a·x + b */ vli_mod_add(w, w, xxx, curve->p, pk->ndigits); /* x^3 + a·x + b */ if (vli_cmp(yy, w, pk->ndigits) != 0) /* Equation */ diff --git a/crypto/ecc.h b/crypto/ecc.h index 2ea86dfb5cf7..8f40ebd74565 100644 --- a/crypto/ecc.h +++ b/crypto/ecc.h @@ -29,7 +29,8 @@ /* One digit is u64 qword. */ #define ECC_CURVE_NIST_P192_DIGITS 3 #define ECC_CURVE_NIST_P256_DIGITS 4 -#define ECC_MAX_DIGITS (512 / 64) +#define ECC_CURVE_NIST_P384_DIGITS 6 +#define ECC_MAX_DIGITS (512 / 64) /* due to ecrdsa */ #define ECC_DIGITS_TO_BYTES_SHIFT 3