Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EC] Unify scalar_mul_public for ec_nistp curves #2004

Merged
merged 13 commits into from
Nov 25, 2024
137 changes: 135 additions & 2 deletions crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// | 2. | x | x | x* |
// | 3. | x | x | |
// | 4. | x | x | x* |
// | 5. | | | |
// | 5. | x | x | |
// * For P-256, only the Fiat-crypto implementation in p256.c is replaced.

#include "ec_nistp.h"
Expand Down Expand Up @@ -317,9 +317,13 @@ static void scalar_rwnaf(int16_t *out, size_t window_size,
#define SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS \
(SCALAR_MUL_TABLE_NUM_POINTS * 3 * FELEM_MAX_NUM_OF_LIMBS)

// The maximum number of bits for a scalar.
#define SCALAR_MUL_MAX_SCALAR_BITS (521)

// Maximum number of windows (digits) for a scalar encoding which is
// determined by the maximum scalar bit size -- 521 bits in our case.
#define SCALAR_MUL_MAX_NUM_WINDOWS DIV_AND_CEIL(521, SCALAR_MUL_WINDOW_SIZE)
#define SCALAR_MUL_MAX_NUM_WINDOWS \
DIV_AND_CEIL(SCALAR_MUL_MAX_SCALAR_BITS, SCALAR_MUL_WINDOW_SIZE)

// Generate table of multiples of the input point P = (x_in, y_in, z_in):
// table <-- [2i + 1]P for i in [0, SCALAR_MUL_TABLE_NUM_POINTS - 1].
Expand Down Expand Up @@ -636,3 +640,132 @@ void ec_nistp_scalar_mul_base(const ec_nistp_meth *ctx,
cmovznz(y_out, ctx->felem_num_limbs, t, y_tmp, y_res);
cmovznz(z_out, ctx->felem_num_limbs, t, z_tmp, z_res);
}

// Computes [g_scalar]G + [p_scalar]P, where G is the base point of the curve
// curve, and P is the given point (x_p, y_p, z_p).
//
// Both scalar products are computed by the same "textbook" wNAF method,
// with w = 5 for g_scalar and w = 5 for p_scalar.
// For the base point G product we use the first sub-table of the precomputed
// table, while for P we generate the table on-the-fly. The tables hold the
// first 16 odd multiples of G or P:
// g_pre_comp = {[1]G, [3]G, ..., [31]G},
// p_pre_comp = {[1]P, [3]P, ..., [31]P}.
// Computing the negation of a point P = (x, y) is relatively easy:
// -P = (x, -y).
// So we may assume that we also have the negatives of the points in the tables.
//
// The scalars are recoded by the textbook wNAF method to digits, where a digit
// is either a zero or an odd integer in [-31, 31]. The method guarantees that
// each non-zero digit is followed by at least four zeroes.
//
// The result [g_scalar]G + [p_scalar]P is computed by the following algorithm:
// 1. Initialize the accumulator with the point-at-infinity.
// 2. For i starting from 521 down to 0:
// 3. Double the accumulator (doubling can be skipped while the
// accumulator is equal to the point-at-infinity).
// 4. Read from |p_pre_comp| the point corresponding to the i-th digit of
// p_scalar, negate it if the digit is negative, and add it to the
// accumulator.
// 5. Read from |g_pre_comp| the point corresponding to the i-th digit of
// g_scalar, negate it if the digit is negative, and add it to the
// accumulator.
// Note: this function is NOT constant-time.
void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const EC_SCALAR *g_scalar,
const ec_nistp_felem_limb *x_p,
const ec_nistp_felem_limb *y_p,
const ec_nistp_felem_limb *z_p,
const EC_SCALAR *p_scalar) {

const size_t felem_num_bytes = ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb);

// Table of multiples of P.
ec_nistp_felem_limb p_table[SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS];
generate_table(ctx, p_table, x_p, y_p, z_p);
const size_t p_point_num_limbs = 3 * ctx->felem_num_limbs; // Projective.

// Table of multiples of G.
const ec_nistp_felem_limb *g_table = ctx->scalar_mul_base_table;
const size_t g_point_num_limbs = 2 * ctx->felem_num_limbs; // Affine.

// Recode the scalars.
int8_t p_wnaf[SCALAR_MUL_MAX_SCALAR_BITS + 1] = {0};
int8_t g_wnaf[SCALAR_MUL_MAX_SCALAR_BITS + 1] = {0};
ec_compute_wNAF(p_wnaf, p_scalar, ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE);
ec_compute_wNAF(g_wnaf, g_scalar, ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE);

// In the beginning res is set to point-at-infinity, so we set the flag.
int16_t res_is_inf = 1;
int16_t d, is_neg, idx;
ec_nistp_felem ftmp;

for (int i = ctx->felem_num_bits; i >= 0; i--) {
samuel40791765 marked this conversation as resolved.
Show resolved Hide resolved

// If |res| is point-at-infinity there is no point in doubling so skip it.
if (!res_is_inf) {
ctx->point_dbl(x_out, y_out, z_out, x_out, y_out, z_out);
}

// Process the p_scalar digit.
d = p_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
const size_t table_idx = idx * p_point_num_limbs;
OPENSSL_memcpy(x_out, &p_table[table_idx], felem_num_bytes);
OPENSSL_memcpy(y_out, &p_table[table_idx + ctx->felem_num_limbs], felem_num_bytes);
OPENSSL_memcpy(z_out, &p_table[table_idx + ctx->felem_num_limbs * 2], felem_num_bytes);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
const ec_nistp_felem_limb *y_tmp;
y_tmp = &p_table[idx * p_point_num_limbs + ctx->felem_num_limbs];
if (is_neg) {
ctx->felem_neg(ftmp, y_tmp);
y_tmp = ftmp;
}
samuel40791765 marked this conversation as resolved.
Show resolved Hide resolved
ctx->point_add(x_out, y_out, z_out, x_out, y_out, z_out, 0,
&p_table[idx * p_point_num_limbs],
y_tmp,
&p_table[idx * p_point_num_limbs + ctx->felem_num_limbs * 2]);
}
}

/* // Process the g_scalar digit. */
d = g_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
const size_t table_idx = idx * g_point_num_limbs;
OPENSSL_memcpy(x_out, &g_table[table_idx], felem_num_bytes);
OPENSSL_memcpy(y_out, &g_table[table_idx + ctx->felem_num_limbs], felem_num_bytes);
OPENSSL_memcpy(z_out, ctx->felem_one, felem_num_bytes);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
const ec_nistp_felem_limb *y_tmp ;
y_tmp = &g_table[idx * g_point_num_limbs + ctx->felem_num_limbs];
if (is_neg) {
ctx->felem_neg(ftmp, &g_table[idx * g_point_num_limbs + ctx->felem_num_limbs]);
y_tmp = ftmp;
}
ctx->point_add(x_out, y_out, z_out, x_out, y_out, z_out, 1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the mixed flag explained elsewhere (re removing the comments that were before this step.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Samuel asked the same so I'll add the comment back in my next PR that's going to deal with point addition anyway.

&g_table[idx * g_point_num_limbs], y_tmp, ctx->felem_one);
}
}
}
}
10 changes: 10 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,15 @@ void ec_nistp_scalar_mul_base(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const EC_SCALAR *scalar);

void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const EC_SCALAR *g_scalar,
const ec_nistp_felem_limb *x_p,
const ec_nistp_felem_limb *y_p,
const ec_nistp_felem_limb *z_p,
const EC_SCALAR *p_scalar);
#endif // EC_NISTP_H

3 changes: 1 addition & 2 deletions crypto/fipsmodule/ec/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,7 @@ void ec_GFp_mont_felem_exp(const EC_GROUP *group, EC_FELEM *out,
// where at most one of any w+1 consecutive digits is non-zero
// with the exception that the most significant digit may be only
// w-1 zeros away from that next non-zero digit.
void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
const EC_SCALAR *scalar, size_t bits, int w);
void ec_compute_wNAF(int8_t *out, const EC_SCALAR *scalar, size_t bits, int w);

int ec_GFp_mont_mul_public_batch(const EC_GROUP *group, EC_JACOBIAN *r,
const EC_SCALAR *g_scalar,
Expand Down
2 changes: 1 addition & 1 deletion crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,

// Set up the coefficients for |p_scalar|.
int8_t p_wNAF[257];
ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
ec_compute_wNAF(p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);

// Set |ret| to the point at infinity.
int skip = 1; // Save some point operations.
Expand Down
162 changes: 5 additions & 157 deletions crypto/fipsmodule/ec/p384.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,6 @@ static p384_limb_t p384_felem_nz(const p384_limb_t in1[P384_NLIMBS]) {
#endif // EC_NISTP_USE_S2N_BIGNUM


static void p384_felem_copy(p384_limb_t out[P384_NLIMBS],
const p384_limb_t in1[P384_NLIMBS]) {
for (size_t i = 0; i < P384_NLIMBS; i++) {
out[i] = in1[i];
}
}

static void p384_from_generic(p384_felem out, const EC_FELEM *in) {
#ifdef OPENSSL_BIG_ENDIAN
uint8_t tmp[P384_EC_FELEM_BYTES];
Expand Down Expand Up @@ -470,26 +463,6 @@ static int ec_GFp_nistp384_cmp_x_coordinate(const EC_GROUP *group,
//
// For detailed analysis of different window sizes see the bottom of this file.

// Constants for scalar encoding in the scalar multiplication functions.
#define P384_MUL_WSIZE (5) // window size w
// Assert the window size is 5 because the pre-computed table in |p384_table.h|
// is generated for window size 5.
OPENSSL_STATIC_ASSERT(P384_MUL_WSIZE == 5,
p384_scalar_mul_window_size_is_not_equal_to_five)

#define P384_MUL_TWO_TO_WSIZE (1 << P384_MUL_WSIZE)

// Number of |P384_MUL_WSIZE|-bit windows in a 384-bit value
#define P384_MUL_NWINDOWS ((384 + P384_MUL_WSIZE - 1)/P384_MUL_WSIZE)

// For the public point in |ec_GFp_nistp384_point_mul_public| function
// we use window size w = 5.
#define P384_MUL_PUB_WSIZE (5)

// We keep only odd multiples in tables, hence the table size is (2^w)/2
#define P384_MUL_TABLE_SIZE (P384_MUL_TWO_TO_WSIZE >> 1)
#define P384_MUL_PUB_TABLE_SIZE (1 << (P384_MUL_PUB_WSIZE - 1))

// Multiplication of an arbitrary point by a scalar, r = [scalar]P.
static void ec_GFp_nistp384_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
const EC_JACOBIAN *p,
Expand Down Expand Up @@ -523,146 +496,21 @@ static void ec_GFp_nistp384_point_mul_base(const EC_GROUP *group,

// Computes [g_scalar]G + [p_scalar]P, where G is the base point of the P-384
// curve, and P is the given point |p|.
//
// Both scalar products are computed by the same "textbook" wNAF method,
// with w = 5 for g_scalar and w = 5 for p_scalar.
// For the base point G product we use the first sub-table of the precomputed
// table |p384_g_pre_comp| from |p384_table.h| file, while for P we generate
// |p_pre_comp| table on-the-fly. The tables hold the first 16 odd multiples
// of G or P:
// g_pre_comp = {[1]G, [3]G, ..., [31]G},
// p_pre_comp = {[1]P, [3]P, ..., [31]P}.
// Computing the negation of a point P = (x, y) is relatively easy:
// -P = (x, -y).
// So we may assume that we also have the negatives of the points in the tables.
//
// The 384-bit scalars are recoded by the textbook wNAF method to 385 digits,
// where a digit is either a zero or an odd integer in [-31, 31]. The method
// guarantees that each non-zero digit is followed by at least four
// zeroes.
//
// The result [g_scalar]G + [p_scalar]P is computed by the following algorithm:
// 1. Initialize the accumulator with the point-at-infinity.
// 2. For i starting from 384 down to 0:
// 3. Double the accumulator (doubling can be skipped while the
// accumulator is equal to the point-at-infinity).
// 4. Read from |p_pre_comp| the point corresponding to the i-th digit of
// p_scalar, negate it if the digit is negative, and add it to the
// accumulator.
// 5. Read from |g_pre_comp| the point corresponding to the i-th digit of
// g_scalar, negate it if the digit is negative, and add it to the
// accumulator.
//
// Note: this function is NOT constant-time.
static void ec_GFp_nistp384_point_mul_public(const EC_GROUP *group,
EC_JACOBIAN *r,
const EC_SCALAR *g_scalar,
const EC_JACOBIAN *p,
const EC_SCALAR *p_scalar) {

p384_felem res[3] = {{0}, {0}, {0}}, two_p[3] = {{0}, {0}, {0}}, ftmp;

// Table of multiples of P: [2i + 1]P for i in [0, 15].
p384_felem p_pre_comp[P384_MUL_PUB_TABLE_SIZE][3];

// Set the first point in the table to P.
p384_from_generic(p_pre_comp[0][0], &p->X);
p384_from_generic(p_pre_comp[0][1], &p->Y);
p384_from_generic(p_pre_comp[0][2], &p->Z);

// Compute two_p = [2]P.
p384_point_double(two_p[0], two_p[1], two_p[2],
p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]);

// Generate the remaining 15 multiples of P.
for (size_t i = 1; i < P384_MUL_PUB_TABLE_SIZE; i++) {
p384_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
two_p[0], two_p[1], two_p[2], 0 /* both Jacobian */,
p_pre_comp[i - 1][0],
p_pre_comp[i - 1][1],
p_pre_comp[i - 1][2]);
}

// Recode the scalars.
int8_t p_wnaf[385] = {0}, g_wnaf[385] = {0};
ec_compute_wNAF(group, p_wnaf, p_scalar, 384, P384_MUL_PUB_WSIZE);
ec_compute_wNAF(group, g_wnaf, g_scalar, 384, P384_MUL_WSIZE);

// In the beginning res is set to point-at-infinity, so we set the flag.
int16_t res_is_inf = 1;
int16_t d, is_neg, idx;

for (int i = 384; i >= 0; i--) {

// If |res| is point-at-infinity there is no point in doubling so skip it.
if (!res_is_inf) {
p384_point_double(res[0], res[1], res[2], res[0], res[1], res[2]);
}
p384_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}};

// Process the p_scalar digit.
d = p_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
p384_felem_copy(res[0], p_pre_comp[idx][0]);
p384_felem_copy(res[1], p_pre_comp[idx][1]);
p384_felem_copy(res[2], p_pre_comp[idx][2]);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
if (is_neg) {
p384_felem_opp(ftmp, p_pre_comp[idx][1]);
} else {
p384_felem_copy(ftmp, p_pre_comp[idx][1]);
}
p384_point_add(res[0], res[1], res[2],
res[0], res[1], res[2],
0 /* both Jacobian */,
p_pre_comp[idx][0], ftmp, p_pre_comp[idx][2]);
}
}
p384_from_generic(tmp[0], &p->X);
p384_from_generic(tmp[1], &p->Y);
p384_from_generic(tmp[2], &p->Z);

// Process the g_scalar digit.
d = g_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
p384_felem_copy(res[0], p384_g_pre_comp[0][idx][0]);
p384_felem_copy(res[1], p384_g_pre_comp[0][idx][1]);
p384_felem_copy(res[2], p384_felem_one);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
if (is_neg) {
p384_felem_opp(ftmp, p384_g_pre_comp[0][idx][1]);
} else {
p384_felem_copy(ftmp, p384_g_pre_comp[0][idx][1]);
}
// Add the point to the accumulator |res|.
// Note that the points in the pre-computed table are given with affine
// coordinates. The point addition function computes a sum of two points,
// either both given in projective, or one in projective and one in
// affine coordinates. The |mixed| flag indicates the latter option,
// in which case we set the third coordinate of the second point to one.
p384_point_add(res[0], res[1], res[2],
res[0], res[1], res[2],
1 /* mixed */,
p384_g_pre_comp[0][idx][0], ftmp, p384_felem_one);
}
}
}
ec_nistp_scalar_mul_public(p384_methods(), res[0], res[1], res[2], g_scalar, tmp[0], tmp[1], tmp[2], p_scalar);

// Copy the result to the output.
p384_to_generic(&r->X, res[0]);
p384_to_generic(&r->Y, res[1]);
p384_to_generic(&r->Z, res[2]);
Expand Down
Loading
Loading