From 13496038ad421bceba17e76f4098749364560eeb Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 25 Sep 2024 13:03:59 -0700 Subject: [PATCH 1/2] Use s2n-bignum P-256 scalar multiplication and Montgomery inverse This replaces the general (fresh, not precomputed, point) scalar multiplication with the corresponding function p256_montjscalarmul or p256_montjscalarmul_alt from s2n-bignum, and also replaces the Fermat inverse in p256-nistz.c with the markedly faster and formally verified divstep-based code from s2n-bignum, bignum_montinv_p256. --- crypto/fipsmodule/CMakeLists.txt | 4 + crypto/fipsmodule/ec/p256-nistz.c | 73 +++++++++++++++---- crypto/fipsmodule/ec/p256-nistz.h | 7 ++ .../s2n-bignum/include/s2n-bignum_aws-lc.h | 13 ++++ 4 files changed, 84 insertions(+), 13 deletions(-) diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt index aaa370f935..dbe7f6651e 100644 --- a/crypto/fipsmodule/CMakeLists.txt +++ b/crypto/fipsmodule/CMakeLists.txt @@ -205,6 +205,10 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR set( S2N_BIGNUM_ASM_SOURCES + p256/bignum_montinv_p256.S + p256/p256_montjscalarmul_alt.S + p256/p256_montjscalarmul.S + p384/bignum_add_p384.S p384/bignum_sub_p384.S p384/bignum_neg_p384.S diff --git a/crypto/fipsmodule/ec/p256-nistz.c b/crypto/fipsmodule/ec/p256-nistz.c index 23003e1e42..ccb9db9aac 100644 --- a/crypto/fipsmodule/ec/p256-nistz.c +++ b/crypto/fipsmodule/ec/p256-nistz.c @@ -32,6 +32,10 @@ #include "internal.h" #include "p256-nistz.h" +#if defined(EC_P256_USE_S2N_BIGNUM) +# include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" +#endif + #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \ !defined(OPENSSL_SMALL) @@ -47,19 +51,6 @@ static const BN_ULONG ONE[P256_LIMBS] = { // Precomputed tables for the default generator #include "p256-nistz-table.h" -// Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in -// util.c for details -static crypto_word_t booth_recode_w5(crypto_word_t in) { - crypto_word_t s, d; - - s = ~((in >> 5) - 1); - d = (1 << 6) - in - 1; - d = (d & s) | (in & ~s); - d = (d >> 1) + (d & 1); - - return (d << 1) + (s & 1); -} - static crypto_word_t booth_recode_w7(crypto_word_t in) { crypto_word_t s, d; @@ -119,6 +110,16 @@ static BN_ULONG is_not_zero(BN_ULONG in) { // ecp_nistz256_mod_inverse_sqr_mont sets |r| to (|in| * 2^-256)^-2 * 2^256 mod // p. That is, |r| is the modular inverse square of |in| for input and output in // the Montgomery domain. + +#if defined(EC_P256_USE_S2N_BIGNUM) +static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]) { + BN_ULONG z2[P256_LIMBS]; + ecp_nistz256_sqr_mont(z2,in); + bignum_montinv_p256(r,z2); +} + +#else static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS], const BN_ULONG in[P256_LIMBS]) { // This implements the addition chain described in @@ -185,7 +186,50 @@ static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS], ecp_nistz256_sqr_mont(r, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2 } +#endif + + +// r = p * p_scalar + +#if defined(EC_P256_USE_S2N_BIGNUM) + +static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r, + const EC_JACOBIAN *p, + const EC_SCALAR *p_scalar) { + uint64_t s2n_point[12], s2n_result[12]; + + assert(p != NULL); + assert(p_scalar != NULL); + assert(group->field.N.width == P256_LIMBS); + + OPENSSL_memcpy(s2n_point,p->X.words,32); + OPENSSL_memcpy(s2n_point+4,p->Y.words,32); + OPENSSL_memcpy(s2n_point+8,p->Z.words,32); + + p256_montjscalarmul_selector(s2n_result,(uint64_t*)p_scalar,s2n_point); + + OPENSSL_memcpy(r->X,s2n_result,32); + OPENSSL_memcpy(r->Y,s2n_result+4,32); + OPENSSL_memcpy(r->Z,s2n_result+8,32); +} + +#else + +// Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in +// util.c for details +static crypto_word_t booth_recode_w5(crypto_word_t in) { + crypto_word_t s, d; + + s = ~((in >> 5) - 1); + d = (1 << 6) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + return (d << 1) + (s & 1); +} + // r = p * p_scalar + static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r, const EC_JACOBIAN *p, const EC_SCALAR *p_scalar) { @@ -279,6 +323,9 @@ static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r, ecp_nistz256_point_add(r, r, aligned_h); } +#endif + + static crypto_word_t calc_first_wvalue(size_t *index, const uint8_t p_str[33]) { static const size_t kWindowSize = 7; static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1; diff --git a/crypto/fipsmodule/ec/p256-nistz.h b/crypto/fipsmodule/ec/p256-nistz.h index c61018bd21..6adb3ab258 100644 --- a/crypto/fipsmodule/ec/p256-nistz.h +++ b/crypto/fipsmodule/ec/p256-nistz.h @@ -29,6 +29,13 @@ extern "C" { #endif +#if !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ + ((defined(OPENSSL_X86_64) && \ + !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)) || \ + defined(OPENSSL_AARCH64)) +#define EC_P256_USE_S2N_BIGNUM +#endif #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \ diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index 87ac773fa5..0974046224 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -50,6 +50,19 @@ static inline uint8_t use_s2n_bignum_alt(void) { } #endif +// Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 +// Input x[4]; output z[4] +extern void bignum_montinv_p256(uint64_t z[static 4],const uint64_t x[static 4]); + +// Montgomery-Jacobian form scalar multiplication for P-256 +// Input scalar[4], point[12]; output res[12] +extern void p256_montjscalarmul(uint64_t res[static 12],const uint64_t scalar[static 4],const uint64_t point[static 12]); +extern void p256_montjscalarmul_alt(uint64_t res[static 12],const uint64_t scalar[static 4],const uint64_t point[static 12]); +static inline void p256_montjscalarmul_selector(uint64_t res[static 12], const uint64_t scalar[static 4], const uint64_t point[static 12]) { + if (use_s2n_bignum_alt()) { p256_montjscalarmul_alt(res, scalar, point); } + else { p256_montjscalarmul(res, scalar, point); } +} + // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced // Inputs x[6], y[6]; output z[6] extern void bignum_add_p384(uint64_t z[static 6], const uint64_t x[static 6], const uint64_t y[static 6]); From db4c52c94d52675bbb3438efb0d2c6f755a80137 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 26 Sep 2024 13:37:03 -0700 Subject: [PATCH 2/2] Fix prefix build for new P-256 code Thanks to Torben --- CMakeLists.txt | 3 +++ tests/ci/common_posix_setup.sh | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a6ff8ef6e..57d4926792 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -223,6 +223,9 @@ if(BORINGSSL_PREFIX AND BORINGSSL_PREFIX_SYMBOLS AND GO_EXECUTABLE) COMMAND sed -i.bak '/ edwards25519_/d' ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols.h COMMAND sed -i.bak '/ edwards25519_/d' ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols_asm.h COMMAND sed -i.bak '/ edwards25519_/d' ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols_nasm.inc + COMMAND sed -i.bak '/ p256_montjscalarmul/d' ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols.h + COMMAND sed -i.bak '/ p256_montjscalarmul/d' ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols_asm.h + COMMAND sed -i.bak '/ p256_montjscalarmul/d' ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols_nasm.inc COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols.h.bak ${CMAKE_CURRENT_BINARY_DIR}/symbol_prefix_include/openssl/boringssl_prefix_symbols_asm.h.bak diff --git a/tests/ci/common_posix_setup.sh b/tests/ci/common_posix_setup.sh index f6e6223676..152e2898f1 100644 --- a/tests/ci/common_posix_setup.sh +++ b/tests/ci/common_posix_setup.sh @@ -103,7 +103,7 @@ function verify_symbols_prefixed { # * "\(bignum\|curve25519_x25519\)": match string of either "bignum" or "curve25519_x25519". # Recall that the option "-v" reverse the pattern matching. So, we are really # filtering out lines that contain either "bignum" or "curve25519_x25519". - cat "$BUILD_ROOT"/symbols_final_crypto.txt "$BUILD_ROOT"/symbols_final_ssl.txt | grep -v -e '^_\?\(bignum\|curve25519_x25519\|edwards25519\)' > "$SRC_ROOT"/symbols_final.txt + cat "$BUILD_ROOT"/symbols_final_crypto.txt "$BUILD_ROOT"/symbols_final_ssl.txt | grep -v -e '^_\?\(bignum\|curve25519_x25519\|edwards25519\|p256_montjscalarmul\)' > "$SRC_ROOT"/symbols_final.txt # Now filter out every line that has the unique prefix $CUSTOM_PREFIX. If we # have any lines left, then some symbol(s) weren't prefixed, unexpectedly. if [ $(grep -c -v ${CUSTOM_PREFIX} "$SRC_ROOT"/symbols_final.txt) -ne 0 ]; then