address review 3 comments

aws · Jul 25, 2024 · c439bf0 · c439bf0
1 parent 087bf5c
commit c439bf0
Show file tree

Hide file tree

Showing 5 changed files with 367 additions and 357 deletions.
diff --git a/crypto/fipsmodule/bn/asm/rsaz-2k-avx512.pl b/crypto/fipsmodule/bn/asm/rsaz-2k-avx512.pl
@@ -381,10 +381,10 @@ sub amm52x20_x1_norm {
 
 ###############################################################################
 # void rsaz_amm52x20_x2_ifma256(BN_ULONG out[2][20],
-#                                    const BN_ULONG a[2][20],
-#                                    const BN_ULONG b[2][20],
-#                                    const BN_ULONG m[2][20],
-#                                    const BN_ULONG k0[2]);
+#                               const BN_ULONG a[2][20],
+#                               const BN_ULONG b[2][20],
+#                               const BN_ULONG m[2][20],
+#                               const BN_ULONG k0[2]);
 ###############################################################################
 
 $code.=<<___;

diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
@@ -1324,7 +1324,7 @@ int BN_mod_exp_mont_consttime_x2(BIGNUM *rr1, const BIGNUM *a1, const BIGNUM *p1
     }
 
     int mod_bits = BN_num_bits(m1);
-    ret = rsaz_mod_exp_avx512_x2(rr1->d, a1->d, p1->d, m1->d,
+    ret = RSAZ_mod_exp_avx512_x2(rr1->d, a1->d, p1->d, m1->d,
                                  in_mont1->RR.d, in_mont1->n0[0],
                                  rr2->d, a2->d, p2->d, m2->d,
                                  in_mont2->RR.d, in_mont2->n0[0],

diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
@@ -805,176 +805,6 @@ void bn_little_endian_to_words(BN_ULONG *out, size_t out_len, const uint8_t *in,
 void bn_words_to_little_endian(uint8_t *out, size_t out_len, const BN_ULONG *in, const size_t in_len);
 
 
-// Naming convention for the following functions:
-//
-//   * amm: Almost Montgomery Multiplication
-//   * ams: Almost Montgomery Squaring
-//   * 52xZZ: data represented as array of ZZ digits in 52-bit radix
-//   * _x1_/_x2_:  1 or 2 independent inputs/outputs
-//   * ifma256: uses 256-bit wide IFMA ISA (AVX512_IFMA256)
-//
-//
-// Almost Montgomery Multiplication (AMM) for 20-digit number in radix
-// 2^52.
-//
-// AMM is defined as presented in the paper [1].
-//
-// The input and output are presented in 2^52 radix domain, i.e.
-// |res|, |a|, |b|, |m| are arrays of 20 64-bit qwords with 12 high
-// bits zeroed.  |k0| is a Montgomery coefficient, which is here k0 =
-// -1/m mod 2^64
-//
-// NB: the AMM implementation does not perform "conditional"
-// subtraction step specified in the original algorithm as according
-// to the Lemma 1 from the paper [2], the result will be always < 2*m
-// and can be used as a direct input to the next AMM iteration.  This
-// post-condition is true, provided the correct parameter |s| (notion
-// of the Lemma 1 from [2]) is chosen, i.e.  s >= n + 2 * k, which
-// matches our case: 1040 > 1024 + 2 * 1.
-//
-// [1] Gueron, S. Efficient software implementations of modular
-//     exponentiation.  DOI: 10.1007/s13389-012-0031-5
-// [2] Gueron, S. Enhanced Montgomery Multiplication.  DOI:
-//     10.1007/3-540-36400-5_5
-void rsaz_amm52x20_x1_ifma256(BN_ULONG *res, const BN_ULONG *a,
-                              const BN_ULONG *b, const BN_ULONG *m,
-                              BN_ULONG k0);
-
-// Dual Almost Montgomery Multiplication for 20-digit number in radix
-// 2^52
-//
-// See description of rsaz_amm52x20_x1_ifma256() above for
-// details about Almost Montgomery Multiplication algorithm and
-// function input parameters description.
-//
-// This function does two AMMs for two independent inputs, hence dual.
-void rsaz_amm52x20_x2_ifma256(BN_ULONG *out, const BN_ULONG *a,
-                              const BN_ULONG *b, const BN_ULONG *m,
-                              const BN_ULONG k0[2]);
-
-// Constant time extraction from the precomputed table of powers
-// base^i, where i = 0..2^EXP_WIN_SIZE-1
-//
-// The input |red_table| contains precomputations for two independent
-// base values.  |red_table_idx1| and |red_table_idx2| are
-// corresponding power indexes.
-//
-// Extracted value (output) is 2 20 digit numbers in 2^52 radix.
-//
-// EXP_WIN_SIZE = 5
-void extract_multiplier_2x20_win5(BN_ULONG *red_Y,
-                                  const BN_ULONG *red_table,
-                                  int red_table_idx1, int red_table_idx2);
-
-// Almost Montgomery Multiplication (AMM) for 30-digit number in radix
-// 2^52.
-//
-// AMM is defined as presented in the paper [1].
-//
-// The input and output are presented in 2^52 radix domain, i.e.
-// |res|, |a|, |b|, |m| are arrays of 32 64-bit qwords with 12 high
-// bits zeroed
-//
-// NOTE: the function uses zero-padded data - 2 high QWs is a padding.
-//
-// |k0| is a Montgomery coefficient, which is here k0 = -1/m mod 2^64
-//
-// NB: the AMM implementation does not perform "conditional"
-// subtraction step specified in the original algorithm as according
-// to the Lemma 1 from the paper [2], the result will be always < 2*m
-// and can be used as a direct input to the next AMM iteration.  This
-// post-condition is true, provided the correct parameter |s| (notion
-// of the Lemma 1 from [2]) is chosen, i.e.  s >= n + 2 * k, which
-// matches our case: 1560 > 1536 + 2 * 1.
-//
-// [1] Gueron, S. Efficient software implementations of modular
-//     exponentiation.  DOI: 10.1007/s13389-012-0031-5
-// [2] Gueron, S. Enhanced Montgomery Multiplication.  DOI:
-//     10.1007/3-540-36400-5_5
-void rsaz_amm52x30_x1_ifma256(BN_ULONG *res, const BN_ULONG *a,
-                              const BN_ULONG *b, const BN_ULONG *m,
-                              BN_ULONG k0);
-// Dual Almost Montgomery Multiplication for 30-digit number in radix
-// 2^52
-//
-// See description of rsaz_amm52x30_x1_ifma256() above for
-// details about Almost Montgomery Multiplication algorithm and
-// function input parameters description.
-//
-// This function does two AMMs for two independent inputs, hence dual.
-//
-// NOTE: the function uses zero-padded data - 2 high QWs is a padding.
-void rsaz_amm52x30_x2_ifma256(BN_ULONG *out, const BN_ULONG *a,
-                              const BN_ULONG *b, const BN_ULONG *m,
-                              const BN_ULONG k0[2]);
-
-// Constant time extraction from the precomputed table of powers
-// base^i, where i = 0..2^EXP_WIN_SIZE-1
-//
-// The input |red_table| contains precomputations for two independent
-// base values.  |red_table_idx1| and |red_table_idx2| are
-// corresponding power indexes.
-//
-// Extracted value (output) is 2 (30 + 2) digits numbers in 2^52
-// radix.  (2 high QW is zero padding)
-//
-// EXP_WIN_SIZE = 5
-void extract_multiplier_2x30_win5(BN_ULONG *red_Y,
-                                  const BN_ULONG *red_table,
-                                  int red_table_idx1, int red_table_idx2);
-
-// Almost Montgomery Multiplication (AMM) for 40-digit number in radix
-// 2^52.
-//
-// AMM is defined as presented in the paper [1].
-//
-// The input and output are presented in 2^52 radix domain, i.e.
-// |res|, |a|, |b|, |m| are arrays of 40 64-bit qwords with 12 high
-// bits zeroed.  |k0| is a Montgomery coefficient, which is here k0 =
-// -1/m mod 2^64
-//
-// NB: the AMM implementation does not perform "conditional"
-// subtraction step specified in the original algorithm as according
-// to the Lemma 1 from the paper [2], the result will be always < 2*m
-// and can be used as a direct input to the next AMM iteration.  This
-// post-condition is true, provided the correct parameter |s| (notion
-// of the Lemma 1 from [2]) is chosen, i.e.  s >= n + 2 * k, which
-// matches our case: 2080 > 2048 + 2 * 1.
-//
-// [1] Gueron, S. Efficient software implementations of modular
-//     exponentiation.  DOI: 10.1007/s13389-012-0031-5
-// [2] Gueron, S. Enhanced Montgomery Multiplication.  DOI:
-//     10.1007/3-540-36400-5_5
-void rsaz_amm52x40_x1_ifma256(BN_ULONG *res, const BN_ULONG *a,
-                              const BN_ULONG *b, const BN_ULONG *m,
-                              BN_ULONG k0);
-
-// Dual Almost Montgomery Multiplication for 40-digit number in radix
-// 2^52
-//
-// See description of rsaz_amm52x40_x1_ifma256() above for
-// details about Almost Montgomery Multiplication algorithm and
-// function input parameters description.
-//
-// This function does two AMMs for two independent inputs, hence dual.
-void rsaz_amm52x40_x2_ifma256(BN_ULONG *out, const BN_ULONG *a,
-                              const BN_ULONG *b, const BN_ULONG *m,
-                              const BN_ULONG k0[2]);
-
-// Constant time extraction from the precomputed table of powers base^i, where
-//    i = 0..2^EXP_WIN_SIZE-1
-//
-// The input |red_table| contains precomputations for two independent base values.
-// |red_table_idx1| and |red_table_idx2| are corresponding power indexes.
-//
-// Extracted value (output) is 2 40 digits numbers in 2^52 radix.
-//
-// EXP_WIN_SIZE = 5
-void extract_multiplier_2x40_win5(BN_ULONG *red_Y,
-                                  const BN_ULONG *red_table,
-                                  int red_table_idx1, int red_table_idx2);
-
-
 #if defined(__cplusplus)
 }  // extern C
 #endif