Remove redundant reduction from unpack_pk() and unpack_sk()

The specifications for the polynomial base multiplication currently require one input to be bound by q in absolute value. To ensure this, the unpacking routines `unpack_pk()` and `unpack_sk()` currently explicitly reduce their output via `poly_reduce()`. Functionally, this reduction is not needed: - For the PK, the modulus-check already ensures that the unpacked PK will be unsigned canonical. - For the SK, there is no requirement in the standard to check that the SK is reduced, or reduce it after unpacking. - There is no risk of overflow in the base multiplication even if the inputs are bound by 4096 rather than q: - For the C base multiplication, the input bound before modular reduction is now 2 * 4096 * 2^15, where before it was 2 * q * 2^15 -- both is below 2^31 so we are fine. After reduction, the 3/2 * q bound is weakened to 2 * q (a much better bound would be possible, but is unneeded), which is still sufficient for up to K=4-fold accumulation since 8 * q < INT16_MAX. - For the AArch64 base multiplication, we are accumulating in 32-bit up to a single final reduction. As in the C case, each 32-bit polynomial multiplication is bound by 2 * 4096 * 2^15 = 2^28, so the accumulated value before reduction is < 2^30 < INT32_MAX. - For the x86_64 base multiplication, the reasoning is unchanged: We don't accumulate in 32-bit but conduct a Montgomery multiplication for every monomial in the product, giving an output bound by q per monomial. Up to 8 such are accumulated, giving a bound by 8 * q < INT16_MAX. This commit strengthens the specifications for all polynomial base multiplication routines - basemul_cached - poly_basemul_montgomery_cached - polyvec_basemul_acc_montgomery - polyvec_basemul_acc_montgomery_cached - matvec_mul The polynomial reductions in `check_pk` and `check_sk` can then be safely removed. Signed-off-by: Hanno Becker <[email protected]>
pq-code-package · Dec 10, 2024 · 6fc7eb4 · 6fc7eb4
1 parent f56c5f6
commit 6fc7eb4
Show file tree

Hide file tree

Showing 12 changed files with 43 additions and 56 deletions.
diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c
@@ -60,14 +60,10 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES],
   polyvec_frombytes(pk, packedpk);
   memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES);
 
-  /*
-   * TODO! We know from the modulus check that this will result in an
-   * unsigned canonical polynomial, but CBMC does not know it. We should
-   * weaken the specification of `unpack_pk()` and all depending functions
-   * to work with the weaker 4096-bound, so that the proofs go through
-   * without the need of this redundant call to polyvec_reduce().
-   */
-  polyvec_reduce(pk);
+  /* NOTE: If a modulus check was conducted on the PK, we know at this
+   * point that the coefficients of `pk` are unsigned canonical. The
+   * specifications and proofs, however, do _not_ assume this, and instead
+   * work with the easily provable bound by 4096. */
 }
 
 /*************************************************
@@ -91,15 +87,14 @@ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk)
  * Description: De-serialize the secret key; inverse of pack_sk
  *
  * Arguments:   - polyvec *sk: pointer to output vector of polynomials (secret
- *key)
+ *                key)
  *              - const uint8_t *packedsk: pointer to input serialized secret
- *key
+ *                key
  **************************************************/
 static void unpack_sk(polyvec *sk,
                       const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES])
 {
   polyvec_frombytes(sk, packedsk);
-  polyvec_reduce(sk);
 }
 
 /*************************************************
@@ -358,7 +353,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *
  * Arguments:   - polyvec *out: Pointer to output polynomial vector
  *              - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain
- *                  and have coefficients of absolute value < MLKEM_Q.
+ *                  and have coefficients of absolute value < 4096.
  *              - polyvec *v: Input polynomial vector. Must be in NTT domain.
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
@@ -373,7 +368,7 @@ __contract__(
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
   requires(forall(int, k0, 0, MLKEM_K - 1,
   forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))))
+    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
   assigns(object_whole(out)))
 {
   int i;

diff --git a/mlkem/native/aarch64/polyvec_clean.S b/mlkem/native/aarch64/polyvec_clean.S
@@ -32,8 +32,8 @@
 // Computes products (a0*b0 + a0*b0t, a0*b1 + a1*b0) in 32-bit.
 //
 // Bounds:
-// - Assume |a| < q, |b|, |bt| < NTT_BOUND
-// - Result: < 2*q*NTT_BOUND
+// - Assume |a| < 4096,
+// - Result: < 2*4096*2^15 = 2^28
 .macro pmull d, a, b
         smull  \d\()0l.4s, \a\()0.4h, \b\()0.4h
         smull2 \d\()0h.4s, \a\()0.8h, \b\()0.8h
@@ -252,11 +252,8 @@ _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
 
         // Bounds:
         //
-        // Each pmull is bound by 2*q*2^15, so the final value
-        // before Montgomery reduction is bound by q*2^18 ~ 2^29.7
-        //
-        // The Montgomery reduction is thus bound by
-        // q * (2^18/2^16 + 1/2) = 4.5 q < 14981
+        // Each pmull is bound by 2*4096*2^15=2^28, so the final value
+        // before Montgomery reduction is bound by 2^30.
 
         mov count, #(MLKEM_N / 16)
 k4_loop_start:

diff --git a/mlkem/native/x86_64/basemul.S b/mlkem/native/x86_64/basemul.S
@@ -104,11 +104,8 @@ vpaddw		%ymm7,%ymm11,%ymm11
 vpsubw		%ymm13,%ymm10,%ymm13
 vpsubw		%ymm12,%ymm6,%ymm6
 
-/* Bounds: Note that we assume that a+b*X is coefficient-wise bound by q in
- * in absolute value: This is coming from the contract for
- * polyvec_basemul_acc_montgomery_cached().
- *
- * Then, each Montgomery multiplication has absolute value < q,
+/* Bounds: Since we are multiplying with signed canonical twiddles,
+ * each Montgomery multiplication has absolute value < q,
  * and hence the coefficients of the output have absolute value < 2q. */
 vmovdqa		%ymm13,(64*\off+ 0)*2(%rdi)
 vmovdqa		%ymm9,(64*\off+16)*2(%rdi)

diff --git a/mlkem/ntt.c b/mlkem/ntt.c
@@ -59,7 +59,7 @@ __contract__(
   for (j = start; j < start + len; j++)
   __loop__(
     invariant(start <= j && j <= start + len)
-    /* 
+    /*
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
@@ -247,7 +247,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
 {
   int32_t t0, t1;
 
-  BOUND(a, 2, MLKEM_Q, "basemul input bound");
+  BOUND(a, 2, 4096, "basemul input bound");
 
   t0 = (int32_t)a[1] * b_cached;
   t0 += (int32_t)a[0] * b[0];
@@ -258,6 +258,5 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
   r[0] = montgomery_reduce(t0);
   r[1] = montgomery_reduce(t1);
 
-  /* |r[i]| < 3/2 q */
-  BOUND(r, 2, 3 * MLKEM_Q / 2, "basemul output bound");
+  BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound");
 }
diff --git a/mlkem/ntt.h b/mlkem/ntt.h
@@ -78,9 +78,9 @@ __contract__(
  *
  * Arguments: - r: Pointer to output polynomial
  *                   Upon return, coefficients are bound by
- *                   3*(q+1)/2 in absolute value.
+ *                   2*MLKEM_Q in absolute value.
  *            - a: Pointer to first input polynomial
- *                   Must be coefficient-wise < q in absolute value.
+ *                   Must be coefficient-wise < 4096 in absolute value.
  *            - b: Pointer to second input polynomial
  *                   Can have arbitrary int16_t coefficients
  *            - b_cached: Some precomputed value, typically derived from
@@ -92,9 +92,9 @@ __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, MLKEM_Q - 1))
+  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, (3 * HALF_Q - 1)))
+  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
 );
 
 

diff --git a/mlkem/params.h b/mlkem/params.h
@@ -28,6 +28,7 @@
 
 #define MLKEM_N 256
 #define MLKEM_Q 3329
+#define UINT12_MAX 4095
 
 #define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */
 #define MLKEM_SSBYTES 32  /* size in bytes of shared key */

diff --git a/mlkem/poly.c b/mlkem/poly.c
@@ -311,7 +311,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, 4095)))
+    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
   {
     /* REF-CHANGE: Introduce some locals for better readability */
     const uint8_t t0 = a[3 * i + 0];
@@ -456,13 +456,13 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
   int i;
-  POLY_BOUND(b_cache, MLKEM_Q);
+  POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), (3 * HALF_Q - 1))))
+    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);

diff --git a/mlkem/poly.h b/mlkem/poly.h
@@ -439,7 +439,7 @@ __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, 4095))
+  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
 );
 
 
@@ -637,9 +637,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (3 * HALF_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)

diff --git a/mlkem/polyvec.c b/mlkem/polyvec.c
@@ -79,7 +79,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
   int i;
   poly t;
 
-  POLYVEC_BOUND(a, MLKEM_Q);
+  POLYVEC_BOUND(a, 4096);
   POLYVEC_BOUND(b, NTT_BOUND);
   POLYVEC_BOUND(b_cache, MLKEM_Q);
 
@@ -98,17 +98,17 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * them from the spec to not unnecessarily constraint native implementations.
    */
   cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (3 * HALF_Q - 1)),
+      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
       "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
-  POLY_BOUND(r, MLKEM_K * 3 * HALF_Q);
+  POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  POLYVEC_BOUND(a, MLKEM_Q);
+  POLYVEC_BOUND(a, 4096);
   POLYVEC_BOUND(b, NTT_BOUND);
   /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may
    * decide not to use a mulcache. Note that the C backend implementation

diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h
@@ -93,7 +93,7 @@ __contract__(
  *
  * Arguments:   - const polyvec *a: pointer to output vector of polynomials
  *                 (of length MLKEM_POLYVECBYTES). Output will have coefficients
- *                 normalized to [0,..,q-1].
+ *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
@@ -102,7 +102,7 @@ __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
   ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, 4095)))
+        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -171,9 +171,8 @@ __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-/* Input is coefficient-wise < q in absolute value */
   requires(forall(int, k1, 0, MLKEM_K - 1,
- array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -188,7 +187,7 @@ __contract__(
  *              using mulcache for second operand.
  *
  *              Bounds:
- *              - a is assumed to be coefficient-wise < q in absolute value.
+ *              - a is assumed to be coefficient-wise < 4096 in absolute value.
  *              - No bounds guarantees for the coefficients in the result.
  *
  * Arguments:   - poly *r: pointer to output polynomial
@@ -206,9 +205,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-/* Input is coefficient-wise < q in absolute value */
   requires(forall(int, k1, 0, MLKEM_K - 1,
- array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 

diff --git a/mlkem/reduce.c b/mlkem/reduce.c
@@ -97,7 +97,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
 int16_t montgomery_reduce(int32_t a)
 {
   int16_t res;
-  SCALAR_BOUND(a, 2 * MLKEM_Q * 32768, "montgomery_reduce input");
+  SCALAR_BOUND(a, 2 * UINT12_MAX * 32768, "montgomery_reduce input");
 
   res = montgomery_reduce_generic(a);
 

diff --git a/mlkem/reduce.h b/mlkem/reduce.h
@@ -20,16 +20,16 @@
  * Description: Montgomery reduction
  *
  * Arguments:   - int32_t a: input integer to be reduced
- *                  Must be smaller than 2 * q * 2^15 in absolute value.
+ *                  Must be smaller than 2 * 2^12 * 2^15 in absolute value.
  *
  * Returns:     integer congruent to a * R^-1 modulo q,
- *              smaller than 3/2 q in absolute value.
+ *              smaller than 2 * q in absolute value.
  **************************************************/
 int16_t montgomery_reduce(int32_t a)
 __contract__(
-  requires(a > -(2 * MLKEM_Q * 32768))
-  requires(a <  (2 * MLKEM_Q * 32768))
-  ensures(return_value > -(3 * HALF_Q) && return_value < (3 * HALF_Q))
+  requires(a > -(2 * 4096 * 32768))
+  requires(a <  (2 * 4096 * 32768))
+  ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q)
 );
 
 #define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)