From 84dbfaaa1b5d8487308292bb56c4a8abdbd9917b Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Tue, 10 Dec 2024 06:37:13 +0000
Subject: [PATCH] Remove redundant reduction from `unpack_pk()` and
 `unpack_sk()`

The specifications for the polynomial base multiplication currently
require one input to be bound by MLKEM_Q in absolute value. To ensure
this, the unpacking routines `unpack_pk()` and `unpack_sk()` currently
explicitly reduce their output via `poly_reduce()`.

Functionally, this reduction is not needed:
- For the PK, the modulus-check already ensures that the unpacked PK
  will be unsigned canonical.
- For the SK, there is no requirement in the standard to check that
  the SK is reduced, or reduce it after unpacking.
- There is no risk of overflow in the base multiplication even if
  the inputs are bound by 4096 rather than MLKEM_Q.

This commit strengthens the specifications for all polynomial base
multiplication routines
- basemul_cached
- poly_basemul_montgomery_cached
- polyvec_basemul_acc_montgomery_cached
to work with the input bound 4096 instead of MLKEM_Q. Accordingly,
the output bound needs weakening from 3/2 * Q to 2*Q; this could
be much tighter if need be, but it's unnecessary.

The polynomial reductions in `check_pk` and `check_sk` can then
be safely removed.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 mlkem/indcpa.c  | 21 ++++++++-------------
 mlkem/ntt.c     |  7 +++----
 mlkem/ntt.h     |  8 ++++----
 mlkem/params.h  |  1 +
 mlkem/poly.c    |  6 +++---
 mlkem/poly.h    |  6 +++---
 mlkem/polyvec.c |  6 +++---
 mlkem/polyvec.h |  9 ++++-----
 mlkem/reduce.c  |  2 +-
 mlkem/reduce.h  |  8 ++++----
 10 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c
index 69551e7b9..c3513911b 100644
--- a/mlkem/indcpa.c
+++ b/mlkem/indcpa.c
@@ -60,14 +60,10 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES],
   polyvec_frombytes(pk, packedpk);
   memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES);
 
-  /*
-   * TODO! We know from the modulus check that this will result in an
-   * unsigned canonical polynomial, but CBMC does not know it. We should
-   * weaken the specification of `unpack_pk()` and all depending functions
-   * to work with the weaker 4096-bound, so that the proofs go through
-   * without the need of this redundant call to polyvec_reduce().
-   */
-  polyvec_reduce(pk);
+  /* NOTE: If a modulus check was conducted on the PK, we know at this
+   * point that the coefficients of `pk` are unsigned canonical. The
+   * specifications and proofs, however, do _not_ assume this, and instead
+   * work with the easily provable bound by 4096. */
 }
 
 /*************************************************
@@ -91,15 +87,14 @@ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk)
  * Description: De-serialize the secret key; inverse of pack_sk
  *
  * Arguments:   - polyvec *sk: pointer to output vector of polynomials (secret
- *key)
+ *                key)
  *              - const uint8_t *packedsk: pointer to input serialized secret
- *key
+ *                key
  **************************************************/
 static void unpack_sk(polyvec *sk,
                       const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES])
 {
   polyvec_frombytes(sk, packedsk);
-  polyvec_reduce(sk);
 }
 
 /*************************************************
@@ -358,7 +353,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *
  * Arguments:   - polyvec *out: Pointer to output polynomial vector
  *              - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain
- *                  and have coefficients of absolute value < MLKEM_Q.
+ *                  and have coefficients of absolute value < 4096.
  *              - polyvec *v: Input polynomial vector. Must be in NTT domain.
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
@@ -373,7 +368,7 @@ __contract__(
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
   requires(forall(int, k0, 0, MLKEM_K - 1,
   forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))))
+    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
   assigns(object_whole(out)))
 {
   int i;
diff --git a/mlkem/ntt.c b/mlkem/ntt.c
index e1a9aacf4..5b17ad0ce 100644
--- a/mlkem/ntt.c
+++ b/mlkem/ntt.c
@@ -59,7 +59,7 @@ __contract__(
   for (j = start; j < start + len; j++)
   __loop__(
     invariant(start <= j && j <= start + len)
-    /* 
+    /*
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
@@ -247,7 +247,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
 {
   int32_t t0, t1;
 
-  BOUND(a, 2, MLKEM_Q, "basemul input bound");
+  BOUND(a, 2, 4096, "basemul input bound");
 
   t0 = (int32_t)a[1] * b_cached;
   t0 += (int32_t)a[0] * b[0];
@@ -258,6 +258,5 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
   r[0] = montgomery_reduce(t0);
   r[1] = montgomery_reduce(t1);
 
-  /* |r[i]| < 3/2 q */
-  BOUND(r, 2, 3 * MLKEM_Q / 2, "basemul output bound");
+  BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound");
 }
diff --git a/mlkem/ntt.h b/mlkem/ntt.h
index 558c1d99f..68c9e6724 100644
--- a/mlkem/ntt.h
+++ b/mlkem/ntt.h
@@ -78,9 +78,9 @@ __contract__(
  *
  * Arguments: - r: Pointer to output polynomial
  *                   Upon return, coefficients are bound by
- *                   3*(q+1)/2 in absolute value.
+ *                   2*MLKEM_Q in absolute value.
  *            - a: Pointer to first input polynomial
- *                   Must be coefficient-wise < q in absolute value.
+ *                   Must be coefficient-wise < 4096 in absolute value.
  *            - b: Pointer to second input polynomial
  *                   Can have arbitrary int16_t coefficients
  *            - b_cached: Some precomputed value, typically derived from
@@ -92,9 +92,9 @@ __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, MLKEM_Q - 1))
+  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, (3 * HALF_Q - 1)))
+  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/mlkem/params.h b/mlkem/params.h
index ab7beb7e1..cd76a7d67 100644
--- a/mlkem/params.h
+++ b/mlkem/params.h
@@ -28,6 +28,7 @@
 
 #define MLKEM_N 256
 #define MLKEM_Q 3329
+#define UINT12_MAX 4095
 
 #define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */
 #define MLKEM_SSBYTES 32  /* size in bytes of shared key */
diff --git a/mlkem/poly.c b/mlkem/poly.c
index 93a663c12..b4e50420c 100644
--- a/mlkem/poly.c
+++ b/mlkem/poly.c
@@ -311,7 +311,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, 4095)))
+    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
   {
     /* REF-CHANGE: Introduce some locals for better readability */
     const uint8_t t0 = a[3 * i + 0];
@@ -456,13 +456,13 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
   int i;
-  POLY_BOUND(b_cache, MLKEM_Q);
+  POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), (3 * HALF_Q - 1))))
+    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
diff --git a/mlkem/poly.h b/mlkem/poly.h
index 35990684b..772afefe0 100644
--- a/mlkem/poly.h
+++ b/mlkem/poly.h
@@ -439,7 +439,7 @@ __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, 4095))
+  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
 );
 
 
@@ -637,9 +637,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (3 * HALF_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
diff --git a/mlkem/polyvec.c b/mlkem/polyvec.c
index b658f8544..904bd1d49 100644
--- a/mlkem/polyvec.c
+++ b/mlkem/polyvec.c
@@ -79,7 +79,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
   int i;
   poly t;
 
-  POLYVEC_BOUND(a, MLKEM_Q);
+  POLYVEC_BOUND(a, 4096);
   POLYVEC_BOUND(b, NTT_BOUND);
   POLYVEC_BOUND(b_cache, MLKEM_Q);
 
@@ -98,10 +98,10 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * them from the spec to not unnecessarily constraint native implementations.
    */
   cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (3 * HALF_Q - 1)),
+      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
       "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
-  POLY_BOUND(r, MLKEM_K * 3 * HALF_Q);
+  POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h
index d48633fbc..cc3281141 100644
--- a/mlkem/polyvec.h
+++ b/mlkem/polyvec.h
@@ -93,7 +93,7 @@ __contract__(
  *
  * Arguments:   - const polyvec *a: pointer to output vector of polynomials
  *                 (of length MLKEM_POLYVECBYTES). Output will have coefficients
- *                 normalized to [0,..,q-1].
+ *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
@@ -102,7 +102,7 @@ __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
   ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, 4095)))
+        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -179,7 +179,7 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a,
  *              using mulcache for second operand.
  *
  *              Bounds:
- *              - a is assumed to be coefficient-wise < q in absolute value.
+ *              - a is assumed to be coefficient-wise < 4096 in absolute value.
  *              - No bounds guarantees for the coefficients in the result.
  *
  * Arguments:   - poly *r: pointer to output polynomial
@@ -197,9 +197,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-/* Input is coefficient-wise < q in absolute value */
   requires(forall(int, k1, 0, MLKEM_K - 1,
- array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
diff --git a/mlkem/reduce.c b/mlkem/reduce.c
index 771dd7ee2..073dcb97e 100644
--- a/mlkem/reduce.c
+++ b/mlkem/reduce.c
@@ -97,7 +97,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
 int16_t montgomery_reduce(int32_t a)
 {
   int16_t res;
-  SCALAR_BOUND(a, 2 * MLKEM_Q * 32768, "montgomery_reduce input");
+  SCALAR_BOUND(a, 2 * UINT12_MAX * 32768, "montgomery_reduce input");
 
   res = montgomery_reduce_generic(a);
 
diff --git a/mlkem/reduce.h b/mlkem/reduce.h
index 24ade7343..e83e26b39 100644
--- a/mlkem/reduce.h
+++ b/mlkem/reduce.h
@@ -20,16 +20,16 @@
  * Description: Montgomery reduction
  *
  * Arguments:   - int32_t a: input integer to be reduced
- *                  Must be smaller than 2 * q * 2^15 in absolute value.
+ *                  Must be smaller than 2 * 2^12 * 2^15 in absolute value.
  *
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 3/2 q in absolute value.
  **************************************************/
 int16_t montgomery_reduce(int32_t a)
 __contract__(
-  requires(a > -(2 * MLKEM_Q * 32768))
-  requires(a <  (2 * MLKEM_Q * 32768))
-  ensures(return_value > -(3 * HALF_Q) && return_value < (3 * HALF_Q))
+  requires(a > -(2 * 4096 * 32768))
+  requires(a <  (2 * 4096 * 32768))
+  ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q)
 );
 
 #define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)