From c1ff2608f94e931d92a172d81d82f7cff5dde9ca Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:28:33 +0000
Subject: [PATCH 1/9] Hoist matrix-vector multiplication into separate function

Attempts to verify the toplevel MLKEM-INDCPA API have showed
that the matrix-vector multiplication embedded in the keypair
generation (and, likely, also the one in the encapsulation)
are causing performance issues for CBMC verification.

This commit hoists the matrix-vector multiplication loop,
excluding the preceeding mulcache computation, into a helper
function `matvec_mul`. This helper function is specified
and proved independently.

To make this work, the call to `poly_tomont()` in the matvec
multiplication loop inside key generation needs to be hoisted
out of the loop and replaced by a call to the new function
`polyvec_tomont()`, which is also given the obvious spec+proof.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 cbmc/proofs/matvec_mul/Makefile               | 54 +++++++++++++++++++
 cbmc/proofs/matvec_mul/cbmc-proof.txt         |  3 ++
 cbmc/proofs/matvec_mul/matvec_mul_harness.c   | 32 +++++++++++
 cbmc/proofs/polyvec_tomont/Makefile           | 54 +++++++++++++++++++
 cbmc/proofs/polyvec_tomont/cbmc-proof.txt     |  3 ++
 .../polyvec_tomont/polyvec_tomont_harness.c   | 27 ++++++++++
 mlkem/indcpa.c                                | 54 +++++++++++++------
 mlkem/polyvec.c                               |  7 +++
 mlkem/polyvec.h                               | 20 +++++++
 9 files changed, 238 insertions(+), 16 deletions(-)
 create mode 100644 cbmc/proofs/matvec_mul/Makefile
 create mode 100644 cbmc/proofs/matvec_mul/cbmc-proof.txt
 create mode 100644 cbmc/proofs/matvec_mul/matvec_mul_harness.c
 create mode 100644 cbmc/proofs/polyvec_tomont/Makefile
 create mode 100644 cbmc/proofs/polyvec_tomont/cbmc-proof.txt
 create mode 100644 cbmc/proofs/polyvec_tomont/polyvec_tomont_harness.c

diff --git a/cbmc/proofs/matvec_mul/Makefile b/cbmc/proofs/matvec_mul/Makefile
new file mode 100644
index 000000000..48c3d6d1d
--- /dev/null
+++ b/cbmc/proofs/matvec_mul/Makefile
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = matvec_mul_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = matvec_mul
+
+DEFINES +=
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET += matvec_mul.0:4 # Maximum value of MLKEM_K
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mlkem/indcpa.c
+
+CHECK_FUNCTION_CONTRACTS=matvec_mul
+USE_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)polyvec_mulcache_compute $(MLKEM_NAMESPACE)polyvec_basemul_acc_montgomery_cached
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--smt2
+
+FUNCTION_NAME = $(MLKEM_NAMESPACE)matvec_mul
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 10
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mlkem/poly.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/cbmc/proofs/matvec_mul/cbmc-proof.txt b/cbmc/proofs/matvec_mul/cbmc-proof.txt
new file mode 100644
index 000000000..3d1913ebf
--- /dev/null
+++ b/cbmc/proofs/matvec_mul/cbmc-proof.txt
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# This file marks this directory as containing a CBMC proof.
diff --git a/cbmc/proofs/matvec_mul/matvec_mul_harness.c b/cbmc/proofs/matvec_mul/matvec_mul_harness.c
new file mode 100644
index 000000000..d20e92e9a
--- /dev/null
+++ b/cbmc/proofs/matvec_mul/matvec_mul_harness.c
@@ -0,0 +1,32 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT-0
+
+/*
+ * Insert copyright notice
+ */
+
+/**
+ * @file matvec_mul_harness.c
+ * @brief Implements the proof harness for poly_reduce function.
+ */
+
+/*
+ * Insert project header files that
+ *   - include the declaration of the function
+ *   - include the types needed to declare function arguments
+ */
+#include "indcpa.h"
+#include "polyvec.h"
+
+void matvec_mul(polyvec *out, polyvec const *a, polyvec const *v,
+                polyvec_mulcache const *vc);
+
+/**
+ * @brief Starting point for formal analysis
+ *
+ */
+void harness(void) {
+  polyvec *out, *a, *v;
+  polyvec_mulcache *vc;
+  matvec_mul(out, a, v, vc);
+}
diff --git a/cbmc/proofs/polyvec_tomont/Makefile b/cbmc/proofs/polyvec_tomont/Makefile
new file mode 100644
index 000000000..425a41ad0
--- /dev/null
+++ b/cbmc/proofs/polyvec_tomont/Makefile
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = polyvec_tomont_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = polyvec_tomont
+
+DEFINES +=
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET += $(MLKEM_NAMESPACE)polyvec_tomont.0:4 # Largest value of MLKEM_K
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mlkem/polyvec.c
+
+CHECK_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)polyvec_tomont
+USE_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)poly_tomont
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--smt2
+
+FUNCTION_NAME = $(MLKEM_NAMESPACE)polyvec_tomont
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 8
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mlkem/poly.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/cbmc/proofs/polyvec_tomont/cbmc-proof.txt b/cbmc/proofs/polyvec_tomont/cbmc-proof.txt
new file mode 100644
index 000000000..3d1913ebf
--- /dev/null
+++ b/cbmc/proofs/polyvec_tomont/cbmc-proof.txt
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# This file marks this directory as containing a CBMC proof.
diff --git a/cbmc/proofs/polyvec_tomont/polyvec_tomont_harness.c b/cbmc/proofs/polyvec_tomont/polyvec_tomont_harness.c
new file mode 100644
index 000000000..9a08b554f
--- /dev/null
+++ b/cbmc/proofs/polyvec_tomont/polyvec_tomont_harness.c
@@ -0,0 +1,27 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT-0
+
+/*
+ * Insert copyright notice
+ */
+
+/**
+ * @file polyvec_tomont_harness.c
+ * @brief Implements the proof harness for polyvec_tomont function.
+ */
+
+/*
+ * Insert project header files that
+ *   - include the declaration of the function
+ *   - include the types needed to declare function arguments
+ */
+#include <polyvec.h>
+
+/**
+ * @brief Starting point for formal analysis
+ *
+ */
+void harness(void) {
+  polyvec *a;
+  polyvec_tomont(a);
+}
diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c
index 033327507..c888f5587 100644
--- a/mlkem/indcpa.c
+++ b/mlkem/indcpa.c
@@ -352,6 +352,39 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES],
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 }
 
+/*************************************************
+ * Name:        matvec_mul
+ *
+ * Description: Computes matrix-vector product in NTT domain,
+ *              via Montgomery multiplication.
+ *
+ * Arguments:   - polyvec *out: Pointer to output polynomial vector
+ *              - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain
+ *                  and have coefficients of absolute value < MLKEM_Q.
+ *              - polyvec *v: Input polynomial vector. Must be in NTT domain.
+ *              - polyvec *vc: Mulcache for v, computed via
+ *                  polyvec_mulcache_compute().
+ **************************************************/
+// clang-format off
+STATIC_TESTABLE
+void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+    const polyvec_mulcache *vc)
+  REQUIRES(IS_FRESH(out, sizeof(polyvec)))
+  REQUIRES(IS_FRESH(a, sizeof(polyvec) * MLKEM_K))
+  REQUIRES(IS_FRESH(v, sizeof(polyvec)))
+  REQUIRES(IS_FRESH(vc, sizeof(polyvec_mulcache)))
+  REQUIRES(FORALL(int, k0, 0, MLKEM_K - 1,
+   FORALL(int, k1, 0, MLKEM_K - 1,
+     ARRAY_IN_BOUNDS(int, k2, 0, MLKEM_N - 1,
+       a[k0].vec[k1].coeffs, -(MLKEM_Q - 1), (MLKEM_Q - 1)))))
+  ASSIGNS(OBJECT_WHOLE(out))
+// clang-format on
+{
+  for (int i = 0; i < MLKEM_K; i++) {
+    polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc);
+  }
+}
+
 /*************************************************
  * Name:        indcpa_keypair_derand
  *
@@ -371,7 +404,6 @@ STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES]) {
-  unsigned int i;
   uint8_t buf[2 * MLKEM_SYMBYTES] ALIGN;
   const uint8_t *publicseed = buf;
   const uint8_t *noiseseed = buf + MLKEM_SYMBYTES;
@@ -404,13 +436,8 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
   polyvec_ntt(&e);
 
   polyvec_mulcache_compute(&skpv_cache, &skpv);
-
-  // matrix-vector multiplication
-  for (i = 0; i < MLKEM_K; i++) {
-    polyvec_basemul_acc_montgomery_cached(&pkpv.vec[i], &a[i], &skpv,
-                                          &skpv_cache);
-    poly_tomont(&pkpv.vec[i]);
-  }
+  matvec_mul(&pkpv, a, &skpv, &skpv_cache);
+  polyvec_tomont(&pkpv);
 
   // Arithmetic cannot overflow, see static assertion at the top
   polyvec_add(&pkpv, &e);
@@ -446,11 +473,10 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                 const uint8_t coins[MLKEM_SYMBYTES]) {
-  unsigned int i;
   uint8_t seed[MLKEM_SYMBYTES] ALIGN;
   polyvec sp, pkpv, ep, at[MLKEM_K], b;
-  polyvec_mulcache sp_cache;
   poly v, k, epp;
+  polyvec_mulcache sp_cache;
 
   unpack_pk(&pkpv, seed, pk);
   poly_frommsg(&k, m);
@@ -474,13 +500,9 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 #endif
 
   polyvec_ntt(&sp);
-  polyvec_mulcache_compute(&sp_cache, &sp);
-
-  // matrix-vector multiplication
-  for (i = 0; i < MLKEM_K; i++) {
-    polyvec_basemul_acc_montgomery_cached(&b.vec[i], &at[i], &sp, &sp_cache);
-  }
 
+  polyvec_mulcache_compute(&sp_cache, &sp);
+  matvec_mul(&b, at, &sp, &sp_cache);
   polyvec_basemul_acc_montgomery_cached(&v, &pkpv, &sp, &sp_cache);
 
   polyvec_invntt_tomont(&b);
diff --git a/mlkem/polyvec.c b/mlkem/polyvec.c
index c8c6c1779..037544114 100644
--- a/mlkem/polyvec.c
+++ b/mlkem/polyvec.c
@@ -355,3 +355,10 @@ void polyvec_add(polyvec *r, const polyvec *b) {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
+
+void polyvec_tomont(polyvec *r) {
+  unsigned int i;
+  for (i = 0; i < MLKEM_K; i++) {
+    poly_tomont(&r->vec[i]);
+  }
+}
diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h
index f5682b4c6..ecc20336c 100644
--- a/mlkem/polyvec.h
+++ b/mlkem/polyvec.h
@@ -216,4 +216,24 @@ ENSURES(FORALL(int, j, 0, MLKEM_K - 1,
 ASSIGNS(OBJECT_WHOLE(r));
 // clang-format on
 
+// clang-format off
+#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont)
+/*************************************************
+ * Name:        polyvec_tomont
+ *
+ * Description: Inplace conversion of all coefficients of a polynomial
+ *              vector from normal domain to Montgomery domain
+ *
+ *              Bounds: Output < q in absolute value.
+ *
+ **************************************************/
+void polyvec_tomont(polyvec *r)
+  REQUIRES(IS_FRESH(r, sizeof(polyvec)))
+  ASSIGNS(OBJECT_UPTO(r, sizeof(polyvec)))
+  ENSURES(FORALL(int, j, 0, MLKEM_K - 1,
+    ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1,
+                    r->vec[j].coeffs, -(MLKEM_Q - 1), (MLKEM_Q - 1))))
+  ASSIGNS(OBJECT_WHOLE(r));
+// clang-format on
+
 #endif

From 89990457990761e221942e28eb7cf29cf5931ebb Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:32:46 +0000
Subject: [PATCH 2/9] Strengthen assigns clause in
 polyvec_basemul_acc_montgomery_cached

Previously, the spec for polyvec_basemul_acc_montgomery_cached()
would mark the entire destination object around the target
polynomial as tainted. This is too strong if the caller passes
a polynomial which is part of a polynomial vector -- which is
the case during matrix vector multiplication.

This commit strengthens the assignment clause for
polyvec_basemul_acc_montgomery_cached to mark only a poly-sized
slice as tainted.

Similarly, the specification of poly_tomont() is strengthened.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 mlkem/poly.h    | 4 ++--
 mlkem/polyvec.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlkem/poly.h b/mlkem/poly.h
index edb52527d..72fd1f18e 100644
--- a/mlkem/poly.h
+++ b/mlkem/poly.h
@@ -570,6 +570,7 @@ ASSIGNS(OBJECT_WHOLE(r))
 ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, -3 * HALF_Q + 1, 3 * HALF_Q - 1));
 // clang-format on
 
+// clang-format off
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
 /*************************************************
  * Name:        poly_tomont
@@ -582,9 +583,8 @@ ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, -3 * HALF_Q + 1, 3 *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
 void poly_tomont(poly *r)
-    // clang-format off
 REQUIRES(IS_FRESH(r, sizeof(poly)))
-ASSIGNS(OBJECT_WHOLE(r))
+ASSIGNS(OBJECT_UPTO(r, sizeof(poly)))
 ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, -(MLKEM_Q - 1), (MLKEM_Q - 1)));
 // clang-format on
 
diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h
index ecc20336c..271a128bf 100644
--- a/mlkem/polyvec.h
+++ b/mlkem/polyvec.h
@@ -130,7 +130,7 @@ REQUIRES(IS_FRESH(b_cache, sizeof(polyvec_mulcache)))
 REQUIRES(FORALL(int, k1, 0, MLKEM_K - 1,
  ARRAY_IN_BOUNDS(int, k2, 0, MLKEM_N - 1,
    a->vec[k1].coeffs, -(MLKEM_Q - 1), (MLKEM_Q - 1))))
-ASSIGNS(OBJECT_WHOLE(r));
+ASSIGNS(OBJECT_UPTO(r, sizeof(poly)));
 // clang-format on
 
 // REF-CHANGE: This function does not exist in the reference implementation

From 67a1a4137d8110949a2f6c7a349c0351d992e1de Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:35:49 +0000
Subject: [PATCH 3/9] CBMC: Add spec (but no proof) of polyvec_ntt

Our type+memory safety proof in CBMC does not require any specific
knowledge of what the NTT does. We do, however, need a bound on the
coefficients of the output polynomial, in order to prove type safety
(non-overflow) when the caller subsequently adds to the output.

The definition of the NTT and invNTT bounds are, likely temporarily,
moved to poly.h, to avoid a cyclic dependency between headers.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 mlkem/ntt.c     |  1 +
 mlkem/ntt.h     |  6 ------
 mlkem/poly.h    |  6 ++++++
 mlkem/polyvec.h | 10 +++++++++-
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/mlkem/ntt.c b/mlkem/ntt.c
index d4d455ab2..ca123ff4b 100644
--- a/mlkem/ntt.c
+++ b/mlkem/ntt.c
@@ -6,6 +6,7 @@
 
 #include "arith_native.h"
 #include "debug/debug.h"
+#include "ntt.h"
 
 /* Code to generate zetas and zetas_inv used in the number-theoretic transform:
 
diff --git a/mlkem/ntt.h b/mlkem/ntt.h
index 58d2eb622..0add34db2 100644
--- a/mlkem/ntt.h
+++ b/mlkem/ntt.h
@@ -17,12 +17,6 @@ void poly_ntt(poly *r);
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
 void poly_invntt_tomont(poly *r);
 
-// Absolute exclusive upper bound for the output of the inverse NTT
-#define INVNTT_BOUND 8 * MLKEM_Q
-
-// Absolute exclusive upper bound for the output of the forward NTT
-#define NTT_BOUND 8 * MLKEM_Q
-
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
 /************************************************************
  * Name: basemul_cached
diff --git a/mlkem/poly.h b/mlkem/poly.h
index 72fd1f18e..57fab57e3 100644
--- a/mlkem/poly.h
+++ b/mlkem/poly.h
@@ -9,6 +9,12 @@
 #include "reduce.h"
 #include "verify.h"
 
+// Absolute exclusive upper bound for the output of the inverse NTT
+#define INVNTT_BOUND (8 * MLKEM_Q)
+
+// Absolute exclusive upper bound for the output of the forward NTT
+#define NTT_BOUND (8 * MLKEM_Q)
+
 /*
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h
index 271a128bf..525ac3b0f 100644
--- a/mlkem/polyvec.h
+++ b/mlkem/polyvec.h
@@ -88,8 +88,16 @@ ENSURES(FORALL(int, k0, 0, MLKEM_K - 1,
                         r->vec[k0].coeffs, 0, 4095)))
 ASSIGNS(OBJECT_WHOLE(r));  // clang-format on
 
+// clang-format off
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
-void polyvec_ntt(polyvec *r);
+void polyvec_ntt(polyvec *r)
+  REQUIRES(IS_FRESH(r, sizeof(polyvec)))
+  ASSIGNS(OBJECT_WHOLE(r))
+  ENSURES(FORALL(int, j, 0, MLKEM_K - 1,
+    ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1,
+                    r->vec[j].coeffs, -NTT_BOUND + 1, NTT_BOUND - 1)));
+// clang-format on
+
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
 void polyvec_invntt_tomont(polyvec *r);
 

From 6f4b5812710e85205efbabd8cbc1eafe6b92faf9 Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:37:40 +0000
Subject: [PATCH 4/9] Strengthen the assign specifications for
 poly_cbd_eta{1,2}

Previously, the assign clauses in poly_cbd_eta{1,2} would see the
entire object containing the target polynomial as tainted. This is
too strong if the target polynoimal is part of a polynomial vector.

This commit strengthens the specifications to only taint the
poly-sized slice spanned by the target polynomial.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 mlkem/cbd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlkem/cbd.h b/mlkem/cbd.h
index d3309882b..9210fdf30 100644
--- a/mlkem/cbd.h
+++ b/mlkem/cbd.h
@@ -21,7 +21,7 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
     // clang-format off
 REQUIRES(IS_FRESH(r, sizeof(poly)))
 REQUIRES(IS_FRESH(buf, MLKEM_ETA1 * MLKEM_N / 4))
-ASSIGNS(OBJECT_WHOLE(r))
+ASSIGNS(OBJECT_UPTO(r, sizeof(poly)))
 ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, -MLKEM_ETA1, MLKEM_ETA1));
 // clang-format on
 
@@ -40,7 +40,7 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
     // clang-format off
 REQUIRES(IS_FRESH(r, sizeof(poly)))
 REQUIRES(IS_FRESH(buf, MLKEM_ETA2 * MLKEM_N / 4))
-ASSIGNS(OBJECT_WHOLE(r))
+ASSIGNS(OBJECT_UPTO(r, sizeof(poly)))
 ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, -MLKEM_ETA2, MLKEM_ETA2));
 // clang-format on
 

From 6a180adf44b2d2e8d1ee08aa1eb2f1ac29a2052e Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:39:50 +0000
Subject: [PATCH 5/9] Allow various object-sharing patterns in
 poly_getnoise_eta1_4x

Previously, poly_getnoise_eta1_4x() was specified as
requiring four output polynomials satisfying IS_FRESH(...).
While intuitively we only want to express that the output polynomials
are disjoint, the obligation generated for a caller is, in fact, much
stronger: The target polynomials cannot belong to the same object.

In particular, the current specification would not allow to pass
in four consecutive slices of a polyvec, or say 2 consecutive polyvecs
in the first and last two arguments each.

Those limitations will prevent verification of indcpa_keypair_derand,
which does indeed pass slices of polyvecs to the getnoise functions.

To accommodate, this commit introduces multiple preconditions
varying on in the amount of object-sharing that's permitted:
For MLKEM_K=4, the output polynomials must be consecutive slices
of the same object. For MLKEM_K=2, the first and last two must be
consecutive, but belong to separate objects. For MLKEM_K=3, the
first three must be consecutive, and the forth is unused and must
be NULL.

For MLKEM_K=3, which needs sampling of 6 polynomials during keygen,
the code previously used 2 dummy buffers for the two unused output
slots. This commit changes this behaviour and instead passes NULL
as the output buffer, and drops the respective write. This is both
cleaner and faster.

Object sharing in the sibling functions poly_getnoise_eta2_4x
and poly_getnoise_eta1122_4x will be addressed once they are
needed.

One can attempt to use a single REQUIRES(...) clause with a large
disjunction, which proves OK but seems to cause issues at call-time;
this needs further investigation.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 mlkem/cbmc.h   |  2 ++
 mlkem/indcpa.c | 24 ++++++++++++++++--------
 mlkem/poly.h   | 35 ++++++++++++++++++++++++++---------
 3 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/mlkem/cbmc.h b/mlkem/cbmc.h
index 7ba15af94..2ed331801 100644
--- a/mlkem/cbmc.h
+++ b/mlkem/cbmc.h
@@ -61,6 +61,8 @@
 #define OBJECT_FROM(...) __CPROVER_object_from(__VA_ARGS__)
 #define OBJECT_UPTO(...) __CPROVER_object_upto(__VA_ARGS__)
 
+#define SAME_OBJECT(...) __CPROVER_same_object(__VA_ARGS__)
+
 // Pointer-related predicates
 // https://diffblue.github.io/cbmc/contracts-memory-predicates.html
 #define IS_FRESH(...) __CPROVER_is_fresh(__VA_ARGS__)
diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c
index c888f5587..02b5a2eb4 100644
--- a/mlkem/indcpa.c
+++ b/mlkem/indcpa.c
@@ -421,10 +421,15 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
   poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, e.vec + 0, e.vec + 1,
                         noiseseed, 0, 1, 2, 3);
 #elif MLKEM_K == 3
-  poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, e.vec + 0,
-                        noiseseed, 0, 1, 2, 3);
-  poly_getnoise_eta1_4x(e.vec + 1, e.vec + 2, pkpv.vec + 0, pkpv.vec + 1,
-                        noiseseed, 4, 5, 6, 7);
+  // Only the first three output buffers are needed.
+  // The laster parameter is a dummy that's overwritten later.
+  poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2,
+                        pkpv.vec + 0 /* irrelevant */, noiseseed, 0, 1, 2,
+                        0xFF /* irrelevant */);
+  // Same here
+  poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2,
+                        pkpv.vec + 0 /* irrelevant */, noiseseed, 3, 4, 5,
+                        0xFF /* irrelevant */);
 #elif MLKEM_K == 4
   poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, skpv.vec + 3,
                         noiseseed, 0, 1, 2, 3);
@@ -487,10 +492,13 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                            coins, 0, 1, 2, 3);
   poly_getnoise_eta2(&epp, coins, 4);
 #elif MLKEM_K == 3
-  poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, ep.vec + 0, coins,
-                        0, 1, 2, 3);
-  poly_getnoise_eta1_4x(ep.vec + 1, ep.vec + 2, &epp, b.vec + 0, coins, 4, 5, 6,
-                        7);
+  // In this call, only the first three output buffers are needed.
+  // The last parameter is a dummy that's overwritten later.
+  poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, &b.vec[0], coins, 0,
+                        1, 2, 0xFF);
+  // The fourth output buffer in this call _is_ used.
+  poly_getnoise_eta1_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, &epp, coins, 3, 4,
+                        5, 6);
 #elif MLKEM_K == 4
   poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, sp.vec + 3, coins,
                         0, 1, 2, 3);
diff --git a/mlkem/poly.h b/mlkem/poly.h
index 57fab57e3..86466a072 100644
--- a/mlkem/poly.h
+++ b/mlkem/poly.h
@@ -449,16 +449,33 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2,
                            uint8_t nonce3)  // clang-format off
-REQUIRES(IS_FRESH(r0, sizeof(poly)))
-REQUIRES(IS_FRESH(r1, sizeof(poly)))
-REQUIRES(IS_FRESH(r2, sizeof(poly)))
-REQUIRES(IS_FRESH(r3, sizeof(poly)))
 REQUIRES(IS_FRESH(seed, MLKEM_SYMBYTES))
-ASSIGNS(OBJECT_WHOLE(r0), OBJECT_WHOLE(r1), OBJECT_WHOLE(r2), OBJECT_WHOLE(r3))
-ENSURES(                                                                          \
-    ARRAY_IN_BOUNDS(int, k0, 0, MLKEM_N - 1, r0->coeffs, -MLKEM_ETA1, MLKEM_ETA1) \
- && ARRAY_IN_BOUNDS(int, k1, 0, MLKEM_N - 1, r1->coeffs, -MLKEM_ETA1, MLKEM_ETA1) \
- && ARRAY_IN_BOUNDS(int, k2, 0, MLKEM_N - 1, r2->coeffs, -MLKEM_ETA1, MLKEM_ETA1) \
+/* Depending on MLKEM_K, the pointers passed to this function belong
+   to the same objects, so we cannot use IS_FRESH for r0-r3.
+
+   NOTE: Somehow it is important to use IS_FRESH() first in the
+     conjunctions defining each case.
+*/
+#if MLKEM_K == 2
+REQUIRES( /* Case A: r0, r1 consecutive, r2, r3 consecutive */
+ (IS_FRESH(r0, 2 * sizeof(poly)) && IS_FRESH(r2, 2 * sizeof(poly)) &&
+   r1 == r0 + 1 && r3 == r2 + 1 && !SAME_OBJECT(r0, r2)))
+#elif MLKEM_K == 4
+REQUIRES( /* Case B: r0, r1, r2, r3 consecutive */
+ (IS_FRESH(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3))
+#elif MLKEM_K == 3
+REQUIRES( /* Case C: r0, r1, r2 consecutive */
+ (IS_FRESH(r0, 3 * sizeof(poly)) && IS_FRESH(r3, 1 * sizeof(poly)) &&
+  r1 == r0 + 1 && r2 == r0 + 2 && !SAME_OBJECT(r3, r0)))
+#endif
+ASSIGNS(OBJECT_UPTO(r0, sizeof(poly)))
+ASSIGNS(OBJECT_UPTO(r1, sizeof(poly)))
+ASSIGNS(OBJECT_UPTO(r2, sizeof(poly)))
+ASSIGNS(OBJECT_UPTO(r3, sizeof(poly)))
+ENSURES(
+    ARRAY_IN_BOUNDS(int, k0, 0, MLKEM_N - 1, r0->coeffs, -MLKEM_ETA1, MLKEM_ETA1)
+ && ARRAY_IN_BOUNDS(int, k1, 0, MLKEM_N - 1, r1->coeffs, -MLKEM_ETA1, MLKEM_ETA1)
+ && ARRAY_IN_BOUNDS(int, k2, 0, MLKEM_N - 1, r2->coeffs, -MLKEM_ETA1, MLKEM_ETA1)
  && ARRAY_IN_BOUNDS(int, k3, 0, MLKEM_N - 1, r3->coeffs, -MLKEM_ETA1, MLKEM_ETA1));
 // clang-format on
 

From 52471df7fc3b2f7162861b04a64df16951367cde Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:45:25 +0000
Subject: [PATCH 6/9] Add preliminary specification (no proof) of hash_g /
 sha3_512

This commit adds a tentative specification, but no proof, for the
hash_g/sha3_512 function used by the toplevel INDCPA API.

It appears that hash_g is called in two versions:
- With input and output equal
- With input and output disjoint

The tentative spec aims to accommodate both cases.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 fips202/fips202.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/fips202/fips202.h b/fips202/fips202.h
index f0eabd694..5ad85110c 100644
--- a/fips202/fips202.h
+++ b/fips202/fips202.h
@@ -88,10 +88,25 @@ ASSIGNS(OBJECT_UPTO(output, outlen));
 
 /* One-stop SHA3-256 shop */
 #define sha3_256 FIPS202_NAMESPACE(sha3_256)
-void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen);
+void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen)
+    // clang-format off
+REQUIRES(IS_FRESH(input, inlen))
+REQUIRES(IS_FRESH(output, 32))
+ASSIGNS(OBJECT_WHOLE(output));
+// clang-format on
 
 /* One-stop SHA3-512 shop */
 #define sha3_512 FIPS202_NAMESPACE(sha3_512)
-void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen);
+void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen)
+    // clang-format off
+REQUIRES(
+    /* Case A: Aliasing between input and output */
+    (output == input && inlen <= 64 && IS_FRESH(output, 64))
+ ||
+    /* Case B: Disjoint input and output */
+    (IS_FRESH(input, inlen) && IS_FRESH(output, 64))
+)
+ASSIGNS(OBJECT_WHOLE(output));
+// clang-format on
 
 #endif

From 7d8bb836f092152a80cd7acfa563e6667cd08820 Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 05:47:08 +0000
Subject: [PATCH 7/9] CBMC: Add top-level proof for indcpa_keypair_derand

This commit brings together the previous workstream of low-level
CBMC proofs, into the proof of type safety and memory safety for
the MLKEM-INDCPA-KeyGen function `indcpa_keypair_derand()`.

In terms of type safety, what the proof in particular shows is that
the input/output bounds of the various arithmetic functions are
coherent, and that there is hence no risk of arithmetic overflow.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 cbmc/proofs/indcpa_keypair_derand/Makefile    | 54 +++++++++++++++++++
 .../indcpa_keypair_derand/cbmc-proof.txt      |  3 ++
 .../indcpa_keypair_derand_harness.c           | 27 ++++++++++
 mlkem/indcpa.h                                | 11 +++-
 4 files changed, 93 insertions(+), 2 deletions(-)
 create mode 100644 cbmc/proofs/indcpa_keypair_derand/Makefile
 create mode 100644 cbmc/proofs/indcpa_keypair_derand/cbmc-proof.txt
 create mode 100644 cbmc/proofs/indcpa_keypair_derand/indcpa_keypair_derand_harness.c

diff --git a/cbmc/proofs/indcpa_keypair_derand/Makefile b/cbmc/proofs/indcpa_keypair_derand/Makefile
new file mode 100644
index 000000000..4f1361f2b
--- /dev/null
+++ b/cbmc/proofs/indcpa_keypair_derand/Makefile
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = indcpa_keypair_derand_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = indcpa_keypair_derand
+
+DEFINES +=
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET +=
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mlkem/indcpa.c
+
+CHECK_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)indcpa_keypair_derand
+USE_FUNCTION_CONTRACTS=$(FIPS202_NAMESPACE)sha3_512 $(MLKEM_NAMESPACE)gen_matrix $(MLKEM_NAMESPACE)poly_getnoise_eta1_4x $(MLKEM_NAMESPACE)polyvec_ntt $(MLKEM_NAMESPACE)polyvec_mulcache_compute matvec_mul $(MLKEM_NAMESPACE)polyvec_tomont $(MLKEM_NAMESPACE)polyvec_add $(MLKEM_NAMESPACE)polyvec_reduce pack_sk pack_pk
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--smt2
+
+FUNCTION_NAME = $(MLKEM_NAMESPACE)indcpa_keypair_derand
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 10
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mlkem/poly.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/cbmc/proofs/indcpa_keypair_derand/cbmc-proof.txt b/cbmc/proofs/indcpa_keypair_derand/cbmc-proof.txt
new file mode 100644
index 000000000..3d1913ebf
--- /dev/null
+++ b/cbmc/proofs/indcpa_keypair_derand/cbmc-proof.txt
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# This file marks this directory as containing a CBMC proof.
diff --git a/cbmc/proofs/indcpa_keypair_derand/indcpa_keypair_derand_harness.c b/cbmc/proofs/indcpa_keypair_derand/indcpa_keypair_derand_harness.c
new file mode 100644
index 000000000..2d0ebc915
--- /dev/null
+++ b/cbmc/proofs/indcpa_keypair_derand/indcpa_keypair_derand_harness.c
@@ -0,0 +1,27 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT-0
+
+/*
+ * Insert copyright notice
+ */
+
+/**
+ * @file indcpa_keypair_derand_harness.c
+ * @brief Implements the proof harness for indcpa_keypair_derand function.
+ */
+
+/*
+ * Insert project header files that
+ *   - include the declaration of the function
+ *   - include the types needed to declare function arguments
+ */
+#include <indcpa.h>
+
+/**
+ * @brief Starting point for formal analysis
+ *
+ */
+void harness(void) {
+  uint8_t *a, *b, *c;
+  indcpa_keypair_derand(a, b, c);
+}
diff --git a/mlkem/indcpa.h b/mlkem/indcpa.h
index c78be02ec..1919aedcf 100644
--- a/mlkem/indcpa.h
+++ b/mlkem/indcpa.h
@@ -13,7 +13,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 REQUIRES(IS_FRESH(a, sizeof(polyvec) * MLKEM_K))
 REQUIRES(IS_FRESH(seed, MLKEM_SYMBYTES))
 REQUIRES(transposed == 0 || transposed == 1)
-ASSIGNS(OBJECT_UPTO(a, sizeof(polyvec) * MLKEM_K))
+ASSIGNS(OBJECT_WHOLE(a))
 ENSURES(FORALL(int, x, 0, MLKEM_K - 1, FORALL(int, y, 0, MLKEM_K - 1,
   ARRAY_IN_BOUNDS(int, i, 0, MLKEM_N - 1, a[x].vec[y].coeffs, 0, (MLKEM_Q - 1)))))
 ;  // clang-format on
@@ -21,7 +21,14 @@ ENSURES(FORALL(int, x, 0, MLKEM_K - 1, FORALL(int, y, 0, MLKEM_K - 1,
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
-                           const uint8_t coins[MLKEM_SYMBYTES]);
+                           const uint8_t coins[MLKEM_SYMBYTES])
+    // clang-format off
+  REQUIRES(IS_FRESH(pk, MLKEM_INDCPA_PUBLICKEYBYTES))
+  REQUIRES(IS_FRESH(sk, MLKEM_INDCPA_SECRETKEYBYTES))
+  REQUIRES(IS_FRESH(coins, MLKEM_SYMBYTES))
+  ASSIGNS(OBJECT_WHOLE(pk))
+  ASSIGNS(OBJECT_WHOLE(sk));
+// clang-format on
 
 #define indcpa_enc MLKEM_NAMESPACE(indcpa_enc)
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],

From 055f112a8c3cd2421ec8eb4b73a80c64518c42a0 Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 06:10:29 +0000
Subject: [PATCH 8/9] CBMC: Prove type+memory-safety of
 crypto_kem_keypair_derand

This commit brings together the numerous low-level proofs to
a proof of type-safety and memory-safety of the top-level
MLKEM key generation API `crypto_kem_keypair_derand()`.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 .../proofs/crypto_kem_keypair_derand/Makefile | 54 +++++++++++++++++++
 .../crypto_kem_keypair_derand/cbmc-proof.txt  |  3 ++
 .../crypto_kem_keypair_derand_harness.c       | 27 ++++++++++
 mlkem/kem.h                                   | 10 +++-
 4 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 cbmc/proofs/crypto_kem_keypair_derand/Makefile
 create mode 100644 cbmc/proofs/crypto_kem_keypair_derand/cbmc-proof.txt
 create mode 100644 cbmc/proofs/crypto_kem_keypair_derand/crypto_kem_keypair_derand_harness.c

diff --git a/cbmc/proofs/crypto_kem_keypair_derand/Makefile b/cbmc/proofs/crypto_kem_keypair_derand/Makefile
new file mode 100644
index 000000000..c89fc716f
--- /dev/null
+++ b/cbmc/proofs/crypto_kem_keypair_derand/Makefile
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = crypto_kem_keypair_derand_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = crypto_kem_keypair_derand
+
+DEFINES +=
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET +=
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mlkem/kem.c
+
+CHECK_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)keypair_derand
+USE_FUNCTION_CONTRACTS=$(FIPS202_NAMESPACE)sha3_256 $(MLKEM_NAMESPACE)indcpa_keypair_derand
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--smt2
+
+FUNCTION_NAME = $(MLKEM_NAMESPACE)keypair_derand
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 10
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mlkem/poly.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/cbmc/proofs/crypto_kem_keypair_derand/cbmc-proof.txt b/cbmc/proofs/crypto_kem_keypair_derand/cbmc-proof.txt
new file mode 100644
index 000000000..3d1913ebf
--- /dev/null
+++ b/cbmc/proofs/crypto_kem_keypair_derand/cbmc-proof.txt
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# This file marks this directory as containing a CBMC proof.
diff --git a/cbmc/proofs/crypto_kem_keypair_derand/crypto_kem_keypair_derand_harness.c b/cbmc/proofs/crypto_kem_keypair_derand/crypto_kem_keypair_derand_harness.c
new file mode 100644
index 000000000..be09d867f
--- /dev/null
+++ b/cbmc/proofs/crypto_kem_keypair_derand/crypto_kem_keypair_derand_harness.c
@@ -0,0 +1,27 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT-0
+
+/*
+ * Insert copyright notice
+ */
+
+/**
+ * @file crypto_kem_keypair_derand_harness.c
+ * @brief Implements the proof harness for crypto_kem_keypair_derand function.
+ */
+
+/*
+ * Insert project header files that
+ *   - include the declaration of the function
+ *   - include the types needed to declare function arguments
+ */
+#include <kem.h>
+
+/**
+ * @brief Starting point for formal analysis
+ *
+ */
+void harness(void) {
+  uint8_t *a, *b, *c;
+  crypto_kem_keypair_derand(a, b, c);
+}
diff --git a/mlkem/kem.h b/mlkem/kem.h
index e79e9a539..039f9e27e 100644
--- a/mlkem/kem.h
+++ b/mlkem/kem.h
@@ -3,6 +3,7 @@
 #define KEM_H
 
 #include <stdint.h>
+#include "cbmc.h"
 #include "params.h"
 
 #define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
@@ -19,7 +20,14 @@
 #endif
 
 #define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+    // clang-format off
+  REQUIRES(IS_FRESH(pk, MLKEM_PUBLICKEYBYTES))
+  REQUIRES(IS_FRESH(sk, MLKEM_SECRETKEYBYTES))
+  REQUIRES(IS_FRESH(coins, 2 * MLKEM_SYMBYTES))
+  ASSIGNS(OBJECT_WHOLE(pk))
+  ASSIGNS(OBJECT_WHOLE(sk));
+// clang-format on
 
 #define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 int crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

From 8fc762aa02b68b0776fab16026b8a1c62d10084d Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 9 Nov 2024 07:23:23 +0000
Subject: [PATCH 9/9] CBMC: Disable generation of coverage information

This commit disables CBMC coverage generation in CI, which is
very time-consuming and not what we are interested in currently.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 .github/actions/cbmc/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/cbmc/action.yml b/.github/actions/cbmc/action.yml
index b1b768564..b76f5138b 100644
--- a/.github/actions/cbmc/action.yml
+++ b/.github/actions/cbmc/action.yml
@@ -48,5 +48,5 @@ runs:
         run: |
           cd cbmc/proofs;
           echo "::group::cbmc_${{ inputs.mlkem_k }}"
-          MLKEM_K=${{ inputs.mlkem_k }} ./run-cbmc-proofs.py --summarize -j8;
+          MLKEM_K=${{ inputs.mlkem_k }} ./run-cbmc-proofs.py --summarize --no-coverage -j8;
           echo "::endgroup::"