From 822ecf44955ee0d75bb7b265dbc7f0f2b70c6289 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Thu, 31 Oct 2024 05:07:55 +0000 Subject: [PATCH 1/2] Remove outdated comment about poly_frombytes() Previously, `poly_tobytes()` included normalization from signed to unsigned coefficients. In consequence, it was not an inverse to `poly_frombytes()`, which was confusing enough to be remarked in poly.h. In the interim, `poly_tobytes()` has changed to no longer normalize from signed to unsigned coefficients, but _assume_ unsigned coefficients instead. The comment regarding poly_tobytes() and poly_frombytes() not being inverse is thus outdated, and this commit removes it. Signed-off-by: Hanno Becker --- mlkem/poly.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlkem/poly.c b/mlkem/poly.c index 716db1678..970daf801 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -239,9 +239,6 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { * * Description: De-serialization of a polynomial. * - * Note that this is not a strict inverse to poly_tobytes() since - * the latter includes normalization to unsigned coefficients. - * * Arguments: INPUT * - a: pointer to input byte array * (of MLKEM_POLYBYTES bytes) From 9403288970aa16aa84e0c346fff1607eada0db81 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Thu, 31 Oct 2024 05:10:44 +0000 Subject: [PATCH 2/2] CBMC: Add spec and proof for poly_frombytes() This commit adds spec and proof for poly_frombytes(). The only noteworthy bit is that `poly_frombytes()` does _not_ produce unsigned canonical coefficients. Instead, coefficients can range up to 4095. Fixes #287 Signed-off-by: Hanno Becker --- cbmc/proofs/poly_frombytes/Makefile | 54 +++++++++++++++++++ cbmc/proofs/poly_frombytes/cbmc-proof.txt | 3 ++ .../poly_frombytes/poly_frombytes_harness.c | 30 +++++++++++ mlkem/poly.c | 26 ++++++--- mlkem/poly.h | 8 ++- 5 files changed, 113 insertions(+), 8 deletions(-) create mode 100644 cbmc/proofs/poly_frombytes/Makefile create mode 100644 cbmc/proofs/poly_frombytes/cbmc-proof.txt create mode 100644 cbmc/proofs/poly_frombytes/poly_frombytes_harness.c diff --git a/cbmc/proofs/poly_frombytes/Makefile b/cbmc/proofs/poly_frombytes/Makefile new file mode 100644 index 000000000..8d7439548 --- /dev/null +++ b/cbmc/proofs/poly_frombytes/Makefile @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = poly_frombytes_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = poly_frombytes + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/poly.c + +CHECK_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)poly_frombytes +USE_FUNCTION_CONTRACTS= +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--bitwuzla + +FUNCTION_NAME = $(MLKEM_NAMESPACE)poly_frombytes + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mlkem/poly.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/cbmc/proofs/poly_frombytes/cbmc-proof.txt b/cbmc/proofs/poly_frombytes/cbmc-proof.txt new file mode 100644 index 000000000..3d1913ebf --- /dev/null +++ b/cbmc/proofs/poly_frombytes/cbmc-proof.txt @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: Apache-2.0 + +# This file marks this directory as containing a CBMC proof. diff --git a/cbmc/proofs/poly_frombytes/poly_frombytes_harness.c b/cbmc/proofs/poly_frombytes/poly_frombytes_harness.c new file mode 100644 index 000000000..cf967f3a9 --- /dev/null +++ b/cbmc/proofs/poly_frombytes/poly_frombytes_harness.c @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +/* + * Insert copyright notice + */ + +/** + * @file poly_frombytes_harness.c + * @brief Implements the proof harness for poly_frombytes function. + */ + +/* + * Insert project header files that + * - include the declaration of the function + * - include the types needed to declare function arguments + */ +#include + +/** + * @brief Starting point for formal analysis + * + */ +void harness(void) { + poly a; + uint8_t r[MLKEM_POLYBYTES]; + + /* Contracts for this function are in poly.h */ + poly_frombytes(&a, r); +} diff --git a/mlkem/poly.c b/mlkem/poly.c index 970daf801..882163be8 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -248,13 +248,25 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) { * 0 .. 4095 **************************************************/ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) { - unsigned int i; - for (i = 0; i < MLKEM_N / 2; i++) { - r->coeffs[2 * i] = - ((a[3 * i + 0] >> 0) | ((uint16_t)a[3 * i + 1] << 8)) & 0xFFF; - r->coeffs[2 * i + 1] = - ((a[3 * i + 1] >> 4) | ((uint16_t)a[3 * i + 2] << 4)) & 0xFFF; - } + int i; + for (i = 0; i < MLKEM_N / 2; i++) + // clang-format off + ASSIGNS(i, OBJECT_WHOLE(r)) + INVARIANT(i >= 0 && i <= MLKEM_N / 2) + INVARIANT(ARRAY_IN_BOUNDS(int, k, 0, (2 * i - 1), r->coeffs, 0, 4095)) + DECREASES(MLKEM_N / 2 - i) + // clang-format on + { + // REF-CHANGE: Introduce some locals for better readability + const uint8_t t0 = a[3 * i + 0]; + const uint8_t t1 = a[3 * i + 1]; + const uint8_t t2 = a[3 * i + 2]; + r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF); + r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4); + } + + // Note that the coefficients are not canonical + POLY_UBOUND(r, 4096); } #else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) { diff --git a/mlkem/poly.h b/mlkem/poly.h index 92dfccfe6..3e536b96e 100644 --- a/mlkem/poly.h +++ b/mlkem/poly.h @@ -203,7 +203,13 @@ ASSIGNS(OBJECT_WHOLE(r)); #define poly_frombytes MLKEM_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]); +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) + // clang-format off +REQUIRES(a != NULL && IS_FRESH(a, MLKEM_POLYBYTES)) +REQUIRES(r != NULL && IS_FRESH(r, sizeof(poly))) +ASSIGNS(OBJECT_WHOLE(r)) +ENSURES(ARRAY_IN_BOUNDS(int, k, 0, (MLKEM_N - 1), r->coeffs, 0, 4095)); +// clang-format on #define poly_frommsg MLKEM_NAMESPACE(poly_frommsg) void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]);