Skip to content

Commit

Permalink
Remove redundant reduction from unpack_pk() and unpack_sk()
Browse files Browse the repository at this point in the history
The specifications for the polynomial base multiplication currently
require one input to be bound by q in absolute value. To ensure
this, the unpacking routines `unpack_pk()` and `unpack_sk()` currently
explicitly reduce their output via `poly_reduce()`.

Functionally, this reduction is not needed:
- For the PK, the modulus-check already ensures that the unpacked PK
  will be unsigned canonical.
- For the SK, there is no requirement in the standard to check that
  the SK is reduced, or reduce it after unpacking.
- There is no risk of overflow in the base multiplication even if
  the inputs are bound by 4096 rather than q:
  - For the C base multiplication, the input bound before modular
    reduction is now 2 * 4096 * 2^15, where before it was
    2 * q * 2^15 -- both is below 2^31 so we are fine.
    After reduction, the 3/2 * q bound is weakened to 2 * q
    (a much better bound would be possible, but is unneeded), which
    is still sufficient for up to K=4-fold accumulation since
    8 * q < INT16_MAX.
  - For the AArch64 base multiplication, we are accumulating in 32-bit
    up to a single final reduction. As in the C case, each 32-bit
    polynomial multiplication is bound by 2 * 4096 * 2^15 = 2^28,
    so the accumulated value before reduction is < 2^30 < INT32_MAX.
  - For the x86_64 base multiplication, the reasoning is unchanged:
    We don't accumulate in 32-bit but conduct a Montgomery multiplication
    for every monomial in the product, giving an output bound by q per monomial.
    Up to 8 such are accumulated, giving a bound by 8 * q < INT16_MAX.

This commit strengthens the specifications for all polynomial base
multiplication routines
- basemul_cached
- poly_basemul_montgomery_cached
- polyvec_basemul_acc_montgomery
- polyvec_basemul_acc_montgomery_cached
- matvec_mul

The polynomial reductions in `check_pk` and `check_sk` can then
be safely removed.

Signed-off-by: Hanno Becker <[email protected]>
  • Loading branch information
hanno-becker committed Dec 10, 2024
1 parent f56c5f6 commit be01930
Show file tree
Hide file tree
Showing 12 changed files with 44 additions and 56 deletions.
21 changes: 8 additions & 13 deletions mlkem/indcpa.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,10 @@ static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES],
polyvec_frombytes(pk, packedpk);
memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES);

/*
* TODO! We know from the modulus check that this will result in an
* unsigned canonical polynomial, but CBMC does not know it. We should
* weaken the specification of `unpack_pk()` and all depending functions
* to work with the weaker 4096-bound, so that the proofs go through
* without the need of this redundant call to polyvec_reduce().
*/
polyvec_reduce(pk);
/* NOTE: If a modulus check was conducted on the PK, we know at this
* point that the coefficients of `pk` are unsigned canonical. The
* specifications and proofs, however, do _not_ assume this, and instead
* work with the easily provable bound by 4096. */
}

/*************************************************
Expand All @@ -91,15 +87,14 @@ static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk)
* Description: De-serialize the secret key; inverse of pack_sk
*
* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret
*key)
* key)
* - const uint8_t *packedsk: pointer to input serialized secret
*key
* key
**************************************************/
static void unpack_sk(polyvec *sk,
const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES])
{
polyvec_frombytes(sk, packedsk);
polyvec_reduce(sk);
}

/*************************************************
Expand Down Expand Up @@ -358,7 +353,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
*
* Arguments: - polyvec *out: Pointer to output polynomial vector
* - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain
* and have coefficients of absolute value < MLKEM_Q.
* and have coefficients of absolute value < 4096.
* - polyvec *v: Input polynomial vector. Must be in NTT domain.
* - polyvec *vc: Mulcache for v, computed via
* polyvec_mulcache_compute().
Expand All @@ -373,7 +368,7 @@ __contract__(
requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
requires(forall(int, k0, 0, MLKEM_K - 1,
forall(int, k1, 0, MLKEM_K - 1,
array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))))
array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
assigns(object_whole(out)))
{
int i;
Expand Down
11 changes: 4 additions & 7 deletions mlkem/native/aarch64/polyvec_clean.S
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
// Computes products (a0*b0 + a0*b0t, a0*b1 + a1*b0) in 32-bit.
//
// Bounds:
// - Assume |a| < q, |b|, |bt| < NTT_BOUND
// - Result: < 2*q*NTT_BOUND
// - Assume |a| < 4096,
// - Result: < 2*4096*2^15 = 2^28
.macro pmull d, a, b
smull \d\()0l.4s, \a\()0.4h, \b\()0.4h
smull2 \d\()0h.4s, \a\()0.8h, \b\()0.8h
Expand Down Expand Up @@ -252,11 +252,8 @@ _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):

// Bounds:
//
// Each pmull is bound by 2*q*2^15, so the final value
// before Montgomery reduction is bound by q*2^18 ~ 2^29.7
//
// The Montgomery reduction is thus bound by
// q * (2^18/2^16 + 1/2) = 4.5 q < 14981
// Each pmull is bound by 2*4096*2^15=2^28, so the final value
// before Montgomery reduction is bound by 2^30.

mov count, #(MLKEM_N / 16)
k4_loop_start:
Expand Down
7 changes: 2 additions & 5 deletions mlkem/native/x86_64/basemul.S
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,8 @@ vpaddw %ymm7,%ymm11,%ymm11
vpsubw %ymm13,%ymm10,%ymm13
vpsubw %ymm12,%ymm6,%ymm6

/* Bounds: Note that we assume that a+b*X is coefficient-wise bound by q in
* in absolute value: This is coming from the contract for
* polyvec_basemul_acc_montgomery_cached().
*
* Then, each Montgomery multiplication has absolute value < q,
/* Bounds: Since we are multiplying with signed canonical twiddles,
* each Montgomery multiplication has absolute value < q,
* and hence the coefficients of the output have absolute value < 2q. */
vmovdqa %ymm13,(64*\off+ 0)*2(%rdi)
vmovdqa %ymm9,(64*\off+16)*2(%rdi)
Expand Down
7 changes: 3 additions & 4 deletions mlkem/ntt.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ __contract__(
for (j = start; j < start + len; j++)
__loop__(
invariant(start <= j && j <= start + len)
/*
/*
* Coefficients are updated in strided pairs, so the bounds for the
* intermediate states alternate twice between the old and new bound
*/
Expand Down Expand Up @@ -247,7 +247,7 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
{
int32_t t0, t1;

BOUND(a, 2, MLKEM_Q, "basemul input bound");
BOUND(a, 2, 4096, "basemul input bound");

t0 = (int32_t)a[1] * b_cached;
t0 += (int32_t)a[0] * b[0];
Expand All @@ -258,6 +258,5 @@ void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
r[0] = montgomery_reduce(t0);
r[1] = montgomery_reduce(t1);

/* |r[i]| < 3/2 q */
BOUND(r, 2, 3 * MLKEM_Q / 2, "basemul output bound");
BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound");
}
8 changes: 4 additions & 4 deletions mlkem/ntt.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ __contract__(
*
* Arguments: - r: Pointer to output polynomial
* Upon return, coefficients are bound by
* 3*(q+1)/2 in absolute value.
* 2*MLKEM_Q in absolute value.
* - a: Pointer to first input polynomial
* Must be coefficient-wise < q in absolute value.
* Must be coefficient-wise < 4096 in absolute value.
* - b: Pointer to second input polynomial
* Can have arbitrary int16_t coefficients
* - b_cached: Some precomputed value, typically derived from
Expand All @@ -92,9 +92,9 @@ __contract__(
requires(memory_no_alias(r, 2 * sizeof(int16_t)))
requires(memory_no_alias(a, 2 * sizeof(int16_t)))
requires(memory_no_alias(b, 2 * sizeof(int16_t)))
requires(array_abs_bound(a, 0, 1, MLKEM_Q - 1))
requires(array_abs_bound(a, 0, 1, UINT12_MAX))
assigns(memory_slice(r, 2 * sizeof(int16_t)))
ensures(array_abs_bound(r, 0, 1, (3 * HALF_Q - 1)))
ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
);


Expand Down
1 change: 1 addition & 0 deletions mlkem/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#define MLKEM_N 256
#define MLKEM_Q 3329
#define UINT12_MAX 4095

#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */
#define MLKEM_SSBYTES 32 /* size in bytes of shared key */
Expand Down
6 changes: 3 additions & 3 deletions mlkem/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
for (i = 0; i < MLKEM_N / 2; i++)
__loop__(
invariant(i >= 0 && i <= MLKEM_N / 2)
invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, 4095)))
invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
{
/* REF-CHANGE: Introduce some locals for better readability */
const uint8_t t0 = a[3 * i + 0];
Expand Down Expand Up @@ -456,13 +456,13 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
const poly_mulcache *b_cache)
{
int i;
POLY_BOUND(b_cache, MLKEM_Q);
POLY_BOUND(b_cache, 4096);

for (i = 0; i < MLKEM_N / 4; i++)
__loop__(
assigns(i, object_whole(r))
invariant(i >= 0 && i <= MLKEM_N / 4)
invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), (3 * HALF_Q - 1))))
invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
{
basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
b_cache->coeffs[2 * i]);
Expand Down
6 changes: 3 additions & 3 deletions mlkem/poly.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ __contract__(
requires(memory_no_alias(a, MLKEM_POLYBYTES))
requires(memory_no_alias(r, sizeof(poly)))
assigns(memory_slice(r, sizeof(poly)))
ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, 4095))
ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
);


Expand Down Expand Up @@ -637,9 +637,9 @@ __contract__(
requires(memory_no_alias(a, sizeof(poly)))
requires(memory_no_alias(b, sizeof(poly)))
requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
assigns(object_whole(r))
ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (3 * HALF_Q - 1)))
ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
);

#define poly_tomont MLKEM_NAMESPACE(poly_tomont)
Expand Down
8 changes: 4 additions & 4 deletions mlkem/polyvec.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
int i;
poly t;

POLYVEC_BOUND(a, MLKEM_Q);
POLYVEC_BOUND(a, 4096);
POLYVEC_BOUND(b, NTT_BOUND);
POLYVEC_BOUND(b_cache, MLKEM_Q);

Expand All @@ -98,17 +98,17 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
* them from the spec to not unnecessarily constraint native implementations.
*/
cassert(
array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (3 * HALF_Q - 1)),
array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
"polyvec_basemul_acc_montgomery_cached output bounds");
/* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
POLY_BOUND(r, MLKEM_K * 3 * HALF_Q);
POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
}
#else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
const polyvec *b,
const polyvec_mulcache *b_cache)
{
POLYVEC_BOUND(a, MLKEM_Q);
POLYVEC_BOUND(a, 4096);
POLYVEC_BOUND(b, NTT_BOUND);
/* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may
* decide not to use a mulcache. Note that the C backend implementation
Expand Down
13 changes: 6 additions & 7 deletions mlkem/polyvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ __contract__(
*
* Arguments: - const polyvec *a: pointer to output vector of polynomials
* (of length MLKEM_POLYVECBYTES). Output will have coefficients
* normalized to [0,..,q-1].
* normalized in [0..4095].
* - uint8_t *r: pointer to input byte array
**************************************************/
void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
Expand All @@ -102,7 +102,7 @@ __contract__(
requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
assigns(object_whole(r))
ensures(forall(int, k0, 0, MLKEM_K - 1,
array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, 4095)))
array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
);

#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
Expand Down Expand Up @@ -171,9 +171,9 @@ __contract__(
requires(memory_no_alias(r, sizeof(poly)))
requires(memory_no_alias(a, sizeof(polyvec)))
requires(memory_no_alias(b, sizeof(polyvec)))
/* Input is coefficient-wise < q in absolute value */
/* Input is coefficient-wise < q in absolute value */
requires(forall(int, k1, 0, MLKEM_K - 1,
array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
assigns(memory_slice(r, sizeof(poly)))
);

Expand All @@ -188,7 +188,7 @@ __contract__(
* using mulcache for second operand.
*
* Bounds:
* - a is assumed to be coefficient-wise < q in absolute value.
* - a is assumed to be coefficient-wise < 4096 in absolute value.
* - No bounds guarantees for the coefficients in the result.
*
* Arguments: - poly *r: pointer to output polynomial
Expand All @@ -206,9 +206,8 @@ __contract__(
requires(memory_no_alias(a, sizeof(polyvec)))
requires(memory_no_alias(b, sizeof(polyvec)))
requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
/* Input is coefficient-wise < q in absolute value */
requires(forall(int, k1, 0, MLKEM_K - 1,
array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
assigns(memory_slice(r, sizeof(poly)))
);

Expand Down
2 changes: 1 addition & 1 deletion mlkem/reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
int16_t montgomery_reduce(int32_t a)
{
int16_t res;
SCALAR_BOUND(a, 2 * MLKEM_Q * 32768, "montgomery_reduce input");
SCALAR_BOUND(a, 2 * UINT12_MAX * 32768, "montgomery_reduce input");

res = montgomery_reduce_generic(a);

Expand Down
10 changes: 5 additions & 5 deletions mlkem/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@
* Description: Montgomery reduction
*
* Arguments: - int32_t a: input integer to be reduced
* Must be smaller than 2 * q * 2^15 in absolute value.
* Must be smaller than 2 * 2^12 * 2^15 in absolute value.
*
* Returns: integer congruent to a * R^-1 modulo q,
* smaller than 3/2 q in absolute value.
* smaller than 2 * q in absolute value.
**************************************************/
int16_t montgomery_reduce(int32_t a)
__contract__(
requires(a > -(2 * MLKEM_Q * 32768))
requires(a < (2 * MLKEM_Q * 32768))
ensures(return_value > -(3 * HALF_Q) && return_value < (3 * HALF_Q))
requires(a > -(2 * 4096 * 32768))
requires(a < (2 * 4096 * 32768))
ensures(return_value > -2 * MLKEM_Q && return_value < 2 * MLKEM_Q)
);

#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
Expand Down

0 comments on commit be01930

Please sign in to comment.