-
Notifications
You must be signed in to change notification settings - Fork 12
/
ntt.c
257 lines (234 loc) · 8.51 KB
/
ntt.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/*
* Copyright (c) 2024 The mlkem-native project authors
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include "arith_backend.h"
#include "debug/debug.h"
#include "ntt.h"
#include "reduce.h"
#if !defined(MLKEM_USE_NATIVE_NTT)
/*
* Computes a block CT butterflies with a fixed twiddle factor,
* using Montgomery multiplication.
* Parameters:
* - r: Pointer to base of polynomial (_not_ the base of butterfly block)
* - root: Twiddle factor to use for the butterfly. This must be in
* Montgomery form and signed canonical.
* - start: Offset to the beginning of the butterfly block
* - len: Index difference between coefficients subject to a butterfly
* - bound: Ghost variable describing coefficient bound: Prior to `start`,
* coefficients must be bound by `bound + MLKEM_Q`. Post `start`,
* they must be bound by `bound`.
* When this function returns, output coefficients in the index range
* [start, start+2*len) have bound bumped to `bound + MLKEM_Q`.
* Example:
* - start=8, len=4
* This would compute the following four butterflies
* 8 -- 12
* 9 -- 13
* 10 -- 14
* 11 -- 15
* - start=4, len=2
* This would compute the following two butterflies
* 4 -- 6
* 5 -- 7
*/
STATIC_TESTABLE
void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
int bound)
__contract__(
requires(0 <= start && start < MLKEM_N)
requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
requires(-HALF_Q < zeta && zeta < HALF_Q)
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
{
/* `bound` is a ghost variable only needed in the CBMC specification */
int j;
((void)bound);
for (j = start; j < start + len; j++)
__loop__(
invariant(start <= j && j <= start + len)
/*
* Coefficients are updated in strided pairs, so the bounds for the
* intermediate states alternate twice between the old and new bound
*/
invariant(array_abs_bound(r, 0, j - 1, bound + MLKEM_Q))
invariant(array_abs_bound(r, j, start + len - 1, bound))
invariant(array_abs_bound(r, start + len, j + len - 1, bound + MLKEM_Q))
invariant(array_abs_bound(r, j + len, MLKEM_N - 1, bound)))
{
int16_t t;
t = fqmul(r[j + len], zeta);
r[j + len] = r[j] - t;
r[j] = r[j] + t;
}
}
/*
*Compute one layer of forward NTT
* Parameters:
* - r: Pointer to base of polynomial
* - len: Stride of butterflies in this layer.
* - layer: Ghost variable indicating which layer is being applied.
* Must match `len` via `len == MLKEM_N >> layer`.
* Note: `len` could be dropped and computed in the function, but
* we are following the structure of the reference NTT from the
* official Kyber implementation here, merely adding `layer` as
* a ghost variable for the specifications.
*/
STATIC_TESTABLE
void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
{
int start, k;
/* `layer` is a ghost variable only needed in the CBMC specification */
((void)layer);
/* Twiddle factors for layer n start at index 2^(layer-1) */
k = MLKEM_N / (2 * len);
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(0 <= start && start < MLKEM_N + 2 * len)
invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
{
int16_t zeta = zetas[k++];
ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
}
}
/*
* Compute full forward NTT
* NOTE: This particular implementation satisfies a much tighter
* bound on the output coefficients (5*q) than the contractual one (8*q),
* but this is not needed in the calling code. Should we change the
* base multiplication strategy to require smaller NTT output bounds,
* the proof may need strengthening.
*/
void poly_ntt(poly *p)
{
int len, layer;
int16_t *r;
POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input");
r = p->coeffs;
for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
__loop__(
invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
{
ntt_layer(r, len, layer);
}
/* Check the stronger bound */
POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output");
}
#else /* MLKEM_USE_NATIVE_NTT */
/* Check that bound for native NTT implies contractual bound */
STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
void poly_ntt(poly *p)
{
POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
ntt_native(p);
POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output");
}
#endif /* MLKEM_USE_NATIVE_NTT */
#if !defined(MLKEM_USE_NATIVE_INTT)
/* Check that bound for reference invNTT implies contractual bound */
#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4)
STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
/* Compute one layer of inverse NTT */
STATIC_TESTABLE
void invntt_layer(int16_t *r, int len, int layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
requires(len == (1 << (8 - layer)))
requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
{
int start, k;
/* `layer` is a ghost variable used only in the specification */
((void)layer);
k = MLKEM_N / len - 1;
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
/* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
{
int j;
int16_t zeta = zetas[k--];
for (j = start; j < start + len; j++)
__loop__(
invariant(start <= j && j <= start + len)
invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
{
int16_t t = r[j];
r[j] = barrett_reduce(t + r[j + len]);
r[j + len] = r[j + len] - t;
r[j + len] = fqmul(r[j + len], zeta);
}
}
}
void poly_invntt_tomont(poly *p)
{
/*
* Scale input polynomial to account for Montgomery factor
* and NTT twist. This also brings coefficients down to
* absolute value < MLKEM_Q.
*/
int j, len, layer;
const int16_t f = 1441;
int16_t *r = p->coeffs;
for (j = 0; j < MLKEM_N; j++)
__loop__(
invariant(0 <= j && j <= MLKEM_N)
invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
{
r[j] = fqmul(r[j], f);
}
/* Run the invNTT layers */
for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
__loop__(
invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
{
invntt_layer(p->coeffs, len, layer);
}
POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output");
}
#else /* MLKEM_USE_NATIVE_INTT */
/* Check that bound for native invNTT implies contractual bound */
STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
void poly_invntt_tomont(poly *p)
{
intt_native(p);
POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output");
}
#endif /* MLKEM_USE_NATIVE_INTT */
void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
int16_t b_cached)
{
int32_t t0, t1;
BOUND(a, 2, 4096, "basemul input bound");
t0 = (int32_t)a[1] * b_cached;
t0 += (int32_t)a[0] * b[0];
t1 = (int32_t)a[0] * b[1];
t1 += (int32_t)a[1] * b[0];
/* |ti| < 2 * q * 2^15 */
r[0] = montgomery_reduce(t0);
r[1] = montgomery_reduce(t1);
BOUND(r, 2, 2 * MLKEM_Q, "basemul output bound");
}