From d703199893431b4639d9b38431ccb64c590f4e59 Mon Sep 17 00:00:00 2001 From: Duc Tri Nguyen Date: Mon, 17 Jun 2024 11:10:51 -0400 Subject: [PATCH] add keccakx interface Signed-off-by: Duc Tri Nguyen rename to x4 add shake256x4 interface add shake256x4 add batch getnoise sampling --- Makefile | 6 +- fips202/fips202x4.c | 242 ++++++++++++++++++++++++++++++++++++++++++++ fips202/fips202x4.h | 66 ++++++++++++ mlkem/indcpa.c | 124 ++++++++++++++++------- mlkem/poly.c | 101 ++++++++++++++++++ mlkem/poly.h | 33 ++++++ 6 files changed, 531 insertions(+), 41 deletions(-) create mode 100644 fips202/fips202x4.c create mode 100644 fips202/fips202x4.h diff --git a/Makefile b/Makefile index e5607c4c0..05db1d563 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,12 @@ NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer RM = /bin/rm SOURCES = mlkem/kem.c mlkem/indcpa.c mlkem/polyvec.c mlkem/poly.c mlkem/ntt.c mlkem/cbd.c mlkem/reduce.c mlkem/verify.c -SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c mlkem/symmetric-shake.c +SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c fips202/fips202x4.c mlkem/symmetric-shake.c SOURCESKECCAKRANDOM = $(SOURCESKECCAK) randombytes/randombytes.c SOURCESNISTKATS = $(SOURCESKECCAK) test/nistrng/aes.c test/nistrng/rng.c -HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.h mlkem/verify.h mlkem/symmetric.h -HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h +HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.c mlkem/verify.h mlkem/symmetric.h +HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h fips202/fips202x4.h HEADERSKECCAKRANDOM = $(HEADERSKECCAK) randombytes/randombytes.h HEADERNISTKATS = $(HEADERSKECCAK) test/nistrng/aes.h test/nistrng/randombytes.h diff --git a/fips202/fips202x4.c b/fips202/fips202x4.c new file mode 100644 index 000000000..f9e2c9e4a --- /dev/null +++ b/fips202/fips202x4.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: Apache-2.0 +#include +#include "fips202x4.h" +#include "fips202.h" +#include "keccakf1600.h" + +#define KECCAK_CTX 25 + +static void keccak_absorb_x4(uint64_t *s, uint32_t r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) +{ + + while (inlen >= r) + { + + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, r); + + KeccakF1600_StatePermute(s + KECCAK_CTX * 0); + KeccakF1600_StatePermute(s + KECCAK_CTX * 1); + KeccakF1600_StatePermute(s + KECCAK_CTX * 2); + KeccakF1600_StatePermute(s + KECCAK_CTX * 3); + + in0 += r; + in1 += r; + in2 += r; + in3 += r; + inlen -= r; + } + + if (inlen > 0) + { + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, inlen); + } + + if (inlen == r - 1) + { + p |= 128; + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1); + } + else + { + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1); + p = 128; + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, r - 1, 1); + } +} + +static void keccak_squeezeblocks_x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + uint64_t *s, + uint32_t r) +{ + + while (nblocks > 0) + { + KeccakF1600_StatePermute(s + KECCAK_CTX * 0); + KeccakF1600_StatePermute(s + KECCAK_CTX * 1); + KeccakF1600_StatePermute(s + KECCAK_CTX * 2); + KeccakF1600_StatePermute(s + KECCAK_CTX * 3); + + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 0, out0, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 1, out1, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 2, out2, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 3, out3, 0, r); + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + nblocks--; + } +} + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index) +{ + if (index >= KECCAK_WAY) + { + return NULL; + } + + return state->ctx + index *KECCAK_CTX; +} + +int shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) +{ + if (state == NULL || in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL) + { + return 1; + } + + memset(state->ctx, 0, sizeof(state->ctx)); + + keccak_absorb_x4(state->ctx, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); + + return 0; +} + +int shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) +{ + if (state == NULL || in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL) + { + return 1; + } + + memset(state->ctx, 0, sizeof(state->ctx)); + + keccak_absorb_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); + + return 0; +} + + +int shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) +{ + if (state == NULL || out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) + { + return 1; + } + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE128_RATE); + + return 0; +} + +int shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) +{ + + if (state == NULL || out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) + { + return 1; + } + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE256_RATE); + + return 0; +} + +int shake256x1_squeezeblocks(uint8_t *out, + size_t nblocks, + size_t index, + keccakx4_state *state) +{ + if (out == NULL) + { + return 1; + } + uint64_t *ctx = keccakx_get_lane_state(state, index); + while (nblocks > 0) + { + KeccakF1600_StatePermute(ctx); + KeccakF1600_StateExtractBytes(ctx, out, 0, SHAKE128_RATE); + out += SHAKE128_RATE; + nblocks--; + } + + return 0; +} + +int shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + uint8_t *in0, + uint8_t *in1, + uint8_t *in2, + uint8_t *in3, + size_t inlen) +{ + if (in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL || + out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) + { + return 1; + } + + keccakx4_state statex; + size_t nblocks = outlen/SHAKE256_RATE; + uint8_t tmp[KECCAK_WAY][SHAKE256_RATE]; + + shake256x4_absorb(&statex, in0, in1, in2, in3, inlen); + + shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &statex); + + out0 += nblocks * SHAKE256_RATE; + out1 += nblocks * SHAKE256_RATE; + out2 += nblocks * SHAKE256_RATE; + out3 += nblocks * SHAKE256_RATE; + + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) + { + shake256x4_squeezeblocks(tmp[0], tmp[1], tmp[2], tmp[3], 1, &statex); + memcpy(out0, tmp[0], outlen); + memcpy(out1, tmp[1], outlen); + memcpy(out2, tmp[2], outlen); + memcpy(out3, tmp[3], outlen); + } + + return 0; +} diff --git a/fips202/fips202x4.h b/fips202/fips202x4.h new file mode 100644 index 000000000..07bcbe2de --- /dev/null +++ b/fips202/fips202x4.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +#ifndef FIPS_202X_H +#define FIPS_202X_H + +#ifndef KECCAK_WAY +#define KECCAK_WAY 4 +#endif + +#include + +typedef struct +{ + uint64_t ctx[25 * KECCAK_WAY]; +} keccakx4_state; + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index); + +int shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +int shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + + +int shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +int shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +/* + * Squeezes a single lane in Keccak 4-way + */ +int shake256x1_squeezeblocks(uint8_t *out, + size_t nblocks, + size_t index, + keccakx4_state *state); + +int shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + uint8_t *in0, + uint8_t *in1, + uint8_t *in2, + uint8_t *in3, + size_t inlen); + +#endif diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c index a7a4409e0..8c7236245 100644 --- a/mlkem/indcpa.c +++ b/mlkem/indcpa.c @@ -9,6 +9,8 @@ #include "ntt.h" #include "symmetric.h" #include "randombytes.h" +#include "fips202x4.h" +#include "fips202.h" /************************************************* * Name: pack_pk @@ -166,41 +168,86 @@ static unsigned int rej_uniform(int16_t *r, // Not static for benchmarking void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) { - unsigned int ctr, i, j, k; - unsigned int buflen, off; - uint8_t buf[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2]; + unsigned int ctr, i, j; + unsigned int buflen; + uint8_t bufx[KECCAK_WAY][GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES]; + uint8_t *buf = NULL; + int16_t *vec = NULL; + uint8_t x, y; xof_state state; - for (i = 0; i < KYBER_K; i++) + keccakx4_state statex; + // The input data to each Keccak lane. + // Original size; KYBER_SYMBYTES + 2, we add padding to make align load/store. + uint8_t seedxy[KECCAK_WAY][KYBER_SYMBYTES + 16]; + + for (i = 0; i < (KYBER_K * KYBER_K / KECCAK_WAY) * KECCAK_WAY; i += KECCAK_WAY) { - for (j = 0; j < KYBER_K; j++) + for (j = 0; j < KECCAK_WAY; j++) { + memcpy(seedxy[j], seed, KYBER_SYMBYTES); + x = (i + j) / KYBER_K; + y = (i + j) % KYBER_K; if (transposed) { - xof_absorb(&state, seed, i, j); + seedxy[j][KYBER_SYMBYTES + 0] = x; + seedxy[j][KYBER_SYMBYTES + 1] = y; } else { - xof_absorb(&state, seed, j, i); + seedxy[j][KYBER_SYMBYTES + 0] = y; + seedxy[j][KYBER_SYMBYTES + 1] = x; } + } - xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); - buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES; - ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, buflen); + shake128x4_absorb(&statex, seedxy[0], seedxy[1], seedxy[2], seedxy[3], KYBER_SYMBYTES + 2); + shake128x4_squeezeblocks(bufx[0], bufx[1], bufx[2], bufx[3], GEN_MATRIX_NBLOCKS, &statex); - while (ctr < KYBER_N) + for (j = 0; j < KECCAK_WAY; j++) + { + x = (i + j) / KYBER_K; + y = (i + j) % KYBER_K; + vec = a[x].vec[y].coeffs; + buf = bufx[j]; + buflen = GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES; + ctr = rej_uniform(vec, KYBER_N, buf, buflen); + + while (ctr < KYBER_N ) { - off = buflen % 3; - for (k = 0; k < off; k++) - { - buf[k] = buf[buflen - off + k]; - } - xof_squeezeblocks(buf + off, 1, &state); - buflen = off + XOF_BLOCKBYTES; - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, buflen); + shake256x1_squeezeblocks(buf, 1, j, &statex); + buflen = XOF_BLOCKBYTES; + ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen); } } } + + // For left over vector, we use single keccak. + for (; i < KYBER_K * KYBER_K; i++) + { + x = i / KYBER_K; + y = i % KYBER_K; + buf = bufx[0]; + vec = a[x].vec[y].coeffs; + + if (transposed) + { + xof_absorb(&state, seed, x, y); + } + else + { + xof_absorb(&state, seed, y, x); + } + xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); + buflen = GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES; + ctr = rej_uniform(vec, KYBER_N, buf, buflen); + + while (ctr < KYBER_N) + { + xof_squeezeblocks(buf, 1, &state); + buflen = XOF_BLOCKBYTES; + ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen); + } + } } /************************************************* @@ -224,21 +271,21 @@ void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], uint8_t buf[2 * KYBER_SYMBYTES]; const uint8_t *publicseed = buf; const uint8_t *noiseseed = buf + KYBER_SYMBYTES; - uint8_t nonce = 0; polyvec a[KYBER_K], e, pkpv, skpv; hash_g(buf, coins, KYBER_SYMBYTES); gen_a(a, publicseed); - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta1(&skpv.vec[i], noiseseed, nonce++); - } - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta1(&e.vec[i], noiseseed, nonce++); - } + #if KYBER_K == 2 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); + #elif KYBER_K == 3 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); + #elif KYBER_K == 4 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); + #endif polyvec_ntt(&skpv); polyvec_ntt(&e); @@ -280,7 +327,6 @@ void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], { unsigned int i; uint8_t seed[KYBER_SYMBYTES]; - uint8_t nonce = 0; polyvec sp, pkpv, ep, at[KYBER_K], b; poly v, k, epp; @@ -288,15 +334,17 @@ void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], poly_frommsg(&k, m); gen_at(at, seed); - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta1(sp.vec + i, coins, nonce++); - } - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta2(ep.vec + i, coins, nonce++); - } - poly_getnoise_eta2(&epp, coins, nonce++); + #if KYBER_K == 2 + poly_getnoise_eta1122_4x(sp.vec+0, sp.vec+1, ep.vec+0, ep.vec+1, coins, 0, 1, 2, 3); + poly_getnoise_eta2(&epp, coins, 4); + #elif KYBER_K == 3 + poly_getnoise_eta1_4x(sp.vec+0, sp.vec+1, sp.vec+2, ep.vec+0, coins, 0, 1, 2, 3); + poly_getnoise_eta1_4x(ep.vec+1, ep.vec+2, &epp, b.vec+0, coins, 4, 5, 6, 7); + #elif KYBER_K == 4 + poly_getnoise_eta1_4x(sp.vec+0, sp.vec+1, sp.vec+2, sp.vec+3, coins, 0, 1, 2, 3); + poly_getnoise_eta1_4x(ep.vec+0, ep.vec+1, ep.vec+2, ep.vec+3, coins, 4, 5, 6, 7); + poly_getnoise_eta2(&epp, coins, 8); + #endif polyvec_ntt(&sp); diff --git a/mlkem/poly.c b/mlkem/poly.c index 3137668eb..46bbbab3a 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -8,6 +8,7 @@ #include "cbd.h" #include "symmetric.h" #include "verify.h" +#include "fips202x4.h" /************************************************************ * Name: scalar_compress_q_16 @@ -338,6 +339,39 @@ void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t non poly_cbd_eta1(r, buf); } +/************************************************* +* Name: poly_getnoise_eta1_4x +* +* Description: Batch sample four polynomials deterministically from a seed and nonces, +* with output polynomials close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r{0,1,2,3}: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce{0,1,2,3}: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + uint8_t buf[KECCAK_WAY][KYBER_ETA1 *KYBER_N / 4]; + prf(buf[0], sizeof(buf[0]), seed, nonce0); + prf(buf[1], sizeof(buf[1]), seed, nonce1); + prf(buf[2], sizeof(buf[2]), seed, nonce2); + prf(buf[3], sizeof(buf[3]), seed, nonce3); + poly_cbd_eta1(r0, buf[0]); + poly_cbd_eta1(r1, buf[1]); + poly_cbd_eta1(r2, buf[2]); + poly_cbd_eta1(r3, buf[3]); +} + /************************************************* * Name: poly_getnoise_eta2 * @@ -357,6 +391,73 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t non poly_cbd_eta2(r, buf); } +/************************************************* +* Name: poly_getnoise_eta2_4x +* +* Description: Batch sample four polynomials deterministically from a seed and nonces, +* with output polynomials close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r{0,1,2,3}: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce{0,1,2,3}: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + uint8_t buf[KECCAK_WAY][KYBER_ETA2 * KYBER_N / 4]; + prf(buf[0], sizeof(buf[0]), seed, nonce0); + prf(buf[1], sizeof(buf[1]), seed, nonce1); + prf(buf[2], sizeof(buf[2]), seed, nonce2); + prf(buf[3], sizeof(buf[3]), seed, nonce3); + poly_cbd_eta2(r0, buf[0]); + poly_cbd_eta2(r1, buf[1]); + poly_cbd_eta2(r2, buf[2]); + poly_cbd_eta2(r3, buf[3]); +} + +/************************************************* +* Name: poly_getnoise_eta1122_4x +* +* Description: Batch sample four polynomials deterministically from a seed and a nonces, +* with output polynomials close to centered binomial distribution +* with parameter KYBER_ETA1 and KYBER_ETA2 +* +* Arguments: - poly *r{0,1,2,3}: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce{0,1,2,3}: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + uint8_t buf1[KECCAK_WAY/2][KYBER_ETA1 * KYBER_N / 4]; + uint8_t buf2[KECCAK_WAY/2][KYBER_ETA2 * KYBER_N / 4]; + prf(buf1[0], sizeof(buf1[0]), seed, nonce0); + prf(buf1[1], sizeof(buf1[1]), seed, nonce1); + prf(buf2[0], sizeof(buf2[0]), seed, nonce2); + prf(buf2[1], sizeof(buf2[1]), seed, nonce3); + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); +} + /************************************************* * Name: poly_ntt * diff --git a/mlkem/poly.h b/mlkem/poly.h index 48ff1efee..e585fbaff 100644 --- a/mlkem/poly.h +++ b/mlkem/poly.h @@ -65,9 +65,42 @@ void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); #define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); +#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta1_4x) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + #define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); +#define poly_getnoise_eta2_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) +void poly_getnoise_eta2_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + +#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + #define poly_ntt KYBER_NAMESPACE(poly_ntt) void poly_ntt(poly *r); #define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)