diff --git a/Makefile b/Makefile index 6b1189920..690086f22 100644 --- a/Makefile +++ b/Makefile @@ -16,13 +16,13 @@ NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer RM = /bin/rm SOURCES = mlkem/kem.c mlkem/indcpa.c mlkem/polyvec.c mlkem/poly.c mlkem/ntt.c mlkem/cbd.c mlkem/reduce.c mlkem/verify.c -SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c mlkem/symmetric-shake.c +SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c fips202/fips202x4.c mlkem/symmetric-shake.c SOURCESKECCAKRANDOM = $(SOURCESKECCAK) randombytes/randombytes.c SOURCESNISTKATS = $(SOURCESKECCAK) test/nistrng/aes.c test/nistrng/rng.c SOURCESBENCH = $(SOURCESKECCAKRANDOM) test/hal.c -HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.h mlkem/verify.h mlkem/symmetric.h -HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h +HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.c mlkem/verify.h mlkem/symmetric.h +HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h fips202/fips202x4.h HEADERSKECCAKRANDOM = $(HEADERSKECCAK) randombytes/randombytes.h HEADERNISTKATS = $(HEADERSKECCAK) test/nistrng/aes.h test/nistrng/randombytes.h HEADERSBENCH = $(HEADERSKECCAKRANDOM) test/hal.h diff --git a/fips202/fips202x4.c b/fips202/fips202x4.c new file mode 100644 index 000000000..0c21ce75e --- /dev/null +++ b/fips202/fips202x4.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: Apache-2.0 +#include +#include "fips202x4.h" +#include "fips202.h" +#include "keccakf1600.h" + +#define KECCAK_CTX 25 + +static void keccak_absorb_x4(uint64_t *s, uint32_t r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) +{ + + while (inlen >= r) + { + + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, r); + + KeccakF1600_StatePermute(s + KECCAK_CTX * 0); + KeccakF1600_StatePermute(s + KECCAK_CTX * 1); + KeccakF1600_StatePermute(s + KECCAK_CTX * 2); + KeccakF1600_StatePermute(s + KECCAK_CTX * 3); + + in0 += r; + in1 += r; + in2 += r; + in3 += r; + inlen -= r; + } + + if (inlen > 0) + { + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, inlen); + } + + if (inlen == r - 1) + { + p |= 128; + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1); + } + else + { + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1); + p = 128; + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, r - 1, 1); + } +} + +static void keccak_squeezeblocks_x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + uint64_t *s, + uint32_t r) +{ + + while (nblocks > 0) + { + KeccakF1600_StatePermute(s + KECCAK_CTX * 0); + KeccakF1600_StatePermute(s + KECCAK_CTX * 1); + KeccakF1600_StatePermute(s + KECCAK_CTX * 2); + KeccakF1600_StatePermute(s + KECCAK_CTX * 3); + + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 0, out0, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 1, out1, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 2, out2, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 3, out3, 0, r); + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + nblocks--; + } +} + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index) +{ + if (index >= KECCAK_WAY) + { + return NULL; + } + + return state->ctx + index * KECCAK_CTX; +} + +void shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) +{ + memset(state->ctx, 0, sizeof(state->ctx)); + + keccak_absorb_x4(state->ctx, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) +{ + memset(state->ctx, 0, sizeof(state->ctx)); + + keccak_absorb_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + + +void shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) +{ + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE128_RATE); +} + +void shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) +{ + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE256_RATE); +} + +void shake256x4_squeezeblocks_single(uint8_t *out, + size_t nblocks, + size_t index, + keccakx4_state *state) +{ + uint64_t *ctx = keccakx_get_lane_state(state, index); + while (nblocks > 0) + { + KeccakF1600_StatePermute(ctx); + KeccakF1600_StateExtractBytes(ctx, out, 0, SHAKE128_RATE); + out += SHAKE128_RATE; + nblocks--; + } +} + +void shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + uint8_t *in0, + uint8_t *in1, + uint8_t *in2, + uint8_t *in3, + size_t inlen) +{ + keccakx4_state statex; + size_t nblocks = outlen/SHAKE256_RATE; + uint8_t tmp[KECCAK_WAY][SHAKE256_RATE]; + + shake256x4_absorb(&statex, in0, in1, in2, in3, inlen); + + shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &statex); + + out0 += nblocks * SHAKE256_RATE; + out1 += nblocks * SHAKE256_RATE; + out2 += nblocks * SHAKE256_RATE; + out3 += nblocks * SHAKE256_RATE; + + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) + { + shake256x4_squeezeblocks(tmp[0], tmp[1], tmp[2], tmp[3], 1, &statex); + memcpy(out0, tmp[0], outlen); + memcpy(out1, tmp[1], outlen); + memcpy(out2, tmp[2], outlen); + memcpy(out3, tmp[3], outlen); + } +} diff --git a/fips202/fips202x4.h b/fips202/fips202x4.h new file mode 100644 index 000000000..84cae56c7 --- /dev/null +++ b/fips202/fips202x4.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +#ifndef FIPS_202X4_H +#define FIPS_202X4_H + +#ifndef KECCAK_WAY +#define KECCAK_WAY 4 +#endif + +#include + +typedef struct +{ + uint64_t ctx[25 * KECCAK_WAY]; +} keccakx4_state; + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index); + +void shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + + +void shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +/* + * Squeezes a single lane in Keccak 4-way + */ +void shake256x4_squeezeblocks_single(uint8_t *out, + size_t nblocks, + size_t index, + keccakx4_state *state); + +void shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + uint8_t *in0, + uint8_t *in1, + uint8_t *in2, + uint8_t *in3, + size_t inlen); + +#endif diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c index a7a4409e0..9757175cd 100644 --- a/mlkem/indcpa.c +++ b/mlkem/indcpa.c @@ -9,6 +9,8 @@ #include "ntt.h" #include "symmetric.h" #include "randombytes.h" +#include "fips202x4.h" +#include "fips202.h" /************************************************* * Name: pack_pk @@ -166,41 +168,88 @@ static unsigned int rej_uniform(int16_t *r, // Not static for benchmarking void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) { - unsigned int ctr, i, j, k; - unsigned int buflen, off; - uint8_t buf[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2]; + unsigned int ctr, i, j; + unsigned int buflen; + uint8_t bufx[KECCAK_WAY][GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES]; + uint8_t *buf = NULL; + int16_t *vec = NULL; + uint8_t x, y; xof_state state; - for (i = 0; i < KYBER_K; i++) + keccakx4_state statex; + // The input data to each Keccak lane. + // Original size; KYBER_SYMBYTES + 2, we add padding to make align load/store. + uint8_t seedxy[KECCAK_WAY][KYBER_SYMBYTES + 16]; + for (j = 0; j < KECCAK_WAY; j++) + { + memcpy(seedxy[j], seed, KYBER_SYMBYTES); + } + for (i = 0; i < (KYBER_K * KYBER_K / KECCAK_WAY) * KECCAK_WAY; i += KECCAK_WAY) { - for (j = 0; j < KYBER_K; j++) + for (j = 0; j < KECCAK_WAY; j++) { + x = (i + j) / KYBER_K; + y = (i + j) % KYBER_K; if (transposed) { - xof_absorb(&state, seed, i, j); + seedxy[j][KYBER_SYMBYTES + 0] = x; + seedxy[j][KYBER_SYMBYTES + 1] = y; } else { - xof_absorb(&state, seed, j, i); + seedxy[j][KYBER_SYMBYTES + 0] = y; + seedxy[j][KYBER_SYMBYTES + 1] = x; } + } - xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); + shake128x4_absorb(&statex, seedxy[0], seedxy[1], seedxy[2], seedxy[3], KYBER_SYMBYTES + 2); + shake128x4_squeezeblocks(bufx[0], bufx[1], bufx[2], bufx[3], GEN_MATRIX_NBLOCKS, &statex); + + for (j = 0; j < KECCAK_WAY; j++) + { + x = (i + j) / KYBER_K; + y = (i + j) % KYBER_K; + vec = a[x].vec[y].coeffs; + buf = bufx[j]; buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES; - ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, buflen); + ctr = rej_uniform(vec, KYBER_N, buf, buflen); - while (ctr < KYBER_N) + while (ctr < KYBER_N ) { - off = buflen % 3; - for (k = 0; k < off; k++) - { - buf[k] = buf[buflen - off + k]; - } - xof_squeezeblocks(buf + off, 1, &state); - buflen = off + XOF_BLOCKBYTES; - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, buflen); + shake256x4_squeezeblocks_single(buf, 1, j, &statex); + buflen = XOF_BLOCKBYTES; + ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen); } } } + + // For left over vector, we use single keccak. + for (; i < KYBER_K * KYBER_K; i++) + { + x = i / KYBER_K; + y = i % KYBER_K; + buf = bufx[0]; + vec = a[x].vec[y].coeffs; + + if (transposed) + { + xof_absorb(&state, seed, x, y); + } + else + { + xof_absorb(&state, seed, y, x); + } + xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); + buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES; + ctr = rej_uniform(vec, KYBER_N, buf, buflen); + + while (ctr < KYBER_N) + { + xof_squeezeblocks(buf, 1, &state); + buflen = XOF_BLOCKBYTES; + ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen); + } + } } /************************************************* @@ -224,21 +273,21 @@ void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], uint8_t buf[2 * KYBER_SYMBYTES]; const uint8_t *publicseed = buf; const uint8_t *noiseseed = buf + KYBER_SYMBYTES; - uint8_t nonce = 0; polyvec a[KYBER_K], e, pkpv, skpv; hash_g(buf, coins, KYBER_SYMBYTES); gen_a(a, publicseed); - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta1(&skpv.vec[i], noiseseed, nonce++); - } - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta1(&e.vec[i], noiseseed, nonce++); - } + #if KYBER_K == 2 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); + #elif KYBER_K == 3 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); + #elif KYBER_K == 4 + poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); + #endif polyvec_ntt(&skpv); polyvec_ntt(&e); @@ -280,7 +329,6 @@ void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], { unsigned int i; uint8_t seed[KYBER_SYMBYTES]; - uint8_t nonce = 0; polyvec sp, pkpv, ep, at[KYBER_K], b; poly v, k, epp; @@ -288,15 +336,17 @@ void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], poly_frommsg(&k, m); gen_at(at, seed); - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta1(sp.vec + i, coins, nonce++); - } - for (i = 0; i < KYBER_K; i++) - { - poly_getnoise_eta2(ep.vec + i, coins, nonce++); - } - poly_getnoise_eta2(&epp, coins, nonce++); + #if KYBER_K == 2 + poly_getnoise_eta1122_4x(sp.vec+0, sp.vec+1, ep.vec+0, ep.vec+1, coins, 0, 1, 2, 3); + poly_getnoise_eta2(&epp, coins, 4); + #elif KYBER_K == 3 + poly_getnoise_eta1_4x(sp.vec+0, sp.vec+1, sp.vec+2, ep.vec+0, coins, 0, 1, 2, 3); + poly_getnoise_eta1_4x(ep.vec+1, ep.vec+2, &epp, b.vec+0, coins, 4, 5, 6, 7); + #elif KYBER_K == 4 + poly_getnoise_eta1_4x(sp.vec+0, sp.vec+1, sp.vec+2, sp.vec+3, coins, 0, 1, 2, 3); + poly_getnoise_eta1_4x(ep.vec+0, ep.vec+1, ep.vec+2, ep.vec+3, coins, 4, 5, 6, 7); + poly_getnoise_eta2(&epp, coins, 8); + #endif polyvec_ntt(&sp); diff --git a/mlkem/poly.c b/mlkem/poly.c index d10594a3d..2444a17cd 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 #include +#include #include "cbmc.h" #include "params.h" #include "poly.h" @@ -8,6 +9,7 @@ #include "cbd.h" #include "symmetric.h" #include "verify.h" +#include "fips202x4.h" /************************************************************ * Name: scalar_compress_q_16 @@ -367,6 +369,46 @@ void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t non poly_cbd_eta1(r, buf); } +/************************************************* +* Name: poly_getnoise_eta1_4x +* +* Description: Batch sample four polynomials deterministically from a seed and nonces, +* with output polynomials close to centered binomial distribution +* with parameter KYBER_ETA1 +* +* Arguments: - poly *r{0,1,2,3}: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce{0,1,2,3}: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + uint8_t buf[KECCAK_WAY][KYBER_ETA1 * KYBER_N / 4]; + uint8_t extkey[KECCAK_WAY][KYBER_SYMBYTES + 1]; + memcpy(extkey[0], seed, KYBER_SYMBYTES); + memcpy(extkey[1], seed, KYBER_SYMBYTES); + memcpy(extkey[2], seed, KYBER_SYMBYTES); + memcpy(extkey[3], seed, KYBER_SYMBYTES); + extkey[0][KYBER_SYMBYTES] = nonce0; + extkey[1][KYBER_SYMBYTES] = nonce1; + extkey[2][KYBER_SYMBYTES] = nonce2; + extkey[3][KYBER_SYMBYTES] = nonce3; + shake256x4(buf[0], buf[1], buf[2], buf[3], KYBER_ETA1 * KYBER_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], KYBER_SYMBYTES + 1); + poly_cbd_eta1(r0, buf[0]); + poly_cbd_eta1(r1, buf[1]); + poly_cbd_eta1(r2, buf[2]); + poly_cbd_eta1(r3, buf[3]); +} + /************************************************* * Name: poly_getnoise_eta2 * @@ -386,6 +428,96 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t non poly_cbd_eta2(r, buf); } +/************************************************* +* Name: poly_getnoise_eta2_4x +* +* Description: Batch sample four polynomials deterministically from a seed and nonces, +* with output polynomials close to centered binomial distribution +* with parameter KYBER_ETA2 +* +* Arguments: - poly *r{0,1,2,3}: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce{0,1,2,3}: one-byte input nonce +**************************************************/ +void poly_getnoise_eta2_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + uint8_t buf[KECCAK_WAY][KYBER_ETA2 * KYBER_N / 4]; + uint8_t extkey[KECCAK_WAY][KYBER_SYMBYTES + 1]; + memcpy(extkey[0], seed, KYBER_SYMBYTES); + memcpy(extkey[1], seed, KYBER_SYMBYTES); + memcpy(extkey[2], seed, KYBER_SYMBYTES); + memcpy(extkey[3], seed, KYBER_SYMBYTES); + extkey[0][KYBER_SYMBYTES] = nonce0; + extkey[1][KYBER_SYMBYTES] = nonce1; + extkey[2][KYBER_SYMBYTES] = nonce2; + extkey[3][KYBER_SYMBYTES] = nonce3; + shake256x4(buf[0], buf[1], buf[2], buf[3], KYBER_ETA2 * KYBER_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], KYBER_SYMBYTES + 1); + poly_cbd_eta2(r0, buf[0]); + poly_cbd_eta2(r1, buf[1]); + poly_cbd_eta2(r2, buf[2]); + poly_cbd_eta2(r3, buf[3]); +} + +/************************************************* +* Name: poly_getnoise_eta1122_4x +* +* Description: Batch sample four polynomials deterministically from a seed and a nonces, +* with output polynomials close to centered binomial distribution +* with parameter KYBER_ETA1 and KYBER_ETA2 +* +* Arguments: - poly *r{0,1,2,3}: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed +* (of length KYBER_SYMBYTES bytes) +* - uint8_t nonce{0,1,2,3}: one-byte input nonce +**************************************************/ +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3) +{ + uint8_t buf1[KECCAK_WAY/2][KYBER_ETA1 * KYBER_N / 4]; + uint8_t buf2[KECCAK_WAY/2][KYBER_ETA2 * KYBER_N / 4]; + uint8_t extkey[KECCAK_WAY][KYBER_SYMBYTES + 1]; + memcpy(extkey[0], seed, KYBER_SYMBYTES); + memcpy(extkey[1], seed, KYBER_SYMBYTES); + memcpy(extkey[2], seed, KYBER_SYMBYTES); + memcpy(extkey[3], seed, KYBER_SYMBYTES); + extkey[0][KYBER_SYMBYTES] = nonce0; + extkey[1][KYBER_SYMBYTES] = nonce1; + extkey[2][KYBER_SYMBYTES] = nonce2; + extkey[3][KYBER_SYMBYTES] = nonce3; + + #if KYBER_ETA1 == KYBER_ETA2 + shake256x4(buf1[0], buf1[1], buf2[0], buf2[1], KYBER_ETA1 * KYBER_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], KYBER_SYMBYTES + 1); + #else + shake256(buf1[0], sizeof(buf1[0]), extkey[0], sizeof(extkey[0])); + shake256(buf1[1], sizeof(buf1[1]), extkey[1], sizeof(extkey[1])); + shake256(buf2[0], sizeof(buf2[0]), extkey[2], sizeof(extkey[2])); + shake256(buf2[1], sizeof(buf2[1]), extkey[3], sizeof(extkey[3])); + #endif + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); +} + /************************************************* * Name: poly_ntt * diff --git a/mlkem/poly.h b/mlkem/poly.h index c7737e320..bbeee5281 100644 --- a/mlkem/poly.h +++ b/mlkem/poly.h @@ -75,9 +75,42 @@ void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); #define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); +#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta1_4x) +void poly_getnoise_eta1_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + #define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); +#define poly_getnoise_eta2_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) +void poly_getnoise_eta2_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + +#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) +void poly_getnoise_eta1122_4x(poly *r0, + poly *r1, + poly *r2, + poly *r3, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t nonce0, + uint8_t nonce1, + uint8_t nonce2, + uint8_t nonce3); + #define poly_ntt KYBER_NAMESPACE(poly_ntt) void poly_ntt(poly *r); #define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) diff --git a/mlkem/symmetric-shake.c b/mlkem/symmetric-shake.c index 33a80ecc3..722550656 100644 --- a/mlkem/symmetric-shake.c +++ b/mlkem/symmetric-shake.c @@ -52,7 +52,7 @@ void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYM } /************************************************* -* Name: kyber_shake256_prf +* Name: kyber_shake256_rkprf * * Description: Usage of SHAKE256 as a PRF, concatenates secret and public input * and then generates outlen bytes of SHAKE256 output