From bf6f718de05e2d0c2436661c1de2c6f10d81fa8b Mon Sep 17 00:00:00 2001 From: Duc Tri Nguyen Date: Mon, 17 Jun 2024 11:10:51 -0400 Subject: [PATCH] add keccakx interface Signed-off-by: Duc Tri Nguyen --- Makefile | 6 +- fips202/fips202.c | 6 +- fips202/fips202.h | 17 +++++ fips202/fips202x.c | 177 +++++++++++++++++++++++++++++++++++++++++++++ fips202/fips202x.h | 46 ++++++++++++ mlkem/indcpa.c | 84 ++++++++++++++++----- 6 files changed, 311 insertions(+), 25 deletions(-) create mode 100644 fips202/fips202x.c create mode 100644 fips202/fips202x.h diff --git a/Makefile b/Makefile index 6c229c9e8..305b92722 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,12 @@ NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer RM = /bin/rm SOURCES = mlkem/kem.c mlkem/indcpa.c mlkem/polyvec.c mlkem/poly.c mlkem/ntt.c mlkem/cbd.c mlkem/reduce.c mlkem/verify.c -SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c mlkem/symmetric-shake.c +SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c fips202/fips202x.c mlkem/symmetric-shake.c SOURCESKECCAKRANDOM = $(SOURCESKECCAK) randombytes/randombytes.c SOURCESNISTKATS = $(SOURCESKECCAK) test/nistrng/aes.c test/nistrng/rng.c -HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.h mlkem/verify.h mlkem/symmetric.h -HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h +HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.c mlkem/verify.h mlkem/symmetric.h +HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h fips202/fips202x.h HEADERSKECCAKRANDOM = $(HEADERSKECCAK) randombytes/randombytes.h HEADERNISTKATS = $(HEADERSKECCAK) test/nistrng/aes.h test/nistrng/randombytes.h diff --git a/fips202/fips202.c b/fips202/fips202.c index 0ad3dc645..36cbcc879 100644 --- a/fips202/fips202.c +++ b/fips202/fips202.c @@ -72,9 +72,9 @@ static void keccak_absorb(uint64_t *s, * - uint64_t *s: pointer to in/output Keccak state * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) **************************************************/ -static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, - uint64_t *s, - uint32_t r) +void keccak_squeezeblocks(uint8_t *h, size_t nblocks, + uint64_t *s, + uint32_t r) { while (nblocks > 0) { diff --git a/fips202/fips202.h b/fips202/fips202.h index 49247c2b8..c80e57ac5 100644 --- a/fips202/fips202.h +++ b/fips202/fips202.h @@ -35,6 +35,23 @@ typedef struct uint64_t ctx[25]; } shake256ctx; +/** + * Squeezes blocks of data from the Keccak state. + * + * This function extracts data from the Keccak state, effectively performing the squeeze step of the Keccak + * cryptographic hash function. It generates output data based on the current state and updates the state + * accordingly. + * + * @param h Pointer to the output buffer where the squeezed data will be stored. + * @param nblocks Number of blocks to squeeze from the state. Each block is r bytes long, where r is the rate + * specified when initializing the state. + * @param s Pointer to the Keccak state array. This array must contain at least 25 elements of type uint64_t. + * @param r Rate in bytes. Specifies the rate parameter of the Keccak function, which determines how much + * data is processed per permutation. + */ +void keccak_squeezeblocks(uint8_t *h, size_t nblocks, uint64_t *s, uint32_t r); + + /* Initialize the state and absorb the provided input. * * This function does not support being called multiple times diff --git a/fips202/fips202x.c b/fips202/fips202x.c new file mode 100644 index 000000000..c007177b7 --- /dev/null +++ b/fips202/fips202x.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: Apache-2.0 +#include +#include "fips202x.h" +#include "fips202.h" +#include "keccakf1600.h" + +#define KECCAK_CTX 25 + +static void keccak_absorb_x4(uint64_t *s, uint32_t r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) +{ + + while (inlen >= r) + { + + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, r); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, r); + + KeccakF1600_StatePermute(s + KECCAK_CTX * 0); + KeccakF1600_StatePermute(s + KECCAK_CTX * 1); + KeccakF1600_StatePermute(s + KECCAK_CTX * 2); + KeccakF1600_StatePermute(s + KECCAK_CTX * 3); + + in0 += r; + in1 += r; + in2 += r; + in3 += r; + inlen -= r; + } + + if (inlen > 0) + { + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, inlen); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, inlen); + } + + if (inlen == r - 1) + { + p |= 128; + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1); + } + else + { + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1); + p = 128; + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, r - 1, 1); + KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, r - 1, 1); + } +} + +static void keccak_squeezeblocks_x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + uint64_t *s, + uint32_t r) +{ + + while (nblocks > 0) + { + KeccakF1600_StatePermute(s + KECCAK_CTX * 0); + KeccakF1600_StatePermute(s + KECCAK_CTX * 1); + KeccakF1600_StatePermute(s + KECCAK_CTX * 2); + KeccakF1600_StatePermute(s + KECCAK_CTX * 3); + + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 0, out0, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 1, out1, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 2, out2, 0, r); + KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 3, out3, 0, r); + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + nblocks--; + } +} + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index) +{ + if (index >= KECCAK_WAY) + { + return NULL; + } + + return state->ctx + index *KECCAK_CTX; +} + +int shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) +{ + if (state == NULL || in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL) + { + return 1; + } + + memset(state->ctx, 0, sizeof(state->ctx)); + + keccak_absorb_x4(state->ctx, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); + + return 0; +} + +int shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) +{ + if (state == NULL || in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL) + { + return 1; + } + + memset(state->ctx, 0, sizeof(state->ctx)); + + keccak_absorb_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); + + return 0; +} + + +int shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) +{ + if (state == NULL || out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) + { + return 1; + } + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE128_RATE); + + return 0; +} + +int shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) +{ + + if (state == NULL || out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) + { + return 1; + } + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE256_RATE); + + return 0; +} diff --git a/fips202/fips202x.h b/fips202/fips202x.h new file mode 100644 index 000000000..c97471710 --- /dev/null +++ b/fips202/fips202x.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: Apache-2.0 +#ifndef FIPS_202X_H +#define FIPS_202X_H + +#ifndef KECCAK_WAY +#define KECCAK_WAY 4 +#endif + +#include + +typedef struct +{ + uint64_t ctx[25 * KECCAK_WAY]; +} keccakx4_state; + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index); + +int shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +int shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + + +int shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +int shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); +#endif diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c index a7a4409e0..e3c0c9e10 100644 --- a/mlkem/indcpa.c +++ b/mlkem/indcpa.c @@ -9,6 +9,8 @@ #include "ntt.h" #include "symmetric.h" #include "randombytes.h" +#include "fips202x.h" +#include "fips202.h" /************************************************* * Name: pack_pk @@ -166,41 +168,85 @@ static unsigned int rej_uniform(int16_t *r, // Not static for benchmarking void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) { - unsigned int ctr, i, j, k; - unsigned int buflen, off; - uint8_t buf[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2]; + unsigned int ctr, i, j; + unsigned int buflen; + uint8_t bufx[KECCAK_WAY][GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES + 2]; + uint8_t *buf = NULL; + int16_t *vec = NULL; + uint8_t x, y; xof_state state; - for (i = 0; i < KYBER_K; i++) + keccakx4_state statex; + uint8_t seedxy[KECCAK_WAY][KYBER_SYMBYTES + 2]; + + for (i = 0; i < (KYBER_K *KYBER_K / KECCAK_WAY) *KECCAK_WAY; i += KECCAK_WAY) { - for (j = 0; j < KYBER_K; j++) + for (j = 0; j < KECCAK_WAY; j++) { + memcpy(seedxy[j], seed, KYBER_SYMBYTES); + x = (i + j) / KYBER_K; + y = (i + j) % KYBER_K; if (transposed) { - xof_absorb(&state, seed, i, j); + seedxy[j][KYBER_SYMBYTES + 0] = x; + seedxy[j][KYBER_SYMBYTES + 1] = y; } else { - xof_absorb(&state, seed, j, i); + seedxy[j][KYBER_SYMBYTES + 0] = y; + seedxy[j][KYBER_SYMBYTES + 1] = x; } + } - xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); - buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES; - ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, buflen); + shake128x4_absorb(&statex, seedxy[0], seedxy[1], seedxy[2], seedxy[3], KYBER_SYMBYTES + 2); + shake128x4_squeezeblocks(bufx[0], bufx[1], bufx[2], bufx[3], GEN_MATRIX_NBLOCKS, &statex); - while (ctr < KYBER_N) + for (j = 0; j < KECCAK_WAY; j++) + { + x = (i + j) / KYBER_K; + y = (i + j) % KYBER_K; + vec = a[x].vec[y].coeffs; + buf = bufx[j]; + buflen = GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES; + ctr = rej_uniform(vec, KYBER_N, buf, buflen); + + while (ctr < KYBER_N ) { - off = buflen % 3; - for (k = 0; k < off; k++) - { - buf[k] = buf[buflen - off + k]; - } - xof_squeezeblocks(buf + off, 1, &state); - buflen = off + XOF_BLOCKBYTES; - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, buflen); + uint64_t *ctx = keccakx_get_lane_state(&statex, j); + keccak_squeezeblocks(buf, 1, ctx, SHAKE128_RATE); + buflen = XOF_BLOCKBYTES; + ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen); } } } + + // For left over vector, we use single keccak. + for (; i < KYBER_K *KYBER_K; i++) + { + x = i / KYBER_K; + y = i % KYBER_K; + buf = bufx[0]; + vec = a[x].vec[y].coeffs; + + if (transposed) + { + xof_absorb(&state, seed, x, y); + } + else + { + xof_absorb(&state, seed, y, x); + } + xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); + buflen = GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES; + ctr = rej_uniform(vec, KYBER_N, buf, buflen); + + while (ctr < KYBER_N) + { + xof_squeezeblocks(buf, 1, &state); + buflen = XOF_BLOCKBYTES; + ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen); + } + } } /*************************************************