diff --git a/fips202/fips202x.c b/fips202/fips202x.c index 65df69252..8aa1145f3 100644 --- a/fips202/fips202x.c +++ b/fips202/fips202x.c @@ -2,102 +2,105 @@ #include #include "fips202x.h" #include "fips202.h" -#define KECCAK_CTX 25 - -int keccakx_set_lane_input(keccakx *kx, size_t lane_index, uint8_t *input, size_t inlen) { - - if (kx == NULL || input == NULL || lane_index >= KECCAK_WAY) { - return 1; - } - kx->inputs[lane_index].p = input; - kx->inputs[lane_index].plen = inlen; +#define KECCAK_CTX 25 - return 0; +static void keccak_absorb_x4(uint64_t *s, uint32_t r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) { + + keccak_absorb(s + KECCAK_CTX * 0, r, in0, inlen, p); + keccak_absorb(s + KECCAK_CTX * 1, r, in1, inlen, p); + keccak_absorb(s + KECCAK_CTX * 2, r, in2, inlen, p); + keccak_absorb(s + KECCAK_CTX * 3, r, in3, inlen, p); } -int keccakx_set_lane_output(keccakx *kx, size_t lane_index, uint8_t *output, size_t outlen) { - if (kx == NULL || output == NULL || lane_index >= KECCAK_WAY) { - return 1; - } - - kx->outputs[lane_index].p = output; - kx->outputs[lane_index].plen = outlen; - - return 0; +static void keccak_squeezeblocks_x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + uint64_t *s, + uint32_t r) { + + keccak_squeezeblocks(out0, nblocks, s + KECCAK_CTX * 0, r); + keccak_squeezeblocks(out1, nblocks, s + KECCAK_CTX * 1, r); + keccak_squeezeblocks(out2, nblocks, s + KECCAK_CTX * 2, r); + keccak_squeezeblocks(out3, nblocks, s + KECCAK_CTX * 3, r); } -uint64_t *keccakx_get_lane_state(keccakx *kx, size_t lane_index) { - if (kx == NULL || lane_index >= KECCAK_WAY) { +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index) { + if (index >= KECCAK_WAY) { return NULL; } - return kx->ctx + lane_index *KECCAK_CTX; + return state->ctx + index *KECCAK_CTX; } -int shake128x_absorb(keccakx *kx) { - if (kx == NULL) { +int shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + if (state == NULL || in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL) { return 1; } - // Clear all old data - memset(kx->ctx, 0, KECCAK_CTX *sizeof(uint64_t) * KECCAK_WAY); + memset(state->ctx, 0, sizeof(state->ctx)); - for (unsigned i = 0; i < KECCAK_WAY; i++) { - keccak_absorb(kx->ctx + KECCAK_CTX *i, SHAKE128_RATE, kx->inputs[i].p, kx->inputs[i].plen, 0x1F); - } + keccak_absorb_x4(state->ctx, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); return 0; } -int shake256x_absorb(keccakx *kx) { - if (kx == NULL) { +int shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + if (state == NULL || in0 == NULL || in1 == NULL || in2 == NULL || in3 == NULL) { return 1; } - memset(kx->ctx, 0, KECCAK_CTX *sizeof(uint64_t) * KECCAK_WAY); + memset(state->ctx, 0, sizeof(state->ctx)); - for (unsigned i = 0; i < KECCAK_WAY; i++) { - keccak_absorb(kx->ctx + KECCAK_CTX *i, SHAKE256_RATE, kx->inputs[i].p, kx->inputs[i].plen, 0x1F); - } + keccak_absorb_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); return 0; } -int shake128x_squeezeblocks(keccakx *kx, size_t nblocks) { - if (kx == NULL) { - return 1; - } - - for (unsigned i = 0; i < KECCAK_WAY; i++) { - if (kx->outputs[i].plen < nblocks *SHAKE128_RATE) { - // One of the output buffer is smaller than requested size - return 2; - } - } - for (unsigned i = 0; i < KECCAK_WAY; i++) { - keccak_squeezeblocks(kx->outputs[i].p, nblocks, kx->ctx + KECCAK_CTX *i, SHAKE128_RATE); +int shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + if (state == NULL || out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) { + return 1; } + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE128_RATE); return 0; } -int shake256x_squeezeblocks(keccakx *kx, size_t nblocks) { - if (kx == NULL) { - return 1; - } - - for (unsigned i = 0; i < KECCAK_WAY; i++) { - if (kx->outputs[i].plen < nblocks *SHAKE256_RATE) { - // One of the output buffer is smaller than requested size - return 2; - } - } +int shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { - for (unsigned i = 0; i < KECCAK_WAY; i++) { - keccak_squeezeblocks(kx->outputs[i].p, nblocks, kx->ctx + KECCAK_CTX *i, SHAKE256_RATE); + if (state == NULL || out0 == NULL || out1 == NULL || out2 == NULL || out3 == NULL) { + return 1; } + keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, SHAKE256_RATE); return 0; } diff --git a/fips202/fips202x.h b/fips202/fips202x.h index 9e1809a4b..9a68f1332 100644 --- a/fips202/fips202x.h +++ b/fips202/fips202x.h @@ -3,113 +3,43 @@ #define FIPS_202X_H #ifndef KECCAK_WAY -#define KECCAK_WAY 7 +#define KECCAK_WAY 4 #endif #include -#include -typedef struct -{ - uint8_t *p; - size_t plen; -} fp; - -typedef struct -{ - fp inputs[KECCAK_WAY]; - fp outputs[KECCAK_WAY]; +typedef struct { uint64_t ctx[25 * KECCAK_WAY]; -} keccakx; - -/** - * Sets the input for a specific lane in the Keccak-x context. - * - * This function assigns the input data to a specified lane within the Keccak-x - * processing pipeline. It's used to prepare data for absorption into the Keccak - * state. - * - * @param kx Pointer to the Keccak-x context structure. - * @param lane_index Index of the lane to set the input for. - * @param input Pointer to the input data. - * @param inlen Length of the input data in bytes. - * @return 0 on success, 1 on error (e.g., invalid lane index, null pointer). - */ -int keccakx_set_lane_input(keccakx *kx, size_t lane_index, uint8_t *input, size_t inlen); - -/** - * Sets the output for a specific lane in the Keccak-x context. - * - * This function assigns the output data to a specified lane within the Keccak-x - * processing pipeline. It's typically used after squeezing data from the Keccak - * state. - * - * @param kx Pointer to the Keccak-x context structure. - * @param lane_index Index of the lane to set the output for. - * @param output Pointer to the output data. - * @param outlen Length of the output data in bytes. - * @return 0 on success, 1 on error (e.g., invalid lane index, null pointer). - */ -int keccakx_set_lane_output(keccakx *kx, size_t lane_index, uint8_t *output, size_t outlen); - -/** - * Retrieves the current keccak state of a specific lane in the Keccak-x context. - * - * This function returns a pointer to the current state of a specified lane within - * the Keccak-x processing pipeline. The state represents the internal data - * managed by Keccak-x for cryptographic processing. - * - * @param kx Pointer to the Keccak-x context structure. - * @param lane_index Index of the lane to retrieve the state for. - * @return A pointer to the current state of the specified lane, or NULL on error - * (e.g., invalid lane index, null pointer). - */ -uint64_t *keccakx_get_lane_state(keccakx *kx, size_t lane_index); - -/** - * Absorbs all input data into the Keccak-x context for SHAKE128 processing. - * - * This function absorbs input all data into the Keccak-x context. - * It processes input data across all lanes of the Keccak-x context. - * - * @param kx Pointer to the Keccak-x context structure. - * @return 0 on success, 1 on error (e.g., null pointer). - */ -int shake128x_absorb(keccakx *kx); - -/** - * Absorbs input data into the Keccak-x context for SHAKE256 processing. - * - * Similar to shake128x_absorb(), but tailored for SHAKE256 processing. - * - * @param kx Pointer to the Keccak-x context structure. - * @return 0 on success, 1 on error (e.g., null pointer). - */ -int shake256x_absorb(keccakx *kx); - -/** - * Squeezes output data from the Keccak-x context for SHAKE128 processing. - * - * This function squeezes output data from the Keccak-x context. - * It generates output data across all lanes of the Keccak-x context and stores - * data at designated output from keccakx_set_lane_output(). - * - * @param kx Pointer to the Keccak-x context structure. - * @param nblocks Number of blocks to squeeze from each lane. - * @return 0 on success, non-0 on error (e.g., insufficient output buffer size, null pointer). - */ -int shake128x_squeezeblocks(keccakx *kx, size_t nblocks); - -/** - * Squeezes output data from the Keccak-x context for SHAKE256 processing. - * - * This function squeezes output data from the Keccak-x context. - * It generates output data across all lanes of the Keccak-x context and stores - * data at designated output from keccakx_set_lane_output(). - * - * @param kx Pointer to the Keccak-x context structure. - * @param nblocks Number of blocks to squeeze from each lane. - * @return 0 on success, non-0 on error (e.g., insufficient output buffer size, null pointer). - */ -int shake256x_squeezeblocks(keccakx *kx, size_t nblocks); +} keccakx4_state; + +uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index); + +int shake128x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +int shake256x4_absorb(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + + +int shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +int shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); #endif diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c index 0331892b5..b7242bee1 100644 --- a/mlkem/indcpa.c +++ b/mlkem/indcpa.c @@ -175,7 +175,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) int16_t *vec = NULL; xof_state state; - keccakx kx; + keccakx4_state statex; uint8_t seedxy[KECCAK_WAY][KYBER_SYMBYTES + 2]; for (i = 0; i < (KYBER_K *KYBER_K / KECCAK_WAY) *KECCAK_WAY; i += KECCAK_WAY) @@ -193,13 +193,10 @@ void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) seedxy[j][KYBER_SYMBYTES + 0] = (i + j) % KYBER_K; seedxy[j][KYBER_SYMBYTES + 1] = (i + j) / KYBER_K; } - - keccakx_set_lane_input(&kx, j, seedxy[j], KYBER_SYMBYTES + 2); - keccakx_set_lane_output(&kx, j, bufx[j], GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES + 2); - } - shake128x_absorb(&kx); - shake128x_squeezeblocks(&kx, GEN_MATRIX_NBLOCKS); + + shake128x4_absorb(&statex, seedxy[0], seedxy[1], seedxy[2], seedxy[3], KYBER_SYMBYTES + 2); + shake128x4_squeezeblocks(bufx[0], bufx[1], bufx[2], bufx[3], GEN_MATRIX_NBLOCKS, &statex); for (j = 0; j < KECCAK_WAY; j++) { @@ -208,9 +205,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) buflen = GEN_MATRIX_NBLOCKS *XOF_BLOCKBYTES; ctr = rej_uniform(vec, KYBER_N, buf, buflen); - while (ctr < KYBER_N ) - { - uint64_t *ctx = keccakx_get_lane_state(&kx, j); + while (ctr < KYBER_N ) { + uint64_t *ctx = keccakx_get_lane_state(&statex, j); keccak_squeezeblocks(buf, 1, ctx, SHAKE128_RATE); buflen = XOF_BLOCKBYTES; ctr += rej_uniform(vec + ctr, KYBER_N - ctr, buf, buflen);