Skip to content

Commit

Permalink
unroll for future asm landing
Browse files Browse the repository at this point in the history
Signed-off-by: Duc Tri Nguyen <[email protected]>
  • Loading branch information
cothan committed Jun 17, 2024
1 parent 37d9218 commit a216720
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 22 deletions.
14 changes: 0 additions & 14 deletions fips202/fips202.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,6 @@ typedef struct
uint64_t ctx[25];
} shake256ctx;

/**
* Bare Keccak implementation for absorbing input data into the Keccak state.
*
* This function performs the absorb step of the Keccak cryptographic hash function.
*
* @param s Pointer to the Keccak state array. This array must contain at least 25 elements of type uint64_t.
* @param r Rate in bytes. Specifies the rate parameter of the Keccak function, which determines how much
* data is processed per permutation.
* @param m Pointer to the input data to be absorbed into the state.
* @param mlen Length of the input data in bytes.
* @param p Domain separation byte.
*/
void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, size_t mlen, uint8_t p);

/**
* Squeezes blocks of data from the Keccak state.
*
Expand Down
69 changes: 61 additions & 8 deletions fips202/fips202x.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <string.h>
#include "fips202x.h"
#include "fips202.h"
#include "keccakf1600.h"

#define KECCAK_CTX 25

Expand All @@ -13,10 +14,49 @@ static void keccak_absorb_x4(uint64_t *s, uint32_t r,
size_t inlen,
uint8_t p) {

keccak_absorb(s + KECCAK_CTX * 0, r, in0, inlen, p);
keccak_absorb(s + KECCAK_CTX * 1, r, in1, inlen, p);
keccak_absorb(s + KECCAK_CTX * 2, r, in2, inlen, p);
keccak_absorb(s + KECCAK_CTX * 3, r, in3, inlen, p);
while (inlen >= r) {

KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, r);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, r);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, r);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, r);

KeccakF1600_StatePermute(s + KECCAK_CTX * 0);
KeccakF1600_StatePermute(s + KECCAK_CTX * 1);
KeccakF1600_StatePermute(s + KECCAK_CTX * 2);
KeccakF1600_StatePermute(s + KECCAK_CTX * 3);

in0 += r;
in1 += r;
in2 += r;
in3 += r;
inlen -= r;
}

if (inlen > 0) {
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, in0, 0, inlen);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, in1, 0, inlen);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, in2, 0, inlen);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, in3, 0, inlen);
}

if (inlen == r - 1) {
p |= 128;
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1);
} else {
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, inlen, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, inlen, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, inlen, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, inlen, 1);
p = 128;
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 0, &p, r - 1, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 1, &p, r - 1, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 2, &p, r - 1, 1);
KeccakF1600_StateXORBytes(s + KECCAK_CTX * 3, &p, r - 1, 1);
}
}

static void keccak_squeezeblocks_x4(uint8_t *out0,
Expand All @@ -27,10 +67,23 @@ static void keccak_squeezeblocks_x4(uint8_t *out0,
uint64_t *s,
uint32_t r) {

keccak_squeezeblocks(out0, nblocks, s + KECCAK_CTX * 0, r);
keccak_squeezeblocks(out1, nblocks, s + KECCAK_CTX * 1, r);
keccak_squeezeblocks(out2, nblocks, s + KECCAK_CTX * 2, r);
keccak_squeezeblocks(out3, nblocks, s + KECCAK_CTX * 3, r);
while (nblocks > 0) {
KeccakF1600_StatePermute(s + KECCAK_CTX * 0);
KeccakF1600_StatePermute(s + KECCAK_CTX * 1);
KeccakF1600_StatePermute(s + KECCAK_CTX * 2);
KeccakF1600_StatePermute(s + KECCAK_CTX * 3);

KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 0, out0, 0, r);
KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 1, out1, 0, r);
KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 2, out2, 0, r);
KeccakF1600_StateExtractBytes(s + KECCAK_CTX * 3, out3, 0, r);

out0 += r;
out1 += r;
out2 += r;
out3 += r;
nblocks--;
}
}

uint64_t *keccakx_get_lane_state(keccakx4_state *state, size_t index) {
Expand Down

0 comments on commit a216720

Please sign in to comment.