diff --git a/.gitignore b/.gitignore index 9e5ec4e3f..30d8efb51 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ test/gen_KAT1024 test/gen_NISTKAT512 test/gen_NISTKAT768 test/gen_NISTKAT1024 +*.dSYM \ No newline at end of file diff --git a/Makefile b/Makefile index 583b271e4..2dfaa3ca8 100644 --- a/Makefile +++ b/Makefile @@ -13,12 +13,12 @@ CFLAGS_NISTRANDOMBYTES = ${CFLAGS} ${INCLUDE_NISTRANDOM} NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer RM = /bin/rm -SOURCES = mlkem/kem.c mlkem/indcpa.c mlkem/polyvec.c mlkem/poly.c mlkem/ntt.c mlkem/cbd.c mlkem/reduce.c mlkem/verify.c +SOURCES = mlkem/kem.c mlkem/indcpa.c mlkem/polyvec.c mlkem/poly.c mlkem/ntt.c mlkem/cbd.c mlkem/reduce.c mlkem/verify.c mlkem/rej_uniform.c mlkem/rej_uniform_asm.s SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c mlkem/symmetric-shake.c SOURCESKECCAKRANDOM = $(SOURCESKECCAK) randombytes/randombytes.c SOURCESNISTKATS = $(SOURCESKECCAK) test/nistrng/aes.c test/nistrng/rng.c -HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.c mlkem/verify.h mlkem/symmetric.h +HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.c mlkem/verify.h mlkem/symmetric.h mlkem/rej_uniform.h HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h HEADERSKECCAKRANDOM = $(HEADERSKECCAK) randombytes/randombytes.h HEADERNISTKATS = $(HEADERSKECCAK) test/nistrng/aes.h test/nistrng/randombytes.h diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c index 648bc26cc..dd9d4783c 100644 --- a/mlkem/indcpa.c +++ b/mlkem/indcpa.c @@ -7,6 +7,7 @@ #include "polyvec.h" #include "poly.h" #include "ntt.h" +#include "rej_uniform.h" #include "symmetric.h" #include "randombytes.h" @@ -100,43 +101,6 @@ static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_ poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES); } -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) { - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while (ctr < len && pos + 3 <= buflen) { - val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF; - val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF; - pos += 3; - - if (val0 < KYBER_Q) { - r[ctr++] = val0; - } - if (ctr < len && val1 < KYBER_Q) { - r[ctr++] = val1; - } - } - - return ctr; -} - #define gen_a(A,B) gen_matrix(A,B,0) #define gen_at(A,B) gen_matrix(A,B,1) diff --git a/mlkem/rej_uniform.c b/mlkem/rej_uniform.c new file mode 100644 index 000000000..7e1d212ca --- /dev/null +++ b/mlkem/rej_uniform.c @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: Apache-2.0 +#include +#include +#include "rej_uniform_asm.h" +#include "params.h" +#include "rej_uniform.h" + +const uint8_t table_idx[256][16] = { + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 0 + {0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 + {2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 2 + {0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 3 + {4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 4 + {0, 1, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 + {2, 3, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 + {0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 + {6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 8 + {0, 1, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 9 + {2, 3, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 10 + {0, 1, 2, 3, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 11 + {4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 12 + {0, 1, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 13 + {2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 14 + {0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1}, // 15 + {8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 16 + {0, 1, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 17 + {2, 3, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 18 + {0, 1, 2, 3, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 19 + {4, 5, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 20 + {0, 1, 4, 5, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 21 + {2, 3, 4, 5, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 22 + {0, 1, 2, 3, 4, 5, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 23 + {6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 24 + {0, 1, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 25 + {2, 3, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 26 + {0, 1, 2, 3, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 27 + {4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 28 + {0, 1, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 29 + {2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 30 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1}, // 31 + {10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 32 + {0, 1, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 33 + {2, 3, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 34 + {0, 1, 2, 3, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 35 + {4, 5, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 36 + {0, 1, 4, 5, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 37 + {2, 3, 4, 5, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 38 + {0, 1, 2, 3, 4, 5, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 39 + {6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 40 + {0, 1, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 41 + {2, 3, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 42 + {0, 1, 2, 3, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 43 + {4, 5, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 44 + {0, 1, 4, 5, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 45 + {2, 3, 4, 5, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 46 + {0, 1, 2, 3, 4, 5, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1}, // 47 + {8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 48 + {0, 1, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 49 + {2, 3, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 50 + {0, 1, 2, 3, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 51 + {4, 5, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 52 + {0, 1, 4, 5, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 53 + {2, 3, 4, 5, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 54 + {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1}, // 55 + {6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 56 + {0, 1, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 57 + {2, 3, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 58 + {0, 1, 2, 3, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1}, // 59 + {4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 60 + {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1}, // 61 + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1}, // 62 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1, -1}, // 63 + {12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 64 + {0, 1, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 65 + {2, 3, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 66 + {0, 1, 2, 3, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 67 + {4, 5, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 68 + {0, 1, 4, 5, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 69 + {2, 3, 4, 5, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 70 + {0, 1, 2, 3, 4, 5, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 71 + {6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 72 + {0, 1, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 73 + {2, 3, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 74 + {0, 1, 2, 3, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 75 + {4, 5, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 76 + {0, 1, 4, 5, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 77 + {2, 3, 4, 5, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 78 + {0, 1, 2, 3, 4, 5, 6, 7, 12, 13, -1, -1, -1, -1, -1, -1}, // 79 + {8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 80 + {0, 1, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 81 + {2, 3, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 82 + {0, 1, 2, 3, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 83 + {4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 84 + {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 85 + {2, 3, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 86 + {0, 1, 2, 3, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1}, // 87 + {6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 88 + {0, 1, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 89 + {2, 3, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 90 + {0, 1, 2, 3, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1}, // 91 + {4, 5, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 92 + {0, 1, 4, 5, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1}, // 93 + {2, 3, 4, 5, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1}, // 94 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, -1, -1, -1, -1}, // 95 + {10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 96 + {0, 1, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 97 + {2, 3, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 98 + {0, 1, 2, 3, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 99 + {4, 5, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 100 + {0, 1, 4, 5, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 101 + {2, 3, 4, 5, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 102 + {0, 1, 2, 3, 4, 5, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 103 + {6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 104 + {0, 1, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 105 + {2, 3, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 106 + {0, 1, 2, 3, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 107 + {4, 5, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 108 + {0, 1, 4, 5, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 109 + {2, 3, 4, 5, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 110 + {0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, -1, -1, -1, -1}, // 111 + {8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 112 + {0, 1, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 113 + {2, 3, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 114 + {0, 1, 2, 3, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 115 + {4, 5, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 116 + {0, 1, 4, 5, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 117 + {2, 3, 4, 5, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 118 + {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1}, // 119 + {6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}, // 120 + {0, 1, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 121 + {2, 3, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 122 + {0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1}, // 123 + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, -1, -1}, // 124 + {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1}, // 125 + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1, -1, -1}, // 126 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1}, // 127 + {14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 128 + {0, 1, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 129 + {2, 3, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 130 + {0, 1, 2, 3, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 131 + {4, 5, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 132 + {0, 1, 4, 5, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 133 + {2, 3, 4, 5, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 134 + {0, 1, 2, 3, 4, 5, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 135 + {6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 136 + {0, 1, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 137 + {2, 3, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 138 + {0, 1, 2, 3, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 139 + {4, 5, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 140 + {0, 1, 4, 5, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 141 + {2, 3, 4, 5, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 142 + {0, 1, 2, 3, 4, 5, 6, 7, 14, 15, -1, -1, -1, -1, -1, -1}, // 143 + {8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 144 + {0, 1, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 145 + {2, 3, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 146 + {0, 1, 2, 3, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 147 + {4, 5, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 148 + {0, 1, 4, 5, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 149 + {2, 3, 4, 5, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 150 + {0, 1, 2, 3, 4, 5, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1}, // 151 + {6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 152 + {0, 1, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 153 + {2, 3, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 154 + {0, 1, 2, 3, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1}, // 155 + {4, 5, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 156 + {0, 1, 4, 5, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1}, // 157 + {2, 3, 4, 5, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1, -1, -1}, // 158 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, -1, -1, -1, -1}, // 159 + {10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 160 + {0, 1, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 161 + {2, 3, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 162 + {0, 1, 2, 3, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 163 + {4, 5, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 164 + {0, 1, 4, 5, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 165 + {2, 3, 4, 5, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 166 + {0, 1, 2, 3, 4, 5, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 167 + {6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 168 + {0, 1, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 169 + {2, 3, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 170 + {0, 1, 2, 3, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 171 + {4, 5, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 172 + {0, 1, 4, 5, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 173 + {2, 3, 4, 5, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 174 + {0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, -1, -1, -1, -1}, // 175 + {8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 176 + {0, 1, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 177 + {2, 3, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 178 + {0, 1, 2, 3, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 179 + {4, 5, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 180 + {0, 1, 4, 5, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 181 + {2, 3, 4, 5, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 182 + {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1}, // 183 + {6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 184 + {0, 1, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 185 + {2, 3, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 186 + {0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1}, // 187 + {4, 5, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1, -1, -1}, // 188 + {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1}, // 189 + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1, -1, -1}, // 190 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, -1, -1}, // 191 + {12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 192 + {0, 1, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 193 + {2, 3, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 194 + {0, 1, 2, 3, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 195 + {4, 5, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 196 + {0, 1, 4, 5, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 197 + {2, 3, 4, 5, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 198 + {0, 1, 2, 3, 4, 5, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 199 + {6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 200 + {0, 1, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 201 + {2, 3, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 202 + {0, 1, 2, 3, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 203 + {4, 5, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 204 + {0, 1, 4, 5, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 205 + {2, 3, 4, 5, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 206 + {0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1}, // 207 + {8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 208 + {0, 1, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 209 + {2, 3, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 210 + {0, 1, 2, 3, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 211 + {4, 5, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 212 + {0, 1, 4, 5, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 213 + {2, 3, 4, 5, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 214 + {0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1}, // 215 + {6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 216 + {0, 1, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 217 + {2, 3, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 218 + {0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1}, // 219 + {4, 5, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 220 + {0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1}, // 221 + {2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1, -1, -1}, // 222 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, -1, -1}, // 223 + {10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 224 + {0, 1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 225 + {2, 3, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 226 + {0, 1, 2, 3, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 227 + {4, 5, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 228 + {0, 1, 4, 5, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 229 + {2, 3, 4, 5, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 230 + {0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 231 + {6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 232 + {0, 1, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 233 + {2, 3, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 234 + {0, 1, 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 235 + {4, 5, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 236 + {0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 237 + {2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 238 + {0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, -1, -1}, // 239 + {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}, // 240 + {0, 1, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 241 + {2, 3, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 242 + {0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 243 + {4, 5, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 244 + {0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 245 + {2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 246 + {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1}, // 247 + {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1}, // 248 + {0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 249 + {2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 250 + {0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1}, // 251 + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1}, // 252 + {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1}, // 253 + {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1}, // 254 + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, // 255 +}; + +const uint16_t bit_table[8] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +/************************************************* +* Name: rej_uniform_large +* +* Description: Rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers +* (uniform mod q) +* - const uint8_t *buf: pointer to input buffer +* (assumed to be uniform random bytes) +* - unsigned int buflen: length of input buffer in bytes +* - unsigned int *consumed: the length of consumed buffer +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform_large(int16_t *r, unsigned int len, + const uint8_t *buf, unsigned int buflen, + unsigned int *consumed) { + + // Ensure alignment, the minimum stack size is (len + 32) * sizeof(int16_t) + // Since we don't want to use malloc/free, we assume the maximum len = KYBER_N and use stack. + int16_t stack[KYBER_N + 32] = {0}; + unsigned int count; + + count = rej_uniform_asm(stack, buf, consumed, buflen, len, table_idx, bit_table); + + unsigned int min = (count < len ? count : len); + memcpy(r, stack, min * sizeof(uint16_t)); + + return min; + +} + +/************************************************* +* Name: rej_uniform_scalar +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers +* (uniform mod q) +* - const uint8_t *buf: pointer to input buffer +* (assumed to be uniform random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static unsigned int rej_uniform_scalar(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF; + val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF; + pos += 3; + + if (val0 < KYBER_Q) { + r[ctr++] = val0; + } + if (ctr < len && val1 < KYBER_Q) { + r[ctr++] = val1; + } + } + return ctr; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers +* (uniform mod q) +* - const uint8_t *buf: pointer to input buffer +* (assumed to be uniform random bytes) +* - unsigned int buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + + unsigned int ctr, consumed = 0; + + // Sample from large buffer with full lane as much as possible. + ctr = rej_uniform_large(r, len, buf, buflen, &consumed); + if (ctr < len) { + // This function will utilize every last byte of the buffer. + ctr += rej_uniform_scalar(r + ctr, len - ctr, buf + consumed, buflen - consumed); + } + + return ctr; +} diff --git a/mlkem/rej_uniform.h b/mlkem/rej_uniform.h new file mode 100644 index 000000000..9a5d2bf7d --- /dev/null +++ b/mlkem/rej_uniform.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 +#ifndef REJ_UNIFORM_H +#define REJ_UNIFORM_H + +#define rej_uniform KYBER_NAMESPACE(rej_uniform) +unsigned int rej_uniform(int16_t *r, + unsigned int len, + const uint8_t *buf, + unsigned int buflen); + +#endif diff --git a/mlkem/rej_uniform_asm.h b/mlkem/rej_uniform_asm.h new file mode 100644 index 000000000..943890162 --- /dev/null +++ b/mlkem/rej_uniform_asm.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +#ifndef REJ_UNIFORM_ASM +#define REJ_UNIFORM_ASM + +unsigned int rej_uniform_asm(int16_t *local_buf, + const uint8_t *buf, + unsigned int *buf_consumed, + unsigned int buflen, + unsigned int len, + const uint8_t idx[256][16], + const uint16_t bits[8]); + +#endif diff --git a/mlkem/rej_uniform_asm.s b/mlkem/rej_uniform_asm.s new file mode 100644 index 000000000..8fff58936 --- /dev/null +++ b/mlkem/rej_uniform_asm.s @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: Apache-2.0 +#include "params.h" + +.text: + +.macro div48 dst, src + mov w8, #43691 + movk w8, #43690, lsl #16 + umull x8, \src, w8 + lsr x8, x8, #37 + add w8, w8, w8, lsl #1 + lsl \dst, w8, #4 +.endm +/************************************************* +* Name: rej_uniform_asm +* +* Description: Full SIMD lane, run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - unsigned int len: requested number of 16-bit integers +* (uniform mod q) +* - const uint8_t *buf: pointer to input buffer +* (assumed to be uniform random bytes) +* - unsigned int buflen: length of input buffer in bytes +* - unsigned int *consumed: the length of consumed buffer +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +.align 4 +.global rej_uniform_asm +.global _rej_uniform_asm +rej_uniform_asm: +_rej_uniform_asm: + + /* Input registers */ + stack .req x0 + counter .req w0 + buf .req x1 + buf_consumed .req x2 + buflen .req w3 + len .req x4 + lenw .req w4 + table_idx .req x5 + bit_table .req x6 + + /* Output registers */ + output .req x11 + + /* Temporary registers */ + tmp .req w8 + count .req w8 + bound .req w9 + iterw .req w10 + iter .req x10 + + rec_idx_0 .req w12 + rec_idx_1 .req w13 + rec_idx_2 .req w14 + rec_idx_3 .req w15 + + ctr0 .req w12 + ctr1 .req w13 + ctr2 .req w14 + ctr3 .req w15 + + /* Vector registers */ + + buf0 .req v0 + buf1 .req v1 + buf2 .req v2 + + tmp0 .req v4 + tmp1 .req v5 + tmp2 .req v6 + tmp3 .req v7 + + sign0 .req v4 + sign1 .req v5 + sign2 .req v6 + sign3 .req v7 + + val0 .req v16 + val0q .req q16 + val1 .req v17 + val1q .req q17 + val2 .req v18 + val2q .req q18 + val3 .req v19 + val3q .req q19 + + t0 .req s20 + t1 .req s21 + t2 .req s22 + t3 .req s23 + + table0 .req v24 + table0q .req q24 + table1 .req v25 + table1q .req q25 + table2 .req v26 + table2q .req q26 + table3 .req v27 + table3q .req q27 + + kyber_q .req v30 + bits .req v31 + bits_q .req q31 + + + /* Vectorize code start */ + + ldr bits_q, [bit_table] + movz tmp, #KYBER_Q + dup.8h kyber_q, tmp + div48 bound, buflen + + mov iterw, #0 + mov output, stack + + loop48: + add x8, buf, iter + + ld3.16b {buf0, buf1, buf2}, [x8] + add iterw, iterw, #48 + + zip1.16b tmp0, buf0, buf1 + zip2.16b tmp1, buf0, buf1 + zip1.16b tmp2, buf1, buf2 + zip2.16b tmp3, buf1, buf2 + + bic.8h tmp0, #0xf0, lsl 8 + bic.8h tmp1, #0xf0, lsl 8 + ushr.8h tmp2, tmp2, #4 + ushr.8h tmp3, tmp3, #4 + + zip1.8h val0, tmp0, tmp2 + zip2.8h val1, tmp0, tmp2 + zip1.8h val2, tmp1, tmp3 + zip2.8h val3, tmp1, tmp3 + + cmhi.8h sign0, kyber_q, val0 + cmhi.8h sign1, kyber_q, val1 + cmhi.8h sign2, kyber_q, val2 + cmhi.8h sign3, kyber_q, val3 + + and.16b sign0, sign0, bits + and.16b sign1, sign1, bits + and.16b sign2, sign2, bits + and.16b sign3, sign3, bits + + uaddlv.8h t0, sign0 + uaddlv.8h t1, sign1 + uaddlv.8h t2, sign2 + uaddlv.8h t3, sign3 + + fmov rec_idx_0, t0 + fmov rec_idx_1, t1 + fmov rec_idx_2, t2 + fmov rec_idx_3, t3 + + ldr table0q, [table_idx, rec_idx_0, uxtw #4] + ldr table1q, [table_idx, rec_idx_1, uxtw #4] + ldr table2q, [table_idx, rec_idx_2, uxtw #4] + ldr table3q, [table_idx, rec_idx_3, uxtw #4] + + cnt.16b sign0, sign0 + cnt.16b sign1, sign1 + cnt.16b sign2, sign2 + cnt.16b sign3, sign3 + + uaddlv.8h t0, sign0 + uaddlv.8h t1, sign1 + uaddlv.8h t2, sign2 + uaddlv.8h t3, sign3 + + fmov ctr0, t0 + fmov ctr1, t1 + fmov ctr2, t2 + fmov ctr3, t3 + + tbl.16b val0, {val0}, table0 + tbl.16b val1, {val1}, table1 + tbl.16b val2, {val2}, table2 + tbl.16b val3, {val3}, table3 + + str val0q, [output] + add output, output, ctr0, uxtw #1 + + str val1q, [output] + add output, output, ctr1, uxtw #1 + + str val2q, [output] + add output, output, ctr2, uxtw #1 + + str val3q, [output] + add output, output, ctr3, uxtw #1 + + sub x8, output, stack + lsr x8, x8, #1 + + cmp count, lenw + b.hs end + + cmp iterw, bound + b.lo loop48 + + loop24: + + add x8, buf, iter + + ld3.8b {buf0, buf1, buf2}, [x8] + add iterw, iterw, #24 + + zip1.16b tmp0, buf0, buf1 + zip1.16b tmp1, buf1, buf2 + + bic.8h tmp0, #0xf0, lsl 8 + ushr.8h tmp1, tmp1, #4 + + zip1.8h val0, tmp0, tmp1 + zip2.8h val1, tmp0, tmp1 + + cmhi.8h sign0, kyber_q, val0 + cmhi.8h sign1, kyber_q, val1 + + and.16b sign0, sign0, bits + and.16b sign1, sign1, bits + + uaddlv.8h t0, sign0 + uaddlv.8h t1, sign1 + + fmov rec_idx_0, t0 + fmov rec_idx_1, t1 + + ldr table0q, [table_idx, rec_idx_0, uxtw #4] + ldr table1q, [table_idx, rec_idx_1, uxtw #4] + + cnt.16b sign0, sign0 + cnt.16b sign1, sign1 + + uaddlv.8h t0, sign0 + uaddlv.8h t1, sign1 + + fmov ctr0, t0 + fmov ctr1, t1 + + tbl.16b val0, {val0}, table0 + tbl.16b val1, {val1}, table1 + + str val0q, [output] + add output, output, ctr0, uxtw #1 + + str val1q, [output] + add output, output, ctr1, uxtw #1 + + sub x8, output, stack + lsr x8, x8, #1 + + cmp count, lenw + b.hs end + + cmp iterw, buflen + b.lo loop24 + + end: + mov counter, count + str iterw, [buf_consumed] + ret diff --git a/scripts/checksum b/scripts/checksum index 4066d6158..e66af9685 100755 --- a/scripts/checksum +++ b/scripts/checksum @@ -3,7 +3,12 @@ # This script executes a binary file, captures its output, then generates and compares its SHA-256 hash with a provided one. -output_hash=$(./$1 | sha256sum | awk '{ print $1 }') +if [[ "$OSTYPE" == "darwin"* ]]; then + output_hash=$(./$1 | sha2 -256 | awk '{ print $4 }') +else + output_hash=$(./$1 | sha256sum | awk '{ print $1 }') +fi + if [[ ${output_hash} == "${2}" ]]; then echo "${1#*_} Hashes match."