Skip to content

Commit

Permalink
AArch64: Autogenerate twiddles for mulcache computation
Browse files Browse the repository at this point in the history
This commit extends autogenerate_files.py to autogenerate
also the twiddles needed for the native mulcache computation
in the AArch64 native backend.

Signed-off-by: Hanno Becker <[email protected]>
  • Loading branch information
hanno-becker committed Nov 26, 2024
1 parent 3a30436 commit bb9247e
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 41 deletions.
33 changes: 33 additions & 0 deletions mlkem/native/aarch64/aarch64_zetas.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,39 @@ const int16_t aarch64_invntt_zetas_layer56[] = {
-16113, -16113, -5739, -5739, -167, -167,
};

const int16_t aarch64_zetas_mulcache_native[] = {
17, -17, -568, 568, 583, -583, -680, 680, 1637, -1637, 723,
-723, -1041, 1041, 1100, -1100, 1409, -1409, -667, 667, -48, 48,
233, -233, 756, -756, -1173, 1173, -314, 314, -279, 279, -1626,
1626, 1651, -1651, -540, 540, -1540, 1540, -1482, 1482, 952, -952,
1461, -1461, -642, 642, 939, -939, -1021, 1021, -892, 892, -941,
941, 733, -733, -992, 992, 268, -268, 641, -641, 1584, -1584,
-1031, 1031, -1292, 1292, -109, 109, 375, -375, -780, 780, -1239,
1239, 1645, -1645, 1063, -1063, 319, -319, -556, 556, 757, -757,
-1230, 1230, 561, -561, -863, 863, -735, 735, -525, 525, 1092,
-1092, 403, -403, 1026, -1026, 1143, -1143, -1179, 1179, -554, 554,
886, -886, -1607, 1607, 1212, -1212, -1455, 1455, 1029, -1029, -1219,
1219, -394, 394, 885, -885, -1175, 1175,
};

const int16_t aarch64_zetas_mulcache_twisted_native[] = {
167, -167, -5591, 5591, 5739, -5739, -6693, 6693, 16113,
-16113, 7117, -7117, -10247, 10247, 10828, -10828, 13869, -13869,
-6565, 6565, -472, 472, 2293, -2293, 7441, -7441, -11546,
11546, -3091, 3091, -2746, 2746, -16005, 16005, 16251, -16251,
-5315, 5315, -15159, 15159, -14588, 14588, 9371, -9371, 14381,
-14381, -6319, 6319, 9243, -9243, -10050, 10050, -8780, 8780,
-9262, 9262, 7215, -7215, -9764, 9764, 2638, -2638, 6309,
-6309, 15592, -15592, -10148, 10148, -12717, 12717, -1073, 1073,
3691, -3691, -7678, 7678, -12196, 12196, 16192, -16192, 10463,
-10463, 3140, -3140, -5473, 5473, 7451, -7451, -12107, 12107,
5522, -5522, -8495, 8495, -7235, 7235, -5168, 5168, 10749,
-10749, 3967, -3967, 10099, -10099, 11251, -11251, -11605, 11605,
-5453, 5453, 8721, -8721, -15818, 15818, 11930, -11930, -14322,
14322, 10129, -10129, -11999, 11999, -3878, 3878, 8711, -8711,
-11566, 11566,
};

#else /* MLKEM_USE_NATIVE_AARCH64 */
// Dummy declaration for compilers disliking empty compilation units
int empty_cu_aarch64_zetas;
Expand Down
6 changes: 6 additions & 0 deletions mlkem/native/aarch64/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,17 @@
MLKEM_NAMESPACE(aarch64_invntt_zetas_layer01234)
#define aarch64_invntt_zetas_layer56 \
MLKEM_NAMESPACE(aarch64_invntt_zetas_layer56)
#define aarch64_zetas_mulcache_native \
MLKEM_NAMESPACE(aarch64_zetas_mulcache_native)
#define aarch64_zetas_mulcache_twisted_native \
MLKEM_NAMESPACE(aarch64_zetas_mulcache_twisted_native)

extern const int16_t aarch64_ntt_zetas_layer01234[];
extern const int16_t aarch64_ntt_zetas_layer56[];
extern const int16_t aarch64_invntt_zetas_layer01234[];
extern const int16_t aarch64_invntt_zetas_layer56[];
extern const int16_t aarch64_zetas_mulcache_native[];
extern const int16_t aarch64_zetas_mulcache_twisted_native[];

#define ntt_asm_clean MLKEM_NAMESPACE(ntt_asm_clean)
void ntt_asm_clean(int16_t *, const int16_t *, const int16_t *);
Expand Down
35 changes: 0 additions & 35 deletions mlkem/native/aarch64/consts.c

This file was deleted.

6 changes: 3 additions & 3 deletions mlkem/native/aarch64/profiles/clean.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

#include "../../arith_native.h"
#include "../arith_native_aarch64.h"
#include "../consts.h"

#define MLKEM_USE_NATIVE_NTT
#define MLKEM_USE_NATIVE_INTT
Expand Down Expand Up @@ -42,8 +41,9 @@ static inline void poly_tomont_native(poly *data) {

static inline void poly_mulcache_compute_native(poly_mulcache *x,
const poly *y) {
poly_mulcache_compute_asm_clean(x->coeffs, y->coeffs, zetas_mulcache_native,
zetas_mulcache_twisted_native);
poly_mulcache_compute_asm_clean(x->coeffs, y->coeffs,
aarch64_zetas_mulcache_native,
aarch64_zetas_mulcache_twisted_native);
}
static inline void polyvec_basemul_acc_montgomery_cached_native(
poly *r, const polyvec *a, const polyvec *b,
Expand Down
6 changes: 3 additions & 3 deletions mlkem/native/aarch64/profiles/opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

#include "../../arith_native.h"
#include "../arith_native_aarch64.h"
#include "../consts.h"

#define MLKEM_USE_NATIVE_NTT
#define MLKEM_USE_NATIVE_INTT
Expand Down Expand Up @@ -42,8 +41,9 @@ static inline void poly_tomont_native(poly *data) {

static inline void poly_mulcache_compute_native(poly_mulcache *x,
const poly *y) {
poly_mulcache_compute_asm_opt(x->coeffs, y->coeffs, zetas_mulcache_native,
zetas_mulcache_twisted_native);
poly_mulcache_compute_asm_opt(x->coeffs, y->coeffs,
aarch64_zetas_mulcache_native,
aarch64_zetas_mulcache_twisted_native);
}
static inline void polyvec_basemul_acc_montgomery_cached_native(
poly *r, const polyvec *a, const polyvec *b,
Expand Down
20 changes: 20 additions & 0 deletions scripts/autogenerate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,18 @@ def double_ith(t, i):
yield from double_ith(gen_aarch64_root_of_unity_for_block(6,8*block+5, inv=True), i)
yield from double_ith(gen_aarch64_root_of_unity_for_block(6,8*block+7, inv=True), i)

def gen_aarch64_mulcache_twiddles():
for idx in range(64):
root = pow(root_of_unity, bitreverse(64+idx,7), modulus)
yield prepare_root_for_barrett(root)[0]
yield prepare_root_for_barrett(-root)[0]

def gen_aarch64_mulcache_twiddles_twisted():
for idx in range(64):
root = pow(root_of_unity, bitreverse(64+idx,7), modulus)
yield prepare_root_for_barrett(root)[1]
yield prepare_root_for_barrett(-root)[1]

def gen_aarch64_fwd_ntt_zeta_file(dry_run=False):
def gen():
yield from gen_header()
Expand All @@ -227,6 +239,14 @@ def gen():
yield from map(lambda t: str(t) + ",", gen_aarch64_inv_ntt_zetas_layer56())
yield "};"
yield ""
yield "const int16_t aarch64_zetas_mulcache_native[] = {"
yield from map(lambda t: str(t) + ",", gen_aarch64_mulcache_twiddles())
yield "};"
yield ""
yield "const int16_t aarch64_zetas_mulcache_twisted_native[] = {"
yield from map(lambda t: str(t) + ",", gen_aarch64_mulcache_twiddles_twisted())
yield "};"
yield ""
yield "#else /* MLKEM_USE_NATIVE_AARCH64 */"
yield "// Dummy declaration for compilers disliking empty compilation units"
yield "int empty_cu_aarch64_zetas;"
Expand Down

0 comments on commit bb9247e

Please sign in to comment.