From bb9247eb31cd62cf1533422086994a877419ad3c Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Tue, 26 Nov 2024 08:43:43 +0000 Subject: [PATCH] AArch64: Autogenerate twiddles for mulcache computation This commit extends autogenerate_files.py to autogenerate also the twiddles needed for the native mulcache computation in the AArch64 native backend. Signed-off-by: Hanno Becker --- mlkem/native/aarch64/aarch64_zetas.c | 33 +++++++++++++++++++ mlkem/native/aarch64/arith_native_aarch64.h | 6 ++++ mlkem/native/aarch64/consts.c | 35 --------------------- mlkem/native/aarch64/profiles/clean.h | 6 ++-- mlkem/native/aarch64/profiles/opt.h | 6 ++-- scripts/autogenerate_files.py | 20 ++++++++++++ 6 files changed, 65 insertions(+), 41 deletions(-) delete mode 100644 mlkem/native/aarch64/consts.c diff --git a/mlkem/native/aarch64/aarch64_zetas.c b/mlkem/native/aarch64/aarch64_zetas.c index 05899dff8..5d9fe528c 100644 --- a/mlkem/native/aarch64/aarch64_zetas.c +++ b/mlkem/native/aarch64/aarch64_zetas.c @@ -124,6 +124,39 @@ const int16_t aarch64_invntt_zetas_layer56[] = { -16113, -16113, -5739, -5739, -167, -167, }; +const int16_t aarch64_zetas_mulcache_native[] = { + 17, -17, -568, 568, 583, -583, -680, 680, 1637, -1637, 723, + -723, -1041, 1041, 1100, -1100, 1409, -1409, -667, 667, -48, 48, + 233, -233, 756, -756, -1173, 1173, -314, 314, -279, 279, -1626, + 1626, 1651, -1651, -540, 540, -1540, 1540, -1482, 1482, 952, -952, + 1461, -1461, -642, 642, 939, -939, -1021, 1021, -892, 892, -941, + 941, 733, -733, -992, 992, 268, -268, 641, -641, 1584, -1584, + -1031, 1031, -1292, 1292, -109, 109, 375, -375, -780, 780, -1239, + 1239, 1645, -1645, 1063, -1063, 319, -319, -556, 556, 757, -757, + -1230, 1230, 561, -561, -863, 863, -735, 735, -525, 525, 1092, + -1092, 403, -403, 1026, -1026, 1143, -1143, -1179, 1179, -554, 554, + 886, -886, -1607, 1607, 1212, -1212, -1455, 1455, 1029, -1029, -1219, + 1219, -394, 394, 885, -885, -1175, 1175, +}; + +const int16_t aarch64_zetas_mulcache_twisted_native[] = { + 167, -167, -5591, 5591, 5739, -5739, -6693, 6693, 16113, + -16113, 7117, -7117, -10247, 10247, 10828, -10828, 13869, -13869, + -6565, 6565, -472, 472, 2293, -2293, 7441, -7441, -11546, + 11546, -3091, 3091, -2746, 2746, -16005, 16005, 16251, -16251, + -5315, 5315, -15159, 15159, -14588, 14588, 9371, -9371, 14381, + -14381, -6319, 6319, 9243, -9243, -10050, 10050, -8780, 8780, + -9262, 9262, 7215, -7215, -9764, 9764, 2638, -2638, 6309, + -6309, 15592, -15592, -10148, 10148, -12717, 12717, -1073, 1073, + 3691, -3691, -7678, 7678, -12196, 12196, 16192, -16192, 10463, + -10463, 3140, -3140, -5473, 5473, 7451, -7451, -12107, 12107, + 5522, -5522, -8495, 8495, -7235, 7235, -5168, 5168, 10749, + -10749, 3967, -3967, 10099, -10099, 11251, -11251, -11605, 11605, + -5453, 5453, 8721, -8721, -15818, 15818, 11930, -11930, -14322, + 14322, 10129, -10129, -11999, 11999, -3878, 3878, 8711, -8711, + -11566, 11566, +}; + #else /* MLKEM_USE_NATIVE_AARCH64 */ // Dummy declaration for compilers disliking empty compilation units int empty_cu_aarch64_zetas; diff --git a/mlkem/native/aarch64/arith_native_aarch64.h b/mlkem/native/aarch64/arith_native_aarch64.h index f3f9d7f28..e0773779c 100644 --- a/mlkem/native/aarch64/arith_native_aarch64.h +++ b/mlkem/native/aarch64/arith_native_aarch64.h @@ -16,11 +16,17 @@ MLKEM_NAMESPACE(aarch64_invntt_zetas_layer01234) #define aarch64_invntt_zetas_layer56 \ MLKEM_NAMESPACE(aarch64_invntt_zetas_layer56) +#define aarch64_zetas_mulcache_native \ + MLKEM_NAMESPACE(aarch64_zetas_mulcache_native) +#define aarch64_zetas_mulcache_twisted_native \ + MLKEM_NAMESPACE(aarch64_zetas_mulcache_twisted_native) extern const int16_t aarch64_ntt_zetas_layer01234[]; extern const int16_t aarch64_ntt_zetas_layer56[]; extern const int16_t aarch64_invntt_zetas_layer01234[]; extern const int16_t aarch64_invntt_zetas_layer56[]; +extern const int16_t aarch64_zetas_mulcache_native[]; +extern const int16_t aarch64_zetas_mulcache_twisted_native[]; #define ntt_asm_clean MLKEM_NAMESPACE(ntt_asm_clean) void ntt_asm_clean(int16_t *, const int16_t *, const int16_t *); diff --git a/mlkem/native/aarch64/consts.c b/mlkem/native/aarch64/consts.c deleted file mode 100644 index c33b17a26..000000000 --- a/mlkem/native/aarch64/consts.c +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2024 The mlkem-native project authors -// SPDX-License-Identifier: Apache-2.0 - -#include "consts.h" - -const int16_t zetas_mulcache_native[256] = { - 17, -17, -568, 568, 583, -583, -680, 680, 1637, -1637, 723, - -723, -1041, 1041, 1100, -1100, 1409, -1409, -667, 667, -48, 48, - 233, -233, 756, -756, -1173, 1173, -314, 314, -279, 279, -1626, - 1626, 1651, -1651, -540, 540, -1540, 1540, -1482, 1482, 952, -952, - 1461, -1461, -642, 642, 939, -939, -1021, 1021, -892, 892, -941, - 941, 733, -733, -992, 992, 268, -268, 641, -641, 1584, -1584, - -1031, 1031, -1292, 1292, -109, 109, 375, -375, -780, 780, -1239, - 1239, 1645, -1645, 1063, -1063, 319, -319, -556, 556, 757, -757, - -1230, 1230, 561, -561, -863, 863, -735, 735, -525, 525, 1092, - -1092, 403, -403, 1026, -1026, 1143, -1143, -1179, 1179, -554, 554, - 886, -886, -1607, 1607, 1212, -1212, -1455, 1455, 1029, -1029, -1219, - 1219, -394, 394, 885, -885, -1175, 1175}; - -const int16_t zetas_mulcache_twisted_native[256] = { - 167, -167, -5591, 5591, 5739, -5739, -6693, 6693, 16113, - -16113, 7117, -7117, -10247, 10247, 10828, -10828, 13869, -13869, - -6565, 6565, -472, 472, 2293, -2293, 7441, -7441, -11546, - 11546, -3091, 3091, -2746, 2746, -16005, 16005, 16251, -16251, - -5315, 5315, -15159, 15159, -14588, 14588, 9371, -9371, 14381, - -14381, -6319, 6319, 9243, -9243, -10050, 10050, -8780, 8780, - -9262, 9262, 7215, -7215, -9764, 9764, 2638, -2638, 6309, - -6309, 15592, -15592, -10148, 10148, -12717, 12717, -1073, 1073, - 3691, -3691, -7678, 7678, -12196, 12196, 16192, -16192, 10463, - -10463, 3140, -3140, -5473, 5473, 7451, -7451, -12107, 12107, - 5522, -5522, -8495, 8495, -7235, 7235, -5168, 5168, 10749, - -10749, 3967, -3967, 10099, -10099, 11251, -11251, -11605, 11605, - -5453, 5453, 8721, -8721, -15818, 15818, 11930, -11930, -14322, - 14322, 10129, -10129, -11999, 11999, -3878, 3878, 8711, -8711, - -11566, 11566}; diff --git a/mlkem/native/aarch64/profiles/clean.h b/mlkem/native/aarch64/profiles/clean.h index ff86a258d..e0fbdeb54 100644 --- a/mlkem/native/aarch64/profiles/clean.h +++ b/mlkem/native/aarch64/profiles/clean.h @@ -10,7 +10,6 @@ #include "../../arith_native.h" #include "../arith_native_aarch64.h" -#include "../consts.h" #define MLKEM_USE_NATIVE_NTT #define MLKEM_USE_NATIVE_INTT @@ -42,8 +41,9 @@ static inline void poly_tomont_native(poly *data) { static inline void poly_mulcache_compute_native(poly_mulcache *x, const poly *y) { - poly_mulcache_compute_asm_clean(x->coeffs, y->coeffs, zetas_mulcache_native, - zetas_mulcache_twisted_native); + poly_mulcache_compute_asm_clean(x->coeffs, y->coeffs, + aarch64_zetas_mulcache_native, + aarch64_zetas_mulcache_twisted_native); } static inline void polyvec_basemul_acc_montgomery_cached_native( poly *r, const polyvec *a, const polyvec *b, diff --git a/mlkem/native/aarch64/profiles/opt.h b/mlkem/native/aarch64/profiles/opt.h index 3d20e234c..330cff1b7 100644 --- a/mlkem/native/aarch64/profiles/opt.h +++ b/mlkem/native/aarch64/profiles/opt.h @@ -10,7 +10,6 @@ #include "../../arith_native.h" #include "../arith_native_aarch64.h" -#include "../consts.h" #define MLKEM_USE_NATIVE_NTT #define MLKEM_USE_NATIVE_INTT @@ -42,8 +41,9 @@ static inline void poly_tomont_native(poly *data) { static inline void poly_mulcache_compute_native(poly_mulcache *x, const poly *y) { - poly_mulcache_compute_asm_opt(x->coeffs, y->coeffs, zetas_mulcache_native, - zetas_mulcache_twisted_native); + poly_mulcache_compute_asm_opt(x->coeffs, y->coeffs, + aarch64_zetas_mulcache_native, + aarch64_zetas_mulcache_twisted_native); } static inline void polyvec_basemul_acc_montgomery_cached_native( poly *r, const polyvec *a, const polyvec *b, diff --git a/scripts/autogenerate_files.py b/scripts/autogenerate_files.py index da174a596..af2a4abe7 100644 --- a/scripts/autogenerate_files.py +++ b/scripts/autogenerate_files.py @@ -202,6 +202,18 @@ def double_ith(t, i): yield from double_ith(gen_aarch64_root_of_unity_for_block(6,8*block+5, inv=True), i) yield from double_ith(gen_aarch64_root_of_unity_for_block(6,8*block+7, inv=True), i) +def gen_aarch64_mulcache_twiddles(): + for idx in range(64): + root = pow(root_of_unity, bitreverse(64+idx,7), modulus) + yield prepare_root_for_barrett(root)[0] + yield prepare_root_for_barrett(-root)[0] + +def gen_aarch64_mulcache_twiddles_twisted(): + for idx in range(64): + root = pow(root_of_unity, bitreverse(64+idx,7), modulus) + yield prepare_root_for_barrett(root)[1] + yield prepare_root_for_barrett(-root)[1] + def gen_aarch64_fwd_ntt_zeta_file(dry_run=False): def gen(): yield from gen_header() @@ -227,6 +239,14 @@ def gen(): yield from map(lambda t: str(t) + ",", gen_aarch64_inv_ntt_zetas_layer56()) yield "};" yield "" + yield "const int16_t aarch64_zetas_mulcache_native[] = {" + yield from map(lambda t: str(t) + ",", gen_aarch64_mulcache_twiddles()) + yield "};" + yield "" + yield "const int16_t aarch64_zetas_mulcache_twisted_native[] = {" + yield from map(lambda t: str(t) + ",", gen_aarch64_mulcache_twiddles_twisted()) + yield "};" + yield "" yield "#else /* MLKEM_USE_NATIVE_AARCH64 */" yield "// Dummy declaration for compilers disliking empty compilation units" yield "int empty_cu_aarch64_zetas;"