Skip to content

Commit

Permalink
Merge pull request #270 from pq-code-package/aarch64_basemul_api
Browse files Browse the repository at this point in the history
AArch64: Simplify polyvec_basemul_acc_montgomery interface
  • Loading branch information
hanno-becker authored Oct 29, 2024
2 parents 0eb2d60 + 8be0e2f commit 6f4de1f
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 79 deletions.
64 changes: 13 additions & 51 deletions mlkem/native/aarch64/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,57 +50,19 @@ void poly_mulcache_compute_asm_opt(int16_t *, const int16_t *, const int16_t *,
#define poly_tobytes_asm_clean MLKEM_NAMESPACE(poly_tobytes_asm_clean)
void poly_tobytes_asm_clean(uint8_t *r, const int16_t *a);

#define polyvec_basemul_acc_montgomery_cached_asm_k2_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
void polyvec_basemul_acc_montgomery_cached_asm_k2_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k3_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
void polyvec_basemul_acc_montgomery_cached_asm_k3_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k4_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
void polyvec_basemul_acc_montgomery_cached_asm_k4_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k2_opt \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_opt)
void polyvec_basemul_acc_montgomery_cached_asm_k2_opt(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k3_opt \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_opt)
void polyvec_basemul_acc_montgomery_cached_asm_k3_opt(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k4_opt \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_opt)
void polyvec_basemul_acc_montgomery_cached_asm_k4_opt(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define _polyvec_basemul_acc_montgomery_cached_asm_clean_name(k) \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k##k##_clean)
#define polyvec_basemul_acc_montgomery_cached_asm_clean_name(k) \
_polyvec_basemul_acc_montgomery_cached_asm_clean_name(k)

#define _polyvec_basemul_acc_montgomery_cached_asm_opt_name(k) \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k##k##_opt)
#define polyvec_basemul_acc_montgomery_cached_asm_opt_name(k) \
_polyvec_basemul_acc_montgomery_cached_asm_opt_name(k)
#define polyvec_basemul_acc_montgomery_cached_asm_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)
void polyvec_basemul_acc_montgomery_cached_asm_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_opt \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)
void polyvec_basemul_acc_montgomery_cached_asm_opt(int16_t *r, const int16_t *a,
const int16_t *b,
const int16_t *b_cache);


#endif /* MLKEM_USE_NATIVE_AARCH64 */
#endif /* MLKEM_AARCH64_NATIVE_H */
24 changes: 12 additions & 12 deletions mlkem/native/aarch64/polyvec_clean.S
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ const_addr:
.short 0

#if MLKEM_K == 2
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)

MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean):
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
push_stack

ASM_LOAD(xtmp, const_addr)
Expand Down Expand Up @@ -179,11 +179,11 @@ k2_loop_start:
#endif /* MLKEM_K == 2 */

#if MLKEM_K == 3
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)

MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean):
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
push_stack

ASM_LOAD(xtmp, const_addr)
Expand Down Expand Up @@ -223,11 +223,11 @@ k3_loop_start:
#endif /* MLKEM_K == 3 */

#if MLKEM_K == 4
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean)

MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean):
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_clean):
push_stack

ASM_LOAD(xtmp, const_addr)
Expand Down
24 changes: 12 additions & 12 deletions mlkem/native/aarch64/polyvec_opt.S
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ const_addr:
.short 0

#if MLKEM_K == 2
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_opt)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_opt)
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)

MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_opt):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_opt):
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt):
push_stack

ASM_LOAD(xtmp, const_addr)
Expand Down Expand Up @@ -351,11 +351,11 @@ k2_loop_start:
#endif /* MLKEM_K == 2 */

#if MLKEM_K == 3
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_opt)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_opt)
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)

MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_opt):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_opt):
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt):
push_stack

ASM_LOAD(xtmp, const_addr)
Expand Down Expand Up @@ -701,11 +701,11 @@ k3_loop_start:
#endif /* MLKEM_K == 3 */

#if MLKEM_K == 4
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_opt)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_opt)
.global MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)
.global _MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt)

MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_opt):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_opt):
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt):
_MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_opt):
push_stack

ASM_LOAD(xtmp, const_addr)
Expand Down
2 changes: 1 addition & 1 deletion mlkem/native/aarch64/profiles/clean.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ static inline void poly_mulcache_compute_native(poly_mulcache *x,
static inline void polyvec_basemul_acc_montgomery_cached_native(
poly *r, const polyvec *a, const polyvec *b,
const polyvec_mulcache *b_cache) {
polyvec_basemul_acc_montgomery_cached_asm_clean_name(MLKEM_K)(
polyvec_basemul_acc_montgomery_cached_asm_clean(
r->coeffs, a->vec[0].coeffs, b->vec[0].coeffs, b_cache->vec[0].coeffs);
}

Expand Down
2 changes: 1 addition & 1 deletion mlkem/native/aarch64/profiles/opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ static inline void poly_mulcache_compute_native(poly_mulcache *x,
static inline void polyvec_basemul_acc_montgomery_cached_native(
poly *r, const polyvec *a, const polyvec *b,
const polyvec_mulcache *b_cache) {
polyvec_basemul_acc_montgomery_cached_asm_opt_name(MLKEM_K)(
polyvec_basemul_acc_montgomery_cached_asm_opt(
r->coeffs, a->vec[0].coeffs, b->vec[0].coeffs, b_cache->vec[0].coeffs);
}

Expand Down
4 changes: 2 additions & 2 deletions test/bench_components_mlkem.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ static int bench(void) {
poly_mulcache_compute_asm_clean((int16_t *)data0, (int16_t *)data1,
(int16_t *)data2, (int16_t *)data3));
BENCH("poly-basemul-acc-montgomery-clean",
polyvec_basemul_acc_montgomery_cached_asm_clean_name(MLKEM_K)(
polyvec_basemul_acc_montgomery_cached_asm_clean(
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
(int16_t *)data3));

Expand All @@ -83,7 +83,7 @@ static int bench(void) {
poly_mulcache_compute_asm_opt((int16_t *)data0, (int16_t *)data1,
(int16_t *)data2, (int16_t *)data3));
BENCH("poly-basemul-acc-montgomery-opt",
polyvec_basemul_acc_montgomery_cached_asm_opt_name(MLKEM_K)(
polyvec_basemul_acc_montgomery_cached_asm_opt(
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
(int16_t *)data3));
#endif
Expand Down

0 comments on commit 6f4de1f

Please sign in to comment.