Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AArch64: Remove ASM_LOAD directives #547

Merged
merged 7 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mk/crypto.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
CPPFLAGS += -Imlkem/fips202 -Imlkem/fips202/native
FIPS202_SRCS = $(wildcard mlkem/fips202/*.c)
ifeq ($(OPT),1)
FIPS202_SRCS += $(wildcard mlkem/fips202/native/aarch64/src/*.S) $(wildcard mlkem/fips202/native/x86_64/src/*.c)
FIPS202_SRCS += $(wildcard mlkem/fips202/native/aarch64/src/*.S) $(wildcard mlkem/fips202/native/aarch64/src/*.c) $(wildcard mlkem/fips202/native/x86_64/src/*.c)
endif

$(BUILD_DIR)/libmlkem.a: $(call OBJS, $(FIPS202_SRCS))
Expand Down
13 changes: 0 additions & 13 deletions mlkem/fips202/native/aarch64/src/common.i

This file was deleted.

2 changes: 1 addition & 1 deletion mlkem/fips202/native/aarch64/src/cortex_a55_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#define MLKEM_USE_FIPS202_X1_NATIVE
static INLINE void keccak_f1600_x1_native(uint64_t *state)
{
keccak_f1600_x1_scalar_asm_opt(state);
keccak_f1600_x1_scalar_asm_opt(state, keccakf1600_round_constants);
}

#endif /* FIPS202_NATIVE_PROFILE_IMPL_H */
11 changes: 6 additions & 5 deletions mlkem/fips202/native/aarch64/src/default_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@
#define MLKEM_USE_FIPS202_X1_NATIVE
static INLINE void keccak_f1600_x1_native(uint64_t *state)
{
keccak_f1600_x1_v84a_asm_clean(state);
keccak_f1600_x1_v84a_asm_clean(state, keccakf1600_round_constants);
}
#elif !defined(SYS_AARCH64_SLOW_BARREL_SHIFTER)
#define MLKEM_USE_FIPS202_X1_NATIVE
static INLINE void keccak_f1600_x1_native(uint64_t *state)
{
keccak_f1600_x1_scalar_asm_opt(state);
keccak_f1600_x1_scalar_asm_opt(state, keccakf1600_round_constants);
}
#endif /* !SYS_AARCH64_SLOW_BARREL_SHIFTER */

Expand All @@ -66,13 +66,14 @@ static INLINE void keccak_f1600_x1_native(uint64_t *state)
#define MLKEM_USE_FIPS202_X2_NATIVE
static INLINE void keccak_f1600_x2_native(uint64_t *state)
{
keccak_f1600_x2_v84a_asm_clean(state);
keccak_f1600_x2_v84a_asm_clean(state, keccakf1600_round_constants);
}
#else /* __APPLE__ */
#define MLKEM_USE_FIPS202_X4_NATIVE
static INLINE void keccak_f1600_x4_native(uint64_t *state)
{
keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm_opt(state);
keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm_opt(state,
keccakf1600_round_constants);
}
#endif /* __APPLE__ */

Expand All @@ -81,7 +82,7 @@ static INLINE void keccak_f1600_x4_native(uint64_t *state)
#define MLKEM_USE_FIPS202_X4_NATIVE
static INLINE void keccak_f1600_x4_native(uint64_t *state)
{
keccak_f1600_x4_scalar_v8a_asm_hybrid_opt(state);
keccak_f1600_x4_scalar_v8a_asm_hybrid_opt(state, keccakf1600_round_constants);
}

#endif /* __ARM_FEATURE_SHA3 */
Expand Down
21 changes: 14 additions & 7 deletions mlkem/fips202/native/aarch64/src/fips202_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,37 @@

#define keccak_f1600_x1_scalar_asm_opt \
FIPS202_NAMESPACE(keccak_f1600_x1_scalar_asm_opt)
void keccak_f1600_x1_scalar_asm_opt(uint64_t *state);
void keccak_f1600_x1_scalar_asm_opt(uint64_t *state, uint64_t const *rc);

#define keccak_f1600_x1_v84a_asm_clean \
FIPS202_NAMESPACE(keccak_f1600_x1_v84a_asm_clean)
void keccak_f1600_x1_v84a_asm_clean(uint64_t *state);
void keccak_f1600_x1_v84a_asm_clean(uint64_t *state, uint64_t const *rc);

#define keccak_f1600_x2_v84a_asm_clean \
FIPS202_NAMESPACE(keccak_f1600_x2_v84a_asm_clean)
void keccak_f1600_x2_v84a_asm_clean(uint64_t *state);
void keccak_f1600_x2_v84a_asm_clean(uint64_t *state, uint64_t const *rc);

#define keccak_f1600_x2_v8a_v84a_asm_hybrid \
FIPS202_NAMESPACE(keccak_f1600_x2_v8a_v84a_asm_hybrid)
void keccak_f1600_x2_v8a_v84a_asm_hybrid(uint64_t *state);
void keccak_f1600_x2_v8a_v84a_asm_hybrid(uint64_t *state, uint64_t const *rc);

#define keccak_f1600_x4_scalar_v8a_asm_hybrid_opt \
FIPS202_NAMESPACE(keccak_f1600_x4_scalar_v8a_asm_hybrid_opt)
void keccak_f1600_x4_scalar_v8a_asm_hybrid_opt(uint64_t *state);
void keccak_f1600_x4_scalar_v8a_asm_hybrid_opt(uint64_t *state,
uint64_t const *rc);

#define keccak_f1600_x4_scalar_v84a_asm_hybrid_opt \
FIPS202_NAMESPACE(keccak_f1600_x4_scalar_v84a_asm_hybrid_opt)
void keccak_f1600_x4_scalar_v84a_asm_hybrid_opt(uint64_t *state);
void keccak_f1600_x4_scalar_v84a_asm_hybrid_opt(uint64_t *state,
uint64_t const *rc);

#define keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm_opt \
FIPS202_NAMESPACE(keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm_opt)
void keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm_opt(uint64_t *state);
void keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm_opt(uint64_t *state,
uint64_t const *rc);

#define keccakf1600_round_constants \
FIPS202_NAMESPACE(keccakf1600_round_constants)
extern const uint64_t keccakf1600_round_constants[];

#endif /* FIPS202_AARCH64_NATIVE_H */
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,17 @@
*
*/


// Author: Hanno Becker <[email protected]>
// Author: Matthias Kannwischer <[email protected]>

#include "common.h"
#if defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_DEFAULT) || \
defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_A55)

// Needed to provide ASM_LOAD directive
#include "common.i"

/********************** CONSTANTS *************************/
.data
.balign 64
round_constants:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008

/****************** REGISTER ALLOCATIONS *******************/

input_addr .req x0
input_rc .req x1
const_addr .req x26

/* Mapping of Kecck-f1600 state to scalar registers
Expand Down Expand Up @@ -205,8 +173,8 @@ _FIPS202_NAMESPACE(keccak_f1600_x1_scalar_asm_opt):
save_gprs

initial:
ASM_LOAD(const_addr, round_constants)
str const_addr, [sp, #STACK_LOC_CONST]
mov const_addr, input_rc
str input_rc, [sp, #STACK_LOC_CONST]
load_state
str input_addr, [sp, #STACK_LOC_INPUT] // @slothy:writes=STACK_LOC_INPUT

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,44 +41,12 @@
#if defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_DEFAULT) || \
defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_A55)

// Needed to provide ASM_LOAD directive
#include "common.i"
#include "namespace.h"

#if defined(__ARM_FEATURE_SHA3)

/********************** CONSTANTS *************************/
.data
.align(8)
round_constants:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008

/****************** REGISTER ALLOCATIONS *******************/

input_addr .req x0
input_rc .req x1
const_addr .req x1
count .req x2
cur_const .req x3
Expand Down Expand Up @@ -349,7 +317,7 @@ FIPS202_NAMESPACE(keccak_f1600_x1_v84a_asm_clean):
_FIPS202_NAMESPACE(keccak_f1600_x1_v84a_asm_clean):
alloc_stack
save_vregs
ASM_LOAD(const_addr, round_constants)
mov const_addr, input_rc
load_input

mov count, #(KECCAK_F1600_ROUNDS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,44 +41,12 @@
#if defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_DEFAULT) || \
defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_A55)

// Needed to provide ASM_LOAD directive
#include "common.i"
#include "namespace.h"

#if defined(__ARM_FEATURE_SHA3)

/********************** CONSTANTS *************************/
.data
.align(8)
round_constants:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008

/****************** REGISTER ALLOCATIONS *******************/

input_addr .req x0
input_rc .req x1
const_addr .req x1
count .req x2
cur_const .req x3
Expand Down Expand Up @@ -377,7 +345,7 @@ FIPS202_NAMESPACE(keccak_f1600_x2_v84a_asm_clean):
_FIPS202_NAMESPACE(keccak_f1600_x2_v84a_asm_clean):
alloc_stack
save_vregs
ASM_LOAD(const_addr, round_constants)
mov const_addr, input_rc
load_input

mov count, #(KECCAK_F1600_ROUNDS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,43 +41,12 @@
#if defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_DEFAULT) || \
defined(MLKEM_NATIVE_FIPS202_BACKEND_AARCH64_A55)

// Needed to provide ASM_LOAD directive
#include "common.i"

#if defined(__ARM_FEATURE_SHA3)

/********************** CONSTANTS *************************/
.data
.align(8)
round_constants:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008

/****************** REGISTER ALLOCATIONS *******************/

input_addr .req x0
input_rc .req x1
const_addr .req x1
count .req x2
cur_const .req x3
Expand Down Expand Up @@ -418,7 +387,7 @@ _FIPS202_NAMESPACE(keccak_f1600_x2_v8a_v84a_asm_hybrid):
alloc_stack
save_gprs
save_vregs
ASM_LOAD(const_addr, round_constants)
mov const_addr, input_rc
load_input

mov count, #(KECCAK_F1600_ROUNDS)
Expand Down
Loading
Loading