Skip to content

Commit

Permalink
Merge pull request wolfSSL#7998 from SparkiDev/kyber_aarch64_asm
Browse files Browse the repository at this point in the history
Kyber Aarch64: assembly implementations of functions
  • Loading branch information
dgarske authored Sep 26, 2024
2 parents a1a3a0b + de65778 commit 2285c02
Show file tree
Hide file tree
Showing 13 changed files with 25,658 additions and 107 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2977,7 +2977,7 @@ then
AM_CPPFLAGS="$AM_CPPFLAGS+sm4"
fi
else
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto"
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto -DWOLFSSL_AARCH64_NO_SQRMLSH"
fi
;;
esac
Expand Down
7 changes: 7 additions & 0 deletions src/include.am
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,13 @@ if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S
endif
endif
if BUILD_ARMASM_NEON
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-kyber-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-kyber-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM_NEON
endif

if BUILD_DILITHIUM
Expand Down
84 changes: 28 additions & 56 deletions wolfcrypt/src/port/arm/armv8-curve25519.S
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,7 @@ _fe_cmov_table:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-128]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x17, x19, [x29, #40]
stp x20, x21, [x29, #56]
stp x22, x23, [x29, #72]
stp x24, x25, [x29, #88]
Expand Down Expand Up @@ -546,8 +545,7 @@ _fe_cmov_table:
stp x10, x11, [x0, #48]
stp x12, x13, [x0, #64]
stp x14, x15, [x0, #80]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x17, x19, [x29, #40]
ldp x20, x21, [x29, #56]
ldp x22, x23, [x29, #72]
ldp x24, x25, [x29, #88]
Expand All @@ -573,8 +571,7 @@ _fe_mul:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-64]!
add x29, sp, #0
str x17, [x29, #24]
str x19, [x29, #32]
stp x17, x19, [x29, #24]
stp x20, x21, [x29, #40]
str x22, [x29, #56]
# Multiply
Expand Down Expand Up @@ -703,8 +700,7 @@ _fe_mul:
# Store
stp x6, x7, [x0]
stp x8, x9, [x0, #16]
ldr x17, [x29, #24]
ldr x19, [x29, #32]
ldp x17, x19, [x29, #24]
ldp x20, x21, [x29, #40]
ldr x22, [x29, #56]
ldp x29, x30, [sp], #0x40
Expand Down Expand Up @@ -835,8 +831,7 @@ _fe_invert:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-176]!
add x29, sp, #0
str x17, [x29, #160]
str x20, [x29, #168]
stp x17, x20, [x29, #160]
# Invert
str x0, [x29, #144]
str x1, [x29, #152]
Expand Down Expand Up @@ -1694,8 +1689,7 @@ L_fe_invert8:
#else
bl _fe_mul
#endif /* __APPLE__ */
ldr x17, [x29, #160]
ldr x20, [x29, #168]
ldp x17, x20, [x29, #160]
ldp x29, x30, [sp], #0xb0
ret
#ifndef __APPLE__
Expand All @@ -1715,8 +1709,7 @@ _curve25519:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-288]!
add x29, sp, #0
str x17, [x29, #200]
str x19, [x29, #208]
stp x17, x19, [x29, #200]
stp x20, x21, [x29, #216]
stp x22, x23, [x29, #232]
stp x24, x25, [x29, #248]
Expand Down Expand Up @@ -3801,8 +3794,7 @@ L_curve25519_inv_8:
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
mov x0, xzr
ldr x17, [x29, #200]
ldr x19, [x29, #208]
ldp x17, x19, [x29, #200]
ldp x20, x21, [x29, #216]
ldp x22, x23, [x29, #232]
ldp x24, x25, [x29, #248]
Expand All @@ -3828,8 +3820,7 @@ _fe_pow22523:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #128]
str x23, [x29, #136]
stp x17, x23, [x29, #128]
# pow22523
str x0, [x29, #112]
str x1, [x29, #120]
Expand Down Expand Up @@ -4619,8 +4610,7 @@ L_fe_pow22523_7:
#else
bl _fe_mul
#endif /* __APPLE__ */
ldr x17, [x29, #128]
ldr x23, [x29, #136]
ldp x17, x23, [x29, #128]
ldp x29, x30, [sp], #0x90
ret
#ifndef __APPLE__
Expand All @@ -4640,8 +4630,7 @@ _ge_p1p1_to_p2:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-80]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x17, x19, [x29, #40]
stp x20, x21, [x29, #56]
str x22, [x29, #72]
str x0, [x29, #16]
Expand Down Expand Up @@ -5002,8 +4991,7 @@ _ge_p1p1_to_p2:
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x17, x19, [x29, #40]
ldp x20, x21, [x29, #56]
ldr x22, [x29, #72]
ldp x29, x30, [sp], #0x50
Expand All @@ -5025,8 +5013,7 @@ _ge_p1p1_to_p3:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-112]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x17, x19, [x29, #40]
stp x20, x21, [x29, #56]
stp x22, x23, [x29, #72]
stp x24, x25, [x29, #88]
Expand Down Expand Up @@ -5505,8 +5492,7 @@ _ge_p1p1_to_p3:
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x17, x19, [x29, #40]
ldp x20, x21, [x29, #56]
ldp x22, x23, [x29, #72]
ldp x24, x25, [x29, #88]
Expand All @@ -5530,8 +5516,7 @@ _ge_p2_dbl:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-128]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x17, x19, [x29, #40]
stp x20, x21, [x29, #56]
stp x22, x23, [x29, #72]
stp x24, x25, [x29, #88]
Expand Down Expand Up @@ -5986,8 +5971,7 @@ _ge_p2_dbl:
sbc x7, x7, xzr
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x17, x19, [x29, #40]
ldp x20, x21, [x29, #56]
ldp x22, x23, [x29, #72]
ldp x24, x25, [x29, #88]
Expand All @@ -6012,8 +5996,7 @@ _ge_madd:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x17, x19, [x29, #56]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
Expand Down Expand Up @@ -6503,8 +6486,7 @@ _ge_madd:
stp x10, x11, [x0, #16]
stp x4, x5, [x1]
stp x6, x7, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x17, x19, [x29, #56]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
Expand All @@ -6529,8 +6511,7 @@ _ge_msub:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x17, x19, [x29, #56]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
Expand Down Expand Up @@ -7020,8 +7001,7 @@ _ge_msub:
stp x10, x11, [x0, #16]
stp x4, x5, [x1]
stp x6, x7, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x17, x19, [x29, #56]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
Expand All @@ -7046,8 +7026,7 @@ _ge_add:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x17, x19, [x29, #56]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
Expand Down Expand Up @@ -7663,8 +7642,7 @@ _ge_add:
stp x23, x24, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x17, x19, [x29, #56]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
Expand All @@ -7689,8 +7667,7 @@ _ge_sub:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x17, x19, [x29, #56]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
Expand Down Expand Up @@ -8321,8 +8298,7 @@ _ge_sub:
stp x14, x15, [x0, #16]
stp x21, x22, [x1]
stp x23, x24, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x17, x19, [x29, #56]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
Expand All @@ -8347,8 +8323,7 @@ _sc_reduce:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-64]!
add x29, sp, #0
str x17, [x29, #16]
str x19, [x29, #24]
stp x17, x19, [x29, #16]
stp x20, x21, [x29, #32]
stp x22, x23, [x29, #48]
ldp x2, x3, [x0]
Expand Down Expand Up @@ -8525,8 +8500,7 @@ _sc_reduce:
# Store result
stp x2, x3, [x0]
stp x4, x5, [x0, #16]
ldr x17, [x29, #16]
ldr x19, [x29, #24]
ldp x17, x19, [x29, #16]
ldp x20, x21, [x29, #32]
ldp x22, x23, [x29, #48]
ldp x29, x30, [sp], #0x40
Expand All @@ -8548,8 +8522,7 @@ _sc_muladd:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-96]!
add x29, sp, #0
str x17, [x29, #24]
str x19, [x29, #32]
stp x17, x19, [x29, #24]
stp x20, x21, [x29, #40]
stp x22, x23, [x29, #56]
stp x24, x25, [x29, #72]
Expand Down Expand Up @@ -8824,8 +8797,7 @@ _sc_muladd:
# Store result
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
ldr x17, [x29, #24]
ldr x19, [x29, #32]
ldp x17, x19, [x29, #24]
ldp x20, x21, [x29, #40]
ldp x22, x23, [x29, #56]
ldp x24, x25, [x29, #72]
Expand Down
Loading

0 comments on commit 2285c02

Please sign in to comment.