AArch64: Add secure native implementations for verify.h

This commit adds native implementations of the verify.h API for AArch64. Instead of writing the functions in assembly, they are largely kept in C, and only the decisive operations are implemented in inline assembly to prevent the compiler from inadvertently introducing secret-dependent branches. Sticking to C and inline assembly keeps most of the work to the compiler while giving it large scheduling freedom through the precise register usage annotations in the inline assembly. A function call, in turn, would need to obey the function call ABI and potentially force the compiler to stash registers on the stack. Signed-off-by: Hanno Becker <[email protected]>
pq-code-package · Nov 28, 2024 · e92ceb7 · e92ceb7
1 parent c26a0d4
commit e92ceb7
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 22 deletions.
diff --git a/mlkem/common.h b/mlkem/common.h
@@ -12,6 +12,8 @@
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
 
+#define asm __asm__
+
 #define MLKEM_CONCAT_(left, right) left##right
 #define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right)
 

diff --git a/mlkem/native/aarch64/verify-aarch64.h b/mlkem/native/aarch64/verify-aarch64.h
@@ -9,48 +9,72 @@
 static inline int verify_native(const uint8_t *a, const uint8_t *b,
                                 const size_t len)
 {
-  // TODO: replace this with inline asm
   uint8_t r = 0;
-  uint64_t u;
+  int res;
 
-  // Switch to a _signed_ ilen value, so that our loop counter
-  // can also be signed, and thus (i - 1) in the loop invariant
-  // can yield -1 as required.
-  const int ilen = (int)len;
-
-  for (int i = 0; i < ilen; i++)
+  for (size_t i = 0; i < len; i++)
   {
     r |= a[i] ^ b[i];
   }
 
-  u = (-(uint64_t)r) >> 63;
-  return (int)u;
+  // Use inline assembly to evaluate b != 0 to avoid
+  // the compiler messing with it and potentially realizing
+  // that, functionally, the above loop can be aborted as
+  // soon as r != 0.
+  asm("cmp %w[r], #0; \n\t"
+      "cset %w[res], ne"  // res = (b != 0)
+      : [res] "=r"(res)
+      : [r] "r"(r)
+      : "cc" /* the flag register is clobbered */);
+  return res;
+}
+
+static inline uint8_t csel_uint8(uint8_t a, uint8_t b, const uint8_t cond)
+{
+  // Writing `cond ? a : b` in assembly to prevent
+  // the compiler generating a branch.
+  //
+  // Using inline assembly to avoid function call overheads
+  // and give the compiler more flexibility in scheduling.
+  uint8_t res;
+  asm("cmp %w[cond], #0; \n\t"
+      "csel %w[res], %w[a], %w[b], ne"  // res = (cond != 0) ? a : b
+      : [res] "=r"(res)
+      : [a] "r"(a), [b] "r"(b), [cond] "r"(cond)
+      : "cc" /* the flag register is clobbered */);
+  return res;
 }
 
 static inline void cmov_native(uint8_t *r, const uint8_t *x, size_t len,
                                uint8_t b)
 {
-  // TODO: replace this with inline asm
   size_t i;
-
-  b = (-b) & 0xFF;
   for (i = 0; i < len; i++)
   {
-    r[i] ^= b & (r[i] ^ x[i]);
+    r[i] = csel_uint8(x[i], r[i], b);
   }
 }
 
+static inline int16_t csel_int16(int16_t a, int16_t b, const uint16_t cond)
+{
+  // Writing `cond ? a : b` in assembly to prevent
+  // the compiler generating a branch.
+  //
+  // Using inline assembly to avoid function call overheads
+  // and give the compiler more flexibility in scheduling.
+  int16_t res;
+  asm("cmp %w[cond], #0; \n\t"
+      "csel %w[res], %w[a], %w[b], ne"  // res = (cond != 0) ? a : b
+      : [res] "=r"(res)
+      : [a] "r"(a), [b] "r"(b), [cond] "r"(cond)
+      : "cc" /* the flag register is clobbered */);
+  return res;
+}
+
 static inline void cmov_int16_native(int16_t *r, const int16_t v,
                                      const uint16_t b)
 {
-  // TODO: replace this with inline asm
-  // b == 0 => mask = 0x0000
-  // b == 1 => mask = 0xFFFF
-  const uint16_t mask = -b;
-
-  // mask == 0x0000 => *r == (*r ^ 0x0000) == *r
-  // mask == 0xFFFF => *r == (*r ^ (*r ^ v)) == (*r ^ *r) ^ v == 0 ^ v == v
-  *r ^= mask & ((*r) ^ v);
+  *r = csel_int16(v, *r, b);
 }
 
 #endif /* VERIFY_AARCH64_H */