From 7ea4e19a14057ca208e52fd2cb90e6dce9f3263d Mon Sep 17 00:00:00 2001
From: Edd Barrett <vext01@gmail.com>
Date: Tue, 17 Dec 2024 17:16:22 +0000
Subject: [PATCH] Have callees set up their shadow frame.

Before this change we'd have caller's allocate shadow space. Besides
being an unconventional strategy, this also means that we allocate
shadow space for things which will never require shadow space. We even
had to conservatively allocate shadow space before external calls just
in case they call-back into functions that do need shadow space.

This change makes each callee requiring a shadow frame allocate it
itself.

Read the comment at the top of ShadowStack.cpp for implementation
details.

Measuring about a 22% speedup on bigloop.
---
 llvm/include/llvm/InitializePasses.h          |   1 +
 llvm/lib/CodeGen/CodeGen.cpp                  |   1 +
 llvm/lib/Transforms/Yk/ShadowStack.cpp        | 428 +++++++++++-------
 llvm/test/Transforms/Yk/ShadowStack.ll        | 116 +++++
 .../Transforms/Yk/ShadowStackRecurseMain.ll   |  14 +
 .../test/Transforms/Yk/ShadowStackZeroMain.ll |  28 ++
 6 files changed, 425 insertions(+), 163 deletions(-)
 create mode 100644 llvm/test/Transforms/Yk/ShadowStack.ll
 create mode 100644 llvm/test/Transforms/Yk/ShadowStackRecurseMain.ll
 create mode 100644 llvm/test/Transforms/Yk/ShadowStackZeroMain.ll

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index fc3d06cf603432..fedbebb97b30ca 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -344,6 +344,7 @@ void initializeXRayInstrumentationPass(PassRegistry&);
 void initializeYkStackmapsPass(PassRegistry&);
 void initializeYkSplitBlocksAfterCallsPass(PassRegistry&);
 void initializeYkBasicBlockTracerPass(PassRegistry&);
+void initializeYkShadowStackPass(PassRegistry&);
 void initializeYkModuleClonePass(PassRegistry&);
 } // end namespace llvm
 
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index eb51081c6ceaee..3189f8b735b8a3 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -153,5 +153,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeYkStackmapsPass(Registry);
   initializeYkSplitBlocksAfterCallsPass(Registry);
   initializeYkBasicBlockTracerPass(Registry);
+  initializeYkShadowStackPass(Registry);
   initializeYkModuleClonePass(Registry);
 }
diff --git a/llvm/lib/Transforms/Yk/ShadowStack.cpp b/llvm/lib/Transforms/Yk/ShadowStack.cpp
index 442301a6e1312d..be52a8a3afd705 100644
--- a/llvm/lib/Transforms/Yk/ShadowStack.cpp
+++ b/llvm/lib/Transforms/Yk/ShadowStack.cpp
@@ -1,11 +1,85 @@
 //===- ShadowStack.cpp - Pass to add shadow stacks to the AOT module --===//
 //
-// Add shadow stacks to store variables that may have their references taken.
-// Storing such variables on a shadow stack allows AOT to share them with
-// compiled traces, and back (i.e. references created inside a trace will still
-// be valid when we return from the trace via deoptimisation).
-// YKFIXME: This can be optimised by only putting variables on the shadow stack
-// that actually have their reference taken.
+// This pass adds a shadow stack to a yk interpreter so that variables which
+// may have their address taken are uniform between JITted and AOT code.
+//
+// It works as follows:
+//
+// main() is assumed to be the entry point to the interpreter. In there we
+// malloc a chunk of memory for use as a shadow stack. The pointer to this
+// memory is then stored into a global variable for other functions to pick up.
+//
+// Then for each non-main function, F, we then insert a "shadow prologue" which:
+//  1. load's the shadow stack's "high water mark" from the global variable.
+//  2. adds to the pointer to reserve shadow space for F.
+//  3. stores the new high water mark back to the global variable.
+//
+// Then for each non-main function, at each return point from the function,
+// we insert code to restore the shadow stack pointer back to what it was
+// before we adjusted it.
+//
+// If a function requires no shadow space, then the above steps can be omitted.
+//
+// main() is assumed to not recursively call main(), as this would cause use to
+// re-allocate the shadow stack from scratch, leaking the existing shadow stack
+// and generally causing chaos. This is checked during this pass.
+//
+// Special considerations regarding setjmp/longjmp:
+//
+// Consider a function using setjmp like this:
+//
+// ```
+// define f() {
+//   ; allocate shadow space
+//   %0 = load ptr, ptr @shadowstack_0, align 8
+//   %1 = getelementptr i8, ptr %0, i32 16
+//   store ptr %1, ptr @shadowstack_0, align 8
+//   ...
+//   call @setjmp(...)
+//   ...
+//   ; and suppose g() uses the shadow stack and calls longjmp() to transfer to
+//   ; the above setjmp().
+//   call @g(...)
+//   ...
+//   call @h(...) ; assume this also uses the shadow stack.
+//   ...
+//   return:
+//    ; release shadow space
+//    store ptr %0, ptr @shadowstack_0, align 8
+//    ret i32 1
+// }
+// ```
+//
+// Is the system in a consistent after g() calls longjump()?
+//
+// This question can be split into two:
+//
+// 1. Will f() restore the shadow stack pointer to the right value when it
+// returns after the longjmp()?
+//
+// 2. Will callees like h() get a useable shadow stack after the longjmp()?
+//
+// To answer 1: since %0 is local to f() and not changed between the calls the
+// setjmp and longjmp, this value will be restored during longjmp() and thus
+// the epilogue will do the right thing. See the man page for setjmp(3) for
+// more on why this works.
+//
+// To answer 2: when g() longjumps, we will skip g()'s shadow epilogue, so the
+// shadow stack pointer will not be restored. In effect, g()'s shadow frame
+// leaks up until the point where f()'s shadow epilogue restores the shadow
+// stack pointer.
+//
+// Does this matter? I believe the behaviour to be correct in the sense that
+// h() will get a usable shadow frame, but just deeper in the shadow stack than
+// expected. One can dream up scenarios, e.g. "h(), calls i(), calls j(), ...,
+// which longjumps", where a long chain of shadow frames temporarily leak,
+// potentially blowing the shadow stack. This could happen if the top-level
+// interpreter loop (which is long-lived) contains a commonly jumped-to
+// setjump(), so we probably do want to fix this soon.
+//
+// YKFIXME: If you wanted to fix this you'd have to reload the shadow stack
+// pointer before calls to setjmp() and similarly to the reasoning for 1, it
+// would be restored after a longjmp.
 
 #include "llvm/Transforms/Yk/ShadowStack.h"
 #include "llvm/IR/BasicBlock.h"
@@ -18,13 +92,13 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
-#include "llvm/Transforms/Yk/LivenessAnalysis.h"
 
 #include <map>
 
-#define DEBUG_TYPE "yk-shadowstack"
+#define DEBUG_TYPE "yk-shadow-stack-pass"
 #define YK_MT_NEW "yk_mt_new"
 #define G_SHADOW_STACK "shadowstack_0"
+#define MAIN "main"
 // The size of the shadow stack. Defaults to 1MB.
 // YKFIXME: Make this adjustable by a compiler flag.
 #define SHADOW_STACK_SIZE 1000000
@@ -37,20 +111,25 @@ void initializeYkShadowStackPass(PassRegistry &);
 
 namespace {
 class YkShadowStack : public ModulePass {
+  // Commonly used types.
+  Type *Int8Ty = nullptr;
+  Type *Int8PtrTy = nullptr;
+  Type *Int32Ty = nullptr;
+  Type *PointerSizedIntTy = nullptr;
+
 public:
   static char ID;
   YkShadowStack() : ModulePass(ID) {
     initializeYkShadowStackPass(*PassRegistry::getPassRegistry());
   }
 
-  // Checks whether the given instruction is the alloca of the call to
-  // `yk_mt_new`.
-  bool isYkMTNewAlloca(Instruction *I) {
+  // Checks whether the given instruction allocates space for the result of a
+  // call to `yk_mt_new`.
+  bool isYkMTNewAlloca(AllocaInst *I) {
     for (User *U : I->users()) {
-      if (U && isa<StoreInst>(U)) {
-        Value *V = cast<StoreInst>(U)->getValueOperand();
-        if (isa<CallInst>(V)) {
-          CallInst *CI = cast<CallInst>(V);
+      if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+        Value *V = SI->getValueOperand();
+        if (CallInst *CI = dyn_cast<CallInst>(V)) {
           if (CI->isInlineAsm())
             return false;
           if (!CI->getCalledFunction())
@@ -62,171 +141,194 @@ class YkShadowStack : public ModulePass {
     return false;
   }
 
+  // Insert main's prologue.
+  //
+  // Main is a little different, in that it actually allocates the shadow stack
+  // and thus can use the allocation directly if it needs shadow space.
+  //
+  // Returns a pointer to the result of the call to malloc that was used to
+  // heap allocate memory for a shadow stack.
+  CallInst *insertMainPrologue(Function *Main, GlobalVariable *SSGlobal,
+                               size_t SFrameSize) {
+    Module *M = Main->getParent();
+    Instruction *First = Main->getEntryBlock().getFirstNonPHI();
+    IRBuilder<> Builder(First);
+
+    // Create some memory on the heap for the shadow stack.
+    FunctionCallee MF =
+        M->getOrInsertFunction("malloc", Int8PtrTy, PointerSizedIntTy);
+    CallInst *Malloc = Builder.CreateCall(
+        MF, {ConstantInt::get(PointerSizedIntTy, SHADOW_STACK_SIZE)}, "");
+
+    // If main() needs shadow space, reserve some.
+    if (SFrameSize > 0) {
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          Int8Ty, Malloc, {ConstantInt::get(Int32Ty, SFrameSize)}, "",
+          Malloc->getNextNode());
+      // Update the global variable keeping track of the top of shadow stack.
+      Builder.CreateStore(GEP, SSGlobal);
+    } else {
+      // If main doesn't require any shadow stack space then we simply
+      // initialise the global with the result of the call to malloc().
+      Builder.CreateStore(Malloc, SSGlobal);
+    }
+
+    return Malloc;
+  }
+
+  // Scan the function `F` for instructions of interest and compute the layout
+  // of the shadow frame.
+  size_t analyseFunction(Function &F, DataLayout &DL,
+                         std::map<AllocaInst *, size_t> &Allocas,
+                         std::vector<ReturnInst *> &Rets) {
+    size_t SFrameSize = 0;
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : BB) {
+        if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+          // Some yk specific variables that will never be traced and thus
+          // can live happily on the normal stack.
+          if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+            // Don't put yk locations on the shadow stack.
+            //
+            // YKFIXME: This is somewhat fragile since `struct.YkLocation` is
+            // a name given by LLVM which could theoretically change. Luckily,
+            // this should all go away once we only move variables to the
+            // shadowstack that have their reference taken.
+            if (!ST->isLiteral() && ST->getName() == "struct.YkLocation") {
+              continue;
+            }
+          }
+          if (isYkMTNewAlloca(AI)) {
+            // The variable created by `yk_mt_new` will never be traced, so
+            // there's no need to store it on the shadow stack.
+            continue;
+          }
+          // Record the offset at which to store the object, ensuring we obey
+          // LLVM's alignment requirements.
+          //
+          // YKOPT: We currently allocate objects on the shadow stack in
+          // whatever order we encounter them, but we may be able to waste less
+          // space (to padding) by sorting them by size.
+          size_t Align = AI->getAlign().value();
+          SFrameSize = ((SFrameSize + (Align - 1)) / Align) * Align;
+          Allocas.insert({AI, SFrameSize});
+          SFrameSize += AI->getAllocationSize(DL).value();
+        } else if (ReturnInst *RI = dyn_cast<ReturnInst>(&I)) {
+          Rets.push_back(RI);
+        } else if (CallBase *CI = dyn_cast<CallBase>(&I)) {
+          // check for recursive calls to main().
+          Function *CF = CI->getCalledFunction();
+          if ((CF != nullptr) && (CF->getName() == MAIN)) {
+            F.getContext().emitError("detected recursive call to main!");
+          }
+        }
+      }
+    }
+    return SFrameSize;
+  }
+
+  // Make space on the shadow stack for F's frame.
+  //
+  // Returns the shadow stack pointer before more space is allocated. Local
+  // variables for the shadow frame will be pointers relative to this.
+  Value *insertShadowPrologue(Function &F, GlobalValue *SSGlobal,
+                              size_t AllocSize) {
+    Instruction *First = F.getEntryBlock().getFirstNonPHI();
+    IRBuilder<> Builder(First);
+
+    // Load the shadow stack pointer out of the global variable.
+    Value *InitSSPtr = Builder.CreateLoad(Int8PtrTy, SSGlobal);
+    // Add space for F's shadow frame.
+    GetElementPtrInst *GEP = GetElementPtrInst::Create(
+        Int8Ty, InitSSPtr, {ConstantInt::get(Int32Ty, AllocSize)}, "", First);
+    // Update the global variable keeping track of the top of shadow stack.
+    Builder.CreateStore(GEP, SSGlobal);
+
+    return InitSSPtr;
+  }
+
+  // Replace alloca instructions with shadow stack accesses.
+  void rewriteAllocas(DataLayout &DL, std::map<AllocaInst *, size_t> &Allocas,
+                      Value *SSPtr) {
+    for (auto [AI, Off] : Allocas) {
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          Int8Ty, SSPtr, {ConstantInt::get(Int32Ty, Off)}, "", AI);
+      AI->replaceAllUsesWith(GEP);
+      AI->removeFromParent();
+      AI->deleteValue();
+    }
+  }
+
+  /// At each place the function can return, insert IR to restore the shadow
+  /// stack pointer to it's initial value (as it was before the prologue
+  /// allocate shadow sack space).
+  void insertShadowEpilogues(std::vector<ReturnInst *> &Rets,
+                             GlobalVariable *SSGlobal, Value *InitSSPtr) {
+    for (ReturnInst *RI : Rets) {
+      IRBuilder<> Builder(RI);
+      Builder.CreateStore(InitSSPtr, SSGlobal);
+    }
+  }
+
   bool runOnModule(Module &M) override {
     LLVMContext &Context = M.getContext();
 
+    // Cache commonly used types.
+    Int8Ty = Type::getInt8Ty(Context);
+    Int8PtrTy = Type::getInt8PtrTy(Context);
+    Int32Ty = Type::getInt32Ty(Context);
     DataLayout DL(&M);
-    Type *Int8Ty = Type::getInt8Ty(Context);
-    Type *Int32Ty = Type::getInt32Ty(Context);
-    Type *PointerSizedIntTy = DL.getIntPtrType(Context);
-    Type *Int8PtrTy = Type::getInt8PtrTy(Context);
+    PointerSizedIntTy = DL.getIntPtrType(Context);
 
     // Create a global variable which will store the pointer to the heap memory
-    // allocated for the shadow stack.
-    Constant *GShadowStackPtr = M.getOrInsertGlobal(G_SHADOW_STACK, Int8PtrTy);
-    GlobalVariable *GVar = M.getNamedGlobal(G_SHADOW_STACK);
-    GVar->setInitializer(
+    // used by the shadow stack.
+    //
+    // YKFIXME: This isn't thread safe. For now interpreters are assumed to be
+    // single threaded: https://github.com/ykjit/yk/issues/794
+    GlobalVariable *SSGlobal =
+        cast<GlobalVariable>(M.getOrInsertGlobal(G_SHADOW_STACK, Int8PtrTy));
+    SSGlobal->setInitializer(
         ConstantPointerNull::get(cast<PointerType>(Int8PtrTy)));
 
-    // We only need to create one shadow stack per module so we'll do this
-    // inside the module's entry point.
+    // Handle main() separatley, since it works slightly differently to other
+    // functions: it allocates the shadow stack.
+    //
+    // Note that since we assuming main() doesn't call main(), we can consider
+    // the shadow stack disused at the point main() returns. For this reason,
+    // there's no need to emit a shadow epilogue for main().
+    //
     // YKFIXME: Investigate languages that don't have/use main as the first
     // entry point.
-    Function *Main = M.getFunction("main");
-    if (Main == nullptr) {
-      Context.emitError(
-          "Unable to add shadow stack: could not find \"main\" function!");
-      return false;
+    Function *Main = M.getFunction(MAIN);
+    if (Main == nullptr || Main->isDeclaration()) {
+      Context.emitError("Unable to add shadow stack: could not find definition "
+                        "of \"main\" function!");
     }
-    Instruction *First = Main->getEntryBlock().getFirstNonPHI();
-    IRBuilder<> Builder(First);
+    std::map<AllocaInst *, size_t> MainAllocas;
+    std::vector<ReturnInst *> MainRets;
+    size_t SFrameSize = analyseFunction(*Main, DL, MainAllocas, MainRets);
+    CallInst *Malloc = insertMainPrologue(Main, SSGlobal, SFrameSize);
+    rewriteAllocas(DL, MainAllocas, Malloc);
 
-    // Now create some memory on the heap for the shadow stack.
-    FunctionCallee MF =
-        M.getOrInsertFunction("malloc", Int8PtrTy, PointerSizedIntTy);
-    CallInst *Malloc = Builder.CreateCall(
-        MF, {ConstantInt::get(PointerSizedIntTy, SHADOW_STACK_SIZE)}, "");
-    Builder.CreateStore(Malloc, GShadowStackPtr);
-
-    Value *SSPtr;
+    // Instrument each remaining function with shadow stack code.
     for (Function &F : M) {
-      if (F.empty()) // skip declarations.
+      if (F.empty()) {
+        // skip declarations.
         continue;
-
-      if (&F != Main) {
-        // At the top of each function in the module, load the heap pointer
-        // from the global shadow stack variable.
-        Builder.SetInsertPoint(F.getEntryBlock().getFirstNonPHI());
-        SSPtr = Builder.CreateLoad(Int8PtrTy, GShadowStackPtr);
-      } else {
-        SSPtr = cast<Value>(Malloc);
       }
-
-      size_t Offset = 0;
-      // Remember which allocas were replaced, so we can remove them later in
-      // one swoop. Removing them here messes up the loop.
-      std::vector<Instruction *> RemoveAllocas;
-      for (BasicBlock &BB : F) {
-        for (Instruction &I : BB) {
-          if (isa<AllocaInst>(I)) {
-            // Replace allocas with pointers into the shadow stack.
-            AllocaInst &AI = cast<AllocaInst>(I);
-            if (isYkMTNewAlloca(&AI)) {
-              // The variable created by `yk_mt_new` will never be traced, so
-              // there's no need to store it on the shadow stack.
-              continue;
-            }
-            if (isa<StructType>(AI.getAllocatedType())) {
-              StructType *ST = cast<StructType>(AI.getAllocatedType());
-              // Some yk specific variables that will never be traced and thus
-              // can live happily on the normal stack.
-              // YKFIXME: This is somewhat fragile since `struct.YkLocation` is
-              // a name given by LLVM which could theoretically change. Luckily,
-              // this should all go away once we only move variables to the
-              // shadowstack that have their reference taken.
-              if (!ST->isLiteral()) {
-                if (ST->getName() == "YkCtrlPointVars" ||
-                    ST->getName() == "struct.YkLocation") {
-                  continue;
-                }
-              }
-            }
-            Builder.SetInsertPoint(&I);
-            auto AllocaSizeInBytes = AI.getAllocationSize(DL);
-            if (!AllocaSizeInBytes) {
-              // YKFIXME: Deal with functions where the stack size isn't know at
-              // compile time, e.g. when `alloca` is used.
-              Context.emitError("Unable to add shadow stack: function has "
-                                "dynamically sized stack!");
-              return false;
-            }
-            // Calculate this `AllocaInst`s size, aligning its pointer if
-            // necessary, and create a replacement pointer into the shadow
-            // stack.
-            size_t Align = AI.getAlign().value();
-            Offset = int((Offset + (Align - 1)) / Align) * Align;
-            if (Offset == 0) {
-              // If the offset is 0, we don't want to create `ptr_add
-              // %shadowstack, 0` as later parts of the pipeline are clever
-              // enough to recognise that as an alias: instead simply replace
-              // this variable with a direct reference to the shadow stack
-              // pointer.
-              cast<Value>(I).replaceAllUsesWith(SSPtr);
-            } else {
-              GetElementPtrInst *GEP = GetElementPtrInst::Create(
-                  Int8Ty, SSPtr, {ConstantInt::get(Int32Ty, Offset)}, "",
-                  cast<Instruction>(&AI));
-              Builder.SetInsertPoint(GEP);
-              cast<Value>(I).replaceAllUsesWith(GEP);
-            }
-            RemoveAllocas.push_back(cast<Instruction>(&AI));
-            Offset += *AllocaSizeInBytes;
-          } else if (isa<CallInst>(I)) {
-            // When we see a call, we need make space for a new stack frame. We
-            // do this by simply adjusting the pointer stored in the global
-            // shadow stack. When the function returns the global is reset. This
-            // is similar to how the RSP is adjusted inside the
-            // prologue/epilogue of a function, but here the prologue/epilogue
-            // are handled by the caller.
-            CallInst &CI = cast<CallInst>(I);
-            if (&CI == Malloc) {
-              // Don't do this for the `malloc` that created the shadow stack.
-              continue;
-            }
-            // Inline asm can't be traced.
-            if (CI.isInlineAsm()) {
-              continue;
-            }
-
-            if (CI.getCalledFunction()) {
-              // Note that it's important that we adjust the shadow stack
-              // before calling foreign code and intrinsics, as they may call
-              // back into functions that require shadow space.
-              if (CI.getCalledFunction()->getName() == "llvm.dbg.declare") {
-                continue;
-              }
-              if (CI.getCalledFunction()->getName() == "llvm.dbg.value") {
-                continue;
-              }
-            }
-
-            // Adjust shadow stack pointer before a call, and reset it back to
-            // its previous value upon returning. Make sure to align the shadow
-            // stack to a 16 byte boundary before calling, as required by the
-            // calling convention.
-#ifdef __x86_64__
-            Offset = int((Offset + (16 - 1)) / 16) * 16;
-#else
-#error unknown platform
-#endif
-            GetElementPtrInst *GEP = GetElementPtrInst::Create(
-                Int8Ty, SSPtr, {ConstantInt::get(Int32Ty, Offset)}, "", &I);
-            Builder.SetInsertPoint(&I);
-            Builder.CreateStore(GEP, GShadowStackPtr);
-            Builder.SetInsertPoint(I.getNextNonDebugInstruction());
-            Builder.CreateStore(SSPtr, GShadowStackPtr);
-          } else if (isa<CallBase>(I)) {
-            // FIXME: There are other call-like instructions (e.g. `invoke`,
-            // `callbr`) that we will need to think about when they arise.
-            Context.emitError("Unimplemented shadow stack allocation");
-            return false;
-          }
-        }
+      if (F.getName() == MAIN) {
+        // We've handled main already.
+        continue;
       }
-      for (Instruction *I : RemoveAllocas) {
-        I->removeFromParent();
+
+      std::map<AllocaInst *, size_t> Allocas;
+      std::vector<ReturnInst *> Rets;
+      size_t SFrameSize = analyseFunction(F, DL, Allocas, Rets);
+      if (SFrameSize > 0) {
+        Value *InitSSPtr = insertShadowPrologue(F, SSGlobal, SFrameSize);
+        rewriteAllocas(DL, Allocas, InitSSPtr);
+        insertShadowEpilogues(Rets, SSGlobal, InitSSPtr);
       }
-      RemoveAllocas.clear();
     }
 
 #ifndef NDEBUG
diff --git a/llvm/test/Transforms/Yk/ShadowStack.ll b/llvm/test/Transforms/Yk/ShadowStack.ll
new file mode 100644
index 00000000000000..a1250e0ca391df
--- /dev/null
+++ b/llvm/test/Transforms/Yk/ShadowStack.ll
@@ -0,0 +1,116 @@
+; Checks that the shadow stack pass does what it should.
+;
+; RUN: llc -O0 -stop-after yk-shadow-stack-pass -yk-shadow-stack < %s  | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+declare ptr @yk_mt_new();
+declare ptr @yk_location_new();
+%struct.YkLocation = type { i64 }
+
+; The pass should insert a global variable to hold the shadow stack pointer.
+; CHECK: @shadowstack_0 = global ptr null
+
+; Check a non-main function that requires some shadow space.
+;
+; CHECK: define dso_local i32 @f(i32 noundef %x, i32 noundef %y, i32 noundef %z) #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = load ptr, ptr @shadowstack_0, align 8
+; CHECK-NEXT:   %1 = getelementptr i8, ptr %0, i32 16
+; CHECK-NEXT:   store ptr %1, ptr @shadowstack_0, align 8
+; CHECK-NEXT:   %2 = getelementptr i8, ptr %0, i32 0
+; CHECK-NEXT:   %3 = getelementptr i8, ptr %0, i32 4
+; CHECK-NEXT:   %4 = getelementptr i8, ptr %0, i32 8
+; CHECK-NEXT:   %5 = getelementptr i8, ptr %0, i32 12
+; CHECK:       return:
+; CHECK-NEXT:    %11 = load i32, ptr %2, align 4
+; CHECK-NEXT:    store ptr %0, ptr @shadowstack_0, align 8
+; CHECK-NEXT:    ret i32 %11
+; CHECK-NEXT:  }
+define dso_local i32 @f(i32 noundef %x, i32 noundef %y, i32 noundef %z) noinline optnone {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %z.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  store i32 %y, ptr %y.addr, align 4
+  store i32 %z, ptr %z.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  %cmp = icmp sgt i32 %0, 3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %1 = load i32, ptr %y.addr, align 4
+  %2 = load i32, ptr %z.addr, align 4
+  %add = add nsw i32 %1, %2
+  store i32 %add, ptr %retval, align 4
+  br label %return
+
+if.else:
+  %3 = load i32, ptr %x.addr, align 4
+  %4 = load i32, ptr %y.addr, align 4
+  %add1 = add nsw i32 %3, %4
+  store i32 %add1, ptr %retval, align 4
+  br label %return
+
+return:
+  %5 = load i32, ptr %retval, align 4
+  ret i32 %5
+}
+
+; Now let's check that a function requiring no shadow space doesn't load, add 0
+; to, and store back, the shadow stack pointer. To do so would be wasteful. In
+; other words, the function should remain empty.
+;
+; CHECK:       define dso_local void @g() #0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+; CHECK-NEXT:  }
+define dso_local void @g() optnone noinline {
+entry:
+  ret void
+}
+
+; Now a main, which has a slightly different prologue to other functions.
+;
+; We also check that some special values don't end up on the shadow stack.
+;
+; CHECK:  define dso_local i32 @main(i32 noundef %argc, ptr noundef %argv) #0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %0 = call ptr @malloc(i64 1000000)
+; CHECK-NEXT:    %1 = getelementptr i8, ptr %0, i32 32
+; CHECK-NEXT:    store ptr %1, ptr @shadowstack_0, align 8
+; CHECK-NEXT:    %2 = getelementptr i8, ptr %0, i32 0
+; CHECK-NEXT:    %3 = getelementptr i8, ptr %0, i32 4
+; CHECK-NEXT:    %4 = getelementptr i8, ptr %0, i32 8
+; CHECK-NEXT:    %5 = getelementptr i8, ptr %0, i32 16
+; CHECK-NEXT:    %6 = getelementptr i8, ptr %0, i32 28
+; CHECK-NEXT:    %mt_stack = alloca ptr, align 8
+; CHECK-NEXT:    %loc_stack = alloca %struct.YkLocation, align 8
+; CHECK:         %lrv = load i32, ptr %2, align 4
+; --- remember, main() has no shadow epilogue! ---
+; CHECK-NEXT:    ret i32 %lrv
+; CHECK-NEXT:  }
+
+define dso_local i32 @main(i32 noundef %argc, ptr noundef %argv) noinline optnone {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca ptr, align 8
+  %vs = alloca [3 x i32], align 4
+  %i = alloca i32, align 4
+  %mt_stack = alloca ptr, align 8 ; this should not end up on the shadow stack
+  %loc_stack = alloca %struct.YkLocation, align 8 ; nor this.
+  store i32 0, ptr %retval, align 4
+  store i32 %argc, ptr %argc.addr, align 4
+  store ptr %argv, ptr %argv.addr, align 8
+  %mt = call ptr @yk_mt_new()
+  store ptr %mt, ptr %mt_stack
+  %loc = call ptr @yk_location_new()
+  store ptr %loc, ptr %loc_stack
+  %lrv = load i32, ptr %retval, align 4
+  ret i32 %lrv
+}
diff --git a/llvm/test/Transforms/Yk/ShadowStackRecurseMain.ll b/llvm/test/Transforms/Yk/ShadowStackRecurseMain.ll
new file mode 100644
index 00000000000000..6abe5dd81a6b17
--- /dev/null
+++ b/llvm/test/Transforms/Yk/ShadowStackRecurseMain.ll
@@ -0,0 +1,14 @@
+; Checks that a recurive call to main crashes the shadow stack pass.
+;
+; RUN: not llc -O0 -stop-after yk-shadow-stack-pass -yk-shadow-stack < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+; CHECK: error: detected recursive call to main!
+define dso_local i32 @main(i32 noundef %argc, ptr noundef %argv) noinline optnone {
+entry:
+  %rv = call i32 @main(i32 %argc, ptr %argv);
+  ret i32 %rv
+}
diff --git a/llvm/test/Transforms/Yk/ShadowStackZeroMain.ll b/llvm/test/Transforms/Yk/ShadowStackZeroMain.ll
new file mode 100644
index 00000000000000..16d5a7d3f965d3
--- /dev/null
+++ b/llvm/test/Transforms/Yk/ShadowStackZeroMain.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -stop-after yk-shadow-stack-pass -yk-shadow-stack < %s  | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+declare ptr @yk_mt_new();
+declare ptr @yk_location_new();
+%struct.YkLocation = type { i64 }
+
+; The pass should insert a global variable to hold the shadow stack pointer.
+; CHECK: @shadowstack_0 = global ptr null
+
+; Check that a main fucntion requiring no shadow space doesn't needlessly
+; fiddle with the shadow stack pointer.
+;
+; It should however, still allocate and initialise the shadow stack pointer.
+;
+; CHECK:       define dso_local i32 @main(i32 noundef %argc, ptr noundef %argv) #0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %0 = call ptr @malloc(i64 1000000)
+; CHECK-NEXT:    store ptr %0, ptr @shadowstack_0, align 8
+; CHECK-NEXT:    ret i32 0
+; CHECK-NEXT:  }
+define dso_local i32 @main(i32 noundef %argc, ptr noundef %argv) noinline optnone {
+entry:
+  ret i32 0
+}