Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to the new LLVM pass manager #252

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions anvill/src/Lifters/FunctionLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,20 @@
#include <llvm/IR/Instruction.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Scalar/DCE.h>
#include <llvm/Transforms/Scalar/DeadStoreElimination.h>
#include <llvm/Transforms/Scalar/Reassociate.h>
#include <llvm/Transforms/Scalar/SROA.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include <llvm/Transforms/Utils/Mem2Reg.h>
#include <remill/Arch/Arch.h>
#include <remill/Arch/Instruction.h>
#include <remill/BC/Compat/Error.h>
Expand Down Expand Up @@ -1713,18 +1719,19 @@ void FunctionLifter::RecursivelyInlineLiftedFunctionIntoNativeFunction(void) {
}

// Initialize cleanup optimizations
llvm::legacy::FunctionPassManager fpm(semantics_module.get());
fpm.add(llvm::createCFGSimplificationPass());
fpm.add(llvm::createPromoteMemoryToRegisterPass());
fpm.add(llvm::createReassociatePass());
fpm.add(llvm::createDeadStoreEliminationPass());
fpm.add(llvm::createDeadCodeEliminationPass());
fpm.add(llvm::createSROAPass());
fpm.add(llvm::createDeadCodeEliminationPass());
fpm.add(llvm::createInstructionCombiningPass());
fpm.doInitialization();
fpm.run(*native_func);
fpm.doFinalization();
llvm::PassBuilder pass_builder;
llvm::FunctionPassManager fpm;
llvm::FunctionAnalysisManager fam;
pass_builder.registerFunctionAnalyses(fam);
fpm.addPass(llvm::SimplifyCFGPass());
fpm.addPass(llvm::PromotePass());
fpm.addPass(llvm::ReassociatePass());
fpm.addPass(llvm::DSEPass());
fpm.addPass(llvm::DCEPass());
fpm.addPass(llvm::SROA());
fpm.addPass(llvm::DCEPass());
fpm.addPass(llvm::InstCombinePass());
fpm.run(*native_func, fam);

ClearVariableNames(native_func);
}
Expand Down
122 changes: 73 additions & 49 deletions anvill/src/Optimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,24 @@
#include <llvm/IR/InlineAsm.h>
#include <llvm/IR/Instruction.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/Scalar/DCE.h>
#include <llvm/Transforms/Scalar/Sink.h>
#include <llvm/Transforms/Scalar/NewGVN.h>
#include <llvm/Transforms/Scalar/SCCP.h>
#include <llvm/Transforms/Scalar/DeadStoreElimination.h>
#include <llvm/Transforms/Scalar/SROA.h>
#include <llvm/Transforms/Scalar/EarlyCSE.h>
#include <llvm/Transforms/Scalar/BDCE.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/Inliner.h>
#include <llvm/Transforms/IPO/GlobalOpt.h>
#include <llvm/Transforms/IPO/GlobalDCE.h>
// #include <llvm/Transforms/IPO/StripSymbols.h>
#include <llvm/Transforms/Utils/Local.h>

// clang-format on
Expand Down Expand Up @@ -84,63 +97,76 @@ void OptimizeModule(const EntityLifter &lifter_context,
memory_escape->eraseFromParent();
}

llvm::legacy::PassManager mpm;
mpm.add(llvm::createFunctionInliningPass(250));
mpm.add(llvm::createGlobalOptimizerPass());
mpm.add(llvm::createGlobalDCEPass());
mpm.add(llvm::createStripDeadDebugInfoPass());
mpm.run(module);

llvm::legacy::FunctionPassManager fpm(&module);
fpm.add(llvm::createDeadCodeEliminationPass());
fpm.add(llvm::createSinkingPass());
fpm.add(llvm::createNewGVNPass());
fpm.add(llvm::createSCCPPass());
fpm.add(llvm::createDeadStoreEliminationPass());
fpm.add(llvm::createSROAPass());
fpm.add(llvm::createEarlyCSEPass(true));
fpm.add(llvm::createBitTrackingDCEPass());
fpm.add(llvm::createCFGSimplificationPass());
fpm.add(llvm::createSinkingPass());
fpm.add(llvm::createCFGSimplificationPass());
fpm.add(llvm::createInstructionCombiningPass());
llvm::PassBuilder pass_builder;
llvm::LoopAnalysisManager lam;
llvm::FunctionAnalysisManager fam;
llvm::CGSCCAnalysisManager cgam;
llvm::ModuleAnalysisManager mam;
pass_builder.registerLoopAnalyses(lam);
pass_builder.registerFunctionAnalyses(fam);
pass_builder.registerCGSCCAnalyses(cgam);
pass_builder.registerModuleAnalyses(mam);
pass_builder.crossRegisterProxies(lam, fam, cgam, mam);

llvm::ModulePassManager mpm;
llvm::InlineParams inline_params;
inline_params.DefaultThreshold = 250;
mpm.addPass(llvm::ModuleInlinerWrapperPass(inline_params));
mpm.addPass(llvm::GlobalOptPass());
mpm.addPass(llvm::GlobalDCEPass());
// TODO(alex): Figure out what to do here.
// The new version of this pass only seems to have been added in LLVM 12.
// mpm.addPass(llvm::StripDeadDebugInfoPass());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pass was introduced in this commit. It'd be pretty easy to just write the wrapper ourselves if we wanted.

mpm.run(module, mam);

llvm::FunctionPassManager fpm;
fpm.addPass(llvm::DCEPass());
fpm.addPass(llvm::SinkingPass());
fpm.addPass(llvm::NewGVNPass());
fpm.addPass(llvm::SCCPPass());
fpm.addPass(llvm::DSEPass());
fpm.addPass(llvm::SROA());
fpm.addPass(llvm::EarlyCSEPass());
fpm.addPass(llvm::BDCEPass());
fpm.addPass(llvm::SimplifyCFGPass());
fpm.addPass(llvm::SinkingPass());
fpm.addPass(llvm::SimplifyCFGPass());
fpm.addPass(llvm::InstCombinePass());

auto error_manager_ptr = ITransformationErrorManager::Create();
auto &err_man = *error_manager_ptr.get();

fpm.add(CreateSinkSelectionsIntoBranchTargets(err_man));
fpm.add(CreateRemoveUnusedFPClassificationCalls());
fpm.add(CreateRemoveDelaySlotIntrinsics());
fpm.add(CreateRemoveErrorIntrinsics());
fpm.add(CreateLowerRemillMemoryAccessIntrinsics());
fpm.add(CreateRemoveCompilerBarriers());
fpm.add(CreateLowerTypeHintIntrinsics());
fpm.add(CreateInstructionFolderPass(err_man));
fpm.add(llvm::createDeadCodeEliminationPass());
fpm.add(CreateRecoverEntityUseInformation(err_man, lifter_context));
fpm.add(CreateSinkSelectionsIntoBranchTargets(err_man));
fpm.add(CreateRemoveTrivialPhisAndSelects());
fpm.add(llvm::createDeadCodeEliminationPass());
fpm.add(CreateRecoverStackFrameInformation(err_man, options));
fpm.add(llvm::createSROAPass());
fpm.add(CreateSplitStackFrameAtReturnAddress(err_man));
fpm.add(llvm::createSROAPass());
AddSinkSelectionsIntoBranchTargets(fpm, err_man);
Copy link
Contributor Author

@tetsuo-cpp tetsuo-cpp Aug 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new pass manager doesn't take a pointer anymore. So if I want to do:

fpm.add(SinkSelectionsIntoBranchTargets(err_man))

Then I'd need to expose the headers for each of the passes and move them to libraries/anvill_passes/include since we need the actual definitions to construct it. To avoid reorganising everything, I've exposed these helpers in Transform.h to add the pass to the pass manager.

AddRemoveUnusedFPClassificationCalls(fpm);
AddRemoveDelaySlotIntrinsics(fpm);
AddRemoveErrorIntrinsics(fpm);
AddLowerRemillMemoryAccessIntrinsics(fpm);
AddRemoveCompilerBarriers(fpm);
AddLowerTypeHintIntrinsics(fpm);
AddInstructionFolderPass(fpm, err_man);
fpm.addPass(llvm::DCEPass());
AddRecoverEntityUseInformation(fpm, err_man, lifter_context);
AddSinkSelectionsIntoBranchTargets(fpm, err_man);
AddRemoveTrivialPhisAndSelects(fpm);
fpm.addPass(llvm::DCEPass());
AddRecoverStackFrameInformation(fpm, err_man, options);
fpm.addPass(llvm::SROA());
AddSplitStackFrameAtReturnAddress(fpm, err_man);
fpm.addPass(llvm::SROA());

// Sometimes we have a values in the form of (expr ^ 1) used as branch
// conditions or other targets. Try to fix these to be CMPs, since it
// makes code easier to read and analyze. This is a fairly narrow optimization
// but it comes up often enough for lifted code.
fpm.add(CreateConvertXorToCmp());
AddConvertXorToCmp(fpm);

if (FLAGS_pointer_brighten_gas) {
fpm.add(CreateBrightenPointerOperations(FLAGS_pointer_brighten_gas));
AddBrightenPointerOperations(fpm, FLAGS_pointer_brighten_gas);
}

fpm.doInitialization();
for (auto &func : module) {
fpm.run(func);
fpm.run(func, fam);
}
fpm.doFinalization();

// We can extend error handling here to provide more visibility
// into what has happened
Expand Down Expand Up @@ -183,14 +209,12 @@ void OptimizeModule(const EntityLifter &lifter_context,

CHECK(!err_man.HasFatalError());

fpm.add(CreateTransformRemillJumpIntrinsics(lifter_context));
fpm.add(CreateRemoveRemillFunctionReturns(lifter_context));
fpm.add(CreateLowerRemillUndefinedIntrinsics());
fpm.doInitialization();
AddTransformRemillJumpIntrinsics(fpm, lifter_context);
AddRemoveRemillFunctionReturns(fpm, lifter_context);
AddLowerRemillUndefinedIntrinsics(fpm);
for (auto &func : module) {
fpm.run(func);
fpm.run(func, fam);
}
fpm.doFinalization();

// Get rid of all final uses of `__anvill_pc`.
if (auto anvill_pc = module.getGlobalVariable(::anvill::kSymbolicPCName)) {
Expand Down
55 changes: 28 additions & 27 deletions libraries/anvill_passes/include/anvill/Transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
#include <anvill/ITransformationErrorManager.h>
#include <anvill/Lifters/Options.h>
#include <anvill/Lifters/ValueLifter.h>
#include <llvm/IR/PassManager.h>

namespace llvm {
class Function;
class FunctionPass;
} // namespace llvm
namespace anvill {

Expand Down Expand Up @@ -55,14 +55,14 @@ class EntityLifter;
//
// When this happens, we're better able to fold cross-references at the targets
// of conditional branches.
llvm::FunctionPass *CreateSinkSelectionsIntoBranchTargets(
ITransformationErrorManager &error_manager);
void AddSinkSelectionsIntoBranchTargets(
llvm::FunctionPassManager &fpm, ITransformationErrorManager &error_manager);

// Remill semantics sometimes contain compiler barriers (empty inline assembly
// statements), especially related to floating point code (i.e. preventing
// re-ordering of floating point operations so that we can capture the flags).
// This pass eliminates those empty inline assembly statements.
llvm::FunctionPass *CreateRemoveCompilerBarriers(void);
void AddRemoveCompilerBarriers(llvm::FunctionPassManager &fpm);

// Analyze `func` and determine if the function stores the return value of
// the `llvm.returnaddress` intrinsic into an `alloca` (presumed to be the
Expand All @@ -85,8 +85,8 @@ llvm::FunctionPass *CreateRemoveCompilerBarriers(void);
// frame. This approach is very convenient, but comes at the cost of having
// to do this particular transformation in order to recover more typical stack
// frame structures.
llvm::FunctionPass *CreateSplitStackFrameAtReturnAddress(
ITransformationErrorManager &error_manager);
void AddSplitStackFrameAtReturnAddress(
llvm::FunctionPassManager &fpm, ITransformationErrorManager &error_manager);

// Remove unused calls to floating point classification functions. Calls to
// these functions are present in a bunch of FPU-related instruction semantics
Expand All @@ -99,11 +99,11 @@ llvm::FunctionPass *CreateSplitStackFrameAtReturnAddress(
// NOTE(pag): This pass must be applied before any kind of renaming of lifted
// functions is performed, so that we don't accidentally remove
// calls to classification functions present in the target binary.
llvm::FunctionPass *CreateRemoveUnusedFPClassificationCalls(void);
void AddRemoveUnusedFPClassificationCalls(llvm::FunctionPassManager &fpm);

// Lowers the `__remill_read_memory_NN`, `__remill_write_memory_NN`, and the
// various atomic read-modify-write variants into LLVM loads and stores.
llvm::FunctionPass *CreateLowerRemillMemoryAccessIntrinsics(void);
void AddLowerRemillMemoryAccessIntrinsics(llvm::FunctionPassManager &fpm);

// Type information from prior lifting efforts, or from front-end tools
// (e.g. Binary Ninja) is plumbed through the system by way of calls to // intrinsic functions such as `__anvill_type<blah>`. These function calls
Expand All @@ -114,7 +114,7 @@ llvm::FunctionPass *CreateLowerRemillMemoryAccessIntrinsics(void);
//
// These function calls need to be removed/lowered into `inttoptr` or `bitcast`
// instructions.
llvm::FunctionPass *CreateLowerTypeHintIntrinsics(void);
void AddLowerTypeHintIntrinsics(llvm::FunctionPassManager &fpm);

// Anvill-lifted bitcode operates at a very low level, swapping between integer
// and pointer representations. It is typically for just-lifted bitcode to
Expand All @@ -134,7 +134,8 @@ llvm::FunctionPass *CreateLowerTypeHintIntrinsics(void);
//
// This function attempts to apply a battery of pattern-based transforms to
// brighten integer operations into pointer operations.
llvm::FunctionPass *CreateBrightenPointerOperations(unsigned max_gas = 250);
void AddBrightenPointerOperations(llvm::FunctionPassManager &fpm,
unsigned max_gas = 250);

// Transforms the bitcode to eliminate calls to `__remill_function_return`,
// where appropriate. This will not succeed for all architectures, but is
Expand Down Expand Up @@ -163,8 +164,8 @@ llvm::FunctionPass *CreateBrightenPointerOperations(unsigned max_gas = 250);
//
// NOTE(pag): This pass should be applied as late as possible, as the call to
// `__remill_function_return` depends upon the memory pointer.
llvm::FunctionPass *
CreateRemoveRemillFunctionReturns(const EntityLifter &lifter);
void AddRemoveRemillFunctionReturns(llvm::FunctionPassManager &fpm,
const EntityLifter &lifter);

// This function pass makes use of the `__anvill_sp` usages to create an
// `llvm::StructType` that acts as a stack frame. This initial stack frame
Expand All @@ -175,9 +176,9 @@ CreateRemoveRemillFunctionReturns(const EntityLifter &lifter);
// to eliminate the stack frame, then to enable splitting of the stack from
// into components (see `CreateSplitStackFrameAtReturnAddress`) such that
// SROA can apply to the arguments and return address components.
llvm::FunctionPass *
CreateRecoverStackFrameInformation(ITransformationErrorManager &error_manager,
const LifterOptions &options);
void AddRecoverStackFrameInformation(llvm::FunctionPassManager &fpm,
ITransformationErrorManager &error_manager,
const LifterOptions &options);

// Anvill-lifted code is full of references to constant expressions related
// to `__anvill_pc`. These constant expressions exist to "taint" values as
Expand All @@ -189,9 +190,9 @@ CreateRecoverStackFrameInformation(ITransformationErrorManager &error_manager,
// other entitities. We say opportunistic because that pass is not guaranteed
// to replace all such references, and will in fact leave references around
// for later passes to benefit from.
llvm::FunctionPass *
CreateRecoverEntityUseInformation(ITransformationErrorManager &error_manager,
const EntityLifter &lifter);
void AddRecoverEntityUseInformation(llvm::FunctionPassManager &fpm,
ITransformationErrorManager &error_manager,
const EntityLifter &lifter);

// Some machine code instructions explicitly introduce undefined values /
// behavior. Often, this is a result of the CPUs of different steppings of
Expand All @@ -208,19 +209,19 @@ CreateRecoverEntityUseInformation(ITransformationErrorManager &error_manager,
//
// This pass exists to do the lowering to `undef` values, and should be run
// as late as possible.
llvm::FunctionPass *CreateLowerRemillUndefinedIntrinsics(void);
void AddLowerRemillUndefinedIntrinsics(llvm::FunctionPassManager &fpm);

// This function pass will attempt to fold the following instruction
// combinations:
// {SelectInst, PHINode}/{BinaryOperator, CastInst, GetElementPtrInst}
llvm::FunctionPass *
CreateInstructionFolderPass(ITransformationErrorManager &error_manager);
void AddInstructionFolderPass(llvm::FunctionPassManager &fpm,
ITransformationErrorManager &error_manager);

// Removes trivial PHI and select nodes. These are PHI and select nodes whose
// incoming values or true/false values match. This can happen as a result of
// the instruction folding pass that hoists and folds values up through selects
// and PHI nodes, followed by the select sinking pass, which pushes values down.
llvm::FunctionPass *CreateRemoveTrivialPhisAndSelects(void);
void AddRemoveTrivialPhisAndSelects(llvm::FunctionPassManager &fpm);

// The pass transforms bitcode to replace the calls to `__remill_jump` into
// `__remill_function_return` if a value returned by `llvm.returnaddress`, or
Expand All @@ -235,8 +236,8 @@ llvm::FunctionPass *CreateRemoveTrivialPhisAndSelects(void);

// NOTE: The pass should be run as late as possible in the list but before
// `RemoveRemillFunctionReturns` transform
llvm::FunctionPass *
CreateTransformRemillJumpIntrinsics(const EntityLifter &lifter);
void AddTransformRemillJumpIntrinsics(llvm::FunctionPassManager &fpm,
const EntityLifter &lifter);

// Finds values in the form of:
// %cmp = icmp eq val1, val2
Expand All @@ -252,14 +253,14 @@ CreateTransformRemillJumpIntrinsics(const EntityLifter &lifter);
// with xors is more difficult to analyze and for a human to read
// This pass should only work on boolean values, and handle when those are used
// in Branches and Selects
llvm::FunctionPass *CreateConvertXorToCmp(void);
void AddConvertXorToCmp(llvm::FunctionPassManager &fpm);

// Removes calls to `__remill_delay_slot_begin` and `__remill_delay_slot_end`.
// These calls surround the lifted versions of delayed instructions, to signal
// their location in the bitcode.
llvm::FunctionPass *CreateRemoveDelaySlotIntrinsics(void);
void AddRemoveDelaySlotIntrinsics(llvm::FunctionPassManager &fpm);

// Removes calls to `__remill_error`.
llvm::FunctionPass *CreateRemoveErrorIntrinsics(void);
void AddRemoveErrorIntrinsics(llvm::FunctionPassManager &fpm);

} // namespace anvill
Loading