diff --git a/src/boomerang-plugins/codegen/CMakeLists.txt b/src/boomerang-plugins/codegen/CMakeLists.txt index bd36b2bef..7c29de720 100644 --- a/src/boomerang-plugins/codegen/CMakeLists.txt +++ b/src/boomerang-plugins/codegen/CMakeLists.txt @@ -15,10 +15,14 @@ include_directories( BOOMERANG_ADD_CODEGEN( NAME "C" SOURCES + c/ASTNode.cpp + c/ASTNode.h c/CCodeGenerator.cpp c/CCodeGenerator.h c/CodeWriter.cpp c/CodeWriter.h c/ControlFlowAnalyzer.cpp c/ControlFlowAnalyzer.h + c/StmtASTNode.cpp + c/StmtASTNode.h ) diff --git a/src/boomerang-plugins/codegen/c/ASTNode.cpp b/src/boomerang-plugins/codegen/c/ASTNode.cpp new file mode 100644 index 000000000..76f0c0990 --- /dev/null +++ b/src/boomerang-plugins/codegen/c/ASTNode.cpp @@ -0,0 +1,21 @@ +#pragma region License +/* + * This file is part of the Boomerang Decompiler. + * + * See the file "LICENSE.TERMS" for information on usage and + * redistribution of this file, and for a DISCLAIMER OF ALL + * WARRANTIES. + */ +#pragma endregion License +#include "ASTNode.h" + + +ASTNode::ASTNode() +{ +} + + +ASTNode::~ASTNode() +{ +} + diff --git a/src/boomerang-plugins/codegen/c/ASTNode.h b/src/boomerang-plugins/codegen/c/ASTNode.h new file mode 100644 index 000000000..8d8db6404 --- /dev/null +++ b/src/boomerang-plugins/codegen/c/ASTNode.h @@ -0,0 +1,36 @@ +#pragma region License +/* + * This file is part of the Boomerang Decompiler. + * + * See the file "LICENSE.TERMS" for information on usage and + * redistribution of this file, and for a DISCLAIMER OF ALL + * WARRANTIES. + */ +#pragma endregion License +#pragma once + + +class BasicBlock; +class OStream; + + +/** + * Base class for all nodes in the Abstract Syntax Tree. + */ +class ASTNode +{ +public: + ASTNode(); + ASTNode(const ASTNode &other) = delete; + ASTNode(ASTNode &&other) = default; + + virtual ~ASTNode(); + + ASTNode &operator=(const ASTNode &other) = delete; + ASTNode &operator=(ASTNode &&other) = default; + +public: + virtual bool isStmt() const { return false; } + + virtual void printAST(OStream &os) const = 0; +}; diff --git a/src/boomerang-plugins/codegen/c/CCodeGenerator.cpp b/src/boomerang-plugins/codegen/c/CCodeGenerator.cpp index be3248123..d76ddb35f 100644 --- a/src/boomerang-plugins/codegen/c/CCodeGenerator.cpp +++ b/src/boomerang-plugins/codegen/c/CCodeGenerator.cpp @@ -11,7 +11,6 @@ #include "boomerang/core/Project.h" #include "boomerang/core/Settings.h" -#include "boomerang/db/BasicBlock.h" #include "boomerang/db/Prog.h" #include "boomerang/db/module/Module.h" #include "boomerang/db/proc/UserProc.h" @@ -25,6 +24,7 @@ #include "boomerang/ssl/exp/Ternary.h" #include "boomerang/ssl/exp/TypedExp.h" #include "boomerang/ssl/statements/BoolAssign.h" +#include "boomerang/ssl/statements/BranchStatement.h" #include "boomerang/ssl/statements/CallStatement.h" #include "boomerang/ssl/statements/CaseStatement.h" #include "boomerang/ssl/statements/ReturnStatement.h" @@ -36,6 +36,8 @@ #include "boomerang/util/ByteUtil.h" #include "boomerang/util/log/Log.h" +#include "boomerang/db/BasicBlock.h" + CCodeGenerator::CCodeGenerator(Project *project) : ICodeGenerator(project) @@ -382,13 +384,13 @@ void CCodeGenerator::addReturnStatement(const StatementList *rets) void CCodeGenerator::removeUnusedLabels() { for (QStringList::iterator it = m_lines.begin(); it != m_lines.end();) { - if (it->startsWith("bb0x") && it->contains(':')) { - QStringRef bbAddrString = it->midRef(4, it->indexOf(':') - 4); - bool ok = false; - Address bbAddr(bbAddrString.toLongLong(&ok, 16)); + if (it->startsWith("lab_") && it->contains(':')) { + QStringRef bbNumberStr = it->midRef(4, it->indexOf(':') - 4); + bool ok = false; + int stmtNumber = bbNumberStr.toInt(&ok, 16); assert(ok); - if (m_usedLabels.find(bbAddr.value()) == m_usedLabels.end()) { + if (m_usedLabels.find(stmtNumber) == m_usedLabels.end()) { it = m_lines.erase(it); continue; } @@ -446,8 +448,8 @@ void CCodeGenerator::generateCode(UserProc *proc) } // Start generating "real" code - std::list followSet, gotoSet; - generateCode(proc->getEntryBB(), nullptr, followSet, gotoSet, proc); + std::list followSet, gotoSet; + generateCode(m_analyzer.findEntryNode(), nullptr, followSet, gotoSet, proc); addProcEnd(); @@ -823,15 +825,15 @@ void CCodeGenerator::addIfElseCondEnd() } -void CCodeGenerator::addGoto(const BasicBlock *bb) +void CCodeGenerator::addGoto(const StmtASTNode *bb) { QString tgt; OStream s(&tgt); indent(s, m_indent); - s << "goto bb0x" << QString::number(bb->getLowAddr().value(), 16) << ";"; + s << "goto lab_" << QString::number(bb->getStatement()->getNumber()) << ";"; appendLine(tgt); - m_usedLabels.insert(bb->getLowAddr().value()); + m_usedLabels.insert(bb->getStatement()->getNumber()); } @@ -857,12 +859,12 @@ void CCodeGenerator::addBreak() } -void CCodeGenerator::addLabel(const BasicBlock *bb) +void CCodeGenerator::addLabel(const StmtASTNode *bb) { QString tgt; OStream s(&tgt); - s << "bb0x" << QString::number(bb->getLowAddr().value(), 16) << ":"; + s << "lab_" << QString::number(bb->getStatement()->getNumber()) << ":"; appendLine(tgt); } @@ -1916,13 +1918,13 @@ void CCodeGenerator::closeParen(OStream &str, OpPrec outer, OpPrec inner) } -void CCodeGenerator::generateCode(const BasicBlock *bb, const BasicBlock *latch, - std::list &followSet, - std::list &gotoSet, UserProc *proc) +void CCodeGenerator::generateCode(const StmtASTNode *bb, const StmtASTNode *latch, + std::list &followSet, + std::list &gotoSet, UserProc *proc) { // If this is the follow for the most nested enclosing conditional, then don't generate // anything. Otherwise if it is in the follow set generate a goto to the follow - const BasicBlock *enclFollow = followSet.empty() ? nullptr : followSet.back(); + const StmtASTNode *enclFollow = followSet.empty() ? nullptr : followSet.back(); if (Util::isContained(gotoSet, bb) && !m_analyzer.isLatchNode(bb) && ((latch && m_analyzer.getLoopHead(latch) && @@ -1998,9 +2000,9 @@ void CCodeGenerator::generateCode(const BasicBlock *bb, const BasicBlock *latch, } -void CCodeGenerator::generateCode_Loop(const BasicBlock *bb, std::list &gotoSet, - UserProc *proc, const BasicBlock *latch, - std::list &followSet) +void CCodeGenerator::generateCode_Loop(const StmtASTNode *bb, std::list &gotoSet, + UserProc *proc, const StmtASTNode *latch, + std::list &followSet) { // add the follow of the loop (if it exists) to the follow set if (m_analyzer.getLoopFollow(bb)) { @@ -2014,7 +2016,7 @@ void CCodeGenerator::generateCode_Loop(const BasicBlock *bb, std::listgetCond(); + SharedExp cond = bb->getStatement()->getCondExpr(); if (bb->getSuccessor(BTHEN) == m_analyzer.getLoopFollow(bb)) { cond = Unary::get(opLNot, cond)->simplify(); @@ -2023,7 +2025,7 @@ void CCodeGenerator::generateCode_Loop(const BasicBlock *bb, std::listgetSuccessor(BELSE) == m_analyzer.getLoopFollow(bb)) + const StmtASTNode *loopBody = (bb->getSuccessor(BELSE) == m_analyzer.getLoopFollow(bb)) ? bb->getSuccessor(BTHEN) : bb->getSuccessor(BELSE); generateCode(loopBody, m_analyzer.getLatchNode(bb), followSet, gotoSet, proc); @@ -2075,11 +2077,11 @@ void CCodeGenerator::generateCode_Loop(const BasicBlock *bb, std::listisType(BBType::Twoway)); + const StmtASTNode *myLatch = m_analyzer.getLatchNode(bb); + const StmtASTNode *myHead = m_analyzer.getLoopHead(myLatch); + assert(myLatch->getStatement()->isBranch()); - SharedExp cond = myLatch->getCond(); + SharedExp cond = myLatch->getStatement()->getCondExpr(); if (myLatch->getSuccessor(BELSE) == myHead) { addPostTestedLoopEnd(Unary::get(opLNot, cond)->simplify()); } @@ -2116,10 +2118,10 @@ void CCodeGenerator::generateCode_Loop(const BasicBlock *bb, std::list &gotoSet, UserProc *proc, - const BasicBlock *latch, - std::list &followSet) +void CCodeGenerator::generateCode_Branch(const StmtASTNode *bb, + std::list &gotoSet, UserProc *proc, + const StmtASTNode *latch, + std::list &followSet) { // reset this back to LoopCond if it was originally of this type if (m_analyzer.getLatchNode(bb) != nullptr) { @@ -2128,7 +2130,7 @@ void CCodeGenerator::generateCode_Branch(const BasicBlock *bb, // for 2 way conditional headers that are effectively jumps into // or out of a loop or case body, we will need a new follow node - const BasicBlock *tmpCondFollow = nullptr; + const StmtASTNode *tmpCondFollow = nullptr; // keep track of how many nodes were added to the goto set so that // the correct number are removed @@ -2153,7 +2155,7 @@ void CCodeGenerator::generateCode_Branch(const BasicBlock *bb, else { if (m_analyzer.getUnstructType(bb) == UnstructType::JumpInOutLoop) { // define the loop header to be compared against - const BasicBlock *myLoopHead = (m_analyzer.getStructType(bb) == StructType::LoopCond + const StmtASTNode *myLoopHead = (m_analyzer.getStructType(bb) == StructType::LoopCond ? bb : m_analyzer.getLoopHead(bb)); gotoSet.push_back(m_analyzer.getCondFollow(bb)); @@ -2186,19 +2188,17 @@ void CCodeGenerator::generateCode_Branch(const BasicBlock *bb, writeBB(bb); // write the conditional header - SwitchInfo *psi = nullptr; // Init to nullptr to suppress a warning + const SwitchInfo *psi = nullptr; // Init to nullptr to suppress a warning if (m_analyzer.getCondType(bb) == CondType::Case) { - // The CaseStatement will be in the last RTL this BB - RTL *last = bb->getRTLs()->back().get(); - CaseStatement *cs = (CaseStatement *)last->getHlStmt(); + const CaseStatement *cs = bb->getStatement(); psi = cs->getSwitchInfo(); // Write the switch header (i.e. "switch (var) {") addCaseCondHeader(psi->switchExp); } else { - SharedExp cond = bb->getCond(); + SharedExp cond = bb->getStatement()->getCondExpr(); if (!cond) { cond = Const::get(Address(0xfeedface)); // hack, but better than a crash @@ -2219,7 +2219,7 @@ void CCodeGenerator::generateCode_Branch(const BasicBlock *bb, // write code for the body of the conditional if (m_analyzer.getCondType(bb) != CondType::Case) { - const BasicBlock *succ = bb->getSuccessor( + const StmtASTNode *succ = bb->getSuccessor( (m_analyzer.getCondType(bb) == CondType::IfElse) ? BELSE : BTHEN); assert(succ != nullptr); @@ -2261,12 +2261,12 @@ void CCodeGenerator::generateCode_Branch(const BasicBlock *bb, // case header // first, determine the optimal fall-through ordering - std::list> + std::list> switchDests = computeOptimalCaseOrdering(bb, psi); for (auto it = switchDests.begin(); it != switchDests.end(); ++it) { SharedExp caseValue = it->first; - const BasicBlock *succ = it->second; + const StmtASTNode *succ = it->second; addCaseCondOption(*caseValue); if (std::next(it) != switchDests.end() && std::next(it)->second == succ) { @@ -2315,16 +2315,16 @@ void CCodeGenerator::generateCode_Branch(const BasicBlock *bb, } -void CCodeGenerator::generateCode_Seq(const BasicBlock *bb, std::list &gotoSet, - UserProc *proc, const BasicBlock *latch, - std::list &followSet) +void CCodeGenerator::generateCode_Seq(const StmtASTNode *bb, std::list &gotoSet, + UserProc *proc, const StmtASTNode *latch, + std::list &followSet) { // generate code for the body of this block writeBB(bb); // return if this is the 'return' block (i.e. has no out edges) after emitting a 'return' // statement - if (bb->getType() == BBType::Ret) { + if (bb->getStatement()->isReturn()) { // This should be emitted now, like a normal statement // addReturnStatement(getReturnVal()); return; @@ -2332,14 +2332,10 @@ void CCodeGenerator::generateCode_Seq(const BasicBlock *bb, std::listgetNumSuccessors() == 0) { - LOG_WARN("No out edge for BB at address %1, in proc %2", bb->getLowAddr(), proc->getName()); - - if (bb->getType() == BBType::CompJump) { - assert(!bb->getRTLs()->empty()); - RTL *lastRTL = bb->getRTLs()->back().get(); - assert(!lastRTL->empty()); +// LOG_WARN("No out edge for BB at address %1, in proc %2", bb->getLowAddr(), proc->getName()); - GotoStatement *gs = static_cast(lastRTL->back()); + if (bb->getStatement()->isGoto() ) { + const GotoStatement *gs = bb->getStatement(); if (gs && gs->getDest()) { addLineComment("goto " + gs->getDest()->toString()); } @@ -2351,22 +2347,22 @@ void CCodeGenerator::generateCode_Seq(const BasicBlock *bb, std::listgetSuccessor(0); + const StmtASTNode *succ = bb->getSuccessor(0); if (bb->getNumSuccessors() > 1) { - const BasicBlock *other = bb->getSuccessor(1); + const StmtASTNode *other = bb->getSuccessor(1); LOG_MSG("Found seq with more than one outedge!"); - std::shared_ptr constDest = bb->getDest()->access(); + std::shared_ptr constDest = bb->getStatement()->getDest()->access(); - if (constDest && constDest->isIntConst() && (constDest->getAddr() == succ->getLowAddr())) { - std::swap(other, succ); - LOG_MSG("Taken branch is first out edge"); - } +// if (constDest && constDest->isIntConst() && (constDest->getAddr() == succ->getLowAddr())) { +// std::swap(other, succ); +// LOG_MSG("Taken branch is first out edge"); +// } - SharedExp cond = bb->getCond(); +// SharedExp cond = bb->getCond(); - if (cond) { - addIfCondHeader(bb->getCond()); +// if (cond) { +// addIfCondHeader(bb->getCond()); if (isGenerated(other)) { emitGotoAndLabel(bb, other); @@ -2376,10 +2372,10 @@ void CCodeGenerator::generateCode_Seq(const BasicBlock *bb, std::listisType(BBType::Ret)) { + else if (dest->getStatement()->isReturn()) { // a goto to a return -> just emit the return statement writeBB(dest); } @@ -2441,27 +2437,17 @@ void CCodeGenerator::emitGotoAndLabel(const BasicBlock *bb, const BasicBlock *de } -void CCodeGenerator::writeBB(const BasicBlock *bb) +void CCodeGenerator::writeBB(const StmtASTNode *bb) { if (m_proc->getProg()->getProject()->getSettings()->debugGen) { - LOG_MSG("Generating code for BB at address %1", bb->getLowAddr()); + LOG_MSG("Generating code for stmt %1", bb->getStatement()->getNumber()); } // Allocate space for a label to be generated for this node and add this to the generated code. // The actual label can then be generated now or back patched later addLabel(bb); - if (bb->getRTLs()) { - for (const auto &rtl : *(bb->getRTLs())) { - if (m_proc->getProg()->getProject()->getSettings()->debugGen) { - LOG_MSG("%1", rtl->getAddress()); - } - - for (const Statement *st : *rtl) { - emitCodeForStmt(st); - } - } - } + emitCodeForStmt(bb->getStatement()); } @@ -2487,9 +2473,9 @@ void CCodeGenerator::appendLine(const QString &s) } -bool CCodeGenerator::isAllParentsGenerated(const BasicBlock *bb) const +bool CCodeGenerator::isAllParentsGenerated(const StmtASTNode *bb) const { - for (BasicBlock *pred : bb->getPredecessors()) { + for (StmtASTNode *pred : bb->getPredecessors()) { if (!m_analyzer.isBackEdge(pred, bb) && !isGenerated(pred)) { return false; } @@ -2499,7 +2485,7 @@ bool CCodeGenerator::isAllParentsGenerated(const BasicBlock *bb) const } -bool CCodeGenerator::isGenerated(const BasicBlock *bb) const +bool CCodeGenerator::isGenerated(const StmtASTNode *bb) const { return m_generatedBBs.find(bb) != m_generatedBBs.end(); } @@ -2563,14 +2549,14 @@ void CCodeGenerator::emitCodeForStmt(const Statement *st) } -std::list> -CCodeGenerator::computeOptimalCaseOrdering(const BasicBlock *caseHead, SwitchInfo *psi) +std::list> +CCodeGenerator::computeOptimalCaseOrdering(const StmtASTNode *caseHead, const SwitchInfo *psi) { - using CaseEntry = std::pair; + using CaseEntry = std::pair; std::list result; for (int i = 0; i < caseHead->getNumSuccessors(); ++i) { - const BasicBlock *origSucc = caseHead->getSuccessor(i); + const StmtASTNode *origSucc = caseHead->getSuccessor(i); SharedExp caseVal; if (psi->switchType == SwitchType::F) { // "Fortran" style? // Yes, use the table value itself @@ -2581,22 +2567,16 @@ CCodeGenerator::computeOptimalCaseOrdering(const BasicBlock *caseHead, SwitchInf caseVal = Const::get(static_cast(psi->lowerBound + i)); } - const BasicBlock *realSucc = origSucc; - while (realSucc->getNumSuccessors() == 1 && - (realSucc->isEmpty() || realSucc->isEmptyJump())) { - realSucc = realSucc->getSuccessor(0); - } - - result.push_back({ caseVal, realSucc }); + result.push_back({ caseVal, origSucc }); } result.sort([](const CaseEntry &left, const CaseEntry &right) { - const BasicBlock *leftBB = left.second; - const BasicBlock *rightBB = right.second; + const StmtASTNode *leftBB = left.second; + const StmtASTNode *rightBB = right.second; - const BasicBlock *leftSucc = leftBB; + const StmtASTNode *leftSucc = leftBB; - while (leftSucc->getType() != BBType::Ret) { + while (leftSucc->getStatement()->isReturn()) { if (leftSucc == rightBB) { return true; // the left case is a fallthrough to the right case } @@ -2608,7 +2588,7 @@ CCodeGenerator::computeOptimalCaseOrdering(const BasicBlock *caseHead, SwitchInf } // No fallthrough found; compare by address - return leftBB->getLowAddr() < rightBB->getLowAddr(); + return leftBB->getStatement()->getNumber() < rightBB->getStatement()->getNumber(); }); return result; diff --git a/src/boomerang-plugins/codegen/c/CCodeGenerator.h b/src/boomerang-plugins/codegen/c/CCodeGenerator.h index 947a3b7d8..619eb2c0c 100644 --- a/src/boomerang-plugins/codegen/c/CCodeGenerator.h +++ b/src/boomerang-plugins/codegen/c/CCodeGenerator.h @@ -25,7 +25,6 @@ #include -class BasicBlock; class Exp; class LocationSet; class BinaryImage; @@ -209,7 +208,7 @@ class BOOMERANG_PLUGIN_API CCodeGenerator : public ICodeGenerator void addIfElseCondEnd(); // goto, break, continue, etc - void addGoto(const BasicBlock *bb); + void addGoto(const StmtASTNode *bb); /// Adds: continue; void addContinue(); @@ -219,7 +218,7 @@ class BOOMERANG_PLUGIN_API CCodeGenerator : public ICodeGenerator // labels /// Adds: L \a ord : - void addLabel(const BasicBlock *bb); + void addLabel(const StmtASTNode *bb); // proc related /** @@ -278,32 +277,32 @@ class BOOMERANG_PLUGIN_API CCodeGenerator : public ICodeGenerator void closeParen(OStream &str, OpPrec outer, OpPrec inner); - void generateCode(const BasicBlock *bb, const BasicBlock *latch, - std::list &followSet, - std::list &gotoSet, UserProc *proc); - void generateCode_Loop(const BasicBlock *bb, std::list &gotoSet, - UserProc *proc, const BasicBlock *latch, - std::list &followSet); - void generateCode_Branch(const BasicBlock *bb, std::list &gotoSet, - UserProc *proc, const BasicBlock *latch, - std::list &followSet); - void generateCode_Seq(const BasicBlock *bb, std::list &gotoSet, - UserProc *proc, const BasicBlock *latch, - std::list &followSet); + void generateCode(const StmtASTNode *bb, const StmtASTNode *latch, + std::list &followSet, + std::list &gotoSet, UserProc *proc); + void generateCode_Loop(const StmtASTNode *bb, std::list &gotoSet, + UserProc *proc, const StmtASTNode *latch, + std::list &followSet); + void generateCode_Branch(const StmtASTNode *bb, std::list &gotoSet, + UserProc *proc, const StmtASTNode *latch, + std::list &followSet); + void generateCode_Seq(const StmtASTNode *bb, std::list &gotoSet, + UserProc *proc, const StmtASTNode *latch, + std::list &followSet); /// Emits a goto statement (at the correct indentation level) with the destination label for /// dest. Also places the label just before the destination code if it isn't already there. If /// the goto is to the return block, it would be nice to emit a 'return' instead (but would have /// to duplicate the other code in that return BB). Also, 'continue' and 'break' statements /// are used instead if possible - void emitGotoAndLabel(const BasicBlock *bb, const BasicBlock *dest); + void emitGotoAndLabel(const StmtASTNode *bb, const StmtASTNode *dest); /// Generates code for each non-CTI (except procedure calls) statement within the block. - void writeBB(const BasicBlock *bb); + void writeBB(const StmtASTNode *bb); /// \returns true if all predecessors of this BB have had their code generated. - bool isAllParentsGenerated(const BasicBlock *bb) const; - bool isGenerated(const BasicBlock *bb) const; + bool isAllParentsGenerated(const StmtASTNode *bb) const; + bool isGenerated(const StmtASTNode *bb) const; void emitCodeForStmt(const Statement *stmt); @@ -314,8 +313,8 @@ class BOOMERANG_PLUGIN_API CCodeGenerator : public ICodeGenerator * The value or the case label is determined by the value of the first part of the pair, * the jump destination for the case is determined by the second part of the pair. */ - std::list> - computeOptimalCaseOrdering(const BasicBlock *caseHead, SwitchInfo *switchInfo); + std::list> + computeOptimalCaseOrdering(const StmtASTNode *caseHead, const SwitchInfo *switchInfo); private: void print(const Module *module); @@ -330,8 +329,8 @@ class BOOMERANG_PLUGIN_API CCodeGenerator : public ICodeGenerator private: int m_indent = 0; ///< Current indentation depth std::map m_locals; ///< All locals in a Proc - std::unordered_set m_usedLabels; ///< All used goto labels. (lowAddr of BB) - std::unordered_set m_generatedBBs; + std::unordered_set m_usedLabels; ///< All used goto labels. (lowAddr of BB) + std::unordered_set m_generatedBBs; UserProc *m_proc = nullptr; ControlFlowAnalyzer m_analyzer; diff --git a/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.cpp b/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.cpp index 8a4f5b20e..0f3614ac7 100644 --- a/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.cpp +++ b/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.cpp @@ -12,6 +12,15 @@ #include "boomerang/db/BasicBlock.h" #include "boomerang/db/proc/ProcCFG.h" #include "boomerang/util/log/Log.h" +#include "boomerang/ssl/RTL.h" + +#include "boomerang/ssl/statements/CallStatement.h" +#include "boomerang/ssl/statements/CaseStatement.h" +#include "boomerang/ssl/statements/BranchStatement.h" +#include "boomerang/ssl/statements/ReturnStatement.h" +#include "boomerang/db/proc/UserProc.h" + +#include ControlFlowAnalyzer::ControlFlowAnalyzer() @@ -22,6 +31,12 @@ ControlFlowAnalyzer::ControlFlowAnalyzer() void ControlFlowAnalyzer::structureCFG(ProcCFG *cfg) { m_cfg = cfg; + m_nodes.clear(); + m_postOrdering.clear(); + m_revPostOrdering.clear(); + m_info.clear(); + + rebuildASTForest(); if (m_cfg->findRetNode() == nullptr) { return; @@ -45,13 +60,13 @@ void ControlFlowAnalyzer::setTimeStamps() int time = 1; m_postOrdering.clear(); - updateLoopStamps(findEntryBB(), time); + updateLoopStamps(findEntryNode(), time); // set the reverse parenthesis for the nodes time = 1; - updateRevLoopStamps(findEntryBB(), time); + updateRevLoopStamps(findEntryNode(), time); - BasicBlock *retNode = findExitBB(); + StmtASTNode *retNode = findExitNode(); assert(retNode); m_revPostOrdering.clear(); updateRevOrder(retNode); @@ -62,9 +77,9 @@ void ControlFlowAnalyzer::updateImmedPDom() { // traverse the nodes in order (i.e from the bottom up) for (int i = m_revPostOrdering.size() - 1; i >= 0; i--) { - const BasicBlock *bb = m_revPostOrdering[i]; + const StmtASTNode *bb = m_revPostOrdering[i]; - for (BasicBlock *succ : bb->getSuccessors()) { + for (StmtASTNode *succ : bb->getSuccessors()) { if (getRevOrd(succ) > getRevOrd(bb)) { setImmPDom(bb, findCommonPDom(getImmPDom(bb), succ)); } @@ -72,22 +87,22 @@ void ControlFlowAnalyzer::updateImmedPDom() } // make a second pass but consider the original CFG ordering this time - for (const BasicBlock *bb : m_postOrdering) { + for (const StmtASTNode *bb : m_postOrdering) { if (bb->getNumSuccessors() <= 1) { continue; } for (auto &succ : bb->getSuccessors()) { - BasicBlock *succNode = succ; + StmtASTNode *succNode = succ; setImmPDom(bb, findCommonPDom(getImmPDom(bb), succNode)); } } // one final pass to fix up nodes involved in a loop - for (const BasicBlock *bb : m_postOrdering) { + for (const StmtASTNode *bb : m_postOrdering) { if (bb->getNumSuccessors() > 1) { for (auto &succ : bb->getSuccessors()) { - BasicBlock *succNode = succ; + StmtASTNode *succNode = succ; if (isBackEdge(bb, succNode) && (bb->getNumSuccessors() > 1) && getImmPDom(succNode) && @@ -103,8 +118,8 @@ void ControlFlowAnalyzer::updateImmedPDom() } -const BasicBlock *ControlFlowAnalyzer::findCommonPDom(const BasicBlock *currImmPDom, - const BasicBlock *succImmPDom) +const StmtASTNode *ControlFlowAnalyzer::findCommonPDom(const StmtASTNode *currImmPDom, + const StmtASTNode *succImmPDom) { if (!currImmPDom) { return succImmPDom; @@ -118,8 +133,7 @@ const BasicBlock *ControlFlowAnalyzer::findCommonPDom(const BasicBlock *currImmP return currImmPDom; // ordering hasn't been done } - const BasicBlock *oldCurImmPDom = currImmPDom; - const BasicBlock *oldSuccImmPDom = succImmPDom; + const StmtASTNode *oldCurImmPDom = currImmPDom; int giveup = 0; #define GIVEUP 10000 @@ -136,9 +150,6 @@ const BasicBlock *ControlFlowAnalyzer::findCommonPDom(const BasicBlock *currImmP } if (giveup >= GIVEUP) { - LOG_VERBOSE("Failed to find commonPDom for %1 and %2", oldCurImmPDom->getLowAddr(), - oldSuccImmPDom->getLowAddr()); - return oldCurImmPDom; // no change } @@ -149,7 +160,7 @@ const BasicBlock *ControlFlowAnalyzer::findCommonPDom(const BasicBlock *currImmP void ControlFlowAnalyzer::structConds() { // Process the nodes in order - for (const BasicBlock *currNode : m_postOrdering) { + for (const StmtASTNode *currNode : m_postOrdering) { if (currNode->getNumSuccessors() <= 1) { // not an if/case condition continue; @@ -157,7 +168,7 @@ void ControlFlowAnalyzer::structConds() // if the current conditional header is a two way node and has a back edge, then it // won't have a follow - if (hasBackEdge(currNode) && (currNode->getType() == BBType::Twoway)) { + if (hasBackEdge(currNode) && (currNode->getStatement()->isBranch())) { setStructType(currNode, StructType::Cond); continue; } @@ -177,22 +188,22 @@ void ControlFlowAnalyzer::structConds() } -void ControlFlowAnalyzer::determineLoopType(const BasicBlock *header, bool *&loopNodes) +void ControlFlowAnalyzer::determineLoopType(const StmtASTNode *header, bool *&loopNodes) { assert(getLatchNode(header)); // if the latch node is a two way node then this must be a post tested loop - if (getLatchNode(header)->getType() == BBType::Twoway) { + if (getLatchNode(header)->getStatement()->isBranch()) { setLoopType(header, LoopType::PostTested); // if the head of the loop is a two way node and the loop spans more than one block then it // must also be a conditional header - if ((header->getType() == BBType::Twoway) && (header != getLatchNode(header))) { + if (header->getStatement()->isBranch() && (header != getLatchNode(header))) { setStructType(header, StructType::LoopCond); } } // otherwise it is either a pretested or endless loop - else if (header->getType() == BBType::Twoway) { + else if (header->getStatement()->isBranch()) { // if the header is a two way node then it must have a conditional follow (since it can't // have any backedges leading from it). If this follow is within the loop then this must be // an endless loop @@ -213,12 +224,12 @@ void ControlFlowAnalyzer::determineLoopType(const BasicBlock *header, bool *&loo } -void ControlFlowAnalyzer::findLoopFollow(const BasicBlock *header, bool *&loopNodes) +void ControlFlowAnalyzer::findLoopFollow(const StmtASTNode *header, bool *&loopNodes) { assert(getStructType(header) == StructType::Loop || getStructType(header) == StructType::LoopCond); const LoopType loopType = getLoopType(header); - const BasicBlock *latch = getLatchNode(header); + const StmtASTNode *latch = getLatchNode(header); if (loopType == LoopType::PreTested) { // if the 'while' loop's true child is within the loop, then its false child is the loop @@ -242,12 +253,12 @@ void ControlFlowAnalyzer::findLoopFollow(const BasicBlock *header, bool *&loopNo } else { // endless loop - const BasicBlock *follow = nullptr; + const StmtASTNode *follow = nullptr; // traverse the ordering array between the header and latch nodes. - // BasicBlock * latch = header->getLatchNode(); initialized at function start + // StmtASTNode * latch = header->getLatchNode(); initialized at function start for (int i = getPostOrdering(header) - 1; i > getPostOrdering(latch); i--) { - const BasicBlock *&desc = m_postOrdering[i]; + const StmtASTNode *&desc = m_postOrdering[i]; // the follow for an endless loop will have the following // properties: // i) it will have a parent that is a conditional header inside the loop whose follow @@ -273,7 +284,7 @@ void ControlFlowAnalyzer::findLoopFollow(const BasicBlock *header, bool *&loopNo else { // otherwise find the child (if any) of the conditional header that isn't inside // the same loop - const BasicBlock *succ = desc->getSuccessor(BTHEN); + const StmtASTNode *succ = desc->getSuccessor(BTHEN); if (loopNodes[getPostOrdering(succ)]) { if (!loopNodes[getPostOrdering(desc->getSuccessor(BELSE))]) { @@ -302,7 +313,7 @@ void ControlFlowAnalyzer::findLoopFollow(const BasicBlock *header, bool *&loopNo } -void ControlFlowAnalyzer::tagNodesInLoop(const BasicBlock *header, bool *&loopNodes) +void ControlFlowAnalyzer::tagNodesInLoop(const StmtASTNode *header, bool *&loopNodes) { // Traverse the ordering structure from the header to the latch node tagging the nodes // determined to be within the loop. These are nodes that satisfy the following: @@ -314,7 +325,7 @@ void ControlFlowAnalyzer::tagNodesInLoop(const BasicBlock *header, bool *&loopNo // OR // iii) curNode is the latch node - const BasicBlock *latch = getLatchNode(header); + const StmtASTNode *latch = getLatchNode(header); assert(latch); for (int i = getPostOrdering(header) - 1; i >= getPostOrdering(latch); i--) { @@ -331,8 +342,8 @@ void ControlFlowAnalyzer::tagNodesInLoop(const BasicBlock *header, bool *&loopNo void ControlFlowAnalyzer::structLoops() { for (int i = m_postOrdering.size() - 1; i >= 0; i--) { - const BasicBlock *currNode = m_postOrdering[i]; // the current node under investigation - const BasicBlock *latch = nullptr; // the latching node of the loop + const StmtASTNode *currNode = m_postOrdering[i]; // the current node under investigation + const StmtASTNode *latch = nullptr; // the latching node of the loop // If the current node has at least one back edge into it, it is a loop header. If there are // numerous back edges into the header, determine which one comes form the proper latching @@ -345,7 +356,7 @@ void ControlFlowAnalyzer::structLoops() // vi) has a lower ordering than all other suitable candiates // If no nodes meet the above criteria, then the current node is not a loop header - for (const BasicBlock *pred : currNode->getPredecessors()) { + for (const StmtASTNode *pred : currNode->getPredecessors()) { if ((getCaseHead(pred) == getCaseHead(currNode)) && // ii) (getLoopHead(pred) == getLoopHead(currNode)) && // iii) (!latch || (getPostOrdering(latch) > getPostOrdering(pred))) && // vi) @@ -395,18 +406,18 @@ void ControlFlowAnalyzer::structLoops() void ControlFlowAnalyzer::checkConds() { - for (const BasicBlock *currNode : m_postOrdering) { + for (const StmtASTNode *currNode : m_postOrdering) { // consider only conditional headers that have a follow and aren't case headers if (((getStructType(currNode) == StructType::Cond) || (getStructType(currNode) == StructType::LoopCond)) && getCondFollow(currNode) && (getCondType(currNode) != CondType::Case)) { // define convenient aliases for the relevant loop and case heads and the out edges - const BasicBlock *myLoopHead = (getStructType(currNode) == StructType::LoopCond) + const StmtASTNode *myLoopHead = (getStructType(currNode) == StructType::LoopCond) ? currNode : getLoopHead(currNode); - const BasicBlock *follLoopHead = getLoopHead(getCondFollow(currNode)); - const BasicBlock *bbThen = currNode->getSuccessor(BTHEN); - const BasicBlock *bbElse = currNode->getSuccessor(BELSE); + const StmtASTNode *follLoopHead = getLoopHead(getCondFollow(currNode)); + const StmtASTNode *bbThen = currNode->getSuccessor(BTHEN); + const StmtASTNode *bbElse = currNode->getSuccessor(BELSE); // analyse whether this is a jump into/outof a loop if (myLoopHead != follLoopHead) { @@ -448,9 +459,9 @@ void ControlFlowAnalyzer::checkConds() if ((getUnstructType(currNode) == UnstructType::Structured) && ((getCaseHead(currNode) != getCaseHead(bbThen)) || (getCaseHead(currNode) != getCaseHead(bbElse)))) { - const BasicBlock *myCaseHead = getCaseHead(currNode); - const BasicBlock *thenCaseHead = getCaseHead(bbThen); - const BasicBlock *elseCaseHead = getCaseHead(bbElse); + const StmtASTNode *myCaseHead = getCaseHead(currNode); + const StmtASTNode *thenCaseHead = getCaseHead(bbThen); + const StmtASTNode *elseCaseHead = getCaseHead(bbElse); if ((thenCaseHead == myCaseHead) && (!myCaseHead || (elseCaseHead != getCondFollow(myCaseHead)))) { @@ -487,13 +498,13 @@ void ControlFlowAnalyzer::checkConds() } -bool ControlFlowAnalyzer::isBackEdge(const BasicBlock *source, const BasicBlock *dest) const +bool ControlFlowAnalyzer::isBackEdge(const StmtASTNode *source, const StmtASTNode *dest) const { return dest == source || isAncestorOf(dest, source); } -bool ControlFlowAnalyzer::isCaseOption(const BasicBlock *bb) const +bool ControlFlowAnalyzer::isCaseOption(const StmtASTNode *bb) const { if (!getCaseHead(bb)) { return false; @@ -509,7 +520,7 @@ bool ControlFlowAnalyzer::isCaseOption(const BasicBlock *bb) const } -bool ControlFlowAnalyzer::isAncestorOf(const BasicBlock *bb, const BasicBlock *other) const +bool ControlFlowAnalyzer::isAncestorOf(const StmtASTNode *bb, const StmtASTNode *other) const { return (m_info[bb].m_preOrderID < m_info[other].m_preOrderID && m_info[bb].m_postOrderID > m_info[other].m_postOrderID) || @@ -518,7 +529,7 @@ bool ControlFlowAnalyzer::isAncestorOf(const BasicBlock *bb, const BasicBlock *o } -void ControlFlowAnalyzer::updateLoopStamps(const BasicBlock *bb, int &time) +void ControlFlowAnalyzer::updateLoopStamps(const StmtASTNode *bb, int &time) { // timestamp the current node with the current time // and set its traversed flag @@ -526,7 +537,7 @@ void ControlFlowAnalyzer::updateLoopStamps(const BasicBlock *bb, int &time) m_info[bb].m_preOrderID = time; // recurse on unvisited children and set inedges for all children - for (const BasicBlock *succ : bb->getSuccessors()) { + for (const StmtASTNode *succ : bb->getSuccessors()) { // set the in edge from this child to its parent (the current node) // (not done here, might be a problem) // outEdges[i]->inEdges.Add(this); @@ -546,7 +557,7 @@ void ControlFlowAnalyzer::updateLoopStamps(const BasicBlock *bb, int &time) } -void ControlFlowAnalyzer::updateRevLoopStamps(const BasicBlock *bb, int &time) +void ControlFlowAnalyzer::updateRevLoopStamps(const StmtASTNode *bb, int &time) { // timestamp the current node with the current time and set its traversed flag setTravType(bb, TravType::DFS_RNum); @@ -564,13 +575,13 @@ void ControlFlowAnalyzer::updateRevLoopStamps(const BasicBlock *bb, int &time) } -void ControlFlowAnalyzer::updateRevOrder(const BasicBlock *bb) +void ControlFlowAnalyzer::updateRevOrder(const StmtASTNode *bb) { // Set this node as having been traversed during the post domimator DFS ordering traversal setTravType(bb, TravType::DFS_PDom); // recurse on unvisited children - for (const BasicBlock *pred : bb->getPredecessors()) { + for (const StmtASTNode *pred : bb->getPredecessors()) { if (getTravType(pred) != TravType::DFS_PDom) { updateRevOrder(pred); } @@ -583,8 +594,8 @@ void ControlFlowAnalyzer::updateRevOrder(const BasicBlock *bb) } -void ControlFlowAnalyzer::setCaseHead(const BasicBlock *bb, const BasicBlock *head, - const BasicBlock *follow) +void ControlFlowAnalyzer::setCaseHead(const StmtASTNode *bb, const StmtASTNode *head, + const StmtASTNode *follow) { assert(!getCaseHead(bb)); @@ -597,7 +608,7 @@ void ControlFlowAnalyzer::setCaseHead(const BasicBlock *bb, const BasicBlock *he // if this is a nested case header, then it's member nodes // will already have been tagged so skip straight to its follow - if (bb->isType(BBType::Nway) && (bb != head)) { + if (bb->getStatement()->isCase() && (bb != head)) { if (getCondFollow(bb) && (getTravType(getCondFollow(bb)) != TravType::DFS_Case) && (getCondFollow(bb) != follow)) { setCaseHead(bb, head, follow); @@ -608,7 +619,7 @@ void ControlFlowAnalyzer::setCaseHead(const BasicBlock *bb, const BasicBlock *he // i) isn't on a back-edge, // ii) hasn't already been traversed in a case tagging traversal and, // iii) isn't the follow node. - for (BasicBlock *succ : bb->getSuccessors()) { + for (StmtASTNode *succ : bb->getSuccessors()) { if (!isBackEdge(bb, succ) && (getTravType(succ) != TravType::DFS_Case) && (succ != follow)) { setCaseHead(succ, head, follow); @@ -618,12 +629,12 @@ void ControlFlowAnalyzer::setCaseHead(const BasicBlock *bb, const BasicBlock *he } -void ControlFlowAnalyzer::setStructType(const BasicBlock *bb, StructType structType) +void ControlFlowAnalyzer::setStructType(const StmtASTNode *bb, StructType structType) { // if this is a conditional header, determine exactly which type of conditional header it is // (i.e. switch, if-then, if-then-else etc.) if (structType == StructType::Cond) { - if (bb->isType(BBType::Nway)) { + if (bb->getStatement()->isCase()) { m_info[bb].m_conditionHeaderType = CondType::Case; } else if (getCondFollow(bb) == bb->getSuccessor(BELSE)) { @@ -641,7 +652,7 @@ void ControlFlowAnalyzer::setStructType(const BasicBlock *bb, StructType structT } -void ControlFlowAnalyzer::setUnstructType(const BasicBlock *bb, UnstructType unstructType) +void ControlFlowAnalyzer::setUnstructType(const StmtASTNode *bb, UnstructType unstructType) { assert((m_info[bb].m_structuringType == StructType::Cond || m_info[bb].m_structuringType == StructType::LoopCond) && @@ -650,7 +661,7 @@ void ControlFlowAnalyzer::setUnstructType(const BasicBlock *bb, UnstructType uns } -UnstructType ControlFlowAnalyzer::getUnstructType(const BasicBlock *bb) const +UnstructType ControlFlowAnalyzer::getUnstructType(const StmtASTNode *bb) const { assert((m_info[bb].m_structuringType == StructType::Cond || m_info[bb].m_structuringType == StructType::LoopCond)); @@ -661,7 +672,7 @@ UnstructType ControlFlowAnalyzer::getUnstructType(const BasicBlock *bb) const } -void ControlFlowAnalyzer::setLoopType(const BasicBlock *bb, LoopType l) +void ControlFlowAnalyzer::setLoopType(const StmtASTNode *bb, LoopType l) { assert(getStructType(bb) == StructType::Loop || getStructType(bb) == StructType::LoopCond); m_info[bb].m_loopHeaderType = l; @@ -675,29 +686,29 @@ void ControlFlowAnalyzer::setLoopType(const BasicBlock *bb, LoopType l) } -LoopType ControlFlowAnalyzer::getLoopType(const BasicBlock *bb) const +LoopType ControlFlowAnalyzer::getLoopType(const StmtASTNode *bb) const { assert(getStructType(bb) == StructType::Loop || getStructType(bb) == StructType::LoopCond); return m_info[bb].m_loopHeaderType; } -void ControlFlowAnalyzer::setCondType(const BasicBlock *bb, CondType condType) +void ControlFlowAnalyzer::setCondType(const StmtASTNode *bb, CondType condType) { assert(getStructType(bb) == StructType::Cond || getStructType(bb) == StructType::LoopCond); m_info[bb].m_conditionHeaderType = condType; } -CondType ControlFlowAnalyzer::getCondType(const BasicBlock *bb) const +CondType ControlFlowAnalyzer::getCondType(const StmtASTNode *bb) const { assert(getStructType(bb) == StructType::Cond || getStructType(bb) == StructType::LoopCond); return m_info[bb].m_conditionHeaderType; } -bool ControlFlowAnalyzer::isBBInLoop(const BasicBlock *bb, const BasicBlock *header, - const BasicBlock *latch) const +bool ControlFlowAnalyzer::isBBInLoop(const StmtASTNode *bb, const StmtASTNode *header, + const StmtASTNode *latch) const { assert(getLatchNode(header) == latch); assert(header == latch || ((m_info[header].m_preOrderID > m_info[latch].m_preOrderID && @@ -721,10 +732,10 @@ bool ControlFlowAnalyzer::isBBInLoop(const BasicBlock *bb, const BasicBlock *hea } -bool ControlFlowAnalyzer::hasBackEdge(const BasicBlock *bb) const +bool ControlFlowAnalyzer::hasBackEdge(const StmtASTNode *bb) const { return std::any_of(bb->getSuccessors().begin(), bb->getSuccessors().end(), - [this, bb](const BasicBlock *succ) { return isBackEdge(bb, succ); }); + [this, bb](const StmtASTNode *succ) { return isBackEdge(bb, succ); }); } @@ -736,13 +747,182 @@ void ControlFlowAnalyzer::unTraverse() } -BasicBlock *ControlFlowAnalyzer::findEntryBB() const +StmtASTNode *ControlFlowAnalyzer::findEntryNode() const +{ + const BasicBlock *bb = m_cfg->getEntryBB(); + while (bb) { + const auto it = m_nodes.find(bb->getFirstStmt()); + if (it != m_nodes.end()) { + return it->second; + } + bb = bb->getSuccessor(0); + } + + return nullptr; // not found + } + + +StmtASTNode *ControlFlowAnalyzer::findExitNode() const +{ + const BasicBlock *exitBB = m_cfg->findRetNode(); + if (exitBB) { + const auto it = m_nodes.find(exitBB->getLastStmt()); + if (it != m_nodes.end()) { + return it->second; + } + } + + return nullptr; // not found +} + + +void ControlFlowAnalyzer::rebuildASTForest() { - return m_cfg->getEntryBB(); + // Wire up successors within a BB + for (const BasicBlock *bb : *m_cfg) { + const Statement *prev = nullptr; + for (const auto &rtl : *bb->getRTLs()) { + for (const Statement *stmt : *rtl) { + m_nodes[stmt] = new StmtASTNode(stmt); + if (prev != nullptr) { + m_nodes[stmt]->addPredecessor(m_nodes[prev]); + m_nodes[prev]->addSuccessor(m_nodes[stmt]); + } + prev = stmt; + } + } + } + + // wire up successors between BBs + for (const BasicBlock *bb : *m_cfg) { + const Statement *lastStmt = bb->getLastStmt(); + + // it is important to process the nodes in order to preserve ordering for branches + for (const BasicBlock *succ : bb->getSuccessors()) { + const Statement *firstStmt = findSuccessorStmt(lastStmt, succ); + if (!lastStmt || !firstStmt) { + continue; + } + + m_nodes[lastStmt]->addSuccessor(m_nodes[firstStmt]); + m_nodes[firstStmt]->addPredecessor(m_nodes[lastStmt]); + } + } + + + // debug + dumpStmtCFGToFile(); +} + + +void ControlFlowAnalyzer::dumpStmtCFGToFile() const +{ + QFile dest("StmtCFG.dot"); + dest.open(QFile::WriteOnly); + OStream ost(&dest); + + ost << "digraph StmtCFG {\n\n"; + + for (auto &[stmt, node] : m_nodes) { + QString label; + + if (stmt->isCall()) { + const Function *proc = node->getStatement()->getDestProc(); + label = QString("CALL ") + (proc ? proc->getName() : "/* no dest */"); + label += "("; + for (auto &arg : node->getStatement()->getArguments()) { + label += static_cast(arg)->getRight()->toString() + ","; + } + + if (label.endsWith(",")) { + label.chop(1); + } + label += ")"; + } + else if (stmt->isCase()) { + label = QString("CASE "); + if (node->getStatement()->getSwitchInfo()) { + label += node->getStatement()->getSwitchInfo()->switchExp->toString(); + } + } + else if (stmt->isBranch()) { + label = "BRANCH if " + static_cast(stmt)->getCondExpr()->toString(); + } + else if (stmt->isReturn()) { + label = QString("RET "); + for (auto &ret : node->getStatement()->getReturns()) { + label += static_cast(ret)->getRight()->toString() + ","; + } + + if (label.endsWith(",")) { + label.chop(1); + } + } + else { + label = stmt->toString(); + } + + label = label.replace("\n", " "); + label = label.replace("\"", "'"); + + ost << "stmt" << stmt->getNumber() << "[label=\""; + ost << label << "\"];\n"; + } + + ost << "\n"; + + for (auto &[stmt, node] : m_nodes) { + if (stmt->isBranch()) { + ost << "stmt" << stmt->getNumber() << " -> stmt" << node->getSuccessor(BTHEN)->getStatement()->getNumber() << "[color=green];\n"; + ost << "stmt" << stmt->getNumber() << " -> stmt" << node->getSuccessor(BELSE)->getStatement()->getNumber() << "[color=red];\n"; + } + else if (stmt->isCase()) { + for (int i = 0; i < node->getNumSuccessors(); ++i) { + StmtASTNode *succ = node->getSuccessor(i); + ost << "stmt" << stmt->getNumber() << " -> stmt" << succ->getStatement()->getNumber() << "[label=\""; + + const SwitchInfo *psi = node->getStatement()->getSwitchInfo(); + if (psi->switchType == SwitchType::F) { // "Fortran" style? + // Yes, use the table value itself + ost << reinterpret_cast(psi->tableAddr.value())[i]; + } + else { + // Note that uTable has the address of an int array + ost << static_cast(psi->lowerBound + i); + } + ost << "\"]\n"; + } + } + else { + for (auto &succ : node->getSuccessors()) { + ost << "stmt" << stmt->getNumber() << " -> stmt" << succ->getStatement()->getNumber() << ";\n"; + } + } + } + + ost << "}"; + dest.close(); } -BasicBlock *ControlFlowAnalyzer::findExitBB() const +const Statement *ControlFlowAnalyzer::findSuccessorStmt(const Statement *stmt, const BasicBlock *successorBB) const { - return m_cfg->findRetNode(); + if (stmt == nullptr) { + return nullptr; + } + + std::set visitedBBs; + const BasicBlock *currBB = successorBB; + + while (currBB->isEmpty()) { + if (visitedBBs.find(currBB) != visitedBBs.end()) { + return nullptr; // loop with empty BBs + } + + visitedBBs.insert(currBB); + assert(currBB->getNumSuccessors() == 1); + currBB = currBB->getSuccessor(BTHEN); + } + + return currBB->getFirstStmt(); } diff --git a/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.h b/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.h index 011b9c900..a6e86a830 100644 --- a/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.h +++ b/src/boomerang-plugins/codegen/c/ControlFlowAnalyzer.h @@ -10,6 +10,9 @@ #pragma once +#include "StmtASTNode.h" + +#include #include #include @@ -88,7 +91,7 @@ enum class SBBType : uint8_t /// Holds all information about control Flow Structure. -struct BBStructInfo +struct NodeStructInfo { /// Control flow analysis stuff, lifted from Doug Simon's honours thesis. int m_postOrderIndex = -1; ///< node's position within the ordering structure @@ -117,12 +120,12 @@ struct BBStructInfo LoopType m_loopHeaderType = LoopType::Invalid; ///< the loop type of a loop header // analysis information - const BasicBlock *m_immPDom = nullptr; ///< immediate post dominator - const BasicBlock *m_loopHead = nullptr; ///< head of the most nested enclosing loop - const BasicBlock *m_caseHead = nullptr; ///< head of the most nested enclosing case - const BasicBlock *m_condFollow = nullptr; ///< follow of a conditional header - const BasicBlock *m_loopFollow = nullptr; ///< follow of a loop header - const BasicBlock *m_latchNode = nullptr; ///< latching node of a loop header + const StmtASTNode *m_immPDom = nullptr; ///< immediate post dominator + const StmtASTNode *m_loopHead = nullptr; ///< head of the most nested enclosing loop + const StmtASTNode *m_caseHead = nullptr; ///< head of the most nested enclosing case + const StmtASTNode *m_condFollow = nullptr; ///< follow of a conditional header + const StmtASTNode *m_loopFollow = nullptr; ///< follow of a loop header + const StmtASTNode *m_latchNode = nullptr; ///< latching node of a loop header }; @@ -140,95 +143,95 @@ class ControlFlowAnalyzer void structureCFG(ProcCFG *cfg); /// establish if \p source has a back edge to \p dest - bool isBackEdge(const BasicBlock *source, const BasicBlock *dest) const; + bool isBackEdge(const StmtASTNode *source, const StmtASTNode *dest) const; public: - inline bool isLatchNode(const BasicBlock *bb) const + inline bool isLatchNode(const StmtASTNode *n) const { - const BasicBlock *loopHead = getLoopHead(bb); + const StmtASTNode *loopHead = getLoopHead(n); if (!loopHead) { return false; } - return getLatchNode(loopHead) == bb; + return getLatchNode(loopHead) == n; } - inline const BasicBlock *getLatchNode(const BasicBlock *bb) const + inline const StmtASTNode *getLatchNode(const StmtASTNode *bb) const { return m_info[bb].m_latchNode; } - inline const BasicBlock *getLoopHead(const BasicBlock *bb) const + inline const StmtASTNode *getLoopHead(const StmtASTNode *bb) const { return m_info[bb].m_loopHead; } - inline const BasicBlock *getLoopFollow(const BasicBlock *bb) const + inline const StmtASTNode *getLoopFollow(const StmtASTNode *bb) const { return m_info[bb].m_loopFollow; } - inline const BasicBlock *getCondFollow(const BasicBlock *bb) const + inline const StmtASTNode *getCondFollow(const StmtASTNode *bb) const { return m_info[bb].m_condFollow; } - inline const BasicBlock *getCaseHead(const BasicBlock *bb) const + inline const StmtASTNode *getCaseHead(const StmtASTNode *bb) const { return m_info[bb].m_caseHead; } - TravType getTravType(const BasicBlock *bb) const { return m_info[bb].m_travType; } - StructType getStructType(const BasicBlock *bb) const { return m_info[bb].m_structuringType; } - CondType getCondType(const BasicBlock *bb) const; - UnstructType getUnstructType(const BasicBlock *bb) const; - LoopType getLoopType(const BasicBlock *bb) const; + TravType getTravType(const StmtASTNode *bb) const { return m_info[bb].m_travType; } + StructType getStructType(const StmtASTNode *bb) const { return m_info[bb].m_structuringType; } + CondType getCondType(const StmtASTNode *bb) const; + UnstructType getUnstructType(const StmtASTNode *bb) const; + LoopType getLoopType(const StmtASTNode *bb) const; - void setTravType(const BasicBlock *bb, TravType type) { m_info[bb].m_travType = type; } - void setStructType(const BasicBlock *bb, StructType s); + void setTravType(const StmtASTNode *bb, TravType type) { m_info[bb].m_travType = type; } + void setStructType(const StmtASTNode *bb, StructType s); - bool isCaseOption(const BasicBlock *bb) const; + bool isCaseOption(const StmtASTNode *bb) const; private: - void updateLoopStamps(const BasicBlock *bb, int &time); - void updateRevLoopStamps(const BasicBlock *bb, int &time); - void updateRevOrder(const BasicBlock *bb); + void updateLoopStamps(const StmtASTNode *bb, int &time); + void updateRevLoopStamps(const StmtASTNode *bb, int &time); + void updateRevOrder(const StmtASTNode *bb); - void setLoopHead(const BasicBlock *bb, const BasicBlock *head) { m_info[bb].m_loopHead = head; } - void setLatchNode(const BasicBlock *bb, const BasicBlock *latch) + void setLoopHead(const StmtASTNode *bb, const StmtASTNode *head) { m_info[bb].m_loopHead = head; } + void setLatchNode(const StmtASTNode *bb, const StmtASTNode *latch) { m_info[bb].m_latchNode = latch; } - void setCaseHead(const BasicBlock *bb, const BasicBlock *head, const BasicBlock *follow); + void setCaseHead(const StmtASTNode *bb, const StmtASTNode *head, const StmtASTNode *follow); - void setUnstructType(const BasicBlock *bb, UnstructType unstructType); - void setLoopType(const BasicBlock *bb, LoopType loopType); - void setCondType(const BasicBlock *bb, CondType condType); + void setUnstructType(const StmtASTNode *bb, UnstructType unstructType); + void setLoopType(const StmtASTNode *bb, LoopType loopType); + void setCondType(const StmtASTNode *bb, CondType condType); - void setLoopFollow(const BasicBlock *bb, const BasicBlock *follow) + void setLoopFollow(const StmtASTNode *bb, const StmtASTNode *follow) { m_info[bb].m_loopFollow = follow; } - void setCondFollow(const BasicBlock *bb, const BasicBlock *follow) + void setCondFollow(const StmtASTNode *bb, const StmtASTNode *follow) { m_info[bb].m_condFollow = follow; } /// establish if this bb has any back edges leading FROM it - bool hasBackEdge(const BasicBlock *bb) const; + bool hasBackEdge(const StmtASTNode *bb) const; /// \returns true if \p bb is an ancestor of \p other - bool isAncestorOf(const BasicBlock *bb, const BasicBlock *other) const; - bool isBBInLoop(const BasicBlock *bb, const BasicBlock *header, const BasicBlock *latch) const; + bool isAncestorOf(const StmtASTNode *bb, const StmtASTNode *other) const; + bool isBBInLoop(const StmtASTNode *bb, const StmtASTNode *header, const StmtASTNode *latch) const; - int getPostOrdering(const BasicBlock *bb) const { return m_info[bb].m_postOrderIndex; } - int getRevOrd(const BasicBlock *bb) const { return m_info[bb].m_revPostOrderIndex; } + int getPostOrdering(const StmtASTNode *bb) const { return m_info[bb].m_postOrderIndex; } + int getRevOrd(const StmtASTNode *bb) const { return m_info[bb].m_revPostOrderIndex; } - const BasicBlock *getImmPDom(const BasicBlock *bb) const { return m_info[bb].m_immPDom; } + const StmtASTNode *getImmPDom(const StmtASTNode *bb) const { return m_info[bb].m_immPDom; } - void setImmPDom(const BasicBlock *bb, const BasicBlock *immPDom) + void setImmPDom(const StmtASTNode *bb, const StmtASTNode *immPDom) { m_info[bb].m_immPDom = immPDom; } @@ -263,37 +266,47 @@ class ControlFlowAnalyzer /// Finds the common post dominator of the current immediate post dominator and its successor's /// immediate post dominator - const BasicBlock *findCommonPDom(const BasicBlock *curImmPDom, const BasicBlock *succImmPDom); + const StmtASTNode *findCommonPDom(const StmtASTNode *curImmPDom, const StmtASTNode *succImmPDom); /// \pre The loop induced by (head,latch) has already had all its member nodes tagged /// \post The type of loop has been deduced - void determineLoopType(const BasicBlock *header, bool *&loopNodes); + void determineLoopType(const StmtASTNode *header, bool *&loopNodes); /// \pre The loop headed by header has been induced and all it's member nodes have been tagged /// \post The follow of the loop has been determined. - void findLoopFollow(const BasicBlock *header, bool *&loopNodes); + void findLoopFollow(const StmtASTNode *header, bool *&loopNodes); /// \pre header has been detected as a loop header and has the details of the /// latching node /// \post the nodes within the loop have been tagged - void tagNodesInLoop(const BasicBlock *header, bool *&loopNodes); + void tagNodesInLoop(const StmtASTNode *header, bool *&loopNodes); + + void dumpStmtCFGToFile() const; - BasicBlock *findEntryBB() const; - BasicBlock *findExitBB() const; + const Statement *findSuccessorStmt(const Statement *stmt, const BasicBlock *successorBB) const; + +public: + StmtASTNode *findEntryNode() const; + +private: + StmtASTNode *findExitNode() const; + void rebuildASTForest(); private: ProcCFG *m_cfg = nullptr; + std::map m_nodes; + /// Post Ordering according to a DFS starting at the entry BB. - std::vector m_postOrdering; + std::vector m_postOrdering; /// Post Ordering according to a DFS starting at the exit BB (usually the return BB). /// Note that this is not the reverse of m_postOrdering /// for functions containing calls to noreturn functions or infinite loops. - std::vector m_revPostOrdering; + std::vector m_revPostOrdering; private: /// mutable to allow using the map in const methods (might create entries). /// DO NOT change BBStructInfo in const methods! - mutable std::unordered_map m_info; + mutable std::unordered_map m_info; }; diff --git a/src/boomerang-plugins/codegen/c/StmtASTNode.cpp b/src/boomerang-plugins/codegen/c/StmtASTNode.cpp new file mode 100644 index 000000000..5de9953b4 --- /dev/null +++ b/src/boomerang-plugins/codegen/c/StmtASTNode.cpp @@ -0,0 +1,89 @@ +#pragma region License +/* + * This file is part of the Boomerang Decompiler. + * + * See the file "LICENSE.TERMS" for information on usage and + * redistribution of this file, and for a DISCLAIMER OF ALL + * WARRANTIES. + */ +#pragma endregion License +#include "StmtASTNode.h" + +#include "boomerang/ssl/statements/Statement.h" + + +StmtASTNode::StmtASTNode(const Statement *stmt) + : m_stmt(stmt) +{ +} + + +StmtASTNode::~StmtASTNode() +{ +} + + +void StmtASTNode::addPredecessor(StmtASTNode *pred) +{ + m_predecessors.push_back(pred); +} + + +void StmtASTNode::addSuccessor(StmtASTNode *succ) +{ + m_successors.push_back(succ); +} + + +int StmtASTNode::getNumPredecessors() const +{ + return m_predecessors.size(); +} + + +int StmtASTNode::getNumSuccessors() const +{ + return m_successors.size(); +} + + +StmtASTNode *StmtASTNode::getPredecessor(int i) +{ + return m_predecessors.at(i); +} + + +StmtASTNode *StmtASTNode::getSuccessor(int i) +{ + return m_successors.at(i); +} + + +const StmtASTNode *StmtASTNode::getPredecessor(int i) const +{ + return m_predecessors.at(i); +} + + +const StmtASTNode *StmtASTNode::getSuccessor(int i) const +{ + return m_successors.at(i); +} + + +const std::vector &StmtASTNode::getPredecessors() const +{ + return m_predecessors; +} + + +const std::vector &StmtASTNode::getSuccessors() const +{ + return m_successors; +} + + +void StmtASTNode::printAST(OStream &os) const +{ + m_stmt->print(os); +} diff --git a/src/boomerang-plugins/codegen/c/StmtASTNode.h b/src/boomerang-plugins/codegen/c/StmtASTNode.h new file mode 100644 index 000000000..866c0f8f0 --- /dev/null +++ b/src/boomerang-plugins/codegen/c/StmtASTNode.h @@ -0,0 +1,55 @@ +#pragma region License +/* + * This file is part of the Boomerang Decompiler. + * + * See the file "LICENSE.TERMS" for information on usage and + * redistribution of this file, and for a DISCLAIMER OF ALL + * WARRANTIES. + */ +#pragma endregion License +#pragma once + + +#include "ASTNode.h" + +#include + + +class Statement; + + +class StmtASTNode : public ASTNode +{ +public: + StmtASTNode(const Statement *stmt); + virtual ~StmtASTNode() override; + +public: + bool isStmt() const override { return true; } + + /// \copydoc ASTNode::printAST + void printAST(OStream &os) const override; + + template + const T *getStatement() const { return static_cast(m_stmt); } + + void addPredecessor(StmtASTNode *pred); + void addSuccessor(StmtASTNode *succ); + + int getNumPredecessors() const; + int getNumSuccessors() const; + + StmtASTNode *getPredecessor(int i); + StmtASTNode *getSuccessor(int i); + + const StmtASTNode *getPredecessor(int i) const; + const StmtASTNode *getSuccessor(int i) const; + + const std::vector &getPredecessors() const; + const std::vector &getSuccessors() const; + +private: + const Statement *m_stmt; + std::vector m_successors; + std::vector m_predecessors; +}; diff --git a/tests/regression-tests/expected-outputs/elf32-ppc/minmax/minmax/minmax.c b/tests/regression-tests/expected-outputs/elf32-ppc/minmax/minmax/minmax.c index fb66dda00..485adbe8d 100644 --- a/tests/regression-tests/expected-outputs/elf32-ppc/minmax/minmax/minmax.c +++ b/tests/regression-tests/expected-outputs/elf32-ppc/minmax/minmax/minmax.c @@ -10,7 +10,7 @@ int main(int argc, char *argv[]) local0 = argc; if (argc >= 3) { if (argc <= 3) { -bb0x10000428: +lab_4: argc = local0; printf("MinMax adjusted number of arguments is %d\n", argc); } @@ -21,7 +21,7 @@ int main(int argc, char *argv[]) else { g3 = -2; local0 = g3; - goto bb0x10000428; + goto lab_4; } return 0; } diff --git a/tests/regression-tests/expected-outputs/elf32-ppc/switch/switch/switch.c b/tests/regression-tests/expected-outputs/elf32-ppc/switch/switch/switch.c index b8d5c4dbd..4fc5e0bbc 100644 --- a/tests/regression-tests/expected-outputs/elf32-ppc/switch/switch/switch.c +++ b/tests/regression-tests/expected-outputs/elf32-ppc/switch/switch/switch.c @@ -5,14 +5,14 @@ int main(int argc, char *argv[]); int main(int argc, char *argv[]) { if ((unsigned int)argc > 7) { -bb0x10000438: +lab_4: puts("Other!"); break; } switch(argc) { - case 1: case 0: - goto bb0x10000438; + case 1: + goto lab_4; case 7: puts("Seven!"); break;