From cb34f9eb4d12a64185175ff1421c8d66e4fa45b8 Mon Sep 17 00:00:00 2001 From: mohammad Date: Fri, 3 Jul 2020 14:32:18 +0200 Subject: [PATCH 01/31] var def line added in xml file --- CUGeneration/CUGenerationPass.cpp | 56 +++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 478764303..b6f506fb8 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -50,6 +50,8 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include +#include #include "DPUtils.h" @@ -76,12 +78,15 @@ namespace string name; string type; + string defLine; + string isArray; + string scope; - Variable_struct(const Variable_struct &other) : name(other.name), type(other.type) + Variable_struct(const Variable_struct &other) : name(other.name), type(other.type), defLine(other.defLine) { } - Variable_struct(string n, string t) : name(n), type(t) + Variable_struct(string n, string t, string d) : name(n), type(t), defLine(d) { } @@ -196,6 +201,9 @@ namespace string findStructMemberName(MDNode *structNode, unsigned idx, IRBuilder<> &builder); //DIGlobalVariable* findDbgGlobalDeclare(GlobalVariable *v); + //29.6.2020 Mohammad + string determineVariableDefLine(Instruction *I); + //functions to get list of global variables bool doInitialization(Module &ThisModule); bool doFinalization(Module &M); @@ -237,6 +245,28 @@ namespace /***************************** DiscoPoP Functions ***********************************/ +string CUGeneration::determineVariableDefLine(Instruction *I){ + string varName = determineVariableName(&*I); + + Function *F = I->getFunction(); + + for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + { + BasicBlock &BB = *FI; + for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E; ++BI) + { + if (DbgDeclareInst *DI = dyn_cast(BI)){ + if(auto *N = dyn_cast(DI->getVariable())){ + if(auto *DV = dyn_cast(N)){ + if(DV->getName() == varName) + return to_string(DV->getLine()); + } + } + } + } + } +} + string CUGeneration::determineVariableType(Instruction *I) { string s = ""; @@ -576,14 +606,18 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; for (auto lvi : cu->localVariableNames) { - *outCUs << "\t\t\t" << xmlEscape(lvi.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(lvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto gvi : cu->globalVariableNames) { - *outCUs << "\t\t\t" << xmlEscape(gvi.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(gvi.name) << "" << endl; } *outCUs << "\t\t" << endl; @@ -758,7 +792,6 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - //NOTE: 'instruction' --> '&*instruction' lid = getLID(&*instruction, fileID); basicBlockName = bb->getName(); @@ -778,6 +811,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); + // if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { suspiciousVariables.insert(varName); @@ -894,7 +928,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(string(it->getName()), rso.str()); + Variable v(string(it->getName()), rso.str(), to_string(f->getSubprogram()->getLine())); n->argumentsList.push_back(v); } @@ -936,7 +970,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap) { int lid; - string varName, varType; + string varName, varType, varDefLine; // Changed TerminatorInst to Instuction const Instruction *TInst; string successorBB; @@ -957,7 +991,7 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl auto bbCU = BBIDToCUIDsMap[bb->getName()].begin(); for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - + // errs() << varDefLine << "\n"; if (isa(instruction) || isa(instruction)) { // NOTE: changed 'instruction' to '&*instruction' @@ -968,7 +1002,9 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl // NOTE: changed 'instruction' to '&*instruction', next 2 lines varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - Variable v(varName, varType); + varDefLine = determineVariableDefLine(&*instruction); + + Variable v(varName, varType, varDefLine); std::string prefix("ARRAY"); if (!varType.compare(0, prefix.size(), prefix)) @@ -1159,7 +1195,7 @@ bool CUGeneration::runOnFunction(Function &F) string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(it->getName(), rso.str()); + Variable v(it->getName(), rso.str(), to_string(F.getSubprogram()->getLine())); root->argumentsList.push_back(v); } From 8fecd2b14902a27a36f4fc211ee0eeb0366d4816 Mon Sep 17 00:00:00 2001 From: mohammad Date: Wed, 8 Jul 2020 12:21:39 +0200 Subject: [PATCH 02/31] varDefLine added, 'missing node' resolved --- CUGeneration/CUGenerationPass.cpp | 76 +++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index b6f506fb8..3a9f80f93 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -52,6 +52,7 @@ #include "llvm/Analysis/CallGraph.h" #include #include +#include "llvm/Analysis/TargetLibraryInfo.h" #include "DPUtils.h" @@ -247,6 +248,7 @@ namespace /***************************** DiscoPoP Functions ***********************************/ string CUGeneration::determineVariableDefLine(Instruction *I){ string varName = determineVariableName(&*I); + varName = refineVarName(varName); Function *F = I->getFunction(); @@ -259,7 +261,7 @@ string CUGeneration::determineVariableDefLine(Instruction *I){ if(auto *N = dyn_cast(DI->getVariable())){ if(auto *DV = dyn_cast(N)){ if(DV->getName() == varName) - return to_string(DV->getLine()); + return to_string(fileID) + ":" + to_string(DV->getLine()); } } } @@ -575,11 +577,12 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << getChildrenNodesString(root) << "" << endl; if (root->type == nodeTypes::func || root->type == nodeTypes::dummy) { - *outCUs << "\t\t" << endl; for (auto ai : root->argumentsList) { - *outCUs << "\t\t\t" << xmlEscape(ai.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(ai.name) << "" << endl; } *outCUs << "\t\t" << endl; } @@ -779,7 +782,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu = new CU; - //errs() << "cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; + // errs() << "==== " << bb->getName() << "\n"; //"cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; if(bb->getName().size() == 0) bb->setName(cu->ID); @@ -799,7 +802,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, { cu-> instructionsLineNumbers.insert(lid); cu-> instructionsCount++; - } + //} if(isa < StoreInst >(instruction)) { @@ -877,6 +880,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } } } + } if (cu->instructionsLineNumbers.empty()) { cu->removeCU(); @@ -893,23 +897,38 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, CUVector.push_back(cu); suspiciousVariables.clear(); - //errs() << "cu->basicBlockName: " << cu->basicBlockName << "\n"; - //check for call instructions in current basic block for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if(dyn_cast(instruction) || dyn_cast(instruction)) - continue; + //Mohammad 6.7.2020: Don't create nodes for library functions (c++/llvm). + int32_t lid = getLID(&*instruction, fileID); + if(lid > 0){ + if (isa < CallInst >(instruction)) { - Node *n = new Node; - n->type = nodeTypes::dummy; - + + Function *f = (cast(instruction))->getCalledFunction(); //TODO: DO the same for Invoke inst - //get function name and parameters - Function *f = (cast(instruction))->getCalledFunction(); + //Mohammad 6.7.2020 + Function::iterator FI = f->begin(); + bool externalFunction = true; + string lid; + + for (Function::iterator FI = f->begin(), FE = f->end(); FI != FE; ++FI){ + externalFunction = false; + auto tempBI = FI->begin(); + if(DebugLoc dl = tempBI->getDebugLoc()){ + lid = to_string(dl->getLine()); + }else{ + lid = to_string(tempBI->getFunction()->getSubprogram()->getLine()); + } + break; + } + if(externalFunction) continue; + Node *n = new Node; + n->type = nodeTypes::dummy; // For ordinary function calls, F has a name. // However, sometimes the function being called // in IR is encapsulated by "bitcast()" due to @@ -919,6 +938,8 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, if(f) { n->name = f->getName(); + + // @Zia: This for loop appeared after the else part. For some function calls, the value of f is null. // I guess that is why you have checked if f is not null here. Anyway, I (Mohammad) had to bring the // for loop inside to avoid the segmentation fault. If you think it is not appropriate, find a solution for it. @@ -928,8 +949,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(string(it->getName()), rso.str(), to_string(f->getSubprogram()->getLine())); - + Variable v(string(it->getName()), rso.str(), lid); n->argumentsList.push_back(v); } } @@ -964,6 +984,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } } } + } } } @@ -991,7 +1012,6 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl auto bbCU = BBIDToCUIDsMap[bb->getName()].begin(); for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - // errs() << varDefLine << "\n"; if (isa(instruction) || isa(instruction)) { // NOTE: changed 'instruction' to '&*instruction' @@ -1150,8 +1170,8 @@ void CUGeneration::getAnalysisUsage(AnalysisUsage &AU) const bool CUGeneration::runOnFunction(Function &F) { StringRef funcName = F.getName(); - // Avoid functions we don't want to instrument - if (funcName.find("llvm.dbg") != string::npos) // llvm debug calls + // Avoid functions we don't want to analyze + if (funcName.find("llvm.") != string::npos) // llvm debug calls { return false; } @@ -1167,9 +1187,9 @@ bool CUGeneration::runOnFunction(Function &F) { return false; } - // if (funcName.find("_GLOBAL_") != string::npos) { // global init calls (c++) - // return false; - // } + if (funcName.find("_GLOBAL_") != string::npos) { // global init calls (c++) + return false; + } if (funcName.find("pthread_") != string::npos) { return false; @@ -1189,13 +1209,23 @@ bool CUGeneration::runOnFunction(Function &F) //Get list of arguments for this function and store them in root. // NOTE: changed the way we get the arguments // for (Function::ArgumentListType::iterator it = F.getArgumentList().begin(); it != F.getArgumentList().end(); it++) { + + BasicBlock *BB = &F.getEntryBlock(); + auto BI = BB->begin(); + string lid; + if(DebugLoc dl = BI->getDebugLoc()){ + lid = to_string(dl->getLine()); + }else{ + lid = to_string(BI->getFunction()->getSubprogram()->getLine()); + } + for ( Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) { string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(it->getName(), rso.str(), to_string(F.getSubprogram()->getLine())); + Variable v(it->getName(), rso.str(), to_string(fileID) + ":" + lid); root->argumentsList.push_back(v); } From c2f6bcc7391a9021c9ffa2742a7a239f5f869206 Mon Sep 17 00:00:00 2001 From: mohammad Date: Tue, 14 Jul 2020 10:31:27 +0200 Subject: [PATCH 03/31] c++ stdlib related error resolved --- CUGeneration/CUGenerationPass.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 3a9f80f93..d16ea8b2b 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -247,14 +247,17 @@ namespace /***************************** DiscoPoP Functions ***********************************/ string CUGeneration::determineVariableDefLine(Instruction *I){ + errs()<< "determineVariableDefLine 1\n"; string varName = determineVariableName(&*I); + errs()<< "determineVariableDefLine 2\n"; varName = refineVarName(varName); - + errs()<< "determineVariableDefLine 3\n"; Function *F = I->getFunction(); - + errs()<< "determineVariableDefLine 4\n"; for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) { BasicBlock &BB = *FI; + errs()<< "determineVariableDefLine 5\n"; for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E; ++BI) { if (DbgDeclareInst *DI = dyn_cast(BI)){ @@ -991,7 +994,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap) { int lid; - string varName, varType, varDefLine; + string varName, varType, varDefLine = "----"; // Changed TerminatorInst to Instuction const Instruction *TInst; string successorBB; @@ -1022,7 +1025,7 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl // NOTE: changed 'instruction' to '&*instruction', next 2 lines varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - varDefLine = determineVariableDefLine(&*instruction); + // varDefLine = determineVariableDefLine(&*instruction); Variable v(varName, varType, varDefLine); @@ -1170,6 +1173,7 @@ void CUGeneration::getAnalysisUsage(AnalysisUsage &AU) const bool CUGeneration::runOnFunction(Function &F) { StringRef funcName = F.getName(); + errs() << funcName << "\n"; // Avoid functions we don't want to analyze if (funcName.find("llvm.") != string::npos) // llvm debug calls { @@ -1240,23 +1244,24 @@ bool CUGeneration::runOnFunction(Function &F) Region *TopRegion = RI->getTopLevelRegion(); populateGlobalVariablesSet(TopRegion, globalVariablesSet); + errs() << "1\n"; createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); - + errs() << "2\n"; fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); - + errs() << "3\n"; fillStartEndLineNumbers(root); - + errs() << "4\n"; secureStream(); - + errs() << "5\n"; printOriginalVariables(originalVariablesSet); - + errs() << "6\n"; printData(root); - + errs() << "7\n"; for(auto i : CUVector) { delete(i); } - + errs() << "8\n"; return false; } From acf60dee04846a7b4ccaba04d985cb74673a6a96 Mon Sep 17 00:00:00 2001 From: mohammad Date: Tue, 14 Jul 2020 15:18:58 +0200 Subject: [PATCH 04/31] fixed varDefLine --- CUGeneration/CUGenerationPass.cpp | 39 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index d16ea8b2b..7fe1dfb37 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -81,7 +81,6 @@ namespace string type; string defLine; string isArray; - string scope; Variable_struct(const Variable_struct &other) : name(other.name), type(other.type), defLine(other.defLine) { @@ -247,29 +246,30 @@ namespace /***************************** DiscoPoP Functions ***********************************/ string CUGeneration::determineVariableDefLine(Instruction *I){ - errs()<< "determineVariableDefLine 1\n"; + string varDefLine = "LineNotFound"; + string varName = determineVariableName(&*I); - errs()<< "determineVariableDefLine 2\n"; varName = refineVarName(varName); - errs()<< "determineVariableDefLine 3\n"; + Function *F = I->getFunction(); - errs()<< "determineVariableDefLine 4\n"; for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) { BasicBlock &BB = *FI; - errs()<< "determineVariableDefLine 5\n"; for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E; ++BI) { if (DbgDeclareInst *DI = dyn_cast(BI)){ if(auto *N = dyn_cast(DI->getVariable())){ if(auto *DV = dyn_cast(N)){ - if(DV->getName() == varName) - return to_string(fileID) + ":" + to_string(DV->getLine()); + if(DV->getName() == varName){ + varDefLine = to_string(fileID) + ":" + to_string(DV->getLine()); + break; + } } } } } } + return varDefLine; } string CUGeneration::determineVariableType(Instruction *I) @@ -293,13 +293,11 @@ string CUGeneration::determineVariableType(Instruction *I) Type *structType = pointsToStruct(PTy); if (structType && gep->getNumOperands() > 2) { - //errs() << "STRUCT DETECTED!\n"; s = "STRUCT,"; } // we've found an array if (PTy->getElementType()->getTypeID() == Type::ArrayTyID ) { - //errs() << "ARRAY DETECTED!\n"; s = "ARRAY,"; } } @@ -994,7 +992,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap) { int lid; - string varName, varType, varDefLine = "----"; + string varName, varType, varDefLine; // Changed TerminatorInst to Instuction const Instruction *TInst; string successorBB; @@ -1025,7 +1023,7 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl // NOTE: changed 'instruction' to '&*instruction', next 2 lines varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - // varDefLine = determineVariableDefLine(&*instruction); + varDefLine = determineVariableDefLine(&*instruction); Variable v(varName, varType, varDefLine); @@ -1173,7 +1171,6 @@ void CUGeneration::getAnalysisUsage(AnalysisUsage &AU) const bool CUGeneration::runOnFunction(Function &F) { StringRef funcName = F.getName(); - errs() << funcName << "\n"; // Avoid functions we don't want to analyze if (funcName.find("llvm.") != string::npos) // llvm debug calls { @@ -1244,24 +1241,24 @@ bool CUGeneration::runOnFunction(Function &F) Region *TopRegion = RI->getTopLevelRegion(); populateGlobalVariablesSet(TopRegion, globalVariablesSet); - errs() << "1\n"; + createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); - errs() << "2\n"; + fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); - errs() << "3\n"; + fillStartEndLineNumbers(root); - errs() << "4\n"; + secureStream(); - errs() << "5\n"; + printOriginalVariables(originalVariablesSet); - errs() << "6\n"; + printData(root); - errs() << "7\n"; + for(auto i : CUVector) { delete(i); } - errs() << "8\n"; + return false; } From 70c384b9d47edd500f675d7b2214c32e1fd5af6b Mon Sep 17 00:00:00 2001 From: mohammad Date: Tue, 14 Jul 2020 15:41:41 +0200 Subject: [PATCH 05/31] defLine for global vars marked --- CUGeneration/CUGenerationPass.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 7fe1dfb37..3084eb59d 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -251,6 +251,11 @@ string CUGeneration::determineVariableDefLine(Instruction *I){ string varName = determineVariableName(&*I); varName = refineVarName(varName); + if(programGlobalVariablesSet.count(varName)){ + varDefLine = "GlobalVar"; + //TODO: Find definition line of global variables + } + Function *F = I->getFunction(); for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) { From 7a3693fee67c6378755ddea3f839801bd93f3ab0 Mon Sep 17 00:00:00 2001 From: mohammad Date: Tue, 18 Aug 2020 17:51:33 +0200 Subject: [PATCH 06/31] invalid varDefLine, Commited only to checkout master branch --- CUGeneration/CUGenerationPass.cpp | 299 +++++++++++------------------- 1 file changed, 109 insertions(+), 190 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 3084eb59d..1299988a0 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -50,9 +50,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Analysis/CallGraph.h" -#include -#include -#include "llvm/Analysis/TargetLibraryInfo.h" #include "DPUtils.h" @@ -79,14 +76,12 @@ namespace string name; string type; - string defLine; - string isArray; - Variable_struct(const Variable_struct &other) : name(other.name), type(other.type), defLine(other.defLine) + Variable_struct(const Variable_struct &other) : name(other.name), type(other.type) { } - Variable_struct(string n, string t, string d) : name(n), type(t), defLine(d) + Variable_struct(string n, string t) : name(n), type(t) { } @@ -103,7 +98,13 @@ namespace } Variable; - enum nodeTypes {cu, func, loop, dummy}; + enum nodeTypes + { + cu, + func, + loop, + dummy + }; typedef struct Node_struct { @@ -129,17 +130,17 @@ namespace } } Node; - typedef struct CU_struct: Node_struct + typedef struct CU_struct : Node_struct { string BBID; //BasicBlock Id where the CU appears in - unsigned readDataSize; // number of bytes read from memory by the cu + unsigned readDataSize; // number of bytes read from memory by the cu unsigned writeDataSize; // number of bytes written into memory during the cu unsigned instructionsCount; //basic block id & successor basic blocks for control dependence - vector successorCUs;// keeps IDs of control dependent CUs + vector successorCUs; // keeps IDs of control dependent CUs string basicBlockName; set instructionsLineNumbers; @@ -162,10 +163,9 @@ namespace void removeCU() { - CUIDCounter--;//if a CU does not contain any instruction, e.g. entry basic blocks, then remove it. + CUIDCounter--; //if a CU does not contain any instruction, e.g. entry basic blocks, then remove it. } - } CU; struct CUGeneration : public FunctionPass @@ -188,7 +188,7 @@ namespace map VarNames; RegionInfoPass *RIpass; - RegionInfo *RI; + RegionInfo *RI; //DiscoPoP Fields //set programGlobalVariables; @@ -201,9 +201,6 @@ namespace string findStructMemberName(MDNode *structNode, unsigned idx, IRBuilder<> &builder); //DIGlobalVariable* findDbgGlobalDeclare(GlobalVariable *v); - //29.6.2020 Mohammad - string determineVariableDefLine(Instruction *I); - //functions to get list of global variables bool doInitialization(Module &ThisModule); bool doFinalization(Module &M); @@ -232,7 +229,6 @@ namespace void printNode(Node *node, bool isRoot); void closeOutputFiles(); - virtual bool runOnFunction(Function &F); //changed const char * to Stringref StringRef getPassName() const; @@ -241,42 +237,9 @@ namespace CUGeneration() : FunctionPass(ID) {} }; // end of struct CUGeneration -} // end of anonymous namespace - +} // end of anonymous namespace /***************************** DiscoPoP Functions ***********************************/ -string CUGeneration::determineVariableDefLine(Instruction *I){ - string varDefLine = "LineNotFound"; - - string varName = determineVariableName(&*I); - varName = refineVarName(varName); - - if(programGlobalVariablesSet.count(varName)){ - varDefLine = "GlobalVar"; - //TODO: Find definition line of global variables - } - - Function *F = I->getFunction(); - for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - { - BasicBlock &BB = *FI; - for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E; ++BI) - { - if (DbgDeclareInst *DI = dyn_cast(BI)){ - if(auto *N = dyn_cast(DI->getVariable())){ - if(auto *DV = dyn_cast(N)){ - if(DV->getName() == varName){ - varDefLine = to_string(fileID) + ":" + to_string(DV->getLine()); - break; - } - } - } - } - } - } - return varDefLine; -} - string CUGeneration::determineVariableType(Instruction *I) { string s = ""; @@ -298,11 +261,13 @@ string CUGeneration::determineVariableType(Instruction *I) Type *structType = pointsToStruct(PTy); if (structType && gep->getNumOperands() > 2) { + //errs() << "STRUCT DETECTED!\n"; s = "STRUCT,"; } // we've found an array - if (PTy->getElementType()->getTypeID() == Type::ArrayTyID ) + if (PTy->getElementType()->getTypeID() == Type::ArrayTyID) { + //errs() << "ARRAY DETECTED!\n"; s = "ARRAY,"; } } @@ -312,7 +277,7 @@ string CUGeneration::determineVariableType(Instruction *I) return s; } -string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariable/*=defaultIsGlobalVariableValue*/) +string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariable /*=defaultIsGlobalVariableValue*/) { assert(I && "Instruction cannot be NULL \n"); @@ -350,7 +315,8 @@ string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariabl { ConstantInt *idxPtr = cast(gep->getOperand(2)); uint64_t memberIdx = *(idxPtr->getValue().getRawData()); - if(!(cast(structType))->isLiteral()){ + if (!(cast(structType))->isLiteral()) + { string strName(structType->getStructName().data()); map::iterator it = Structs.find(strName); if (it != Structs.end()) @@ -382,10 +348,9 @@ string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariabl return determineVariableName((Instruction *)(operand), isGlobalVariable); } // if we cannot determine the name, then return * - return "";//getOrInsertVarName("*", builder); + return ""; //getOrInsertVarName("*", builder); } - Type *CUGeneration::pointsToStruct(PointerType *PTy) { assert(PTy); @@ -419,7 +384,6 @@ string CUGeneration::getOrInsertVarName(string varName, IRBuilder<> &builder) return vName; } - string CUGeneration::findStructMemberName(MDNode *structNode, unsigned idx, IRBuilder<> &builder) { assert(structNode); @@ -448,7 +412,8 @@ string CUGeneration::xmlEscape(string data) for (;;) { pos = data.find_first_of("\"&<>", pos); - if (pos == string::npos) break; + if (pos == string::npos) + break; string replacement; switch (data[pos]) { @@ -464,8 +429,7 @@ string CUGeneration::xmlEscape(string data) case '>': replacement = ">"; break; - default: - ; + default:; } data.replace(pos, 1, replacement); pos += replacement.size(); @@ -483,10 +447,8 @@ void CUGeneration::secureStream() outCUIDCounter = new std::ofstream(); outCUIDCounter->open("DP_CUIDCounter.txt", std::ios_base::out); - } - string CUGeneration::getLineNumbersString(set LineNumbers) { string line = ""; @@ -508,7 +470,6 @@ string CUGeneration::getLineNumbersString(set LineNumbers) return line; } - string CUGeneration::getChildrenNodesString(Node *root) { string childrenIDs = ""; @@ -533,7 +494,7 @@ string CUGeneration::getChildrenNodesString(Node *root) void CUGeneration::printOriginalVariables(set &originalVariablesSet) { - for(auto i : originalVariablesSet) + for (auto i : originalVariablesSet) { *outOriginalVariables << i << endl; } @@ -541,23 +502,23 @@ void CUGeneration::printOriginalVariables(set &originalVariablesSet) void CUGeneration::printData(Node *root) { - *outCUs << "" << endl << endl; + *outCUs << "" << endl + << endl; printTree(root, true); - *outCUs << "" << endl << endl << endl; + *outCUs << "" << endl + << endl + << endl; closeOutputFiles(); - } - void CUGeneration::printTree(Node *root, bool isRoot) { Node *tmp = root; printNode(tmp, isRoot); - for (auto node : tmp->childrenNodes) { if (root->type == nodeTypes::func) @@ -571,7 +532,7 @@ void CUGeneration::printTree(Node *root, bool isRoot) void CUGeneration::printNode(Node *root, bool isRoot) { - if(root->name.find("llvm")) + if (root->name.find("llvm")) { *outCUs << "\tID) << "\"" @@ -583,12 +544,11 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << getChildrenNodesString(root) << "" << endl; if (root->type == nodeTypes::func || root->type == nodeTypes::dummy) { + *outCUs << "\t\t" << endl; for (auto ai : root->argumentsList) { - *outCUs << "\t\t\t" - << xmlEscape(ai.name) << "" << endl; + *outCUs << "\t\t\t" << xmlEscape(ai.name) << "" << endl; } *outCUs << "\t\t" << endl; } @@ -608,32 +568,29 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; for (auto sucCUi : cu->successorCUs) { - *outCUs << "\t\t\t" << sucCUi << "" << endl; + *outCUs << "\t\t\t" << sucCUi << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto lvi : cu->localVariableNames) { - *outCUs << "\t\t\t" - << xmlEscape(lvi.name) << "" << endl; + *outCUs << "\t\t\t" << xmlEscape(lvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto gvi : cu->globalVariableNames) { - *outCUs << "\t\t\t" - << xmlEscape(gvi.name) << "" << endl; + *outCUs << "\t\t\t" << xmlEscape(gvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto i : (cu->callLineTofunctionMap)) { - for (auto ii : i.second){ + for (auto ii : i.second) + { *outCUs << "\t\t\t" << ii->ID << "" << endl; // specifica for recursive fucntions inside loops. (Mo 5.11.2019) *outCUs << "\t\t\t\t" << ii->recursiveFunctionCall << "" << endl; @@ -642,7 +599,8 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; } - *outCUs << "\t" << endl << endl; + *outCUs << "\t" << endl + << endl; } } @@ -664,7 +622,6 @@ void CUGeneration::closeOutputFiles() } /*********************************** End of output functions **************************************/ - string CUGeneration::refineVarName(string varName) { @@ -674,7 +631,6 @@ string CUGeneration::refineVarName(string varName) varName.erase(varName.find(".addr"), 5); return varName; - } //recieves the region and outputs all variables and variables crossing basic block boundaries in the region. @@ -687,22 +643,22 @@ void CUGeneration::populateGlobalVariablesSet(Region *TopRegion, set &gl { for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if(isa(instruction) || isa(instruction) || isa(instruction)) + if (isa(instruction) || isa(instruction) || isa(instruction)) { //string varName = refineVarName(determineVariableName(instruction, isGlobalVariable)); // NOTE: changed 'instruction' to '&*instruction' string varName = determineVariableName(&*instruction, isGlobalVariable); - if(isGlobalVariable) // add it if it is a global variable in the program + if (isGlobalVariable) // add it if it is a global variable in the program { programGlobalVariablesSet.insert(varName); } - if(variableToBBMap.find(varName) != variableToBBMap.end()) + if (variableToBBMap.find(varName) != variableToBBMap.end()) { //this var has already once recordded. check for bb id - if(variableToBBMap[varName] != *bb) + if (variableToBBMap[varName] != *bb) { //global variable found. Insert into the globalVariablesSet globalVariablesSet.insert(varName); @@ -719,7 +675,6 @@ void CUGeneration::populateGlobalVariablesSet(Region *TopRegion, set &gl } } - void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap, Node *root, LoopInfo &LI) { // NOTE: changed 'ThisModule->getDataLayout()' to '&ThisModule->getDataLayout()' @@ -734,7 +689,6 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, map loopToNodeMap; - for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { @@ -748,13 +702,11 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, { currentNode = loopToNodeMap[loop]; //errs() << "bb->Name: " << bb->getName() << " , " << "node->ID: " << currentNode->ID << "\n"; - - } //else, create a new Node for the loop, add it as children of currentNode and add it to the map. else { - if(bb->getName().size() != 0) + if (bb->getName().size() != 0) { //errs() << "Name: " << bb->getName() << "\n"; } @@ -771,26 +723,26 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } else { - if(bb->getName().size() != 0) + if (bb->getName().size() != 0) { //errs() << "bb Name: " << bb->getName() << "\n"; } //end of loops. go to the parent of the loop. may have to jump several nodes in case of nested loops - for(map::iterator it = loopToNodeMap.begin(); it != loopToNodeMap.end() ; it++ ) - if (it -> second == currentNode) // current node found in loop map jump to its parent. + for (map::iterator it = loopToNodeMap.begin(); it != loopToNodeMap.end(); it++) + if (it->second == currentNode) // current node found in loop map jump to its parent. { currentNode = currentNode->parentNode; - it = loopToNodeMap.begin(); // search the whole map again for current node - if(it -> second == currentNode) // due to it++ we need to check first element of map ourself - currentNode = currentNode -> parentNode; + it = loopToNodeMap.begin(); // search the whole map again for current node + if (it->second == currentNode) // due to it++ we need to check first element of map ourself + currentNode = currentNode->parentNode; } } cu = new CU; - // errs() << "==== " << bb->getName() << "\n"; //"cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; + //errs() << "cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; - if(bb->getName().size() == 0) + if (bb->getName().size() == 0) bb->setName(cu->ID); cu->BBID = bb->getName(); @@ -801,26 +753,26 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { + //NOTE: 'instruction' --> '&*instruction' lid = getLID(&*instruction, fileID); basicBlockName = bb->getName(); if (lid > 0) { - cu-> instructionsLineNumbers.insert(lid); - cu-> instructionsCount++; - //} - if(isa < StoreInst >(instruction)) + cu->instructionsLineNumbers.insert(lid); + cu->instructionsCount++; + } + if (isa(instruction)) { // get size of data written into memory by this store instruction Value *operand = instruction->getOperand(1); Type *Ty = operand->getType(); unsigned u = DL->getTypeSizeInBits(Ty); - cu-> writeDataSize += u; + cu->writeDataSize += u; //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - // if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { suspiciousVariables.insert(varName); @@ -828,16 +780,16 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->writePhaseLineNumbers.insert(lid); } } - else if(isa < LoadInst >(instruction)) + else if (isa(instruction)) { // get size of data read from memory by this load instruction Type *Ty = instruction->getType(); unsigned u = DL->getTypeSizeInBits(Ty); - cu-> readDataSize += u; + cu->readDataSize += u; //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); - if(suspiciousVariables.count(varName)) + if (suspiciousVariables.count(varName)) { // VIOLATION OF CAUTIOUS PROPERTY //it is a load instruction which read the value of a global variable. @@ -846,7 +798,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->readPhaseLineNumbers.erase(lid); cu->writePhaseLineNumbers.erase(lid); cu->instructionsLineNumbers.erase(lid); - cu-> instructionsCount--; + cu->instructionsCount--; if (cu->instructionsLineNumbers.empty()) { cu->removeCU(); @@ -861,7 +813,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->basicBlockName = basicBlockName; CUVector.push_back(cu); suspiciousVariables.clear(); - CU *temp = cu;// keep current CU to make a reference to the successor CU + CU *temp = cu; // keep current CU to make a reference to the successor CU cu = new CU; cu->BBID = bb->getName(); @@ -878,7 +830,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } else { - if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) + if (globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { if (lid > 0) cu->readPhaseLineNumbers.insert(lid); @@ -886,7 +838,6 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } } } - } if (cu->instructionsLineNumbers.empty()) { cu->removeCU(); @@ -903,49 +854,32 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, CUVector.push_back(cu); suspiciousVariables.clear(); + //errs() << "cu->basicBlockName: " << cu->basicBlockName << "\n"; + //check for call instructions in current basic block for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - //Mohammad 6.7.2020: Don't create nodes for library functions (c++/llvm). - int32_t lid = getLID(&*instruction, fileID); - if(lid > 0){ - - if (isa < CallInst >(instruction)) + if (dyn_cast(instruction) || dyn_cast(instruction)) + continue; + if (isa(instruction)) { - - Function *f = (cast(instruction))->getCalledFunction(); + Node *n = new Node; + n->type = nodeTypes::dummy; + //TODO: DO the same for Invoke inst - //Mohammad 6.7.2020 - Function::iterator FI = f->begin(); - bool externalFunction = true; - string lid; - - for (Function::iterator FI = f->begin(), FE = f->end(); FI != FE; ++FI){ - externalFunction = false; - auto tempBI = FI->begin(); - if(DebugLoc dl = tempBI->getDebugLoc()){ - lid = to_string(dl->getLine()); - }else{ - lid = to_string(tempBI->getFunction()->getSubprogram()->getLine()); - } - break; - } - if(externalFunction) continue; + //get function name and parameters + Function *f = (cast(instruction))->getCalledFunction(); - Node *n = new Node; - n->type = nodeTypes::dummy; // For ordinary function calls, F has a name. // However, sometimes the function being called // in IR is encapsulated by "bitcast()" due to // the way of compiling and linking. In this way, // getCalledFunction() method returns NULL. // Also, getName() returns NULL if this is an indirect function call. - if(f) + if (f) { n->name = f->getName(); - - // @Zia: This for loop appeared after the else part. For some function calls, the value of f is null. // I guess that is why you have checked if f is not null here. Anyway, I (Mohammad) had to bring the // for loop inside to avoid the segmentation fault. If you think it is not appropriate, find a solution for it. @@ -955,21 +889,23 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(string(it->getName()), rso.str(), lid); + Variable v(string(it->getName()), rso.str()); + n->argumentsList.push_back(v); } } - else // get name of the indirect function which is called + else // get name of the indirect function which is called { Value *v = (cast(instruction))->getCalledValue(); Value *sv = v->stripPointerCasts(); - StringRef fname = sv->getName(); + StringRef fname = sv->getName(); n->name = fname; } //Recursive functions (Mo 5.11.2019) - CallGraphWrapperPass* CGWP = &(getAnalysis()); - if(isRecursive(*f, CGWP->getCallGraph())){ + CallGraphWrapperPass *CGWP = &(getAnalysis()); + if (isRecursive(*f, CGWP->getCallGraph())) + { int lid = getLID(&*instruction, fileID); n->recursiveFunctionCall = n->name + " " + dputil::decodeLID(lid) + ","; } @@ -979,7 +915,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, for (auto i : BBCUsVector) { int lid = getLID(&*instruction, fileID); - if(lid >= i->startLine && lid <= i->endLine) + if (lid >= i->startLine && lid <= i->endLine) { i->instructionsLineNumbers.insert(lid); i->childrenNodes.push_back(n); @@ -990,21 +926,20 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } } } - } } } void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap) { int lid; - string varName, varType, varDefLine; + string varName, varType; // Changed TerminatorInst to Instuction const Instruction *TInst; string successorBB; for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { - CU *lastCU = BBIDToCUIDsMap[bb->getName()].back();//get the last CU in the basic block + CU *lastCU = BBIDToCUIDsMap[bb->getName()].back(); //get the last CU in the basic block //get all successor basic blocks for bb TInst = bb->getTerminator(); for (unsigned i = 0, nSucc = TInst->getNumSuccessors(); i < nSucc; ++i) @@ -1018,19 +953,18 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl auto bbCU = BBIDToCUIDsMap[bb->getName()].begin(); for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { + if (isa(instruction) || isa(instruction)) { // NOTE: changed 'instruction' to '&*instruction' lid = getLID(&*instruction, fileID); - if(lid == 0) + if (lid == 0) continue; //varName = refineVarName(determineVariableName(instruction)); // NOTE: changed 'instruction' to '&*instruction', next 2 lines varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - varDefLine = determineVariableDefLine(&*instruction); - - Variable v(varName, varType, varDefLine); + Variable v(varName, varType); std::string prefix("ARRAY"); if (!varType.compare(0, prefix.size(), prefix)) @@ -1040,11 +974,11 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl //errs() << "Name: " << varName << " " << "Type: " << varType << "\n"; - if(lid > (*bbCU)->endLine) + if (lid > (*bbCU)->endLine) { bbCU = next(bbCU, 1); } - if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) + if (globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { (*bbCU)->globalVariableNames.insert(v); originalVariablesSet.insert(varName); @@ -1063,7 +997,7 @@ void CUGeneration::findStartEndLineNumbers(Node *root, int &start, int &end) { if (root->type == nodeTypes::cu) { - if(start == -1 || start > root->startLine) + if (start == -1 || start > root->startLine) { start = root->startLine; } @@ -1078,11 +1012,8 @@ void CUGeneration::findStartEndLineNumbers(Node *root, int &start, int &end) { findStartEndLineNumbers(i, start, end); } - } - - void CUGeneration::fillStartEndLineNumbers(Node *root) { if (root->type != nodeTypes::cu) @@ -1099,7 +1030,6 @@ void CUGeneration::fillStartEndLineNumbers(Node *root) { fillStartEndLineNumbers(i); } - } bool CUGeneration::doFinalization(Module &M) @@ -1123,16 +1053,15 @@ bool CUGeneration::doInitialization(Module &M) initializeCUIDCounter(); - for(Module::global_iterator I = ThisModule->global_begin(); I != ThisModule->global_end(); I++) + for (Module::global_iterator I = ThisModule->global_begin(); I != ThisModule->global_end(); I++) { Value *globalVariable = dyn_cast(I); string glo = string(globalVariable->getName()); - if(glo.find(".") == glo.npos) + if (glo.find(".") == glo.npos) { programGlobalVariablesSet.insert(glo); originalVariablesSet.insert(glo); } - } return true; @@ -1144,7 +1073,8 @@ void CUGeneration::initializeCUIDCounter() if (dputil::fexists(CUCounterFile)) { - std::fstream inCUIDCounter(CUCounterFile, std::ios_base::in);; + std::fstream inCUIDCounter(CUCounterFile, std::ios_base::in); + ; inCUIDCounter >> CUIDCounter; inCUIDCounter.close(); } @@ -1176,26 +1106,26 @@ void CUGeneration::getAnalysisUsage(AnalysisUsage &AU) const bool CUGeneration::runOnFunction(Function &F) { StringRef funcName = F.getName(); - // Avoid functions we don't want to analyze - if (funcName.find("llvm.") != string::npos) // llvm debug calls + // Avoid functions we don't want to instrument + if (funcName.find("llvm.dbg") != string::npos) // llvm debug calls { return false; } - if (funcName.find("__dp_") != string::npos) // instrumentation calls + if (funcName.find("__dp_") != string::npos) // instrumentation calls { return false; } - if (funcName.find("__cx") != string::npos) // c++ init calls + if (funcName.find("__cx") != string::npos) // c++ init calls { return false; } - if (funcName.find("__clang") != string::npos) // clang helper calls + if (funcName.find("__clang") != string::npos) // clang helper calls { return false; } - if (funcName.find("_GLOBAL_") != string::npos) { // global init calls (c++) - return false; - } + // if (funcName.find("_GLOBAL_") != string::npos) { // global init calls (c++) + // return false; + // } if (funcName.find("pthread_") != string::npos) { return false; @@ -1215,23 +1145,13 @@ bool CUGeneration::runOnFunction(Function &F) //Get list of arguments for this function and store them in root. // NOTE: changed the way we get the arguments // for (Function::ArgumentListType::iterator it = F.getArgumentList().begin(); it != F.getArgumentList().end(); it++) { - - BasicBlock *BB = &F.getEntryBlock(); - auto BI = BB->begin(); - string lid; - if(DebugLoc dl = BI->getDebugLoc()){ - lid = to_string(dl->getLine()); - }else{ - lid = to_string(BI->getFunction()->getSubprogram()->getLine()); - } - - for ( Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) + for (Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) { string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(it->getName(), rso.str(), to_string(fileID) + ":" + lid); + Variable v(it->getName(), rso.str()); root->argumentsList.push_back(v); } @@ -1246,7 +1166,6 @@ bool CUGeneration::runOnFunction(Function &F) Region *TopRegion = RI->getTopLevelRegion(); populateGlobalVariablesSet(TopRegion, globalVariablesSet); - createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); @@ -1259,9 +1178,9 @@ bool CUGeneration::runOnFunction(Function &F) printData(root); - for(auto i : CUVector) + for (auto i : CUVector) { - delete(i); + delete (i); } return false; From cd91fdf1343a7cc84eed8c04b30faf98a26c62b6 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Thu, 27 Aug 2020 17:28:31 +0200 Subject: [PATCH 07/31] Add basic CI --- .github/workflows/ci.yml | 38 +++++++++++++++ .github/workflows/discopop_profiler/test.sh | 54 +++++++++++++++++++++ .github/workflows/graph_analyzer/Dockerfile | 14 ++++++ 3 files changed, 106 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100755 .github/workflows/discopop_profiler/test.sh create mode 100644 .github/workflows/graph_analyzer/Dockerfile diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..afe829044 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,38 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +name: "DiscoPoP CI" +on: [push, pull_request] + +jobs: + + graph_analyzer: + name: "Graph Analyzer" + runs-on: ubuntu-latest + steps: + - name: "Checkout Repository" + uses: actions/checkout@v2 + - name: "Build Image with Dependencies" + run: docker build -f .github/workflows/graph_analyzer/Dockerfile . --tag graph_analyzer + - name: "Run unit_tests.py" + run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop/graph_analyzer graph_analyzer python unit_tests.py + + discopop_profiler: + name: "Profiler" + runs-on: ubuntu-latest + steps: + - name: "Checkout Repository" + uses: actions/checkout@v2 + - name: "Build DiscoPoP Profiler" + run: | + mkdir build + cd build + cmake -DCMAKE_BUILD_TYPE=Debug .. + make -j3 + - name: "Run DiscoPoP Profiler on test/ Targets" + run: .github/workflows/discopop_profiler/test.sh diff --git a/.github/workflows/discopop_profiler/test.sh b/.github/workflows/discopop_profiler/test.sh new file mode 100755 index 000000000..2a86ef5b4 --- /dev/null +++ b/.github/workflows/discopop_profiler/test.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +cd "$(dirname "$0")/../../.." || exit 1 + +DISCOPOP_SRC=$(pwd) +DISCOPOP_INSTALL="$(pwd)/build" +CXX=clang++-8 + +tests="dependence reduction" # TODO: "cu" yet missing because it fails + +function test_cu { + # CU Generation + ${CXX} -g -O0 -fno-discard-value-names -Xclang -load -Xclang "${DISCOPOP_INSTALL}"/libi/LLVMCUGeneration.so -mllvm -fm-path -mllvm ./FileMapping.txt -c "$1" || return 1 +} + +function test_dependence { + # Dependence Profiling + ${CXX} -g -O0 -fno-discard-value-names -Xclang -load -Xclang "${DISCOPOP_INSTALL}"/libi/LLVMDPInstrumentation.so -mllvm -fm-path -mllvm ./FileMapping.txt -c "$1" -o out.o || return 1 + ${CXX} out.o -L"${DISCOPOP_INSTALL}"/rtlib -lDiscoPoP_RT -lpthread || return 1 + ./a.out || return 1 +} + +function test_reduction { + # Identifying Reduction Operations + ${CXX} -g -O0 -fno-discard-value-names -Xclang -load -Xclang "${DISCOPOP_INSTALL}"/libi/LLVMDPReduction.so -mllvm -fm-path -mllvm ./FileMapping.txt -c "$1" -o out.o || return 1 + ${CXX} out.o -L"${DISCOPOP_INSTALL}"/rtlib -lDiscoPoP_RT -lpthread || return 1 + ./a.out || return 1 +} + +exit_code=0 +for target in ./test/*/; do + pushd $target + ${DISCOPOP_SRC}/scripts/dp-fmap + for test in ${tests}; do + echo "###" + echo "### ${target} ${test}" + echo "###" + if ! test_$test "$(ls ./*.c ./*.cpp 2>/dev/null)"; then + exit_code=1 + echo -e "\e[31m### ${target} ${test} failed.\e[0m" + fi + done + popd +done + +exit $exit_code diff --git a/.github/workflows/graph_analyzer/Dockerfile b/.github/workflows/graph_analyzer/Dockerfile new file mode 100644 index 000000000..4108e9142 --- /dev/null +++ b/.github/workflows/graph_analyzer/Dockerfile @@ -0,0 +1,14 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +# Dockerfile for an image containing the dependencies of graph_analyzer + +FROM tiagopeixoto/graph-tool +RUN pacman --noconfirm -S python-pip +COPY graph_analyzer/requirements.txt /requirements.txt +RUN pip install -r /requirements.txt From 4c444a8b744e9e3928cedb998dcf23468dfd2d54 Mon Sep 17 00:00:00 2001 From: Ali Jannesari Date: Thu, 27 Aug 2020 14:19:24 -0500 Subject: [PATCH 08/31] Update README.md adding discopop.org --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d9d496f81..fd959eb34 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ In a nutshell, DiscoPoP performs the following steps: * identifies parallel patterns which can be used to parallelize a code region, * and finally suggests corresponding OpenMP parallelization constructs and clauses to programmers. -A more comprehensive overview of DiscoPoP can be found on our [project website](https://www.discopop.tu-darmstadt.de). +A more comprehensive overview of DiscoPoP can be found on our [project website](http://www.discopop.org/). DiscoPoP is built on top of LLVM. Therefore, DiscoPoP can perform the above-mentioned steps on any source code which can be transferred into the LLVM IR. From 8fca900593c95ba996d2ed4a0ce349e0a309c361 Mon Sep 17 00:00:00 2001 From: mohammad Date: Fri, 28 Aug 2020 09:55:31 +0200 Subject: [PATCH 09/31] fixed #26 seg fault --- CUGeneration/CUGenerationPass.cpp | 213 ++++++++++++++++-------------- 1 file changed, 111 insertions(+), 102 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index b6f506fb8..42c75fcb7 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -103,7 +103,13 @@ namespace } Variable; - enum nodeTypes {cu, func, loop, dummy}; + enum nodeTypes + { + cu, + func, + loop, + dummy + }; typedef struct Node_struct { @@ -129,17 +135,17 @@ namespace } } Node; - typedef struct CU_struct: Node_struct + typedef struct CU_struct : Node_struct { string BBID; //BasicBlock Id where the CU appears in - unsigned readDataSize; // number of bytes read from memory by the cu + unsigned readDataSize; // number of bytes read from memory by the cu unsigned writeDataSize; // number of bytes written into memory during the cu unsigned instructionsCount; //basic block id & successor basic blocks for control dependence - vector successorCUs;// keeps IDs of control dependent CUs + vector successorCUs; // keeps IDs of control dependent CUs string basicBlockName; set instructionsLineNumbers; @@ -162,10 +168,9 @@ namespace void removeCU() { - CUIDCounter--;//if a CU does not contain any instruction, e.g. entry basic blocks, then remove it. + CUIDCounter--; //if a CU does not contain any instruction, e.g. entry basic blocks, then remove it. } - } CU; struct CUGeneration : public FunctionPass @@ -188,7 +193,7 @@ namespace map VarNames; RegionInfoPass *RIpass; - RegionInfo *RI; + RegionInfo *RI; //DiscoPoP Fields //set programGlobalVariables; @@ -232,7 +237,6 @@ namespace void printNode(Node *node, bool isRoot); void closeOutputFiles(); - virtual bool runOnFunction(Function &F); //changed const char * to Stringref StringRef getPassName() const; @@ -241,11 +245,11 @@ namespace CUGeneration() : FunctionPass(ID) {} }; // end of struct CUGeneration -} // end of anonymous namespace - +} // end of anonymous namespace /***************************** DiscoPoP Functions ***********************************/ -string CUGeneration::determineVariableDefLine(Instruction *I){ +string CUGeneration::determineVariableDefLine(Instruction *I) +{ string varName = determineVariableName(&*I); Function *F = I->getFunction(); @@ -255,10 +259,13 @@ string CUGeneration::determineVariableDefLine(Instruction *I){ BasicBlock &BB = *FI; for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E; ++BI) { - if (DbgDeclareInst *DI = dyn_cast(BI)){ - if(auto *N = dyn_cast(DI->getVariable())){ - if(auto *DV = dyn_cast(N)){ - if(DV->getName() == varName) + if (DbgDeclareInst *DI = dyn_cast(BI)) + { + if (auto *N = dyn_cast(DI->getVariable())) + { + if (auto *DV = dyn_cast(N)) + { + if (DV->getName() == varName) return to_string(DV->getLine()); } } @@ -292,7 +299,7 @@ string CUGeneration::determineVariableType(Instruction *I) s = "STRUCT,"; } // we've found an array - if (PTy->getElementType()->getTypeID() == Type::ArrayTyID ) + if (PTy->getElementType()->getTypeID() == Type::ArrayTyID) { //errs() << "ARRAY DETECTED!\n"; s = "ARRAY,"; @@ -304,7 +311,7 @@ string CUGeneration::determineVariableType(Instruction *I) return s; } -string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariable/*=defaultIsGlobalVariableValue*/) +string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariable /*=defaultIsGlobalVariableValue*/) { assert(I && "Instruction cannot be NULL \n"); @@ -342,7 +349,8 @@ string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariabl { ConstantInt *idxPtr = cast(gep->getOperand(2)); uint64_t memberIdx = *(idxPtr->getValue().getRawData()); - if(!(cast(structType))->isLiteral()){ + if (!(cast(structType))->isLiteral()) + { string strName(structType->getStructName().data()); map::iterator it = Structs.find(strName); if (it != Structs.end()) @@ -374,10 +382,9 @@ string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariabl return determineVariableName((Instruction *)(operand), isGlobalVariable); } // if we cannot determine the name, then return * - return "";//getOrInsertVarName("*", builder); + return ""; //getOrInsertVarName("*", builder); } - Type *CUGeneration::pointsToStruct(PointerType *PTy) { assert(PTy); @@ -411,7 +418,6 @@ string CUGeneration::getOrInsertVarName(string varName, IRBuilder<> &builder) return vName; } - string CUGeneration::findStructMemberName(MDNode *structNode, unsigned idx, IRBuilder<> &builder) { assert(structNode); @@ -440,7 +446,8 @@ string CUGeneration::xmlEscape(string data) for (;;) { pos = data.find_first_of("\"&<>", pos); - if (pos == string::npos) break; + if (pos == string::npos) + break; string replacement; switch (data[pos]) { @@ -456,8 +463,7 @@ string CUGeneration::xmlEscape(string data) case '>': replacement = ">"; break; - default: - ; + default:; } data.replace(pos, 1, replacement); pos += replacement.size(); @@ -475,10 +481,8 @@ void CUGeneration::secureStream() outCUIDCounter = new std::ofstream(); outCUIDCounter->open("DP_CUIDCounter.txt", std::ios_base::out); - } - string CUGeneration::getLineNumbersString(set LineNumbers) { string line = ""; @@ -500,7 +504,6 @@ string CUGeneration::getLineNumbersString(set LineNumbers) return line; } - string CUGeneration::getChildrenNodesString(Node *root) { string childrenIDs = ""; @@ -525,7 +528,7 @@ string CUGeneration::getChildrenNodesString(Node *root) void CUGeneration::printOriginalVariables(set &originalVariablesSet) { - for(auto i : originalVariablesSet) + for (auto i : originalVariablesSet) { *outOriginalVariables << i << endl; } @@ -533,23 +536,23 @@ void CUGeneration::printOriginalVariables(set &originalVariablesSet) void CUGeneration::printData(Node *root) { - *outCUs << "" << endl << endl; + *outCUs << "" << endl + << endl; printTree(root, true); - *outCUs << "" << endl << endl << endl; + *outCUs << "" << endl + << endl + << endl; closeOutputFiles(); - } - void CUGeneration::printTree(Node *root, bool isRoot) { Node *tmp = root; printNode(tmp, isRoot); - for (auto node : tmp->childrenNodes) { if (root->type == nodeTypes::func) @@ -563,7 +566,7 @@ void CUGeneration::printTree(Node *root, bool isRoot) void CUGeneration::printNode(Node *root, bool isRoot) { - if(root->name.find("llvm")) + if (root->name.find("llvm")) { *outCUs << "\tID) << "\"" @@ -599,7 +602,7 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; for (auto sucCUi : cu->successorCUs) { - *outCUs << "\t\t\t" << sucCUi << "" << endl; + *outCUs << "\t\t\t" << sucCUi << "" << endl; } *outCUs << "\t\t" << endl; @@ -607,24 +610,25 @@ void CUGeneration::printNode(Node *root, bool isRoot) for (auto lvi : cu->localVariableNames) { *outCUs << "\t\t\t" - << xmlEscape(lvi.name) << "" << endl; + << " defLine=\"" << xmlEscape(lvi.defLine) << "\">" + << xmlEscape(lvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto gvi : cu->globalVariableNames) { - *outCUs << "\t\t\t" - << xmlEscape(gvi.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(gvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto i : (cu->callLineTofunctionMap)) { - for (auto ii : i.second){ + for (auto ii : i.second) + { *outCUs << "\t\t\t" << ii->ID << "" << endl; // specifica for recursive fucntions inside loops. (Mo 5.11.2019) *outCUs << "\t\t\t\t" << ii->recursiveFunctionCall << "" << endl; @@ -633,7 +637,8 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; } - *outCUs << "\t" << endl << endl; + *outCUs << "\t" << endl + << endl; } } @@ -655,7 +660,6 @@ void CUGeneration::closeOutputFiles() } /*********************************** End of output functions **************************************/ - string CUGeneration::refineVarName(string varName) { @@ -665,7 +669,6 @@ string CUGeneration::refineVarName(string varName) varName.erase(varName.find(".addr"), 5); return varName; - } //recieves the region and outputs all variables and variables crossing basic block boundaries in the region. @@ -678,22 +681,22 @@ void CUGeneration::populateGlobalVariablesSet(Region *TopRegion, set &gl { for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if(isa(instruction) || isa(instruction) || isa(instruction)) + if (isa(instruction) || isa(instruction) || isa(instruction)) { //string varName = refineVarName(determineVariableName(instruction, isGlobalVariable)); // NOTE: changed 'instruction' to '&*instruction' string varName = determineVariableName(&*instruction, isGlobalVariable); - if(isGlobalVariable) // add it if it is a global variable in the program + if (isGlobalVariable) // add it if it is a global variable in the program { programGlobalVariablesSet.insert(varName); } - if(variableToBBMap.find(varName) != variableToBBMap.end()) + if (variableToBBMap.find(varName) != variableToBBMap.end()) { //this var has already once recordded. check for bb id - if(variableToBBMap[varName] != *bb) + if (variableToBBMap[varName] != *bb) { //global variable found. Insert into the globalVariablesSet globalVariablesSet.insert(varName); @@ -710,7 +713,6 @@ void CUGeneration::populateGlobalVariablesSet(Region *TopRegion, set &gl } } - void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap, Node *root, LoopInfo &LI) { // NOTE: changed 'ThisModule->getDataLayout()' to '&ThisModule->getDataLayout()' @@ -725,7 +727,6 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, map loopToNodeMap; - for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { @@ -739,13 +740,11 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, { currentNode = loopToNodeMap[loop]; //errs() << "bb->Name: " << bb->getName() << " , " << "node->ID: " << currentNode->ID << "\n"; - - } //else, create a new Node for the loop, add it as children of currentNode and add it to the map. else { - if(bb->getName().size() != 0) + if (bb->getName().size() != 0) { //errs() << "Name: " << bb->getName() << "\n"; } @@ -762,18 +761,18 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } else { - if(bb->getName().size() != 0) + if (bb->getName().size() != 0) { //errs() << "bb Name: " << bb->getName() << "\n"; } //end of loops. go to the parent of the loop. may have to jump several nodes in case of nested loops - for(map::iterator it = loopToNodeMap.begin(); it != loopToNodeMap.end() ; it++ ) - if (it -> second == currentNode) // current node found in loop map jump to its parent. + for (map::iterator it = loopToNodeMap.begin(); it != loopToNodeMap.end(); it++) + if (it->second == currentNode) // current node found in loop map jump to its parent. { currentNode = currentNode->parentNode; - it = loopToNodeMap.begin(); // search the whole map again for current node - if(it -> second == currentNode) // due to it++ we need to check first element of map ourself - currentNode = currentNode -> parentNode; + it = loopToNodeMap.begin(); // search the whole map again for current node + if (it->second == currentNode) // due to it++ we need to check first element of map ourself + currentNode = currentNode->parentNode; } } @@ -781,7 +780,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, //errs() << "cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; - if(bb->getName().size() == 0) + if (bb->getName().size() == 0) bb->setName(cu->ID); cu->BBID = bb->getName(); @@ -797,21 +796,21 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, basicBlockName = bb->getName(); if (lid > 0) { - cu-> instructionsLineNumbers.insert(lid); - cu-> instructionsCount++; + cu->instructionsLineNumbers.insert(lid); + cu->instructionsCount++; } - if(isa < StoreInst >(instruction)) + if (isa(instruction)) { // get size of data written into memory by this store instruction Value *operand = instruction->getOperand(1); Type *Ty = operand->getType(); unsigned u = DL->getTypeSizeInBits(Ty); - cu-> writeDataSize += u; + cu->writeDataSize += u; //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - + // if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { suspiciousVariables.insert(varName); @@ -819,16 +818,16 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->writePhaseLineNumbers.insert(lid); } } - else if(isa < LoadInst >(instruction)) + else if (isa(instruction)) { // get size of data read from memory by this load instruction Type *Ty = instruction->getType(); unsigned u = DL->getTypeSizeInBits(Ty); - cu-> readDataSize += u; + cu->readDataSize += u; //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); - if(suspiciousVariables.count(varName)) + if (suspiciousVariables.count(varName)) { // VIOLATION OF CAUTIOUS PROPERTY //it is a load instruction which read the value of a global variable. @@ -837,7 +836,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->readPhaseLineNumbers.erase(lid); cu->writePhaseLineNumbers.erase(lid); cu->instructionsLineNumbers.erase(lid); - cu-> instructionsCount--; + cu->instructionsCount--; if (cu->instructionsLineNumbers.empty()) { cu->removeCU(); @@ -852,7 +851,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->basicBlockName = basicBlockName; CUVector.push_back(cu); suspiciousVariables.clear(); - CU *temp = cu;// keep current CU to make a reference to the successor CU + CU *temp = cu; // keep current CU to make a reference to the successor CU cu = new CU; cu->BBID = bb->getName(); @@ -869,7 +868,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } else { - if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) + if (globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { if (lid > 0) cu->readPhaseLineNumbers.insert(lid); @@ -898,9 +897,9 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, //check for call instructions in current basic block for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if(dyn_cast(instruction) || dyn_cast(instruction)) + if (dyn_cast(instruction) || dyn_cast(instruction)) continue; - if (isa < CallInst >(instruction)) + if (isa(instruction)) { Node *n = new Node; n->type = nodeTypes::dummy; @@ -916,7 +915,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, // the way of compiling and linking. In this way, // getCalledFunction() method returns NULL. // Also, getName() returns NULL if this is an indirect function call. - if(f) + if (f) { n->name = f->getName(); // @Zia: This for loop appeared after the else part. For some function calls, the value of f is null. @@ -928,22 +927,30 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(string(it->getName()), rso.str(), to_string(f->getSubprogram()->getLine())); + + string line = ""; + if (f->getSubprogram()) + { + if (f->getSubprogram()->getLine()) + line = to_string(f->getSubprogram()->getLine()); + } + Variable v(string(it->getName()), rso.str(), line); n->argumentsList.push_back(v); } } - else // get name of the indirect function which is called + else // get name of the indirect function which is called { Value *v = (cast(instruction))->getCalledValue(); Value *sv = v->stripPointerCasts(); - StringRef fname = sv->getName(); + StringRef fname = sv->getName(); n->name = fname; } //Recursive functions (Mo 5.11.2019) - CallGraphWrapperPass* CGWP = &(getAnalysis()); - if(isRecursive(*f, CGWP->getCallGraph())){ + CallGraphWrapperPass *CGWP = &(getAnalysis()); + if (isRecursive(*f, CGWP->getCallGraph())) + { int lid = getLID(&*instruction, fileID); n->recursiveFunctionCall = n->name + " " + dputil::decodeLID(lid) + ","; } @@ -953,7 +960,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, for (auto i : BBCUsVector) { int lid = getLID(&*instruction, fileID); - if(lid >= i->startLine && lid <= i->endLine) + if (lid >= i->startLine && lid <= i->endLine) { i->instructionsLineNumbers.insert(lid); i->childrenNodes.push_back(n); @@ -977,7 +984,7 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { - CU *lastCU = BBIDToCUIDsMap[bb->getName()].back();//get the last CU in the basic block + CU *lastCU = BBIDToCUIDsMap[bb->getName()].back(); //get the last CU in the basic block //get all successor basic blocks for bb TInst = bb->getTerminator(); for (unsigned i = 0, nSucc = TInst->getNumSuccessors(); i < nSucc; ++i) @@ -996,7 +1003,7 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl { // NOTE: changed 'instruction' to '&*instruction' lid = getLID(&*instruction, fileID); - if(lid == 0) + if (lid == 0) continue; //varName = refineVarName(determineVariableName(instruction)); // NOTE: changed 'instruction' to '&*instruction', next 2 lines @@ -1014,11 +1021,11 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl //errs() << "Name: " << varName << " " << "Type: " << varType << "\n"; - if(lid > (*bbCU)->endLine) + if (lid > (*bbCU)->endLine) { bbCU = next(bbCU, 1); } - if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) + if (globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { (*bbCU)->globalVariableNames.insert(v); originalVariablesSet.insert(varName); @@ -1037,7 +1044,7 @@ void CUGeneration::findStartEndLineNumbers(Node *root, int &start, int &end) { if (root->type == nodeTypes::cu) { - if(start == -1 || start > root->startLine) + if (start == -1 || start > root->startLine) { start = root->startLine; } @@ -1052,11 +1059,8 @@ void CUGeneration::findStartEndLineNumbers(Node *root, int &start, int &end) { findStartEndLineNumbers(i, start, end); } - } - - void CUGeneration::fillStartEndLineNumbers(Node *root) { if (root->type != nodeTypes::cu) @@ -1073,7 +1077,6 @@ void CUGeneration::fillStartEndLineNumbers(Node *root) { fillStartEndLineNumbers(i); } - } bool CUGeneration::doFinalization(Module &M) @@ -1097,16 +1100,15 @@ bool CUGeneration::doInitialization(Module &M) initializeCUIDCounter(); - for(Module::global_iterator I = ThisModule->global_begin(); I != ThisModule->global_end(); I++) + for (Module::global_iterator I = ThisModule->global_begin(); I != ThisModule->global_end(); I++) { Value *globalVariable = dyn_cast(I); string glo = string(globalVariable->getName()); - if(glo.find(".") == glo.npos) + if (glo.find(".") == glo.npos) { programGlobalVariablesSet.insert(glo); originalVariablesSet.insert(glo); } - } return true; @@ -1118,7 +1120,8 @@ void CUGeneration::initializeCUIDCounter() if (dputil::fexists(CUCounterFile)) { - std::fstream inCUIDCounter(CUCounterFile, std::ios_base::in);; + std::fstream inCUIDCounter(CUCounterFile, std::ios_base::in); + ; inCUIDCounter >> CUIDCounter; inCUIDCounter.close(); } @@ -1151,19 +1154,19 @@ bool CUGeneration::runOnFunction(Function &F) { StringRef funcName = F.getName(); // Avoid functions we don't want to instrument - if (funcName.find("llvm.dbg") != string::npos) // llvm debug calls + if (funcName.find("llvm.dbg") != string::npos) // llvm debug calls { return false; } - if (funcName.find("__dp_") != string::npos) // instrumentation calls + if (funcName.find("__dp_") != string::npos) // instrumentation calls { return false; } - if (funcName.find("__cx") != string::npos) // c++ init calls + if (funcName.find("__cx") != string::npos) // c++ init calls { return false; } - if (funcName.find("__clang") != string::npos) // clang helper calls + if (funcName.find("__clang") != string::npos) // clang helper calls { return false; } @@ -1189,14 +1192,20 @@ bool CUGeneration::runOnFunction(Function &F) //Get list of arguments for this function and store them in root. // NOTE: changed the way we get the arguments // for (Function::ArgumentListType::iterator it = F.getArgumentList().begin(); it != F.getArgumentList().end(); it++) { - for ( Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) + for (Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) { string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(it->getName(), rso.str(), to_string(F.getSubprogram()->getLine())); + string line = ""; + if (F.getSubprogram()) + { + if (F.getSubprogram()->getLine()) + line = to_string(F.getSubprogram()->getLine()); + } + Variable v(it->getName(), rso.str(), line); root->argumentsList.push_back(v); } /********************* End of initialize root values ***************************/ @@ -1217,14 +1226,14 @@ bool CUGeneration::runOnFunction(Function &F) fillStartEndLineNumbers(root); secureStream(); - + printOriginalVariables(originalVariablesSet); printData(root); - for(auto i : CUVector) + for (auto i : CUVector) { - delete(i); + delete (i); } return false; From b08ca6cd990f604a4f1f4137e01b8017442c1632 Mon Sep 17 00:00:00 2001 From: mohammad Date: Fri, 28 Aug 2020 12:22:36 +0200 Subject: [PATCH 10/31] pushed changes --- CUGeneration/CUGenerationPass.cpp | 299 +++++++++++++++++++----------- 1 file changed, 190 insertions(+), 109 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 1299988a0..3084eb59d 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -50,6 +50,9 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include +#include +#include "llvm/Analysis/TargetLibraryInfo.h" #include "DPUtils.h" @@ -76,12 +79,14 @@ namespace string name; string type; + string defLine; + string isArray; - Variable_struct(const Variable_struct &other) : name(other.name), type(other.type) + Variable_struct(const Variable_struct &other) : name(other.name), type(other.type), defLine(other.defLine) { } - Variable_struct(string n, string t) : name(n), type(t) + Variable_struct(string n, string t, string d) : name(n), type(t), defLine(d) { } @@ -98,13 +103,7 @@ namespace } Variable; - enum nodeTypes - { - cu, - func, - loop, - dummy - }; + enum nodeTypes {cu, func, loop, dummy}; typedef struct Node_struct { @@ -130,17 +129,17 @@ namespace } } Node; - typedef struct CU_struct : Node_struct + typedef struct CU_struct: Node_struct { string BBID; //BasicBlock Id where the CU appears in - unsigned readDataSize; // number of bytes read from memory by the cu + unsigned readDataSize; // number of bytes read from memory by the cu unsigned writeDataSize; // number of bytes written into memory during the cu unsigned instructionsCount; //basic block id & successor basic blocks for control dependence - vector successorCUs; // keeps IDs of control dependent CUs + vector successorCUs;// keeps IDs of control dependent CUs string basicBlockName; set instructionsLineNumbers; @@ -163,9 +162,10 @@ namespace void removeCU() { - CUIDCounter--; //if a CU does not contain any instruction, e.g. entry basic blocks, then remove it. + CUIDCounter--;//if a CU does not contain any instruction, e.g. entry basic blocks, then remove it. } + } CU; struct CUGeneration : public FunctionPass @@ -188,7 +188,7 @@ namespace map VarNames; RegionInfoPass *RIpass; - RegionInfo *RI; + RegionInfo *RI; //DiscoPoP Fields //set programGlobalVariables; @@ -201,6 +201,9 @@ namespace string findStructMemberName(MDNode *structNode, unsigned idx, IRBuilder<> &builder); //DIGlobalVariable* findDbgGlobalDeclare(GlobalVariable *v); + //29.6.2020 Mohammad + string determineVariableDefLine(Instruction *I); + //functions to get list of global variables bool doInitialization(Module &ThisModule); bool doFinalization(Module &M); @@ -229,6 +232,7 @@ namespace void printNode(Node *node, bool isRoot); void closeOutputFiles(); + virtual bool runOnFunction(Function &F); //changed const char * to Stringref StringRef getPassName() const; @@ -237,9 +241,42 @@ namespace CUGeneration() : FunctionPass(ID) {} }; // end of struct CUGeneration -} // end of anonymous namespace +} // end of anonymous namespace + /***************************** DiscoPoP Functions ***********************************/ +string CUGeneration::determineVariableDefLine(Instruction *I){ + string varDefLine = "LineNotFound"; + + string varName = determineVariableName(&*I); + varName = refineVarName(varName); + + if(programGlobalVariablesSet.count(varName)){ + varDefLine = "GlobalVar"; + //TODO: Find definition line of global variables + } + + Function *F = I->getFunction(); + for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + { + BasicBlock &BB = *FI; + for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E; ++BI) + { + if (DbgDeclareInst *DI = dyn_cast(BI)){ + if(auto *N = dyn_cast(DI->getVariable())){ + if(auto *DV = dyn_cast(N)){ + if(DV->getName() == varName){ + varDefLine = to_string(fileID) + ":" + to_string(DV->getLine()); + break; + } + } + } + } + } + } + return varDefLine; +} + string CUGeneration::determineVariableType(Instruction *I) { string s = ""; @@ -261,13 +298,11 @@ string CUGeneration::determineVariableType(Instruction *I) Type *structType = pointsToStruct(PTy); if (structType && gep->getNumOperands() > 2) { - //errs() << "STRUCT DETECTED!\n"; s = "STRUCT,"; } // we've found an array - if (PTy->getElementType()->getTypeID() == Type::ArrayTyID) + if (PTy->getElementType()->getTypeID() == Type::ArrayTyID ) { - //errs() << "ARRAY DETECTED!\n"; s = "ARRAY,"; } } @@ -277,7 +312,7 @@ string CUGeneration::determineVariableType(Instruction *I) return s; } -string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariable /*=defaultIsGlobalVariableValue*/) +string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariable/*=defaultIsGlobalVariableValue*/) { assert(I && "Instruction cannot be NULL \n"); @@ -315,8 +350,7 @@ string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariabl { ConstantInt *idxPtr = cast(gep->getOperand(2)); uint64_t memberIdx = *(idxPtr->getValue().getRawData()); - if (!(cast(structType))->isLiteral()) - { + if(!(cast(structType))->isLiteral()){ string strName(structType->getStructName().data()); map::iterator it = Structs.find(strName); if (it != Structs.end()) @@ -348,9 +382,10 @@ string CUGeneration::determineVariableName(Instruction *I, bool &isGlobalVariabl return determineVariableName((Instruction *)(operand), isGlobalVariable); } // if we cannot determine the name, then return * - return ""; //getOrInsertVarName("*", builder); + return "";//getOrInsertVarName("*", builder); } + Type *CUGeneration::pointsToStruct(PointerType *PTy) { assert(PTy); @@ -384,6 +419,7 @@ string CUGeneration::getOrInsertVarName(string varName, IRBuilder<> &builder) return vName; } + string CUGeneration::findStructMemberName(MDNode *structNode, unsigned idx, IRBuilder<> &builder) { assert(structNode); @@ -412,8 +448,7 @@ string CUGeneration::xmlEscape(string data) for (;;) { pos = data.find_first_of("\"&<>", pos); - if (pos == string::npos) - break; + if (pos == string::npos) break; string replacement; switch (data[pos]) { @@ -429,7 +464,8 @@ string CUGeneration::xmlEscape(string data) case '>': replacement = ">"; break; - default:; + default: + ; } data.replace(pos, 1, replacement); pos += replacement.size(); @@ -447,8 +483,10 @@ void CUGeneration::secureStream() outCUIDCounter = new std::ofstream(); outCUIDCounter->open("DP_CUIDCounter.txt", std::ios_base::out); + } + string CUGeneration::getLineNumbersString(set LineNumbers) { string line = ""; @@ -470,6 +508,7 @@ string CUGeneration::getLineNumbersString(set LineNumbers) return line; } + string CUGeneration::getChildrenNodesString(Node *root) { string childrenIDs = ""; @@ -494,7 +533,7 @@ string CUGeneration::getChildrenNodesString(Node *root) void CUGeneration::printOriginalVariables(set &originalVariablesSet) { - for (auto i : originalVariablesSet) + for(auto i : originalVariablesSet) { *outOriginalVariables << i << endl; } @@ -502,23 +541,23 @@ void CUGeneration::printOriginalVariables(set &originalVariablesSet) void CUGeneration::printData(Node *root) { - *outCUs << "" << endl - << endl; + *outCUs << "" << endl << endl; printTree(root, true); - *outCUs << "" << endl - << endl - << endl; + *outCUs << "" << endl << endl << endl; closeOutputFiles(); + } + void CUGeneration::printTree(Node *root, bool isRoot) { Node *tmp = root; printNode(tmp, isRoot); + for (auto node : tmp->childrenNodes) { if (root->type == nodeTypes::func) @@ -532,7 +571,7 @@ void CUGeneration::printTree(Node *root, bool isRoot) void CUGeneration::printNode(Node *root, bool isRoot) { - if (root->name.find("llvm")) + if(root->name.find("llvm")) { *outCUs << "\tID) << "\"" @@ -544,11 +583,12 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << getChildrenNodesString(root) << "" << endl; if (root->type == nodeTypes::func || root->type == nodeTypes::dummy) { - *outCUs << "\t\t" << endl; for (auto ai : root->argumentsList) { - *outCUs << "\t\t\t" << xmlEscape(ai.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(ai.name) << "" << endl; } *outCUs << "\t\t" << endl; } @@ -568,29 +608,32 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; for (auto sucCUi : cu->successorCUs) { - *outCUs << "\t\t\t" << sucCUi << "" << endl; + *outCUs << "\t\t\t" << sucCUi << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto lvi : cu->localVariableNames) { - *outCUs << "\t\t\t" << xmlEscape(lvi.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(lvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto gvi : cu->globalVariableNames) { - *outCUs << "\t\t\t" << xmlEscape(gvi.name) << "" << endl; + *outCUs << "\t\t\t" + << xmlEscape(gvi.name) << "" << endl; } *outCUs << "\t\t" << endl; *outCUs << "\t\t" << endl; for (auto i : (cu->callLineTofunctionMap)) { - for (auto ii : i.second) - { + for (auto ii : i.second){ *outCUs << "\t\t\t" << ii->ID << "" << endl; // specifica for recursive fucntions inside loops. (Mo 5.11.2019) *outCUs << "\t\t\t\t" << ii->recursiveFunctionCall << "" << endl; @@ -599,8 +642,7 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; } - *outCUs << "\t" << endl - << endl; + *outCUs << "\t" << endl << endl; } } @@ -622,6 +664,7 @@ void CUGeneration::closeOutputFiles() } /*********************************** End of output functions **************************************/ + string CUGeneration::refineVarName(string varName) { @@ -631,6 +674,7 @@ string CUGeneration::refineVarName(string varName) varName.erase(varName.find(".addr"), 5); return varName; + } //recieves the region and outputs all variables and variables crossing basic block boundaries in the region. @@ -643,22 +687,22 @@ void CUGeneration::populateGlobalVariablesSet(Region *TopRegion, set &gl { for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if (isa(instruction) || isa(instruction) || isa(instruction)) + if(isa(instruction) || isa(instruction) || isa(instruction)) { //string varName = refineVarName(determineVariableName(instruction, isGlobalVariable)); // NOTE: changed 'instruction' to '&*instruction' string varName = determineVariableName(&*instruction, isGlobalVariable); - if (isGlobalVariable) // add it if it is a global variable in the program + if(isGlobalVariable) // add it if it is a global variable in the program { programGlobalVariablesSet.insert(varName); } - if (variableToBBMap.find(varName) != variableToBBMap.end()) + if(variableToBBMap.find(varName) != variableToBBMap.end()) { //this var has already once recordded. check for bb id - if (variableToBBMap[varName] != *bb) + if(variableToBBMap[varName] != *bb) { //global variable found. Insert into the globalVariablesSet globalVariablesSet.insert(varName); @@ -675,6 +719,7 @@ void CUGeneration::populateGlobalVariablesSet(Region *TopRegion, set &gl } } + void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap, Node *root, LoopInfo &LI) { // NOTE: changed 'ThisModule->getDataLayout()' to '&ThisModule->getDataLayout()' @@ -689,6 +734,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, map loopToNodeMap; + for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { @@ -702,11 +748,13 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, { currentNode = loopToNodeMap[loop]; //errs() << "bb->Name: " << bb->getName() << " , " << "node->ID: " << currentNode->ID << "\n"; + + } //else, create a new Node for the loop, add it as children of currentNode and add it to the map. else { - if (bb->getName().size() != 0) + if(bb->getName().size() != 0) { //errs() << "Name: " << bb->getName() << "\n"; } @@ -723,26 +771,26 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } else { - if (bb->getName().size() != 0) + if(bb->getName().size() != 0) { //errs() << "bb Name: " << bb->getName() << "\n"; } //end of loops. go to the parent of the loop. may have to jump several nodes in case of nested loops - for (map::iterator it = loopToNodeMap.begin(); it != loopToNodeMap.end(); it++) - if (it->second == currentNode) // current node found in loop map jump to its parent. + for(map::iterator it = loopToNodeMap.begin(); it != loopToNodeMap.end() ; it++ ) + if (it -> second == currentNode) // current node found in loop map jump to its parent. { currentNode = currentNode->parentNode; - it = loopToNodeMap.begin(); // search the whole map again for current node - if (it->second == currentNode) // due to it++ we need to check first element of map ourself - currentNode = currentNode->parentNode; + it = loopToNodeMap.begin(); // search the whole map again for current node + if(it -> second == currentNode) // due to it++ we need to check first element of map ourself + currentNode = currentNode -> parentNode; } } cu = new CU; - //errs() << "cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; + // errs() << "==== " << bb->getName() << "\n"; //"cu->ID: " << cu->ID << " , " << "node->ID: " << currentNode->ID << " , " << "tmpNode->ID: " << tmpNode->ID << " , " << "bb->Name: " << bb->getName() << "\n"; - if (bb->getName().size() == 0) + if(bb->getName().size() == 0) bb->setName(cu->ID); cu->BBID = bb->getName(); @@ -753,26 +801,26 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - //NOTE: 'instruction' --> '&*instruction' lid = getLID(&*instruction, fileID); basicBlockName = bb->getName(); if (lid > 0) { - cu->instructionsLineNumbers.insert(lid); - cu->instructionsCount++; - } - if (isa(instruction)) + cu-> instructionsLineNumbers.insert(lid); + cu-> instructionsCount++; + //} + if(isa < StoreInst >(instruction)) { // get size of data written into memory by this store instruction Value *operand = instruction->getOperand(1); Type *Ty = operand->getType(); unsigned u = DL->getTypeSizeInBits(Ty); - cu->writeDataSize += u; + cu-> writeDataSize += u; //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); + // if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { suspiciousVariables.insert(varName); @@ -780,16 +828,16 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->writePhaseLineNumbers.insert(lid); } } - else if (isa(instruction)) + else if(isa < LoadInst >(instruction)) { // get size of data read from memory by this load instruction Type *Ty = instruction->getType(); unsigned u = DL->getTypeSizeInBits(Ty); - cu->readDataSize += u; + cu-> readDataSize += u; //varName = refineVarName(determineVariableName(instruction)); varName = determineVariableName(&*instruction); - if (suspiciousVariables.count(varName)) + if(suspiciousVariables.count(varName)) { // VIOLATION OF CAUTIOUS PROPERTY //it is a load instruction which read the value of a global variable. @@ -798,7 +846,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->readPhaseLineNumbers.erase(lid); cu->writePhaseLineNumbers.erase(lid); cu->instructionsLineNumbers.erase(lid); - cu->instructionsCount--; + cu-> instructionsCount--; if (cu->instructionsLineNumbers.empty()) { cu->removeCU(); @@ -813,7 +861,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, cu->basicBlockName = basicBlockName; CUVector.push_back(cu); suspiciousVariables.clear(); - CU *temp = cu; // keep current CU to make a reference to the successor CU + CU *temp = cu;// keep current CU to make a reference to the successor CU cu = new CU; cu->BBID = bb->getName(); @@ -830,7 +878,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } else { - if (globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) + if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { if (lid > 0) cu->readPhaseLineNumbers.insert(lid); @@ -838,6 +886,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } } } + } if (cu->instructionsLineNumbers.empty()) { cu->removeCU(); @@ -854,32 +903,49 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, CUVector.push_back(cu); suspiciousVariables.clear(); - //errs() << "cu->basicBlockName: " << cu->basicBlockName << "\n"; - //check for call instructions in current basic block for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if (dyn_cast(instruction) || dyn_cast(instruction)) - continue; - if (isa(instruction)) - { - Node *n = new Node; - n->type = nodeTypes::dummy; + //Mohammad 6.7.2020: Don't create nodes for library functions (c++/llvm). + int32_t lid = getLID(&*instruction, fileID); + if(lid > 0){ + if (isa < CallInst >(instruction)) + { + + Function *f = (cast(instruction))->getCalledFunction(); //TODO: DO the same for Invoke inst - //get function name and parameters - Function *f = (cast(instruction))->getCalledFunction(); + //Mohammad 6.7.2020 + Function::iterator FI = f->begin(); + bool externalFunction = true; + string lid; + + for (Function::iterator FI = f->begin(), FE = f->end(); FI != FE; ++FI){ + externalFunction = false; + auto tempBI = FI->begin(); + if(DebugLoc dl = tempBI->getDebugLoc()){ + lid = to_string(dl->getLine()); + }else{ + lid = to_string(tempBI->getFunction()->getSubprogram()->getLine()); + } + break; + } + if(externalFunction) continue; + Node *n = new Node; + n->type = nodeTypes::dummy; // For ordinary function calls, F has a name. // However, sometimes the function being called // in IR is encapsulated by "bitcast()" due to // the way of compiling and linking. In this way, // getCalledFunction() method returns NULL. // Also, getName() returns NULL if this is an indirect function call. - if (f) + if(f) { n->name = f->getName(); + + // @Zia: This for loop appeared after the else part. For some function calls, the value of f is null. // I guess that is why you have checked if f is not null here. Anyway, I (Mohammad) had to bring the // for loop inside to avoid the segmentation fault. If you think it is not appropriate, find a solution for it. @@ -889,23 +955,21 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(string(it->getName()), rso.str()); - + Variable v(string(it->getName()), rso.str(), lid); n->argumentsList.push_back(v); } } - else // get name of the indirect function which is called + else // get name of the indirect function which is called { Value *v = (cast(instruction))->getCalledValue(); Value *sv = v->stripPointerCasts(); - StringRef fname = sv->getName(); + StringRef fname = sv->getName(); n->name = fname; } //Recursive functions (Mo 5.11.2019) - CallGraphWrapperPass *CGWP = &(getAnalysis()); - if (isRecursive(*f, CGWP->getCallGraph())) - { + CallGraphWrapperPass* CGWP = &(getAnalysis()); + if(isRecursive(*f, CGWP->getCallGraph())){ int lid = getLID(&*instruction, fileID); n->recursiveFunctionCall = n->name + " " + dputil::decodeLID(lid) + ","; } @@ -915,7 +979,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, for (auto i : BBCUsVector) { int lid = getLID(&*instruction, fileID); - if (lid >= i->startLine && lid <= i->endLine) + if(lid >= i->startLine && lid <= i->endLine) { i->instructionsLineNumbers.insert(lid); i->childrenNodes.push_back(n); @@ -926,20 +990,21 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, } } } + } } } void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map> &BBIDToCUIDsMap) { int lid; - string varName, varType; + string varName, varType, varDefLine; // Changed TerminatorInst to Instuction const Instruction *TInst; string successorBB; for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { - CU *lastCU = BBIDToCUIDsMap[bb->getName()].back(); //get the last CU in the basic block + CU *lastCU = BBIDToCUIDsMap[bb->getName()].back();//get the last CU in the basic block //get all successor basic blocks for bb TInst = bb->getTerminator(); for (unsigned i = 0, nSucc = TInst->getNumSuccessors(); i < nSucc; ++i) @@ -953,18 +1018,19 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl auto bbCU = BBIDToCUIDsMap[bb->getName()].begin(); for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { - if (isa(instruction) || isa(instruction)) { // NOTE: changed 'instruction' to '&*instruction' lid = getLID(&*instruction, fileID); - if (lid == 0) + if(lid == 0) continue; //varName = refineVarName(determineVariableName(instruction)); // NOTE: changed 'instruction' to '&*instruction', next 2 lines varName = determineVariableName(&*instruction); varType = determineVariableType(&*instruction); - Variable v(varName, varType); + varDefLine = determineVariableDefLine(&*instruction); + + Variable v(varName, varType, varDefLine); std::string prefix("ARRAY"); if (!varType.compare(0, prefix.size(), prefix)) @@ -974,11 +1040,11 @@ void CUGeneration::fillCUVariables(Region *TopRegion, set &globalVariabl //errs() << "Name: " << varName << " " << "Type: " << varType << "\n"; - if (lid > (*bbCU)->endLine) + if(lid > (*bbCU)->endLine) { bbCU = next(bbCU, 1); } - if (globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) + if(globalVariablesSet.count(varName) || programGlobalVariablesSet.count(varName)) { (*bbCU)->globalVariableNames.insert(v); originalVariablesSet.insert(varName); @@ -997,7 +1063,7 @@ void CUGeneration::findStartEndLineNumbers(Node *root, int &start, int &end) { if (root->type == nodeTypes::cu) { - if (start == -1 || start > root->startLine) + if(start == -1 || start > root->startLine) { start = root->startLine; } @@ -1012,8 +1078,11 @@ void CUGeneration::findStartEndLineNumbers(Node *root, int &start, int &end) { findStartEndLineNumbers(i, start, end); } + } + + void CUGeneration::fillStartEndLineNumbers(Node *root) { if (root->type != nodeTypes::cu) @@ -1030,6 +1099,7 @@ void CUGeneration::fillStartEndLineNumbers(Node *root) { fillStartEndLineNumbers(i); } + } bool CUGeneration::doFinalization(Module &M) @@ -1053,15 +1123,16 @@ bool CUGeneration::doInitialization(Module &M) initializeCUIDCounter(); - for (Module::global_iterator I = ThisModule->global_begin(); I != ThisModule->global_end(); I++) + for(Module::global_iterator I = ThisModule->global_begin(); I != ThisModule->global_end(); I++) { Value *globalVariable = dyn_cast(I); string glo = string(globalVariable->getName()); - if (glo.find(".") == glo.npos) + if(glo.find(".") == glo.npos) { programGlobalVariablesSet.insert(glo); originalVariablesSet.insert(glo); } + } return true; @@ -1073,8 +1144,7 @@ void CUGeneration::initializeCUIDCounter() if (dputil::fexists(CUCounterFile)) { - std::fstream inCUIDCounter(CUCounterFile, std::ios_base::in); - ; + std::fstream inCUIDCounter(CUCounterFile, std::ios_base::in);; inCUIDCounter >> CUIDCounter; inCUIDCounter.close(); } @@ -1106,26 +1176,26 @@ void CUGeneration::getAnalysisUsage(AnalysisUsage &AU) const bool CUGeneration::runOnFunction(Function &F) { StringRef funcName = F.getName(); - // Avoid functions we don't want to instrument - if (funcName.find("llvm.dbg") != string::npos) // llvm debug calls + // Avoid functions we don't want to analyze + if (funcName.find("llvm.") != string::npos) // llvm debug calls { return false; } - if (funcName.find("__dp_") != string::npos) // instrumentation calls + if (funcName.find("__dp_") != string::npos) // instrumentation calls { return false; } - if (funcName.find("__cx") != string::npos) // c++ init calls + if (funcName.find("__cx") != string::npos) // c++ init calls { return false; } - if (funcName.find("__clang") != string::npos) // clang helper calls + if (funcName.find("__clang") != string::npos) // clang helper calls { return false; } - // if (funcName.find("_GLOBAL_") != string::npos) { // global init calls (c++) - // return false; - // } + if (funcName.find("_GLOBAL_") != string::npos) { // global init calls (c++) + return false; + } if (funcName.find("pthread_") != string::npos) { return false; @@ -1145,13 +1215,23 @@ bool CUGeneration::runOnFunction(Function &F) //Get list of arguments for this function and store them in root. // NOTE: changed the way we get the arguments // for (Function::ArgumentListType::iterator it = F.getArgumentList().begin(); it != F.getArgumentList().end(); it++) { - for (Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) + + BasicBlock *BB = &F.getEntryBlock(); + auto BI = BB->begin(); + string lid; + if(DebugLoc dl = BI->getDebugLoc()){ + lid = to_string(dl->getLine()); + }else{ + lid = to_string(BI->getFunction()->getSubprogram()->getLine()); + } + + for ( Function::arg_iterator it = F.arg_begin(); it != F.arg_end(); it++) { string type_str; raw_string_ostream rso(type_str); (it->getType())->print(rso); - Variable v(it->getName(), rso.str()); + Variable v(it->getName(), rso.str(), to_string(fileID) + ":" + lid); root->argumentsList.push_back(v); } @@ -1166,6 +1246,7 @@ bool CUGeneration::runOnFunction(Function &F) Region *TopRegion = RI->getTopLevelRegion(); populateGlobalVariablesSet(TopRegion, globalVariablesSet); + createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); @@ -1178,9 +1259,9 @@ bool CUGeneration::runOnFunction(Function &F) printData(root); - for (auto i : CUVector) + for(auto i : CUVector) { - delete (i); + delete(i); } return false; From b05d5bfbd73428cd3df2aef40ef2f10c42ed82e9 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 28 Aug 2020 17:24:53 +0200 Subject: [PATCH 11/31] CI: Enable CUGeneration test --- .github/workflows/discopop_profiler/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/discopop_profiler/test.sh b/.github/workflows/discopop_profiler/test.sh index 2a86ef5b4..afa3fb8e0 100755 --- a/.github/workflows/discopop_profiler/test.sh +++ b/.github/workflows/discopop_profiler/test.sh @@ -14,7 +14,7 @@ DISCOPOP_SRC=$(pwd) DISCOPOP_INSTALL="$(pwd)/build" CXX=clang++-8 -tests="dependence reduction" # TODO: "cu" yet missing because it fails +tests="cu dependence reduction" function test_cu { # CU Generation From 233ba3111156e184cb36095e3bcdb18846115ab7 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Thu, 10 Sep 2020 12:19:31 +0200 Subject: [PATCH 12/31] reporting of return instructions in Data.xml --- CUGeneration/CUGenerationPass.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 6737f47e4..2b3fab6f5 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -145,6 +145,7 @@ namespace set instructionsLineNumbers; set readPhaseLineNumbers; set writePhaseLineNumbers; + set returnInstructions; set localVariableNames; set globalVariableNames; @@ -588,7 +589,7 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\t" << endl; for (auto ai : root->argumentsList) { - *outCUs << "\t\t\t" << xmlEscape(ai.name) << "" << endl; } @@ -606,7 +607,7 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\tinstructionsLineNumbers).size() << "\">" << getLineNumbersString(cu->instructionsLineNumbers) << "" << endl; *outCUs << "\t\treadPhaseLineNumbers).size() << "\">" << getLineNumbersString(cu->readPhaseLineNumbers) << "" << endl; *outCUs << "\t\twritePhaseLineNumbers).size() << "\">" << getLineNumbersString(cu->writePhaseLineNumbers) << "" << endl; - + *outCUs << "\t\treturnInstructions).size() << "\"" << getLineNumbersString(cu->returnInstructions) << "" << endl; *outCUs << "\t\t" << endl; for (auto sucCUi : cu->successorCUs) { @@ -810,6 +811,9 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, { cu-> instructionsLineNumbers.insert(lid); cu-> instructionsCount++; + if(isa(instruction)){ + cu->returnInstructions.insert(lid); + } //} if(isa < StoreInst >(instruction)) { @@ -913,7 +917,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, if (isa < CallInst >(instruction)) { - + Function *f = (cast(instruction))->getCalledFunction(); //TODO: DO the same for Invoke inst @@ -921,7 +925,7 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, Function::iterator FI = f->begin(); bool externalFunction = true; string lid; - + for (Function::iterator FI = f->begin(), FE = f->end(); FI != FE; ++FI){ externalFunction = false; auto tempBI = FI->begin(); @@ -1216,7 +1220,7 @@ bool CUGeneration::runOnFunction(Function &F) //Get list of arguments for this function and store them in root. // NOTE: changed the way we get the arguments // for (Function::ArgumentListType::iterator it = F.getArgumentList().begin(); it != F.getArgumentList().end(); it++) { - + BasicBlock *BB = &F.getEntryBlock(); auto BI = BB->begin(); string lid; From a46106b4b4d3623e059d025dc9939e9c623a97ce Mon Sep 17 00:00:00 2001 From: mohammad Date: Thu, 10 Sep 2020 13:25:49 +0200 Subject: [PATCH 13/31] debug signal (000---) removed --- CUGeneration/CUGenerationPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 6737f47e4..7f4b72344 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -1237,7 +1237,7 @@ bool CUGeneration::runOnFunction(Function &F) root->argumentsList.push_back(v); } /********************* End of initialize root values ***************************/ - +// errs()<< "000---\n"; // NOTE: changed the pass name for loopinfo -- LoopInfo &LI = getAnalysis(); LoopInfo &LI = getAnalysis().getLoopInfo(); From d8bc421495ef6c4e72729f522385b9d12db68553 Mon Sep 17 00:00:00 2001 From: mohammad Date: Thu, 10 Sep 2020 15:02:06 +0200 Subject: [PATCH 14/31] return lines feature added --- CUGeneration/CUGenerationPass.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 2b3fab6f5..444921f10 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -115,6 +115,7 @@ namespace //Only for func type string name; vector argumentsList; + set returnLines; vector childrenNodes; Node_struct *parentNode; @@ -204,6 +205,7 @@ namespace //29.6.2020 Mohammad string determineVariableDefLine(Instruction *I); + void getFunctionReturnLines(Region *TopRegion, Node *root); //functions to get list of global variables bool doInitialization(Module &ThisModule); @@ -247,6 +249,26 @@ namespace /***************************** DiscoPoP Functions ***********************************/ +void CUGeneration::getFunctionReturnLines(Region *TopRegion, Node *root){ + int lid = 0; + for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) + { + for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) + { + if(isa(instruction)){ + string varName = determineVariableName(&*instruction); + size_t pos = varName.find("retval"); + if (pos != varName.npos){ + lid = getLID(&*instruction, fileID); + // errs() << "varName: " << varName << " " << dputil::decodeLID(lid) << "\n"; + if(lid > 0) + root->returnLines.insert(lid); + } + } + } + } +} + string CUGeneration::determineVariableDefLine(Instruction *I){ string varDefLine = "LineNotFound"; @@ -594,6 +616,13 @@ void CUGeneration::printNode(Node *root, bool isRoot) << xmlEscape(ai.name) << "" << endl; } *outCUs << "\t\t" << endl; + + string rlVals = ""; + for (auto rl : root->returnLines) + { + rlVals += dputil::decodeLID(rl) + ", "; + } + *outCUs << "\t\t" << rlVals << "" << endl; } if (root->type == nodeTypes::cu) @@ -1250,6 +1279,8 @@ bool CUGeneration::runOnFunction(Function &F) RI = &(RIpass->getRegionInfo()); Region *TopRegion = RI->getTopLevelRegion(); + getFunctionReturnLines(TopRegion, root); + populateGlobalVariablesSet(TopRegion, globalVariablesSet); createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); From 12c83987f030a44a47bf80ce0d2ed3231c6c73b1 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 14 Sep 2020 15:59:01 +0200 Subject: [PATCH 15/31] Make graph_analyzer installable with setup.py --- .github/workflows/ci.yml | 2 +- graph_analyzer/PETGraphX.py | 4 +- graph_analyzer/__init__.py | 11 ++ graph_analyzer/__main__.py | 100 ++++++++++++++++++ graph_analyzer/graph_analyzer.py | 95 ++--------------- graph_analyzer/json_serializer.py | 10 +- graph_analyzer/pattern_detection.py | 10 +- .../pattern_detectors/PatternInfo.py | 2 +- graph_analyzer/pattern_detectors/__init__.py | 7 ++ .../pattern_detectors/do_all_detector.py | 6 +- .../geometric_decomposition_detector.py | 8 +- .../pattern_detectors/pipeline_detector.py | 6 +- .../pattern_detectors/reduction_detector.py | 6 +- graph_analyzer/plugins/__init__.py | 7 ++ graph_analyzer/plugins/pipeline.py | 4 +- graph_analyzer/unit_tests.py | 7 +- graph_analyzer/utils.py | 4 +- setup.py | 66 ++++++++++++ 18 files changed, 232 insertions(+), 123 deletions(-) create mode 100644 graph_analyzer/__init__.py create mode 100644 graph_analyzer/__main__.py create mode 100644 graph_analyzer/pattern_detectors/__init__.py create mode 100644 graph_analyzer/plugins/__init__.py create mode 100644 setup.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index afe829044..6f714f91a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: - name: "Build Image with Dependencies" run: docker build -f .github/workflows/graph_analyzer/Dockerfile . --tag graph_analyzer - name: "Run unit_tests.py" - run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop/graph_analyzer graph_analyzer python unit_tests.py + run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop graph_analyzer python -m graph_analyzer.unit_tests discopop_profiler: name: "Profiler" diff --git a/graph_analyzer/PETGraphX.py b/graph_analyzer/PETGraphX.py index e0fa334f9..b2258e0a1 100644 --- a/graph_analyzer/PETGraphX.py +++ b/graph_analyzer/PETGraphX.py @@ -13,8 +13,8 @@ import networkx as nx from lxml.objectify import ObjectifiedElement -from parser import readlineToCUIdMap, writelineToCUIdMap, DependenceItem -from variable import Variable +from .parser import readlineToCUIdMap, writelineToCUIdMap, DependenceItem +from .variable import Variable node_props = [ ('BasicBlockID', 'string', '\'\''), diff --git a/graph_analyzer/__init__.py b/graph_analyzer/__init__.py new file mode 100644 index 000000000..2c422d358 --- /dev/null +++ b/graph_analyzer/__init__.py @@ -0,0 +1,11 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +from .graph_analyzer import run + +__version__ = "0.1" diff --git a/graph_analyzer/__main__.py b/graph_analyzer/__main__.py new file mode 100644 index 000000000..476106c04 --- /dev/null +++ b/graph_analyzer/__main__.py @@ -0,0 +1,100 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +"""Discopop analyzer + +Usage: + graph_analyzer.py [--path ] [--cu-xml ] [--dep-file ] [--plugins ] \ +[--loop-counter ] [--reduction ] [--json ] [--fmap ] + +Options: + --path= Directory with input data [default: ./] + --cu-xml= CU node xml file [default: Data.xml] + --dep-file= Dependencies text file [default: dp_run_dep.txt] + --loop-counter= Loop counter data [default: loop_counter_output.txt] + --reduction= Reduction variables file [default: reduction.txt] + --fmap= File mapping [default: FileMapping.txt] + --json= Json output + --plugins= Plugins to execute + -h --help Show this screen + -v --version Show version +""" + +import json +import os +import sys +import time + +from docopt import docopt # type:ignore +from schema import Schema, Use, SchemaError # type:ignore + +from . import run, __version__ +from .json_serializer import PatternInfoSerializer + +docopt_schema = Schema({ + '--path': Use(str), + '--cu-xml': Use(str), + '--dep-file': Use(str), + '--loop-counter': Use(str), + '--reduction': Use(str), + '--fmap': Use(str), + '--plugins': Use(str), + '--json': Use(str), +}) + + +def get_path(base_path: str, file_name: str) -> str: + """Combines path and filename if it is not absolute + + :param base_path: path + :param file_name: file name + :return: path to file + """ + return file_name if os.path.isabs(file_name) else os.path.join(base_path, file_name) + + +def main(): + arguments = docopt(__doc__, version=f'DiscoPoP analyzer {__version__}') + + try: + arguments = docopt_schema.validate(arguments) + except SchemaError as e: + exit(e) + + path = arguments['--path'] + + cu_xml = get_path(path, arguments['--cu-xml']) + dep_file = get_path(path, arguments['--dep-file']) + loop_counter_file = get_path(path, arguments['--loop-counter']) + reduction_file = get_path(path, arguments['--reduction']) + file_mapping = get_path(path, 'FileMapping.txt') + + for file in [cu_xml, dep_file, loop_counter_file, reduction_file]: + if not os.path.isfile(file): + print(f"File not found: \"{file}\"") + sys.exit() + + plugins = [] if arguments['--plugins'] == 'None' else arguments['--plugins'].split(' ') + + start = time.time() + + res = run(cu_xml, dep_file, loop_counter_file, reduction_file, plugins) + + end = time.time() + + if arguments['--json'] == 'None': + print(str(res)) + else: + with open(arguments['--json'], 'w') as f: + json.dump(res, f, indent=2, cls=PatternInfoSerializer) + + print("Time taken for pattern detection: {0}".format(end - start)) + + +if __name__ == "__main__": + main() diff --git a/graph_analyzer/graph_analyzer.py b/graph_analyzer/graph_analyzer.py index e306740c8..ed6a92061 100644 --- a/graph_analyzer/graph_analyzer.py +++ b/graph_analyzer/graph_analyzer.py @@ -6,59 +6,14 @@ # the 3-Clause BSD License. See the LICENSE file in the package base # directory for details. -"""Discopop analyzer - -Usage: - graph_analyzer.py [--path ] [--cu-xml ] [--dep-file ] [--plugins ] \ -[--loop-counter ] [--reduction ] [--json ] [--fmap ] - -Options: - --path= Directory with input data [default: ./] - --cu-xml= CU node xml file [default: Data.xml] - --dep-file= Dependencies text file [default: dp_run_dep.txt] - --loop-counter= Loop counter data [default: loop_counter_output.txt] - --reduction= Reduction variables file [default: reduction.txt] - --fmap= File mapping [default: FileMapping.txt] - --json= Json output - --plugins= Plugins to execute - -h --help Show this screen - -v --version Show version -""" -import json -import os -import sys -import time +from pathlib import Path from typing import List -from docopt import docopt -from pluginbase import PluginBase -from schema import Schema, Use, SchemaError - -from PETGraphX import PETGraphX -from json_serializer import PatternInfoSerializer -from parser import parse_inputs -from pattern_detection import DetectionResult, PatternDetectorX - -docopt_schema = Schema({ - '--path': Use(str), - '--cu-xml': Use(str), - '--dep-file': Use(str), - '--loop-counter': Use(str), - '--reduction': Use(str), - '--fmap': Use(str), - '--plugins': Use(str), - '--json': Use(str), -}) - +from pluginbase import PluginBase # type:ignore -def get_path(base_path: str, file_name: str) -> str: - """Combines path and filename if it is not absolute - - :param base_path: path - :param file_name: file name - :return: path to file - """ - return file_name if os.path.isabs(file_name) else os.path.join(base_path, file_name) +from .PETGraphX import PETGraphX +from .parser import parse_inputs +from .pattern_detection import DetectionResult, PatternDetectorX def run(cu_xml: str, dep_file: str, loop_counter_file: str, reduction_file: str, plugins: List[str]) \ @@ -73,7 +28,7 @@ def run(cu_xml: str, dep_file: str, loop_counter_file: str, reduction_file: str, plugin_base = PluginBase(package='plugins') plugin_source = plugin_base.make_plugin_source( - searchpath=['./plugins']) + searchpath=[Path(__file__).parent / 'plugins']) for plugin_name in plugins: p = plugin_source.load_plugin(plugin_name) @@ -90,41 +45,3 @@ def run(cu_xml: str, dep_file: str, loop_counter_file: str, reduction_file: str, pet = p.run_after(pet) return res - - -if __name__ == "__main__": - arguments = docopt(__doc__, version='DiscoPoP analyzer 0.1') - - try: - arguments = docopt_schema.validate(arguments) - except SchemaError as e: - exit(e) - - path = arguments['--path'] - - cu_xml = get_path(path, arguments['--cu-xml']) - dep_file = get_path(path, arguments['--dep-file']) - loop_counter_file = get_path(path, arguments['--loop-counter']) - reduction_file = get_path(path, arguments['--reduction']) - file_mapping = get_path(path, 'FileMapping.txt') - - for file in [cu_xml, dep_file, loop_counter_file, reduction_file]: - if not os.path.isfile(file): - print(f"File not found: \"{file}\"") - sys.exit() - - plugins = [] if arguments['--plugins'] == 'None' else arguments['--plugins'].split(' ') - - start = time.time() - - res = run(cu_xml, dep_file, loop_counter_file, reduction_file, plugins) - - end = time.time() - - if arguments['--json'] == 'None': - print(str(res)) - else: - with open(arguments['--json'], 'w') as f: - json.dump(res, f, indent=2, cls=PatternInfoSerializer) - - print("Time taken for pattern detection: {0}".format(end - start)) diff --git a/graph_analyzer/json_serializer.py b/graph_analyzer/json_serializer.py index 41c61e5c3..6614b3ce6 100644 --- a/graph_analyzer/json_serializer.py +++ b/graph_analyzer/json_serializer.py @@ -8,11 +8,11 @@ from json import JSONEncoder -from PETGraphX import CUNode -from pattern_detection import DetectionResult -from pattern_detectors.PatternInfo import PatternInfo -from pattern_detectors.pipeline_detector import PipelineStage -from variable import Variable +from .PETGraphX import CUNode +from .pattern_detection import DetectionResult +from .pattern_detectors.PatternInfo import PatternInfo +from .pattern_detectors.pipeline_detector import PipelineStage +from .variable import Variable def filter_members(d: dict) -> dict: diff --git a/graph_analyzer/pattern_detection.py b/graph_analyzer/pattern_detection.py index 13b163c96..5c1ccb098 100644 --- a/graph_analyzer/pattern_detection.py +++ b/graph_analyzer/pattern_detection.py @@ -7,11 +7,11 @@ # directory for details. from typing import List -from PETGraphX import PETGraphX, NodeType, EdgeType -from pattern_detectors.do_all_detector import run_detection as detect_do_all, DoAllInfo -from pattern_detectors.geometric_decomposition_detector import run_detection as detect_gd, GDInfo -from pattern_detectors.pipeline_detector import run_detection as detect_pipeline, PipelineInfo -from pattern_detectors.reduction_detector import run_detection as detect_reduction, ReductionInfo +from .PETGraphX import PETGraphX, NodeType, EdgeType +from .pattern_detectors.do_all_detector import run_detection as detect_do_all, DoAllInfo +from .pattern_detectors.geometric_decomposition_detector import run_detection as detect_gd, GDInfo +from .pattern_detectors.pipeline_detector import run_detection as detect_pipeline, PipelineInfo +from .pattern_detectors.reduction_detector import run_detection as detect_reduction, ReductionInfo class DetectionResult(object): diff --git a/graph_analyzer/pattern_detectors/PatternInfo.py b/graph_analyzer/pattern_detectors/PatternInfo.py index 6aceb5c1b..3199f4c67 100644 --- a/graph_analyzer/pattern_detectors/PatternInfo.py +++ b/graph_analyzer/pattern_detectors/PatternInfo.py @@ -7,7 +7,7 @@ # directory for details. import json -from PETGraphX import CUNode +from ..PETGraphX import CUNode class PatternInfo(object): diff --git a/graph_analyzer/pattern_detectors/__init__.py b/graph_analyzer/pattern_detectors/__init__.py new file mode 100644 index 000000000..c184ac298 --- /dev/null +++ b/graph_analyzer/pattern_detectors/__init__.py @@ -0,0 +1,7 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. diff --git a/graph_analyzer/pattern_detectors/do_all_detector.py b/graph_analyzer/pattern_detectors/do_all_detector.py index 4cfae54e8..aaeef7e0b 100644 --- a/graph_analyzer/pattern_detectors/do_all_detector.py +++ b/graph_analyzer/pattern_detectors/do_all_detector.py @@ -9,9 +9,9 @@ from graph_tool import Vertex -from PETGraphX import PETGraphX, CUNode, NodeType, EdgeType -from pattern_detectors.PatternInfo import PatternInfo -from utils import classify_loop_variables +from .PatternInfo import PatternInfo +from ..PETGraphX import PETGraphX, CUNode, NodeType, EdgeType +from ..utils import classify_loop_variables class DoAllInfo(PatternInfo): diff --git a/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py b/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py index 196e0cfc6..132f3db34 100644 --- a/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py +++ b/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py @@ -10,10 +10,10 @@ import math from typing import Dict, List -from PETGraphX import PETGraphX, NodeType, CUNode, EdgeType -from pattern_detectors.PatternInfo import PatternInfo -from utils import classify_task_vars, get_child_loops -from variable import Variable +from .PatternInfo import PatternInfo +from ..PETGraphX import PETGraphX, NodeType, CUNode, EdgeType +from ..utils import classify_task_vars, get_child_loops +from ..variable import Variable __loop_iterations: Dict[str, int] = {} diff --git a/graph_analyzer/pattern_detectors/pipeline_detector.py b/graph_analyzer/pattern_detectors/pipeline_detector.py index 035601086..ed90a8345 100644 --- a/graph_analyzer/pattern_detectors/pipeline_detector.py +++ b/graph_analyzer/pattern_detectors/pipeline_detector.py @@ -11,9 +11,9 @@ from graph_tool import Vertex -from PETGraphX import PETGraphX, NodeType, CUNode, EdgeType, DepType -from pattern_detectors.PatternInfo import PatternInfo -from utils import correlation_coefficient, classify_task_vars +from .PatternInfo import PatternInfo +from ..PETGraphX import PETGraphX, NodeType, CUNode, EdgeType, DepType +from ..utils import correlation_coefficient, classify_task_vars __pipeline_threshold = 0.9 diff --git a/graph_analyzer/pattern_detectors/reduction_detector.py b/graph_analyzer/pattern_detectors/reduction_detector.py index f8e9a154b..97da959cb 100644 --- a/graph_analyzer/pattern_detectors/reduction_detector.py +++ b/graph_analyzer/pattern_detectors/reduction_detector.py @@ -11,9 +11,9 @@ from graph_tool import Vertex -from PETGraphX import PETGraphX, NodeType -from pattern_detectors.PatternInfo import PatternInfo -from utils import is_reduction_var, classify_loop_variables +from .PatternInfo import PatternInfo +from ..PETGraphX import PETGraphX, NodeType +from ..utils import is_reduction_var, classify_loop_variables class ReductionInfo(PatternInfo): diff --git a/graph_analyzer/plugins/__init__.py b/graph_analyzer/plugins/__init__.py new file mode 100644 index 000000000..c184ac298 --- /dev/null +++ b/graph_analyzer/plugins/__init__.py @@ -0,0 +1,7 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. diff --git a/graph_analyzer/plugins/pipeline.py b/graph_analyzer/plugins/pipeline.py index 150ee4451..da8a6a56f 100644 --- a/graph_analyzer/plugins/pipeline.py +++ b/graph_analyzer/plugins/pipeline.py @@ -1,8 +1,8 @@ from copy import deepcopy from typing import List -from PETGraphX import PETGraphX, NodeType, CUNode, EdgeType -from utils import correlation_coefficient +from ..PETGraphX import PETGraphX, NodeType, CUNode, EdgeType +from ..utils import correlation_coefficient total = 0 before = [] diff --git a/graph_analyzer/unit_tests.py b/graph_analyzer/unit_tests.py index 7a3346c8c..6716ac495 100644 --- a/graph_analyzer/unit_tests.py +++ b/graph_analyzer/unit_tests.py @@ -1,15 +1,16 @@ import json import os import unittest +from pathlib import Path -from graph_analyzer import run -from json_serializer import PatternInfoSerializer +from . import run +from .json_serializer import PatternInfoSerializer class GraphAnalyzerTest(unittest.TestCase): def test_analyzer_end_to_end(self): # TODO upload test data? - path = './../test' + path = Path(__file__).parent.parent / 'test' for file in [f.name for f in os.scandir(path) if f.name.endswith('.json')]: with self.subTest(file=file): cu_xml = os.path.join(path, file[:-5], 'data', 'Data.xml') diff --git a/graph_analyzer/utils.py b/graph_analyzer/utils.py index ae13cfb49..81ef046e8 100644 --- a/graph_analyzer/utils.py +++ b/graph_analyzer/utils.py @@ -13,8 +13,8 @@ import numpy as np from graph_tool.all import Edge -from PETGraphX import PETGraphX, NodeType, CUNode, DepType, EdgeType, Dependency -from variable import Variable +from .PETGraphX import PETGraphX, NodeType, CUNode, DepType, EdgeType, Dependency +from .variable import Variable loop_data = {} diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..56cd6a41a --- /dev/null +++ b/setup.py @@ -0,0 +1,66 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +import os +import re +import sys +from pathlib import Path + +from setuptools import setup, find_packages + +os.chdir(Path(__file__).parent) +SRC = Path("graph_analyzer") + + +def get_version(): + with open(SRC / "__init__.py") as f: + for line in f: + m = re.match(r'__version__ = "(.*)"', line) + if m: + return m.group(1) + raise SystemExit("Could not find version string.") + + +if sys.version_info < (3, 6): + raise SystemExit("Discopop analyzer requires Python >= 3.6.") + +setup( + name="discopop", + version=get_version(), + packages=find_packages(), + url="https://www.discopop.tu-darmstadt.de/", + author="TU Darmstadt and Iowa State University", + author_email="discopop@lists.parallel.informatik.tu-darmstadt.de", + description="DiscoPoP is a tool that helps software developers parallelize their " + "programs with threads. It discovers potential parallelism in a " + "sequential program and makes recommendations on how to exploit it.", + long_description=open(SRC / "README.md").read(), + long_description_content_type="text/markdown", + install_requires=[ + "matplotlib", + "networkx", + "pluginbase>=1.0.0", + "contextlib2>=0.5.5", + "docopt>=0.6.2", + "lxml>=4.3.3", + "numpy>=1.16.3", + "schema>=0.7.0", + ], + python_requires=">=3.6", + entry_points={"console_scripts": ["graph_analyzer=graph_analyzer.__main__:main"]}, + zip_safe=True, + classifiers=[ + "Development Status :: 4 - Beta" + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Software Development", + ], +) From fb87cf64a77c154283325c06a1025c8e60cb7a95 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Mon, 21 Sep 2020 09:49:25 +0200 Subject: [PATCH 16/31] hotfix: missing bracket in CUGeneration --- CUGeneration/CUGenerationPass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index d2bac1ec1..66be38fa0 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -264,7 +264,7 @@ void CUGeneration::getFunctionReturnLines(Region *TopRegion, Node *root){ if(lid > 0) root->returnLines.insert(lid); } - } + } } } } @@ -636,7 +636,7 @@ void CUGeneration::printNode(Node *root, bool isRoot) *outCUs << "\t\tinstructionsLineNumbers).size() << "\">" << getLineNumbersString(cu->instructionsLineNumbers) << "" << endl; *outCUs << "\t\treadPhaseLineNumbers).size() << "\">" << getLineNumbersString(cu->readPhaseLineNumbers) << "" << endl; *outCUs << "\t\twritePhaseLineNumbers).size() << "\">" << getLineNumbersString(cu->writePhaseLineNumbers) << "" << endl; - *outCUs << "\t\treturnInstructions).size() << "\"" << getLineNumbersString(cu->returnInstructions) << "" << endl; + *outCUs << "\t\treturnInstructions).size() << "\">" << getLineNumbersString(cu->returnInstructions) << "" << endl; *outCUs << "\t\t" << endl; for (auto sucCUi : cu->successorCUs) { From f8c3740c819461e47c89d099fccb051502d1ab44 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Mon, 21 Sep 2020 13:19:04 +0200 Subject: [PATCH 17/31] detection of return statements in addition to return instructions --- CUGeneration/CUGenerationPass.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CUGeneration/CUGenerationPass.cpp b/CUGeneration/CUGenerationPass.cpp index 66be38fa0..4bc5d6c5b 100644 --- a/CUGeneration/CUGenerationPass.cpp +++ b/CUGeneration/CUGenerationPass.cpp @@ -840,9 +840,25 @@ void CUGeneration::createCUs(Region *TopRegion, set &globalVariablesSet, { cu-> instructionsLineNumbers.insert(lid); cu-> instructionsCount++; + // find return instructions if(isa(instruction)){ cu->returnInstructions.insert(lid); } + // find branches to return instructions, i.e. return statements + // Lukas 21.09.20 + else if(isa(instruction)){ + if((cast(instruction))->isUnconditional()){ + if((cast(instruction))->getNumSuccessors() == 1){ + BasicBlock* successorBB = (cast(instruction))->getSuccessor(0); + for (BasicBlock::iterator innerInstruction = successorBB->begin(); innerInstruction != successorBB->end(); ++innerInstruction){ + if(isa(innerInstruction)){ + cu->returnInstructions.insert(lid); + break; + } + } + } + } + } //} if(isa < StoreInst >(instruction)) { From 70f7f5a995581a9dfc197a52442f47ce3588eef6 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 21 Sep 2020 15:35:39 +0200 Subject: [PATCH 18/31] setup.py: read install_requires from requirements.txt --- setup.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 56cd6a41a..6a32f8fb5 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,11 @@ def get_version(): raise SystemExit("Could not find version string.") +def get_requirements(): + with open(SRC / "requirements.txt") as f: + return [line.rstrip() for line in f] + + if sys.version_info < (3, 6): raise SystemExit("Discopop analyzer requires Python >= 3.6.") @@ -41,16 +46,7 @@ def get_version(): "sequential program and makes recommendations on how to exploit it.", long_description=open(SRC / "README.md").read(), long_description_content_type="text/markdown", - install_requires=[ - "matplotlib", - "networkx", - "pluginbase>=1.0.0", - "contextlib2>=0.5.5", - "docopt>=0.6.2", - "lxml>=4.3.3", - "numpy>=1.16.3", - "schema>=0.7.0", - ], + install_requires=get_requirements(), python_requires=">=3.6", entry_points={"console_scripts": ["graph_analyzer=graph_analyzer.__main__:main"]}, zip_safe=True, From 11a5db770d8798511d4b7ff90161d214dfc21b15 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 21 Sep 2020 15:45:25 +0200 Subject: [PATCH 19/31] setup.py: read version from top-level version.txt --- graph_analyzer/__init__.py | 2 -- graph_analyzer/__main__.py | 5 ++--- setup.py | 8 ++------ version.txt | 1 + 4 files changed, 5 insertions(+), 11 deletions(-) create mode 100644 version.txt diff --git a/graph_analyzer/__init__.py b/graph_analyzer/__init__.py index 2c422d358..404add9fc 100644 --- a/graph_analyzer/__init__.py +++ b/graph_analyzer/__init__.py @@ -7,5 +7,3 @@ # directory for details. from .graph_analyzer import run - -__version__ = "0.1" diff --git a/graph_analyzer/__main__.py b/graph_analyzer/__main__.py index 476106c04..10c2e0ced 100644 --- a/graph_analyzer/__main__.py +++ b/graph_analyzer/__main__.py @@ -22,7 +22,6 @@ --json= Json output --plugins= Plugins to execute -h --help Show this screen - -v --version Show version """ import json @@ -33,7 +32,7 @@ from docopt import docopt # type:ignore from schema import Schema, Use, SchemaError # type:ignore -from . import run, __version__ +from . import run from .json_serializer import PatternInfoSerializer docopt_schema = Schema({ @@ -59,7 +58,7 @@ def get_path(base_path: str, file_name: str) -> str: def main(): - arguments = docopt(__doc__, version=f'DiscoPoP analyzer {__version__}') + arguments = docopt(__doc__) try: arguments = docopt_schema.validate(arguments) diff --git a/setup.py b/setup.py index 6a32f8fb5..31b33722d 100644 --- a/setup.py +++ b/setup.py @@ -18,12 +18,8 @@ def get_version(): - with open(SRC / "__init__.py") as f: - for line in f: - m = re.match(r'__version__ = "(.*)"', line) - if m: - return m.group(1) - raise SystemExit("Could not find version string.") + with open("version.txt") as f: + return f.read().rstrip() def get_requirements(): diff --git a/version.txt b/version.txt new file mode 100644 index 000000000..49d59571f --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +0.1 From c2f02e4145e451bba5ff03abb1be4fb388539e8c Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 21 Sep 2020 15:47:39 +0200 Subject: [PATCH 20/31] Move graph_analyzer/{graph_analyzer.py -> __init__.py} --- graph_analyzer/__init__.py | 40 ++++++++++++++++++++++++++- graph_analyzer/graph_analyzer.py | 47 -------------------------------- 2 files changed, 39 insertions(+), 48 deletions(-) delete mode 100644 graph_analyzer/graph_analyzer.py diff --git a/graph_analyzer/__init__.py b/graph_analyzer/__init__.py index 404add9fc..ed6a92061 100644 --- a/graph_analyzer/__init__.py +++ b/graph_analyzer/__init__.py @@ -6,4 +6,42 @@ # the 3-Clause BSD License. See the LICENSE file in the package base # directory for details. -from .graph_analyzer import run +from pathlib import Path +from typing import List + +from pluginbase import PluginBase # type:ignore + +from .PETGraphX import PETGraphX +from .parser import parse_inputs +from .pattern_detection import DetectionResult, PatternDetectorX + + +def run(cu_xml: str, dep_file: str, loop_counter_file: str, reduction_file: str, plugins: List[str]) \ + -> DetectionResult: + cu_dict, dependencies, loop_data, reduction_vars = parse_inputs(cu_xml, dep_file, + loop_counter_file, reduction_file) + + pet = PETGraphX(cu_dict, dependencies, loop_data, reduction_vars) + # TODO add visualization + # pet.show() + + plugin_base = PluginBase(package='plugins') + + plugin_source = plugin_base.make_plugin_source( + searchpath=[Path(__file__).parent / 'plugins']) + + for plugin_name in plugins: + p = plugin_source.load_plugin(plugin_name) + print("executing plugin before: " + plugin_name) + pet = p.run_before(pet) + + pattern_detector = PatternDetectorX(pet) + + res: DetectionResult = pattern_detector.detect_patterns() + + for plugin_name in plugins: + p = plugin_source.load_plugin(plugin_name) + print("executing plugin after: " + plugin_name) + pet = p.run_after(pet) + + return res diff --git a/graph_analyzer/graph_analyzer.py b/graph_analyzer/graph_analyzer.py deleted file mode 100644 index ed6a92061..000000000 --- a/graph_analyzer/graph_analyzer.py +++ /dev/null @@ -1,47 +0,0 @@ -# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) -# -# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany -# -# This software may be modified and distributed under the terms of -# the 3-Clause BSD License. See the LICENSE file in the package base -# directory for details. - -from pathlib import Path -from typing import List - -from pluginbase import PluginBase # type:ignore - -from .PETGraphX import PETGraphX -from .parser import parse_inputs -from .pattern_detection import DetectionResult, PatternDetectorX - - -def run(cu_xml: str, dep_file: str, loop_counter_file: str, reduction_file: str, plugins: List[str]) \ - -> DetectionResult: - cu_dict, dependencies, loop_data, reduction_vars = parse_inputs(cu_xml, dep_file, - loop_counter_file, reduction_file) - - pet = PETGraphX(cu_dict, dependencies, loop_data, reduction_vars) - # TODO add visualization - # pet.show() - - plugin_base = PluginBase(package='plugins') - - plugin_source = plugin_base.make_plugin_source( - searchpath=[Path(__file__).parent / 'plugins']) - - for plugin_name in plugins: - p = plugin_source.load_plugin(plugin_name) - print("executing plugin before: " + plugin_name) - pet = p.run_before(pet) - - pattern_detector = PatternDetectorX(pet) - - res: DetectionResult = pattern_detector.detect_patterns() - - for plugin_name in plugins: - p = plugin_source.load_plugin(plugin_name) - print("executing plugin after: " + plugin_name) - pet = p.run_after(pet) - - return res From f630ba8959a9d2b616e4e4e1952d4cc4729b27d8 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 21 Sep 2020 15:55:23 +0200 Subject: [PATCH 21/31] setup.py: Remove now-uneeded import --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 31b33722d..c219ea99f 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,6 @@ # directory for details. import os -import re import sys from pathlib import Path From 5f637ae7b37d5e4b0d4104f8d754d39470bc57bf Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Wed, 23 Sep 2020 10:37:51 +0200 Subject: [PATCH 22/31] setup.py: Add missing files to generated source package --- MANIFEST.in | 14 ++++++++++++++ version.txt => VERSION | 0 setup.py | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 MANIFEST.in rename version.txt => VERSION (100%) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..86a0e9b4f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,14 @@ +# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) +# +# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany +# +# This software may be modified and distributed under the terms of +# the 3-Clause BSD License. See the LICENSE file in the package base +# directory for details. + +# This file specifies which additional files to include in the Python package. +# https://packaging.python.org/guides/using-manifest-in/ + +include VERSION +include graph_analyzer/README.md +include graph_analyzer/requirements.txt diff --git a/version.txt b/VERSION similarity index 100% rename from version.txt rename to VERSION diff --git a/setup.py b/setup.py index c219ea99f..e475704cd 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def get_version(): - with open("version.txt") as f: + with open("VERSION") as f: return f.read().rstrip() @@ -54,4 +54,5 @@ def get_requirements(): "Programming Language :: Python :: 3", "Topic :: Software Development", ], + license_files=["LICENSE"], ) From 1b4d958fca90788fdac57fa4bb39f0dd3738b8ca Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Wed, 23 Sep 2020 12:00:31 +0200 Subject: [PATCH 23/31] setup.py: Fix missing comma --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e475704cd..2bceb48de 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ def get_requirements(): entry_points={"console_scripts": ["graph_analyzer=graph_analyzer.__main__:main"]}, zip_safe=True, classifiers=[ - "Development Status :: 4 - Beta" + "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", From e204477716ce8fdab4ee901872c8f8f09a053ad2 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Wed, 23 Sep 2020 12:34:55 +0200 Subject: [PATCH 24/31] Add CI Workflow to Publish Releases to PyPI --- .github/workflows/publish.yml | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..432db06cd --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,41 @@ +name: "Publish Package" + +on: + push: + tags: + - v* + +jobs: + publish: + name: "Publish Package" + runs-on: ubuntu-latest + steps: + - name: "Checkout Repository" + uses: actions/checkout@v2 + - name: "Setup Python" + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: "Get Version String" + id: get_version + env: + GITHUB_REF: ${{ github.ref }} + run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\/v/} + shell: bash + - name: "Generate Python Package" + run: python setup.py sdist + - name: "Create Draft Release on GitHub" + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: v${{ steps.get_version.outputs.VERSION }} + release_name: Version ${{ steps.get_version.outputs.VERSION }} + draft: true + - name: "Publish Distribution to PyPI" + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + uses: pypa/gh-action-pypi-publish@master + with: + password: ${{ secrets.pypi_password }} From 759694b1c8ea23e3e4e6052d21ced6091da5e102 Mon Sep 17 00:00:00 2001 From: kirill olokin Date: Mon, 28 Sep 2020 15:21:55 +0200 Subject: [PATCH 25/31] imports from graphtool removed --- graph_analyzer/pattern_detectors/do_all_detector.py | 4 +--- graph_analyzer/pattern_detectors/pipeline_detector.py | 4 +--- graph_analyzer/pattern_detectors/reduction_detector.py | 8 +++----- graph_analyzer/utils.py | 4 ++-- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/graph_analyzer/pattern_detectors/do_all_detector.py b/graph_analyzer/pattern_detectors/do_all_detector.py index 4cfae54e8..b1dd9b280 100644 --- a/graph_analyzer/pattern_detectors/do_all_detector.py +++ b/graph_analyzer/pattern_detectors/do_all_detector.py @@ -7,8 +7,6 @@ # directory for details. from typing import List -from graph_tool import Vertex - from PETGraphX import PETGraphX, CUNode, NodeType, EdgeType from pattern_detectors.PatternInfo import PatternInfo from utils import classify_loop_variables @@ -62,7 +60,7 @@ def run_detection(pet: PETGraphX) -> List[DoAllInfo]: return result -def __detect_do_all(pet: PETGraphX, root: Vertex) -> bool: +def __detect_do_all(pet: PETGraphX, root: CUNode) -> bool: """Calculate do-all value for node :param pet: PET graph diff --git a/graph_analyzer/pattern_detectors/pipeline_detector.py b/graph_analyzer/pattern_detectors/pipeline_detector.py index 035601086..5aee45cf6 100644 --- a/graph_analyzer/pattern_detectors/pipeline_detector.py +++ b/graph_analyzer/pattern_detectors/pipeline_detector.py @@ -9,8 +9,6 @@ from typing import List -from graph_tool import Vertex - from PETGraphX import PETGraphX, NodeType, CUNode, EdgeType, DepType from pattern_detectors.PatternInfo import PatternInfo from utils import correlation_coefficient, classify_task_vars @@ -93,7 +91,7 @@ def __out_dep(self, node: CUNode): return [dep for dep in raw if dep[0] in nodes_after] - def __output_stage(self, node: Vertex) -> PipelineStage: + def __output_stage(self, node: CUNode) -> PipelineStage: in_d = self.__in_dep(node) out_d = self.__out_dep(node) diff --git a/graph_analyzer/pattern_detectors/reduction_detector.py b/graph_analyzer/pattern_detectors/reduction_detector.py index f8e9a154b..36549fceb 100644 --- a/graph_analyzer/pattern_detectors/reduction_detector.py +++ b/graph_analyzer/pattern_detectors/reduction_detector.py @@ -9,9 +9,7 @@ from typing import List -from graph_tool import Vertex - -from PETGraphX import PETGraphX, NodeType +from PETGraphX import PETGraphX, NodeType, CUNode from pattern_detectors.PatternInfo import PatternInfo from utils import is_reduction_var, classify_loop_variables @@ -20,7 +18,7 @@ class ReductionInfo(PatternInfo): """Class, that contains reduction detection result """ - def __init__(self, pet: PETGraphX, node: Vertex): + def __init__(self, pet: PETGraphX, node: CUNode): """ :param pet: PET graph :param node: node, where reduction was detected @@ -64,7 +62,7 @@ def run_detection(pet: PETGraphX) -> List[ReductionInfo]: return result -def __detect_reduction(pet: PETGraphX, root: Vertex) -> bool: +def __detect_reduction(pet: PETGraphX, root: CUNode) -> bool: """Detects reduction pattern in loop :param pet: PET graph diff --git a/graph_analyzer/utils.py b/graph_analyzer/utils.py index ae13cfb49..cebcc41c8 100644 --- a/graph_analyzer/utils.py +++ b/graph_analyzer/utils.py @@ -11,7 +11,6 @@ from typing import List, Set, Dict, Tuple import numpy as np -from graph_tool.all import Edge from PETGraphX import PETGraphX, NodeType, CUNode, DepType, EdgeType, Dependency from variable import Variable @@ -423,7 +422,8 @@ def classify_loop_variables(pet: PETGraphX, loop: CUNode) -> (List[Variable], Li return first_private, private, last_private, shared, reduction -def classify_task_vars(pet: PETGraphX, task: CUNode, type: str, in_deps: List[Edge], out_deps: List[Edge]): +def classify_task_vars(pet: PETGraphX, task: CUNode, type: str, in_deps: List[Tuple[str, str, Dependency]], + out_deps: List[Tuple[str, str, Dependency]]): """Classify task variables :param pet: CU graph From 09bf93edc9da6ca63204fb3bd33f253c7051a3ca Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Mon, 14 Sep 2020 16:35:35 +0200 Subject: [PATCH 26/31] Add MyPy type checking to the CI workflow --- .github/workflows/ci.yml | 2 ++ .github/workflows/graph_analyzer/Dockerfile | 1 + graph_analyzer/PETGraphX.py | 22 ++++++++-------- graph_analyzer/parser.py | 2 +- .../geometric_decomposition_detector.py | 15 ++++++----- .../pattern_detectors/pipeline_detector.py | 26 +++++++++---------- graph_analyzer/plugins/pipeline.py | 4 +-- graph_analyzer/utils.py | 14 +++++----- 8 files changed, 45 insertions(+), 41 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f714f91a..b191e8a03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,8 @@ jobs: run: docker build -f .github/workflows/graph_analyzer/Dockerfile . --tag graph_analyzer - name: "Run unit_tests.py" run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop graph_analyzer python -m graph_analyzer.unit_tests + - name: "Run MyPy Type Checker" + run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop graph_analyzer python -m mypy --warn-unused-ignores -p graph_analyzer discopop_profiler: name: "Profiler" diff --git a/.github/workflows/graph_analyzer/Dockerfile b/.github/workflows/graph_analyzer/Dockerfile index 4108e9142..a79ebb4d7 100644 --- a/.github/workflows/graph_analyzer/Dockerfile +++ b/.github/workflows/graph_analyzer/Dockerfile @@ -12,3 +12,4 @@ FROM tiagopeixoto/graph-tool RUN pacman --noconfirm -S python-pip COPY graph_analyzer/requirements.txt /requirements.txt RUN pip install -r /requirements.txt +RUN pip install mypy data-science-types diff --git a/graph_analyzer/PETGraphX.py b/graph_analyzer/PETGraphX.py index b2258e0a1..baf63c4cd 100644 --- a/graph_analyzer/PETGraphX.py +++ b/graph_analyzer/PETGraphX.py @@ -7,11 +7,11 @@ # directory for details. from enum import IntEnum, Enum -from typing import Dict, List, Tuple, Set +from typing import Dict, List, Tuple, Set, Optional import matplotlib.pyplot as plt -import networkx as nx -from lxml.objectify import ObjectifiedElement +import networkx as nx # type:ignore +from lxml.objectify import ObjectifiedElement # type:ignore from .parser import readlineToCUIdMap, writelineToCUIdMap, DependenceItem from .variable import Variable @@ -38,7 +38,7 @@ ] -def parse_id(node_id: str) -> (int, int): +def parse_id(node_id: str) -> Tuple[int, int]: split = node_id.split(':') return int(split[0]), int(split[1]) @@ -64,10 +64,10 @@ class NodeType(IntEnum): class Dependency: etype: EdgeType - dtype: DepType = None - var_name: str = None - source: str = None - sink: str = None + dtype: Optional[DepType] = None + var_name: Optional[str] = None + source: Optional[str] = None + sink: Optional[str] = None def __init__(self, type: EdgeType): self.etype = type @@ -304,7 +304,7 @@ def __subtree_of_type_rec(self, root: CUNode, type: NodeType, visited: Set[CUNod :param visited: set of visited nodes :return: list of nodes in subtree """ - res = [] + res: List[CUNode] = [] if root in visited: return res visited.add(root) @@ -380,7 +380,7 @@ def get_all_dependencies(self, node: CUNode, root_loop: CUNode) -> Set[CUNode]: return dep_set - def is_loop_index(self, var_name: str, loops_start_lines: List[str], children: List[CUNode]) -> bool: + def is_loop_index(self, var_name: Optional[str], loops_start_lines: List[str], children: List[CUNode]) -> bool: """Checks, whether the variable is a loop index. :param var_name: name of the variable @@ -438,7 +438,7 @@ def get_left_right_subtree(self, target: CUNode, right_subtree: bool) -> List[CU :return: list of nodes in the subtree """ stack: List[CUNode] = [self.main] - res = [] + res: List[CUNode] = [] visited = [] while stack: diff --git a/graph_analyzer/parser.py b/graph_analyzer/parser.py index 1712228a5..07c66c586 100644 --- a/graph_analyzer/parser.py +++ b/graph_analyzer/parser.py @@ -10,7 +10,7 @@ import os from collections import defaultdict -from lxml import objectify +from lxml import objectify # type:ignore readlineToCUIdMap = defaultdict(set) # Map to record which line belongs to read set of nodes. LID -> NodeIds writelineToCUIdMap = defaultdict(set) # Map to record which line belongs to write set of nodes. LID -> NodeIds diff --git a/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py b/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py index 132f3db34..b95f9a7a5 100644 --- a/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py +++ b/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py @@ -8,7 +8,7 @@ import math -from typing import Dict, List +from typing import Dict, List, Tuple, Optional from .PatternInfo import PatternInfo from ..PETGraphX import PETGraphX, NodeType, CUNode, EdgeType @@ -46,7 +46,7 @@ def __init__(self, pet: PETGraphX, node: CUNode, min_iter: int): self.num_tasks = math.floor(nt) self.pragma = "for (i = 0; i < num-tasks; i++) #pragma omp task" - lp = [] + lp: List = [] fp, p, s, in_dep, out_dep, in_out_dep, r = classify_task_vars(pet, node, "GeometricDecomposition", [], []) fp.append(Variable('int', 'i')) @@ -86,21 +86,21 @@ def run_detection(pet: PETGraphX) -> List[GDInfo]: if __detect_geometric_decomposition(pet, node): node.geometric_decomposition = True test, min_iter = __test_chunk_limit(pet, node) - if test: + if test and min_iter is not None: result.append(GDInfo(pet, node, min_iter)) # result.append(node.id) return result -def __test_chunk_limit(pet: PETGraphX, node: CUNode) -> (bool, int): +def __test_chunk_limit(pet: PETGraphX, node: CUNode) -> Tuple[bool, Optional[int]]: """Tests, whether or not the node has inner loops with and none of them have 0 iterations :param pet: PET graph :param node: the node :return: true if node satisfies condition, min iteration number """ - min_iterations_count = math.inf + min_iterations_count = None inner_loop_iter = {} children = pet.direct_children_of_type(node, NodeType.LOOP) @@ -112,8 +112,9 @@ def __test_chunk_limit(pet: PETGraphX, node: CUNode) -> (bool, int): inner_loop_iter[child.start_position()] = __iterations_count(pet, child) for k, v in inner_loop_iter.items(): - min_iterations_count = min(min_iterations_count, v) - return inner_loop_iter and min_iterations_count > 0, min_iterations_count + if min_iterations_count is None or v < min_iterations_count: + min_iterations_count = v + return bool(inner_loop_iter) and (min_iterations_count is None or min_iterations_count > 0), min_iterations_count def __iterations_count(pet: PETGraphX, node: CUNode) -> int: diff --git a/graph_analyzer/pattern_detectors/pipeline_detector.py b/graph_analyzer/pattern_detectors/pipeline_detector.py index 98858125a..70434045b 100644 --- a/graph_analyzer/pattern_detectors/pipeline_detector.py +++ b/graph_analyzer/pattern_detectors/pipeline_detector.py @@ -7,10 +7,10 @@ # directory for details. -from typing import List +from typing import List, Tuple from .PatternInfo import PatternInfo -from ..PETGraphX import PETGraphX, NodeType, CUNode, EdgeType, DepType +from ..PETGraphX import PETGraphX, NodeType, CUNode, EdgeType, DepType, Dependency from ..utils import correlation_coefficient, classify_task_vars __pipeline_threshold = 0.9 @@ -68,28 +68,26 @@ def __init__(self, pet: PETGraphX, node: CUNode): self.stages = [self.__output_stage(s) for s in self._stages] def __in_dep(self, node: CUNode): - raw = [] + raw: List[Tuple[str, str, Dependency]] = [] for n in self._pet.subtree_of_type(node, NodeType.CU): raw.extend((s, t, d) for s, t, d in self._pet.out_edges(n.id, EdgeType.DATA) if d.dtype == DepType.RAW) nodes_before = [node] for i in range(self._stages.index(node)): nodes_before.extend(self._pet.subtree_of_type(self._stages[i], NodeType.CU)) - nodes_before = [n.id for n in nodes_before] - return [dep for dep in raw if dep[1] in nodes_before] + return [dep for dep in raw if dep[1] in [n.id for n in nodes_before]] def __out_dep(self, node: CUNode): - raw = [] + raw: List[Tuple[str, str, Dependency]] = [] for n in self._pet.subtree_of_type(node, NodeType.CU): raw.extend((s, t, d) for s, t, d in self._pet.in_edges(n.id, EdgeType.DATA) if d.dtype == DepType.RAW) nodes_after = [node] for i in range(self._stages.index(node) + 1, len(self._stages)): nodes_after.extend(self._pet.subtree_of_type(self._stages[i], NodeType.CU)) - nodes_after = [n.id for n in nodes_after] - return [dep for dep in raw if dep[0] in nodes_after] + return [dep for dep in raw if dep[0] in [n.id for n in nodes_after]] def __output_stage(self, node: CUNode) -> PipelineStage: in_d = self.__in_dep(node) @@ -157,13 +155,13 @@ def __detect_pipeline(pet: PETGraphX, root: CUNode) -> float: graph_vector = [] for i in range(0, len(loop_subnodes) - 1): - graph_vector.append(1 if pet.depends_ignore_readonly(loop_subnodes[i + 1], loop_subnodes[i], root) else 0) + graph_vector.append(1.0 if pet.depends_ignore_readonly(loop_subnodes[i + 1], loop_subnodes[i], root) else 0.0) pipeline_vector = [] for i in range(0, len(loop_subnodes) - 1): - pipeline_vector.append(1) + pipeline_vector.append(1.0) - min_weight = 1 + min_weight = 1.0 for i in range(0, len(loop_subnodes) - 1): for j in range(i + 1, len(loop_subnodes)): if pet.depends_ignore_readonly(loop_subnodes[i], loop_subnodes[j], root): @@ -172,11 +170,11 @@ def __detect_pipeline(pet: PETGraphX, root: CUNode) -> float: if min_weight > node_weight > 0: min_weight = node_weight - if min_weight == 1: - graph_vector.append(0) + if min_weight == 1.0: + graph_vector.append(0.0) pipeline_vector.append(0) else: - graph_vector.append(1) + graph_vector.append(1.0) pipeline_vector.append(min_weight) return correlation_coefficient(graph_vector, pipeline_vector) diff --git a/graph_analyzer/plugins/pipeline.py b/graph_analyzer/plugins/pipeline.py index da8a6a56f..1c61e7094 100644 --- a/graph_analyzer/plugins/pipeline.py +++ b/graph_analyzer/plugins/pipeline.py @@ -5,8 +5,8 @@ from ..utils import correlation_coefficient total = 0 -before = [] -after = [] +before: List[float] = [] +after: List[float] = [] def run_before(pet: PETGraphX): diff --git a/graph_analyzer/utils.py b/graph_analyzer/utils.py index b76c75506..3a27dd2ee 100644 --- a/graph_analyzer/utils.py +++ b/graph_analyzer/utils.py @@ -15,7 +15,7 @@ from .PETGraphX import PETGraphX, NodeType, CUNode, DepType, EdgeType, Dependency from .variable import Variable -loop_data = {} +loop_data: Dict[str, int] = {} def correlation_coefficient(v1: List[float], v2: List[float]) -> float: @@ -25,8 +25,8 @@ def correlation_coefficient(v1: List[float], v2: List[float]) -> float: :param v2: second vector :return: correlation coefficient, 0 if one of the norms is 0 """ - norm_product = np.linalg.norm(v1) * np.linalg.norm(v2) - return 0 if norm_product == 0 else np.dot(v1, v2) / norm_product + norm_product = np.linalg.norm(v1) * np.linalg.norm(v2) # type:ignore + return 0 if norm_product == 0 else np.dot(v1, v2) / norm_product # type:ignore def is_loop_index2(pet: PETGraphX, root_loop: CUNode, var_name: str) -> bool: @@ -233,9 +233,11 @@ def is_first_written_new(var: Variable, raw_deps: Set[Tuple[str, str, Dependency print("Empty var.name found. Skipping.") return False for dep in raw_deps: + assert dep[2].var_name is not None if var.name in dep[2].var_name and any([n.id == dep[1] for n in tree]): result = True for warDep in war_deps: + assert warDep[2].var_name is not None if (var.name in warDep[2].var_name and any([n.id == dep[1] for n in tree]) and dep[2].source == warDep[2].sink): @@ -337,7 +339,7 @@ def is_read_in(var: Variable, raw_deps_on: Set[Tuple[str, str, Dependency]], return False -def get_child_loops(pet: PETGraphX, node: CUNode) -> (List[CUNode], List[CUNode]): +def get_child_loops(pet: PETGraphX, node: CUNode) -> Tuple[List[CUNode], List[CUNode]]: """Gets all do-all and reduction subloops :param pet: CU graph @@ -363,8 +365,8 @@ def get_child_loops(pet: PETGraphX, node: CUNode) -> (List[CUNode], List[CUNode] return do_all, reduction -def classify_loop_variables(pet: PETGraphX, loop: CUNode) -> (List[Variable], List[Variable], List[Variable], - List[Variable], List[Variable]): +def classify_loop_variables(pet: PETGraphX, loop: CUNode) -> Tuple[List[Variable], List[Variable], List[Variable], + List[Variable], List[Variable]]: """Classifies variables inside the loop :param pet: CU graph From b496ff37a984abc562284967f9f219b974704db1 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Tue, 29 Sep 2020 16:45:03 +0200 Subject: [PATCH 27/31] Speed-up CI workflow for graph_analyzer --- .github/workflows/ci.yml | 17 +++++++++++++---- .github/workflows/graph_analyzer/Dockerfile | 15 --------------- 2 files changed, 13 insertions(+), 19 deletions(-) delete mode 100644 .github/workflows/graph_analyzer/Dockerfile diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b191e8a03..bdcd0f2b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,15 +14,24 @@ jobs: graph_analyzer: name: "Graph Analyzer" runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ 3.6 ] steps: - name: "Checkout Repository" uses: actions/checkout@v2 - - name: "Build Image with Dependencies" - run: docker build -f .github/workflows/graph_analyzer/Dockerfile . --tag graph_analyzer + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: "Install Dependencies" + run: | + python -m pip install -r graph_analyzer/requirements.txt + python -m pip install mypy data-science-types - name: "Run unit_tests.py" - run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop graph_analyzer python -m graph_analyzer.unit_tests + run: python -m graph_analyzer.unit_tests - name: "Run MyPy Type Checker" - run: docker run --mount type=bind,src=`pwd`,dst=/discopop --workdir=/discopop graph_analyzer python -m mypy --warn-unused-ignores -p graph_analyzer + run: python -m mypy --warn-unused-ignores -p graph_analyzer discopop_profiler: name: "Profiler" diff --git a/.github/workflows/graph_analyzer/Dockerfile b/.github/workflows/graph_analyzer/Dockerfile deleted file mode 100644 index a79ebb4d7..000000000 --- a/.github/workflows/graph_analyzer/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) -# -# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany -# -# This software may be modified and distributed under the terms of -# the 3-Clause BSD License. See the LICENSE file in the package base -# directory for details. - -# Dockerfile for an image containing the dependencies of graph_analyzer - -FROM tiagopeixoto/graph-tool -RUN pacman --noconfirm -S python-pip -COPY graph_analyzer/requirements.txt /requirements.txt -RUN pip install -r /requirements.txt -RUN pip install mypy data-science-types From 6faa095e4ea66e87470d905b212a08258be48a0f Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Thu, 1 Oct 2020 12:32:13 +0200 Subject: [PATCH 28/31] Rename graph_analyzer -> discopop_explorer Also update descriptions regarding how to start the application. --- .github/workflows/ci.yml | 10 +++++----- MANIFEST.in | 4 ++-- README.md | 2 +- {graph_analyzer => discopop_explorer}/.gitignore | 0 {graph_analyzer => discopop_explorer}/LICENSE | 0 {graph_analyzer => discopop_explorer}/PETGraphX.py | 0 {graph_analyzer => discopop_explorer}/README.md | 6 +++--- {graph_analyzer => discopop_explorer}/__init__.py | 0 {graph_analyzer => discopop_explorer}/__main__.py | 4 ++-- .../json_serializer.py | 0 {graph_analyzer => discopop_explorer}/parser.py | 0 .../pattern_detection.py | 0 .../pattern_detectors/PatternInfo.py | 0 .../pattern_detectors/__init__.py | 0 .../pattern_detectors/do_all_detector.py | 0 .../geometric_decomposition_detector.py | 0 .../pattern_detectors/pipeline_detector.py | 0 .../pattern_detectors/reduction_detector.py | 0 .../plugins/__init__.py | 0 .../plugins/pipeline.py | 0 {graph_analyzer => discopop_explorer}/requirements.txt | 0 .../test/atax/Data.xml | 0 .../test/atax/dp_run_dep.txt | 0 .../test/atax/loop_counter_output.txt | 0 .../test/atax/reduction.txt | 0 .../test/reduction/Data.xml | 0 .../test/reduction/dep.txt | 0 .../test/reduction/loop_counter_output.txt | 0 .../test/reduction/main.c | 0 .../test/reduction/mainp.c | 0 .../test/reduction/reduction.txt | 0 {graph_analyzer => discopop_explorer}/unit_tests.py | 0 {graph_analyzer => discopop_explorer}/utils.py | 0 {graph_analyzer => discopop_explorer}/variable.py | 0 docs/DPGuide.md | 4 ++-- setup.py | 6 +++--- test/update_test_output.sh | 4 +--- 37 files changed, 19 insertions(+), 21 deletions(-) rename {graph_analyzer => discopop_explorer}/.gitignore (100%) rename {graph_analyzer => discopop_explorer}/LICENSE (100%) rename {graph_analyzer => discopop_explorer}/PETGraphX.py (100%) rename {graph_analyzer => discopop_explorer}/README.md (92%) rename {graph_analyzer => discopop_explorer}/__init__.py (100%) rename {graph_analyzer => discopop_explorer}/__main__.py (97%) rename {graph_analyzer => discopop_explorer}/json_serializer.py (100%) rename {graph_analyzer => discopop_explorer}/parser.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detection.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detectors/PatternInfo.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detectors/__init__.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detectors/do_all_detector.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detectors/geometric_decomposition_detector.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detectors/pipeline_detector.py (100%) rename {graph_analyzer => discopop_explorer}/pattern_detectors/reduction_detector.py (100%) rename {graph_analyzer => discopop_explorer}/plugins/__init__.py (100%) rename {graph_analyzer => discopop_explorer}/plugins/pipeline.py (100%) rename {graph_analyzer => discopop_explorer}/requirements.txt (100%) rename {graph_analyzer => discopop_explorer}/test/atax/Data.xml (100%) rename {graph_analyzer => discopop_explorer}/test/atax/dp_run_dep.txt (100%) rename {graph_analyzer => discopop_explorer}/test/atax/loop_counter_output.txt (100%) rename {graph_analyzer => discopop_explorer}/test/atax/reduction.txt (100%) rename {graph_analyzer => discopop_explorer}/test/reduction/Data.xml (100%) rename {graph_analyzer => discopop_explorer}/test/reduction/dep.txt (100%) rename {graph_analyzer => discopop_explorer}/test/reduction/loop_counter_output.txt (100%) rename {graph_analyzer => discopop_explorer}/test/reduction/main.c (100%) rename {graph_analyzer => discopop_explorer}/test/reduction/mainp.c (100%) rename {graph_analyzer => discopop_explorer}/test/reduction/reduction.txt (100%) rename {graph_analyzer => discopop_explorer}/unit_tests.py (100%) rename {graph_analyzer => discopop_explorer}/utils.py (100%) rename {graph_analyzer => discopop_explorer}/variable.py (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bdcd0f2b1..d68c53d84 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,8 +11,8 @@ on: [push, pull_request] jobs: - graph_analyzer: - name: "Graph Analyzer" + discopop_explorer: + name: "Explorer" runs-on: ubuntu-latest strategy: matrix: @@ -26,12 +26,12 @@ jobs: python-version: ${{ matrix.python-version }} - name: "Install Dependencies" run: | - python -m pip install -r graph_analyzer/requirements.txt + python -m pip install -r discopop_explorer/requirements.txt python -m pip install mypy data-science-types - name: "Run unit_tests.py" - run: python -m graph_analyzer.unit_tests + run: python -m discopop_explorer.unit_tests - name: "Run MyPy Type Checker" - run: python -m mypy --warn-unused-ignores -p graph_analyzer + run: python -m mypy --warn-unused-ignores -p discopop_explorer discopop_profiler: name: "Profiler" diff --git a/MANIFEST.in b/MANIFEST.in index 86a0e9b4f..74cbfafde 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,5 +10,5 @@ # https://packaging.python.org/guides/using-manifest-in/ include VERSION -include graph_analyzer/README.md -include graph_analyzer/requirements.txt +include discopop_explorer/README.md +include discopop_explorer/requirements.txt diff --git a/README.md b/README.md index fd959eb34..e1161e6fa 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ To obtain the list of reduction operations in the target application, we need to *NOTE:* Please use the exact compiler flags that we used. Otherwise, you might not get the correct results, or the analysis might fail. #### Pattern identfication -Once you have all the results generated by DiscoPoP passes, you can use them to identify possible parallel design patterns. To learn more, please read the pattern detection [README](/graph_analyzer/README.md), which explains how to run pattern identification in detail. +Once you have all the results generated by DiscoPoP passes, you can use them to identify possible parallel design patterns. To learn more, please read the pattern detection [README](/discopop_explorer/README.md), which explains how to run pattern identification in detail. ## Walk-through example In the `test/` folder, we have provided sample programs to help you start using DiscoPoP. You can find the walk-through example [here](/docs/DPGuide.md). diff --git a/graph_analyzer/.gitignore b/discopop_explorer/.gitignore similarity index 100% rename from graph_analyzer/.gitignore rename to discopop_explorer/.gitignore diff --git a/graph_analyzer/LICENSE b/discopop_explorer/LICENSE similarity index 100% rename from graph_analyzer/LICENSE rename to discopop_explorer/LICENSE diff --git a/graph_analyzer/PETGraphX.py b/discopop_explorer/PETGraphX.py similarity index 100% rename from graph_analyzer/PETGraphX.py rename to discopop_explorer/PETGraphX.py diff --git a/graph_analyzer/README.md b/discopop_explorer/README.md similarity index 92% rename from graph_analyzer/README.md rename to discopop_explorer/README.md index 7423370bb..a1d553f29 100644 --- a/graph_analyzer/README.md +++ b/discopop_explorer/README.md @@ -21,11 +21,11 @@ To use the graph analyzer tool, you need to have Python 3.6+ installed on your s ### Usage To run the graph analyzer, you can use the following command: -`python3 graph_analyzer.py --path ` +`python3 -m discopop_explorer --path ` You can specify the path to DiscoPoP output files. Then, the Python script searches within this path to find the required files. Nevertheless, if you are interested in passing a specific location to each file, here is the detailed usage: - `graph_analyzer.py [--path ] [--cu-xml ] [--dep-file ] [--plugins ] [--loop-counter ] [--reduction ] [--json ]` + `discopop_explorer [--path ] [--cu-xml ] [--dep-file ] [--plugins ] [--loop-counter ] [--reduction ] [--json ]` Options: ``` @@ -51,7 +51,7 @@ The loop itself sums up all numbers from 1 to n. You can run DiscoPoP on **main.c** or just use included output. -After that, you can run **graph_analyzer.py** from **graph_analyzer**. The **--path** argument should point to the output of the DiscoPoP. +After that, you can run **discopop_explorer**. The **--path** argument should point to the output of the DiscoPoP. In this example, the output for reduction will point to the lines 6-9. And it will suggest **pragma omp parallel for** OpenMP directive for parallizing the loop. You will also find **i** classified as a private variable and **sum** as a reduction variable. Thus, the parallelization directive would be suggested as following: diff --git a/graph_analyzer/__init__.py b/discopop_explorer/__init__.py similarity index 100% rename from graph_analyzer/__init__.py rename to discopop_explorer/__init__.py diff --git a/graph_analyzer/__main__.py b/discopop_explorer/__main__.py similarity index 97% rename from graph_analyzer/__main__.py rename to discopop_explorer/__main__.py index 10c2e0ced..232b0e948 100644 --- a/graph_analyzer/__main__.py +++ b/discopop_explorer/__main__.py @@ -6,10 +6,10 @@ # the 3-Clause BSD License. See the LICENSE file in the package base # directory for details. -"""Discopop analyzer +"""Discopop explorer Usage: - graph_analyzer.py [--path ] [--cu-xml ] [--dep-file ] [--plugins ] \ + discopop_explorer [--path ] [--cu-xml ] [--dep-file ] [--plugins ] \ [--loop-counter ] [--reduction ] [--json ] [--fmap ] Options: diff --git a/graph_analyzer/json_serializer.py b/discopop_explorer/json_serializer.py similarity index 100% rename from graph_analyzer/json_serializer.py rename to discopop_explorer/json_serializer.py diff --git a/graph_analyzer/parser.py b/discopop_explorer/parser.py similarity index 100% rename from graph_analyzer/parser.py rename to discopop_explorer/parser.py diff --git a/graph_analyzer/pattern_detection.py b/discopop_explorer/pattern_detection.py similarity index 100% rename from graph_analyzer/pattern_detection.py rename to discopop_explorer/pattern_detection.py diff --git a/graph_analyzer/pattern_detectors/PatternInfo.py b/discopop_explorer/pattern_detectors/PatternInfo.py similarity index 100% rename from graph_analyzer/pattern_detectors/PatternInfo.py rename to discopop_explorer/pattern_detectors/PatternInfo.py diff --git a/graph_analyzer/pattern_detectors/__init__.py b/discopop_explorer/pattern_detectors/__init__.py similarity index 100% rename from graph_analyzer/pattern_detectors/__init__.py rename to discopop_explorer/pattern_detectors/__init__.py diff --git a/graph_analyzer/pattern_detectors/do_all_detector.py b/discopop_explorer/pattern_detectors/do_all_detector.py similarity index 100% rename from graph_analyzer/pattern_detectors/do_all_detector.py rename to discopop_explorer/pattern_detectors/do_all_detector.py diff --git a/graph_analyzer/pattern_detectors/geometric_decomposition_detector.py b/discopop_explorer/pattern_detectors/geometric_decomposition_detector.py similarity index 100% rename from graph_analyzer/pattern_detectors/geometric_decomposition_detector.py rename to discopop_explorer/pattern_detectors/geometric_decomposition_detector.py diff --git a/graph_analyzer/pattern_detectors/pipeline_detector.py b/discopop_explorer/pattern_detectors/pipeline_detector.py similarity index 100% rename from graph_analyzer/pattern_detectors/pipeline_detector.py rename to discopop_explorer/pattern_detectors/pipeline_detector.py diff --git a/graph_analyzer/pattern_detectors/reduction_detector.py b/discopop_explorer/pattern_detectors/reduction_detector.py similarity index 100% rename from graph_analyzer/pattern_detectors/reduction_detector.py rename to discopop_explorer/pattern_detectors/reduction_detector.py diff --git a/graph_analyzer/plugins/__init__.py b/discopop_explorer/plugins/__init__.py similarity index 100% rename from graph_analyzer/plugins/__init__.py rename to discopop_explorer/plugins/__init__.py diff --git a/graph_analyzer/plugins/pipeline.py b/discopop_explorer/plugins/pipeline.py similarity index 100% rename from graph_analyzer/plugins/pipeline.py rename to discopop_explorer/plugins/pipeline.py diff --git a/graph_analyzer/requirements.txt b/discopop_explorer/requirements.txt similarity index 100% rename from graph_analyzer/requirements.txt rename to discopop_explorer/requirements.txt diff --git a/graph_analyzer/test/atax/Data.xml b/discopop_explorer/test/atax/Data.xml similarity index 100% rename from graph_analyzer/test/atax/Data.xml rename to discopop_explorer/test/atax/Data.xml diff --git a/graph_analyzer/test/atax/dp_run_dep.txt b/discopop_explorer/test/atax/dp_run_dep.txt similarity index 100% rename from graph_analyzer/test/atax/dp_run_dep.txt rename to discopop_explorer/test/atax/dp_run_dep.txt diff --git a/graph_analyzer/test/atax/loop_counter_output.txt b/discopop_explorer/test/atax/loop_counter_output.txt similarity index 100% rename from graph_analyzer/test/atax/loop_counter_output.txt rename to discopop_explorer/test/atax/loop_counter_output.txt diff --git a/graph_analyzer/test/atax/reduction.txt b/discopop_explorer/test/atax/reduction.txt similarity index 100% rename from graph_analyzer/test/atax/reduction.txt rename to discopop_explorer/test/atax/reduction.txt diff --git a/graph_analyzer/test/reduction/Data.xml b/discopop_explorer/test/reduction/Data.xml similarity index 100% rename from graph_analyzer/test/reduction/Data.xml rename to discopop_explorer/test/reduction/Data.xml diff --git a/graph_analyzer/test/reduction/dep.txt b/discopop_explorer/test/reduction/dep.txt similarity index 100% rename from graph_analyzer/test/reduction/dep.txt rename to discopop_explorer/test/reduction/dep.txt diff --git a/graph_analyzer/test/reduction/loop_counter_output.txt b/discopop_explorer/test/reduction/loop_counter_output.txt similarity index 100% rename from graph_analyzer/test/reduction/loop_counter_output.txt rename to discopop_explorer/test/reduction/loop_counter_output.txt diff --git a/graph_analyzer/test/reduction/main.c b/discopop_explorer/test/reduction/main.c similarity index 100% rename from graph_analyzer/test/reduction/main.c rename to discopop_explorer/test/reduction/main.c diff --git a/graph_analyzer/test/reduction/mainp.c b/discopop_explorer/test/reduction/mainp.c similarity index 100% rename from graph_analyzer/test/reduction/mainp.c rename to discopop_explorer/test/reduction/mainp.c diff --git a/graph_analyzer/test/reduction/reduction.txt b/discopop_explorer/test/reduction/reduction.txt similarity index 100% rename from graph_analyzer/test/reduction/reduction.txt rename to discopop_explorer/test/reduction/reduction.txt diff --git a/graph_analyzer/unit_tests.py b/discopop_explorer/unit_tests.py similarity index 100% rename from graph_analyzer/unit_tests.py rename to discopop_explorer/unit_tests.py diff --git a/graph_analyzer/utils.py b/discopop_explorer/utils.py similarity index 100% rename from graph_analyzer/utils.py rename to discopop_explorer/utils.py diff --git a/graph_analyzer/variable.py b/discopop_explorer/variable.py similarity index 100% rename from graph_analyzer/variable.py rename to discopop_explorer/variable.py diff --git a/docs/DPGuide.md b/docs/DPGuide.md index 0c8e5516e..5ba4e2ccb 100644 --- a/docs/DPGuide.md +++ b/docs/DPGuide.md @@ -32,8 +32,8 @@ A data dependence is represented as a triple ``. `type` is t ``` Besides the list of reduction loops, this step generates two important files named `loop_counter_output.txt` and `loop_meta.txt`. These files along with CU graph and dependences are required for the pattern analysis in the next step. -5) To obtain the list of patterns and OpenMP parallelization suggestions, run the Python script in the `/graph_analyzer` directory: +5) To obtain the list of patterns and OpenMP parallelization suggestions, run the Python application `discopop_explorer`: - `python3 graph_analyzer.py --cu-xml=Data.xml --dep-file=dp_run_dep.txt` + `python3 -m discopop_explorer --cu-xml=Data.xml --dep-file=dp_run_dep.txt` You should now be able to see the pipeline pattern found in the target application along with its stages plus suitable OpenMP constructs for parallelization. You can access a sample output in [simple_pipeline.json](/test/simple_pipeline.json). Using these hints you can start parallelizing the target application. diff --git a/setup.py b/setup.py index 2bceb48de..80c812c84 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages os.chdir(Path(__file__).parent) -SRC = Path("graph_analyzer") +SRC = Path("discopop_explorer") def get_version(): @@ -27,7 +27,7 @@ def get_requirements(): if sys.version_info < (3, 6): - raise SystemExit("Discopop analyzer requires Python >= 3.6.") + raise SystemExit("Discopop explorer requires Python >= 3.6.") setup( name="discopop", @@ -43,7 +43,7 @@ def get_requirements(): long_description_content_type="text/markdown", install_requires=get_requirements(), python_requires=">=3.6", - entry_points={"console_scripts": ["graph_analyzer=graph_analyzer.__main__:main"]}, + entry_points={"console_scripts": ["discopop_explorer=discopop_explorer.__main__:main"]}, zip_safe=True, classifiers=[ "Development Status :: 4 - Beta", diff --git a/test/update_test_output.sh b/test/update_test_output.sh index 4c93f8943..8625a2b9d 100755 --- a/test/update_test_output.sh +++ b/test/update_test_output.sh @@ -1,10 +1,8 @@ #!/bin/bash -ANALYZER='./../graph_analyzer/graph_analyzer.py' - for d in */ ; do basename $d - python3 $ANALYZER --path "$PWD/$d/data" --json "$PWD/${d::-1}.json" + PYTHONPATH=.. python3 -m discopop_explorer --path "$PWD/$d/data" --json "$PWD/${d::-1}.json" done From 7de516c815857827863e5070dff3b9a868416c1c Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Thu, 1 Oct 2020 20:18:23 +0200 Subject: [PATCH 29/31] GitHub Action for managing version files and tags --- .github/workflows/publish.yml | 20 ++++++++++++++++++++ VERSION | 2 +- discopop_explorer/__init__.py | 1 + discopop_explorer/__main__.py | 4 ++-- discopop_explorer/_version.py | 1 + 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 discopop_explorer/_version.py diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 432db06cd..f3188d08b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,9 +9,12 @@ jobs: publish: name: "Publish Package" runs-on: ubuntu-latest + if: github.actor != 'github-actions' steps: - name: "Checkout Repository" uses: actions/checkout@v2 + with: + fetch-depth: 0 - name: "Setup Python" uses: actions/setup-python@v2 with: @@ -22,6 +25,23 @@ jobs: GITHUB_REF: ${{ github.ref }} run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\/v/} shell: bash + - name: "Create Version Files and Move Annotated Tag" + env: + GITHUB_REF: ${{ github.ref }} + VERSION: ${{ steps.get_version.outputs.VERSION }} + shell: bash + run: | + git fetch --tags --force origin ${GITHUB_REF} + git config user.name "$(git for-each-ref --format='%(taggername)' ${GITHUB_REF})" + git config user.email $(git for-each-ref --format='%(taggeremail)' ${GITHUB_REF}) + git checkout $(git branch -a --format="%(refname:lstrip=3)" --contains ${GITHUB_REF} | tail -n1) + echo "${VERSION}" > VERSION + echo "__version__ = \"${VERSION}\"" > discopop_explorer/_version.py + git add VERSION discopop_explorer/_version.py + git commit -m "Release of Version ${VERSION}" + git tag -a -f -m "Version ${VERSION}" v${VERSION} + git push + git push --tags --force - name: "Generate Python Package" run: python setup.py sdist - name: "Create Draft Release on GitHub" diff --git a/VERSION b/VERSION index 49d59571f..8acdd82b7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1 +0.0.1 diff --git a/discopop_explorer/__init__.py b/discopop_explorer/__init__.py index ed6a92061..49fdbd1e0 100644 --- a/discopop_explorer/__init__.py +++ b/discopop_explorer/__init__.py @@ -12,6 +12,7 @@ from pluginbase import PluginBase # type:ignore from .PETGraphX import PETGraphX +from ._version import __version__ from .parser import parse_inputs from .pattern_detection import DetectionResult, PatternDetectorX diff --git a/discopop_explorer/__main__.py b/discopop_explorer/__main__.py index 232b0e948..08f8bc38d 100644 --- a/discopop_explorer/__main__.py +++ b/discopop_explorer/__main__.py @@ -32,7 +32,7 @@ from docopt import docopt # type:ignore from schema import Schema, Use, SchemaError # type:ignore -from . import run +from . import run, __version__ from .json_serializer import PatternInfoSerializer docopt_schema = Schema({ @@ -58,7 +58,7 @@ def get_path(base_path: str, file_name: str) -> str: def main(): - arguments = docopt(__doc__) + arguments = docopt(__doc__, version=f"DiscoPoP Version {__version__}") try: arguments = docopt_schema.validate(arguments) diff --git a/discopop_explorer/_version.py b/discopop_explorer/_version.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/discopop_explorer/_version.py @@ -0,0 +1 @@ +__version__ = "0.0.1" From 8cc5c01c5bd612683e08d978b95adb5c3410e0c8 Mon Sep 17 00:00:00 2001 From: Arya Mazaheri Date: Fri, 2 Oct 2020 11:07:14 +0200 Subject: [PATCH 30/31] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e1161e6fa..1525aa394 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,10 @@ In a nutshell, DiscoPoP performs the following steps: * identifies parallel patterns which can be used to parallelize a code region, * and finally suggests corresponding OpenMP parallelization constructs and clauses to programmers. -A more comprehensive overview of DiscoPoP can be found on our [project website](http://www.discopop.org/). - DiscoPoP is built on top of LLVM. Therefore, DiscoPoP can perform the above-mentioned steps on any source code which can be transferred into the LLVM IR. +A more comprehensive overview of DiscoPoP can be found on our [project website](http://www.discopop.org/). + ## Getting started ### Pre-requisites Before doing anything, you need a basic development setup. We have tested DiscoPoP on Ubuntu, and the prerequisite packages should be installed using the following command: @@ -76,5 +76,7 @@ cmake -DCMAKE_CXX_COMPILER= -DCMAKE_CXX_FLAGS="-g -O0 -fno-discar ``` 3. Running `make` will build the project with DiscoPoP instrumentation applied on the code. +You may use Github issues to report potential bugs or ask your questions. In case you need individual support, please contact us using discopop[at]lists.parallel.informatik.tu-darmstadt.de. + ## License © DiscoPoP is available under the terms of the BSD-3-Clause license, as specified in the LICENSE file. From 206abb98235a3454d9937bd8557d2ce3d175ec74 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 2 Oct 2020 11:48:01 +0200 Subject: [PATCH 31/31] Resolve README.md conflicts for merge to master --- README.md | 14 +++++++------- discopop_explorer/README.md | 17 +++++++++-------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 1525aa394..3a0d67cc1 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ DiscoPoP is an open-source tool that helps software developers parallelize their programs with threads. It is a joint project of Technical University of Darmstadt and Iowa State University. In a nutshell, DiscoPoP performs the following steps: -* detects parts of the code (computational units or CUs) with little to no internal parallelization potential, -* finds data dependences among them, -* identifies parallel patterns which can be used to parallelize a code region, -* and finally suggests corresponding OpenMP parallelization constructs and clauses to programmers. +* detect parts of the code (computational units or CUs) with little to no internal parallelization potential, +* find data dependences among them, +* identify parallel patterns that can be used to parallelize a code region, +* and finally suggest corresponding OpenMP parallelization constructs and clauses to programmers. DiscoPoP is built on top of LLVM. Therefore, DiscoPoP can perform the above-mentioned steps on any source code which can be transferred into the LLVM IR. @@ -17,7 +17,7 @@ Before doing anything, you need a basic development setup. We have tested DiscoP sudo apt-get install git build-essential cmake -Additionally, you need LLVM installed on your system. Currently, DiscoPoP supports LLVM 8.0. Lower and higher versions are not supported, due to API changes which lead to compilation failures. Please follow the installation tutorial [here](https://llvm.org/docs/GettingStarted.html), if you have not installed LLVM before. +Additionally, you need to install LLVM on your system. Currently, DiscoPoP only supports LLVM 8.0. Due to API changes, which lead to compilation failures, it does not support lower and higher versions. Please follow the [installation tutorial](https://llvm.org/docs/GettingStarted.html), if you have not installed LLVM yet. ### DiscoPoP profiler installation First, clone the source code into the designated folder. Then, create a build directory: @@ -42,14 +42,14 @@ To obtain the computational unit (CU) graph of the target application, please ru clang++ -g -O0 -fno-discard-value-names -Xclang -load -Xclang /libi/LLVMCUGeneration.so -mllvm -fm-path -mllvm ./FileMapping.txt -c #### Dependence profiling -To obtain data dependences, we need to instrument the target application. Running the instrumented application will result in a text file containing all the dependences located in the present working directory. +To obtain data dependences, we need to instrument the target application. Running the instrumented application will result in a text file containing all the dependences that are located in the present working directory. clang++ -g -O0 -fno-discard-value-names -Xclang -load -Xclang /libi/LLVMDPInstrumentation.so -mllvm -fm-path -mllvm ./FileMapping.txt -c -o out.o clang++ out.o -L/rtlib -lDiscoPoP_RT -lpthread ./ #### Identifying reduction operations -To obtain the list of reduction operations in the target application, we need to instrument the target application. Running the instrumented application will result in a text file containing all the reductions located in the present working directory. +To obtain the list of reduction operations in the target application, we need to instrument the target application. Running the instrumented application will result in a text file containing all the reductions that are located in the present working directory. clang++ -g -O0 -fno-discard-value-names -Xclang -load -Xclang /libi/LLVMDPReduction.so -mllvm -fm-path -mllvm ./FileMapping.txt -c -o out.o clang++ out.o -L/rtlib -lDiscoPoP_RT -lpthread diff --git a/discopop_explorer/README.md b/discopop_explorer/README.md index a1d553f29..30b5da53f 100644 --- a/discopop_explorer/README.md +++ b/discopop_explorer/README.md @@ -1,4 +1,4 @@ -# DiscoPoP Graph Analyzer +# DiscoPoP graph analyzer DiscoPoP profiler is accompanied by a Python framework, specifically designed to analyze the profiler output files, generate a CU graph, detect potential parallel patterns, and suggest OpenMP parallelizations. Currently, the following five patterns can be detected: * Reduction @@ -8,14 +8,15 @@ Currently, the following five patterns can be detected: * Task Parallelism ## Getting started -We assume that you have already run DiscoPoP profiler on the target sequential application, and the following files are created in the current working directory: +We assume that you have already run the DiscoPoP profiler on the target sequential application, and the following files are created in the current working directory: * `Data.xml` (CU information in XML format created by *CUGeneration* pass) * `_dep.txt` (Data dependences created by *DPInstrumentation* pass) * `reduction.txt` and `loop_counter_output.txt` (Reduction operations and loop iteration data identified by *DPReduction* pass) + In case any of the files mentioned above are missing, please follow the [DiscoPoP manual](../README.md) to generate them. ### Pre-requisites -To use the graph analyzer tool, you need to have Python 3.6+ installed on your system. Further python dependencies can be installed using the following command: +To use the graph analyzer tool, you need to have Python 3.6+ installed on your system. Further Python dependencies can be installed using the following command: `pip install -r requirements.txt` ### Usage @@ -35,16 +36,16 @@ Options: --loop-counter= Loop counter data [default: loop_counter_output.txt]. --reduction= Reduction variables file [default: reduction.txt]. --plugins= Plugins to execute - --json Output result as a json file to spicified path + --json Output result as a json file to specified path -h --help Show this screen. --version Show version. ``` By default, running the graph analyzer will print out the list of patterns along with OpenMP parallelization suggestions to the standard output. You can also obtain the results in JSON format by passing `--json` argument to the Python script. -### Walkthrough Example +### Walkthrough example The **test/** folder contains a number of precomputed inputs for testing the tool, e.g., *atax* from Polybench benchmark suite. -Here is an example workflow that you can try it out by yourself. +You can try out this example workflow. **test/reduction/** contains source code and precomputed DiscoPoP output for a simple reduction loop. The loop itself sums up all numbers from 1 to n. @@ -53,8 +54,8 @@ You can run DiscoPoP on **main.c** or just use included output. After that, you can run **discopop_explorer**. The **--path** argument should point to the output of the DiscoPoP. -In this example, the output for reduction will point to the lines 6-9. And it will suggest **pragma omp parallel for** OpenMP directive for parallizing the loop. -You will also find **i** classified as a private variable and **sum** as a reduction variable. Thus, the parallelization directive would be suggested as following: +In this example, the output for reduction will point to the lines 6-9, and it will suggest **pragma omp parallel for** OpenMP directive for parallelizing the loop. +You will also find **i** classified as a private variable and **sum** as a reduction variable. Thus, the parallelization directive would be suggested as follows: ```#pragma omp parallel for private(i) reduction(+:sum)```