From 05bb910dafc7c2c9fadf9d17f1d2ef933a9f46a2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 13:01:21 +0100 Subject: [PATCH 01/20] JIT: Port loop cloning to the new loop representation Run the new loop recognition at the same time as old loop recognition and cross validate that the information matches the old loop information. Implement induction analysis on the new representation. Validate that it matches the old loop induction analysis. Add a few quirks to ensure this. Port loop cloning to run on the new loop representation. Add a number of quirks to keep the logic close to the old cloning. One notable difference is that the old loop recognition considers some blocks to be part of the loops that actually are not; for example, blocks that return or break out from the loop are not part of the loop's SCC, but can be part of the lexical range. This means that the old loop cloning will clone those blocks and apply static optimizations to them, while the new loop cloning won't. However, this does not seem to cause a significant number of regressions. A few diffs are expected due to considering fewer blocks to be part of the loop and thus cloning fewer blocks. Also, throughput regressions are expected since we are doing both loop identifications for now. I expect to incrementally work towards replacing everything with the new representation, at which point we can get rid of the old loop identification and hopefully most of the reachability/dominator computations. I expect us to make back the TP at that point. --- src/coreclr/jit/compiler.h | 100 ++++-- src/coreclr/jit/fgbasic.cpp | 3 +- src/coreclr/jit/flowgraph.cpp | 620 ++++++++++++++++++++++++++++++++ src/coreclr/jit/loopcloning.cpp | 523 ++++++++++++++++----------- src/coreclr/jit/loopcloning.h | 15 +- src/coreclr/jit/optimizer.cpp | 48 +++ 6 files changed, 1068 insertions(+), 241 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 745372a33b02e..03e1a22eed5fb 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1996,6 +1996,42 @@ class FlowGraphDfsTree bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const; }; +struct NaturalLoopIterInfo +{ + unsigned IterVar = BAD_VAR_NUM; + int ConstInitValue = 0; + BasicBlock* InitBlock = nullptr; + GenTree* TestTree = nullptr; + GenTree* IterTree = nullptr; + bool HasConstInit : 1; + bool HasConstLimit : 1; + bool HasSimdLimit : 1; + bool HasInvariantLocalLimit : 1; + bool HasArrayLengthLimit : 1; + + NaturalLoopIterInfo() + : HasConstInit(false) + , HasConstLimit(false) + , HasSimdLimit(false) + , HasInvariantLocalLimit(false) + , HasArrayLengthLimit(false) + { + } + + int IterConst(); + genTreeOps IterOper(); + var_types IterOperType(); + bool IsReversed(); + genTreeOps TestOper(); + bool IsIncreasingLoop(); + bool IsDecreasingLoop(); + GenTree* Iterator(); + GenTree* Limit(); + int ConstLimit(); + unsigned VarLimit(); + bool ArrLenLimit(Compiler* comp, ArrIndex* index); +}; + class FlowGraphNaturalLoop { friend class FlowGraphNaturalLoops; @@ -2018,6 +2054,14 @@ class FlowGraphNaturalLoop bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex); BitVecTraits LoopBlockTraits(); + + template + bool VisitDefs(TFunc func); + + GenTreeLclVarCommon* FindDef(unsigned lclNum); + + void MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init); + bool MatchLimit(NaturalLoopIterInfo* info, GenTree* test); public: BasicBlock* GetHeader() const { @@ -2064,6 +2108,12 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocks(TFunc func); + + BasicBlock* GetLexicallyBottomMostBlock(); + + bool AnalyzeIteration(NaturalLoopIterInfo* info); + + bool HasDef(unsigned lclNum); }; class FlowGraphNaturalLoops @@ -4686,6 +4736,10 @@ class Compiler unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder FlowGraphDfsTree* m_dfs; + FlowGraphNaturalLoops* m_loops; + struct LoopDsc; + LoopDsc** m_newToOldLoop; + FlowGraphNaturalLoop** m_oldToNewLoop; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -6532,7 +6586,7 @@ class Compiler void optFindLoops(); PhaseStatus optCloneLoops(); - void optCloneLoop(unsigned loopInd, LoopCloneContext* context); + void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) void optRemoveRedundantZeroInits(); PhaseStatus optIfConversion(); // If conversion @@ -6844,7 +6898,7 @@ class Compiler bool optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar); bool optPopulateInitInfo(unsigned loopInd, BasicBlock* initBlock, GenTree* init, unsigned iterVar); bool optExtractInitTestIncr( - BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* exit, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr); + BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr); void optFindNaturalLoops(); @@ -7963,34 +8017,34 @@ class Compiler public: struct LoopCloneVisitorInfo { - LoopCloneContext* context; - Statement* stmt; - const unsigned loopNum; - const bool cloneForArrayBounds; - const bool cloneForGDVTests; - LoopCloneVisitorInfo(LoopCloneContext* context, - unsigned loopNum, - Statement* stmt, - bool cloneForArrayBounds, - bool cloneForGDVTests) + LoopCloneContext* context; + Statement* stmt; + FlowGraphNaturalLoop* loop; + const bool cloneForArrayBounds; + const bool cloneForGDVTests; + LoopCloneVisitorInfo(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + Statement* stmt, + bool cloneForArrayBounds, + bool cloneForGDVTests) : context(context) , stmt(nullptr) - , loopNum(loopNum) + , loop(loop) , cloneForArrayBounds(cloneForArrayBounds) , cloneForGDVTests(cloneForGDVTests) { } }; - bool optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum); + bool optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum); bool optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndex(GenTree* tree, ArrIndex* result); - bool optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context); + bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context); static fgWalkPreFn optCanOptimizeByLoopCloningVisitor; fgWalkResult optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info); bool optObtainLoopCloningOpts(LoopCloneContext* context); - bool optIsLoopClonable(unsigned loopInd); + bool optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context); bool optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info); bool optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType); @@ -7999,13 +8053,13 @@ class Compiler #ifdef DEBUG void optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore); #endif - void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath)); - bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context); - bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context); - BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, - unsigned loopNum, - BasicBlock* slowHead, - BasicBlock* insertAfter); + void optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, LoopCloneContext* context DEBUGARG(bool fastPath)); + bool optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + bool optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + BasicBlock* slowHead, + BasicBlock* insertAfter); protected: ssize_t optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk)); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 917fd55ca5a95..48accd29435b8 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -68,7 +68,8 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; - m_dfs = nullptr; + m_dfs = nullptr; + m_loops = nullptr; // Initialize BlockSet data. fgCurBBEpoch = 0; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index f81c55b004a04..9214addb38bf2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4640,3 +4640,623 @@ bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ji return true; } + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitDefs: Visit all definitions contained in the +// loop. +// +// Type parameters: +// TFunc - Callback functor type +// +// Parameters: +// func - Callback functor that accepts a GenTreeLclVarCommon* and returns a +// bool. On true, continue looking for defs; on false, abort. +// +// Returns: +// True if all defs were visited and the functor never returned false; otherwise false. +// +template +bool FlowGraphNaturalLoop::VisitDefs(TFunc func) +{ + class VisitDefsVisitor : public GenTreeVisitor + { + using GenTreeVisitor::m_compiler; + + TFunc& m_func; + + public: + enum + { + DoPreOrder = true, + }; + + VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) + { + } + + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + if ((tree->gtFlags & GTF_ASG) == 0) + { + return Compiler::WALK_SKIP_SUBTREES; + } + + GenTreeLclVarCommon* lclDef; + if (tree->DefinesLocal(m_compiler, &lclDef)) + { + if (!m_func(lclDef)) + return Compiler::WALK_ABORT; + } + + return Compiler::WALK_CONTINUE; + } + }; + + VisitDefsVisitor visitor(m_tree->GetCompiler(), func); + + BasicBlockVisit result = VisitLoopBlocks([&](BasicBlock* loopBlock) { + for (Statement* stmt : loopBlock->Statements()) + { + if (visitor.WalkTree(stmt->GetRootNodePointer(), nullptr) == Compiler::WALK_ABORT) + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + + return result == BasicBlockVisit::Continue; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::FindDef: Find a def of the specified local number. +// +// Parameters: +// lclNum - The local. +// +// Returns: +// Tree that represents a def of the local; nullptr if no def was found. +// +// Remarks: +// Does not take promotion into account. +// +GenTreeLclVarCommon* FlowGraphNaturalLoop::FindDef(unsigned lclNum) +{ + GenTreeLclVarCommon* result = nullptr; + VisitDefs([&result, lclNum](GenTreeLclVarCommon* def) { + if (def->GetLclNum() == lclNum) + { + result = def; + return false; + } + + return true; + }); + + return result; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::AnalyzeIteration: Analyze the induction structure of +// the loop. +// +// Parameters: +// info - [out] Loop information +// +// Returns: +// True if the structure was analyzed and we can make guarantees about it; +// otherwise false. +// +// Remarks: +// The function guarantees that at all definitions of the iteration local, +// the loop condition is reestablished before iteration continues. In other +// words, if this function returns true the loop invariant is guaranteed to +// be upheld in all blocks in the loop (but see below for establishing the +// base case). +// +// Currently we do not validate that there is a zero-trip test ensuring the +// condition is true, so it is up to the user to validate that. This is +// normally done via GTF_RELOP_ZTT set by loop inversion. +// +bool FlowGraphNaturalLoop::AnalyzeIteration(NaturalLoopIterInfo* info) +{ + JITDUMP("Analyzing iteration for " FMT_LP " with header " FMT_BB "\n", m_index, m_header->bbNum); + + const FlowGraphDfsTree* dfs = m_tree; + Compiler* comp = dfs->GetCompiler(); + assert((m_entryEdges.size() == 1) && "Expected preheader"); + + BasicBlock* preheader = m_entryEdges[0]->getSourceBlock(); + + JITDUMP(" Preheader = " FMT_BB "\n", preheader->bbNum); + + // TODO-Quirk: For backwards compatibility always try the lexically + // bottom-most block for the loop variable. + BasicBlock* bottom = GetLexicallyBottomMostBlock(); + JITDUMP(" Bottom = " FMT_BB "\n", bottom->bbNum); + + BasicBlock* cond = bottom; + BasicBlock* initBlock = preheader; + GenTree* init; + GenTree* test; + if (!cond->KindIs(BBJ_COND) || + !comp->optExtractInitTestIncr(&initBlock, bottom, m_header, &init, &test, &info->IterTree)) + { + // TODO-CQ: Try all exit edges here to see if we can find an induction variable. + JITDUMP(" Could not extract induction variable from bottom\n"); + return false; + } + + info->IterVar = comp->optIsLoopIncrTree(info->IterTree); + + assert(info->IterVar != BAD_VAR_NUM); + LclVarDsc* const iterVarDsc = comp->lvaGetDesc(info->IterVar); + + // Bail on promoted case, otherwise we'd have to search the loop + // for both iterVar and its parent. + // TODO-CQ: Fix this + // + if (iterVarDsc->lvIsStructField) + { + JITDUMP(" iterVar V%02u is a promoted field\n", info->IterVar); + return false; + } + + // Bail on the potentially aliased case. + // + if (iterVarDsc->IsAddressExposed()) + { + JITDUMP(" iterVar V%02u is address exposed\n", info->IterVar); + return false; + } + + if (init == nullptr) + { + JITDUMP(" Init = , test = [%06u], incr = [%06u]\n", Compiler::dspTreeID(test), + Compiler::dspTreeID(info->IterTree)); + } + else + { + JITDUMP(" Init = [%06u], test = [%06u], incr = [%06u]\n", Compiler::dspTreeID(init), Compiler::dspTreeID(test), + Compiler::dspTreeID(info->IterTree)); + } + + if (!MatchLimit(info, test)) + { + return false; + } + + MatchInit(info, initBlock, init); + + bool result = VisitDefs([=](GenTreeLclVarCommon* def) { + if ((def->GetLclNum() != info->IterVar) || (def == info->IterTree)) + return true; + + JITDUMP(" Loop has extraneous def [%06u]\n", Compiler::dspTreeID(def)); + return false; + }); + + if (!result) + { + return false; + } + +#ifdef DEBUG + if (comp->verbose) + { + printf(" IterVar = V%02u\n", info->IterVar); + + if (info->HasConstInit) + printf(" Const init with value %d in " FMT_BB "\n", info->ConstInitValue, info->InitBlock->bbNum); + + printf(" Test is [%06u] (", Compiler::dspTreeID(info->TestTree)); + if (info->HasConstLimit) + printf("const limit "); + if (info->HasSimdLimit) + printf("simd limit "); + if (info->HasInvariantLocalLimit) + printf("invariant local limit "); + if (info->HasArrayLengthLimit) + printf("array length limit "); + printf(")\n"); + } +#endif + + return result; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::MatchInit: Try to pattern match the initialization of +// an induction variable. +// +// Parameters: +// info - [in, out] Info structure to query and fill out +// initBlock - Block containing the initialization tree +// init - Initialization tree +// +// Remarks: +// We do not necessarily guarantee or require to be able to find any +// initialization. +// +void FlowGraphNaturalLoop::MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init) +{ + if ((init == nullptr) || !init->OperIs(GT_STORE_LCL_VAR) || (init->AsLclVarCommon()->GetLclNum() != info->IterVar)) + return; + + GenTree* initValue = init->AsLclVar()->Data(); + if (!initValue->IsCnsIntOrI() || !initValue->TypeIs(TYP_INT)) + return; + + info->HasConstInit = true; + info->ConstInitValue = (int)initValue->AsIntCon()->IconValue(); + info->InitBlock = initBlock; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::MatchLimit: Try to pattern match the loop test of an +// induction variable. +// +// Parameters: +// info - [in, out] Info structure to query and fill out +// test - Loop condition test +// +// Returns: +// True if the loop condition was recognized and "info" was filled out. +// +// Remarks: +// Unlike the initialization, we do require that we are able to match the +// loop condition. +// +bool FlowGraphNaturalLoop::MatchLimit(NaturalLoopIterInfo* info, GenTree* test) +{ + // Obtain the relop from the "test" tree. + GenTree* relop; + if (test->OperIs(GT_JTRUE)) + { + relop = test->gtGetOp1(); + } + else + { + assert(test->OperIs(GT_STORE_LCL_VAR)); + relop = test->AsLclVar()->Data(); + } + + noway_assert(relop->OperIsCompare()); + + GenTree* opr1 = relop->AsOp()->gtOp1; + GenTree* opr2 = relop->AsOp()->gtOp2; + + GenTree* iterOp; + GenTree* limitOp; + + // Make sure op1 or op2 is the iterVar. + if (opr1->gtOper == GT_LCL_VAR && opr1->AsLclVarCommon()->GetLclNum() == info->IterVar) + { + iterOp = opr1; + limitOp = opr2; + } + else if (opr2->gtOper == GT_LCL_VAR && opr2->AsLclVarCommon()->GetLclNum() == info->IterVar) + { + iterOp = opr2; + limitOp = opr1; + } + else + { + return false; + } + + if (iterOp->gtType != TYP_INT) + { + return false; + } + + // Mark the iterator node. + iterOp->gtFlags |= GTF_VAR_ITERATOR; + + // Check what type of limit we have - constant, variable or arr-len. + if (limitOp->gtOper == GT_CNS_INT) + { + info->HasConstLimit = true; + if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0) + { + info->HasSimdLimit = true; + } + } + else if (limitOp->OperIs(GT_LCL_VAR)) + { + // See if limit var is a loop invariant + // + GenTreeLclVarCommon* def = FindDef(limitOp->AsLclVarCommon()->GetLclNum()); + if (def != nullptr) + { + JITDUMP(" Limit var V%02u modified by [%06u]\n", limitOp->AsLclVarCommon()->GetLclNum(), + Compiler::dspTreeID(def)); + return false; + } + + info->HasInvariantLocalLimit = true; + } + else if (limitOp->OperIs(GT_ARR_LENGTH)) + { + // See if limit array is a loop invariant + // + GenTree* const array = limitOp->AsArrLen()->ArrRef(); + + if (!array->OperIs(GT_LCL_VAR)) + { + JITDUMP(" Array limit tree [%06u] not analyzable\n", Compiler::dspTreeID(limitOp)); + return false; + } + + GenTreeLclVarCommon* def = FindDef(array->AsLclVar()->GetLclNum()); + if (def != nullptr) + { + JITDUMP(" Array limit var V%02u modified by [%06u]\n", array->AsLclVarCommon()->GetLclNum(), + Compiler::dspTreeID(def)); + return false; + } + + info->HasArrayLengthLimit = true; + } + else + { + JITDUMP(" Loop limit tree [%06u] not analyzable\n", Compiler::dspTreeID(limitOp)); + return false; + } + + // Were we able to successfully analyze the limit? + // + assert(info->HasConstLimit || info->HasInvariantLocalLimit || info->HasArrayLengthLimit); + + info->TestTree = relop; + return true; +} + +//------------------------------------------------------------------------ +// GetLexicallyBottomMostBlock: Get the lexically bottom-most block contained +// within the loop. +// +// Returns: +// Block with highest bbNum. +// +// Remarks: +// Mostly exists as a quirk while transitioning from the old loop +// representation to the new one. +// +BasicBlock* FlowGraphNaturalLoop::GetLexicallyBottomMostBlock() +{ + BasicBlock* bottom = m_header; + VisitLoopBlocks([&bottom](BasicBlock* loopBlock) { + if (loopBlock->bbNum > bottom->bbNum) + bottom = loopBlock; + return BasicBlockVisit::Continue; + }); + + return bottom; +} + +//------------------------------------------------------------------------ +// HasDef: Check if a local is defined anywhere in the loop. +// +// Parameters: +// lclNum - Local to check for a def for. +// +// Returns: +// True if the local has any def. +// +// Remarks: +// +bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) +{ + Compiler* comp = m_tree->GetCompiler(); + LclVarDsc* dsc = comp->lvaGetDesc(lclNum); + + assert(!comp->lvaVarAddrExposed(lclNum)); + // Currently does not handle promoted locals, only fields. + assert(!dsc->lvPromoted); + + unsigned defLclNum1 = lclNum; + unsigned defLclNum2 = BAD_VAR_NUM; + if (dsc->lvIsStructField) + { + defLclNum2 = dsc->lvParentLcl; + } + + bool result = VisitDefs([=](GenTreeLclVarCommon* lcl) { + if ((lcl->GetLclNum() == defLclNum1) || (lcl->GetLclNum() == defLclNum2)) + { + return false; + } + + return true; + }); + + // If we stopped early we found a def. + return !result; +} + +//------------------------------------------------------------------------ +// IterConst: Get the constant with which the iterator is modified +// +// Returns: +// Constant value. +// +int NaturalLoopIterInfo::IterConst() +{ + GenTree* value = IterTree->AsLclVar()->Data(); + return (int)value->gtGetOp2()->AsIntCon()->IconValue(); +} + +//------------------------------------------------------------------------ +// IterOper: Get the type of the operation on the iterator +// +// Returns: +// Oper +// +genTreeOps NaturalLoopIterInfo::IterOper() +{ + return IterTree->AsLclVar()->Data()->OperGet(); +} + +//------------------------------------------------------------------------ +// IterOperType: Get the type of the operation on the iterator. +// +// Returns: +// Type, used for overflow instructions. +// +var_types NaturalLoopIterInfo::IterOperType() +{ + assert(genActualType(IterTree) == TYP_INT); + return IterTree->TypeGet(); +} + +//------------------------------------------------------------------------ +// IsReversed: Returns true if the iterator node is the second operand in the +// loop condition. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsReversed() +{ + return TestTree->gtGetOp2()->OperIs(GT_LCL_VAR) && ((TestTree->gtGetOp2()->gtFlags & GTF_VAR_ITERATOR) != 0); +} + +//------------------------------------------------------------------------ +// TestOper: The type of the comparison between the iterator and the limit +// (GT_LE, GT_GE, etc.) +// +// Returns: +// Oper. +// +genTreeOps NaturalLoopIterInfo::TestOper() +{ + genTreeOps op = TestTree->OperGet(); + return IsReversed() ? GenTree::SwapRelop(op) : op; +} + +//------------------------------------------------------------------------ +// IsIncreasingLoop: Returns true if the loop iterator increases from low to +// high value. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsIncreasingLoop() +{ + // Increasing loop is the one that has "+=" increment operation and "< or <=" limit check. + bool isLessThanLimitCheck = GenTree::StaticOperIs(TestOper(), GT_LT, GT_LE); + return (isLessThanLimitCheck && + (((IterOper() == GT_ADD) && (IterConst() > 0)) || ((IterOper() == GT_SUB) && (IterConst() < 0)))); +} + +//------------------------------------------------------------------------ +// IsIncreasingLoop: Returns true if the loop iterator decreases from high to +// low value. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsDecreasingLoop() +{ + // Decreasing loop is the one that has "-=" decrement operation and "> or >=" limit check. If the operation is + // "+=", make sure the constant is negative to give an effect of decrementing the iterator. + bool isGreaterThanLimitCheck = GenTree::StaticOperIs(TestOper(), GT_GT, GT_GE); + return (isGreaterThanLimitCheck && + (((IterOper() == GT_ADD) && (IterConst() < 0)) || ((IterOper() == GT_SUB) && (IterConst() > 0)))); +} + +//------------------------------------------------------------------------ +// Iterator: Get the iterator node in the loop test +// +// Returns: +// Iterator node. +// +GenTree* NaturalLoopIterInfo::Iterator() +{ + return IsReversed() ? TestTree->gtGetOp2() : TestTree->gtGetOp1(); +} + +//------------------------------------------------------------------------ +// Limit: Get the limit node in the loop test. +// +// Returns: +// Iterator node. +// +GenTree* NaturalLoopIterInfo::Limit() +{ + return IsReversed() ? TestTree->gtGetOp1() : TestTree->gtGetOp2(); +} + +//------------------------------------------------------------------------ +// ConstLimit: Get the constant value of the iterator limit, i.e. when the loop +// condition is "i RELOP const". +// +// Returns: +// Limit constant. +// +// Remarks: +// Only valid if HasConstLimit is true. +// +int NaturalLoopIterInfo::ConstLimit() +{ + assert(HasConstLimit); + GenTree* limit = Limit(); + assert(limit->OperIsConst()); + return (int)limit->AsIntCon()->gtIconVal; +} + +//------------------------------------------------------------------------ +// VarLimit: Get the local var num used in the loop condition, i.e. when the +// loop condition is "i RELOP lclVar" with a loop invariant local. +// +// Returns: +// Local var number. +// +// Remarks: +// Only valid if HasInvariantLocalLimit is true. +// +unsigned NaturalLoopIterInfo::VarLimit() +{ + assert(HasInvariantLocalLimit); + + GenTree* limit = Limit(); + assert(limit->OperGet() == GT_LCL_VAR); + return limit->AsLclVarCommon()->GetLclNum(); +} + +//------------------------------------------------------------------------ +// ArrLenLimit: Get the array length used in the loop condition, i.e. when the +// loop condition is "i RELOP arr.len". +// +// Parameters: +// comp - Compiler instance +// index - [out] Array index information +// +// Returns: +// True if the array length was extracted. +// +// Remarks: +// Only valid if HasArrayLengthLimit is true. +// +bool NaturalLoopIterInfo::ArrLenLimit(Compiler* comp, ArrIndex* index) +{ + assert(HasArrayLengthLimit); + + GenTree* limit = Limit(); + assert(limit->OperIs(GT_ARR_LENGTH)); + + // Check if we have a.length or a[i][j].length + if (limit->AsArrLen()->ArrRef()->OperIs(GT_LCL_VAR)) + { + index->arrLcl = limit->AsArrLen()->ArrRef()->AsLclVarCommon()->GetLclNum(); + index->rank = 0; + return true; + } + // We have a[i].length, extract a[i] pattern. + else if (limit->AsArrLen()->ArrRef()->OperIs(GT_COMMA)) + { + return comp->optReconstructArrIndex(limit->AsArrLen()->ArrRef(), index); + } + return false; +} diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index b7c0facd5065c..04858ee63252b 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -11,6 +11,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" +#include "jitstd/algorithm.h" #ifdef DEBUG @@ -783,6 +784,33 @@ void LoopCloneContext::PrintConditions(unsigned loopNum) } #endif +//-------------------------------------------------------------------------------------------------- +// GetLoopIterInfo: Get the analyzed loop iteration for a loop. +// +// Arguments: +// loopNum - Index of loop, as returned by FlowGraphNaturalLoop::GetIndex(). +// +// Returns: +// The info, or nullptr if the loop iteration structure could not be +// analyzed. +// +NaturalLoopIterInfo* LoopCloneContext::GetLoopIterInfo(unsigned loopNum) +{ + return iterInfo[loopNum]; +} + +//-------------------------------------------------------------------------------------------------- +// SetLoopIterInfo: Set the analyzed loop iteration for a loop. +// +// Arguments: +// loopNum - Index of loop, as returned by FlowGraphNaturalLoop::GetIndex(). +// info - Info to store +// +void LoopCloneContext::SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info) +{ + iterInfo[loopNum] = info; +} + //-------------------------------------------------------------------------------------------------- // CondToStmtInBlock: Convert an array of conditions to IR. Evaluate them into a JTRUE stmt and add it to // a new block after `insertAfter`. @@ -1075,13 +1103,12 @@ LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children // Callers should assume AND operation is used i.e., if all conditions are // true, then take the fast path. // -bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { JITDUMP("------------------------------------------------------------\n"); - JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loopNum); + JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loop->GetIndex()); - LoopDsc* loop = &optLoopTable[loopNum]; - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos->Size() > 0); // We only need to check for iteration behavior if we have array checks. @@ -1104,8 +1131,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident objDeref = LC_Ident::CreateIndirOfLocal(ttInfo->lclNum, 0); LC_Ident methodTable = LC_Ident::CreateClassHandle(ttInfo->clsHnd); LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); - context->EnsureObjDerefs(loopNum)->Push(objDeref); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureObjDerefs(loop->GetIndex())->Push(objDeref); + context->EnsureConditions(loop->GetIndex())->Push(cond); break; } @@ -1126,8 +1153,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methAddr)); - context->EnsureObjDerefs(loopNum)->Push(objDeref); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureObjDerefs(loop->GetIndex())->Push(objDeref); + context->EnsureConditions(loop->GetIndex())->Push(cond); break; } @@ -1142,27 +1169,28 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // No array conditions here, so we're done // JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); return true; } + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); // Note we see cases where the test oper is NE (array.Len) which we could handle // with some extra care. // - if (!GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + if (!GenTree::StaticOperIs(iterInfo->TestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) { // We can't reason about how this loop iterates return false; } - const bool isIncreasingLoop = loop->lpIsIncreasingLoop(); - assert(isIncreasingLoop || loop->lpIsDecreasingLoop()); + const bool isIncreasingLoop = iterInfo->IsIncreasingLoop(); + assert(isIncreasingLoop || iterInfo->IsDecreasingLoop()); // We already know that this is either increasing or decreasing loop and the // stride is (> 0) or (< 0). Here, just take the abs() value and check if it // is beyond the limit. - int stride = abs(loop->lpIterConst()); + int stride = abs(iterInfo->IterConst()); if (stride >= 58) { @@ -1176,27 +1204,27 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident ident; // Init conditions - if (loop->lpFlags & LPFLG_CONST_INIT) + if (iterInfo->HasConstInit) { // Only allowing non-negative const init at this time. // This is because the variable initialized with this constant will be used as an array index, // and array indices must be non-negative. - if (loop->lpConstInit < 0) + if (iterInfo->ConstInitValue < 0) { - JITDUMP("> Init %d is invalid\n", loop->lpConstInit); + JITDUMP("> Init %d is invalid\n", iterInfo->ConstInitValue); return false; } if (!isIncreasingLoop) { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident::CreateConst(static_cast(loop->lpConstInit)); + ident = LC_Ident::CreateConst(static_cast(iterInfo->ConstInitValue)); } } else { // iterVar >= 0 - const unsigned initLcl = loop->lpIterVar(); + const unsigned initLcl = iterInfo->IterVar; if (!genActualTypeIsInt(lvaGetDesc(initLcl))) { JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); @@ -1214,13 +1242,13 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext ident = LC_Ident::CreateVar(initLcl); geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } - context->EnsureConditions(loopNum)->Push(geZero); + context->EnsureConditions(loop->GetIndex())->Push(geZero); } // Limit Conditions - if (loop->lpFlags & LPFLG_CONST_LIMIT) + if (iterInfo->HasConstLimit) { - int limit = loop->lpConstLimit(); + int limit = iterInfo->ConstLimit(); if (limit < 0) { JITDUMP("> limit %d is invalid\n", limit); @@ -1233,9 +1261,9 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext ident = LC_Ident::CreateConst(static_cast(limit)); } } - else if (loop->lpFlags & LPFLG_VAR_LIMIT) + else if (iterInfo->HasInvariantLocalLimit) { - const unsigned limitLcl = loop->lpVarLimit(); + const unsigned limitLcl = iterInfo->VarLimit(); if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) { JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); @@ -1254,12 +1282,12 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(limitLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } - context->EnsureConditions(loopNum)->Push(geZero); + context->EnsureConditions(loop->GetIndex())->Push(geZero); } - else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + else if (iterInfo->HasArrayLengthLimit) { ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); - if (!loop->lpArrLenLimit(this, index)) + if (!iterInfo->ArrLenLimit(this, index)) { JITDUMP("> ArrLen not matching\n"); return false; @@ -1268,7 +1296,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, index, LC_Array::None); - context->EnsureArrayDerefs(loopNum)->Push(array); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } else { @@ -1284,7 +1312,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // GT_GT loop test: (end > start) ==> (end <= arrLen) // GT_GE loop test: (end >= start) ==> (end < arrLen) genTreeOps opLimitCondition; - switch (loop->lpTestOper()) + switch (iterInfo->TestOper()) { case GT_LT: case GT_GT: @@ -1309,11 +1337,11 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureConditions(loop->GetIndex())->Push(cond); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); - context->EnsureArrayDerefs(loopNum)->Push(array); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } break; case LcOptInfo::LcMdArray: @@ -1323,7 +1351,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext mdArrInfo->dim, LC_Array::None)); LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureConditions(loop->GetIndex())->Push(cond); // TODO: ensure array is dereference-able? } @@ -1339,7 +1367,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext } JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); return true; @@ -1450,11 +1478,11 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // // and so on. // -bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { // Get the dereference-able arrays and objects. - JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loopNum); - JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loopNum); + JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loop->GetIndex()); + JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loop->GetIndex()); // We currently expect to have at least one of these. // @@ -1545,7 +1573,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] JitExpandArrayStack*>* levelCond = - context->EnsureBlockConditions(loopNum, condBlocks); + context->EnsureBlockConditions(loop->GetIndex(), condBlocks); for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) { arrayDerefNodes[i]->DeriveLevelConditions(levelCond); @@ -1554,7 +1582,8 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con if (objDeref->Size() > 0) { - JitExpandArrayStack*>* levelCond = context->EnsureBlockConditions(loopNum, 1); + JitExpandArrayStack*>* levelCond = + context->EnsureBlockConditions(loop->GetIndex(), 1); for (unsigned i = 0; i < objDeref->Size(); ++i) { @@ -1566,7 +1595,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con } } - DBEXEC(verbose, context->PrintBlockConditions(loopNum)); + DBEXEC(verbose, context->PrintBlockConditions(loop->GetIndex())); return true; } @@ -1615,9 +1644,10 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore // performs the optimizations assuming that the path in which the candidates // were collected is the fast path in which the optimizations will be performed. // -void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath)) +void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, + LoopCloneContext* context DEBUGARG(bool dynamicPath)) { - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos != nullptr); for (unsigned i = 0; i < optInfos->Size(); ++i) { @@ -1735,20 +1765,14 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // return blocks in the loop that will be cloned. (REVIEW: this 'predicate' function // doesn't seem like the right place to do this change.) // -bool Compiler::optIsLoopClonable(unsigned loopInd) +bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - const LoopDsc& loop = optLoopTable[loopInd]; - const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); - - if (requireIterable && !(loop.lpFlags & LPFLG_ITER)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". No LPFLG_ITER flag.\n", loopInd); - return false; - } + const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); - if (loop.lpIsRemoved()) + if (requireIterable && (iterInfo == nullptr)) { - JITDUMP("Loop cloning: rejecting removed loop " FMT_LP ".\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Could not analyze iteration.\n", loop->GetIndex()); return false; } @@ -1761,34 +1785,39 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // TODO: this limitation could be removed if we do the work to insert new EH regions in the exception table, // for the cloned loop (and its embedded EH regions). // - // Also, count the number of return blocks within the loop for future use. - unsigned loopRetCount = 0; - for (BasicBlock* const blk : loop.LoopBlocks()) - { - if (blk->KindIs(BBJ_RETURN)) - { - loopRetCount++; - } + BasicBlockVisit result = loop->VisitLoopBlocks([=](BasicBlock* blk) { + assert(!blk->KindIs(BBJ_RETURN)); if (bbIsTryBeg(blk)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loopInd); - return false; + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loop->GetIndex()); + return BasicBlockVisit::Abort; } + + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + return false; } // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump // into the middle of a handler (to go to the cloned copy.) Reject. - if (bbIsHandlerBeg(loop.lpEntry)) + if (bbIsHandlerBeg(loop->GetHeader())) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Entry block is a handler start.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Header block is a handler start.\n", loop->GetIndex()); return false; } + // Loop canonicalization should have ensured that there is a unique preheader. + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + // If the head and entry are in different EH regions, reject. - if (!BasicBlock::sameEHRegion(loop.lpHead, loop.lpEntry)) + if (!BasicBlock::sameEHRegion(preheader, loop->GetHeader())) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Head and entry blocks are in different EH regions.\n", - loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Preheader and header blocks are in different EH regions.\n", + loop->GetIndex()); return false; } @@ -1798,29 +1827,28 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // that block; this is one of those cases. This could be fixed fairly easily; for example, // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the // loop. This is just a corner to cut to get this working faster. - BasicBlock* bbAfterLoop = loop.lpBottom->Next(); + // TODO-Quirk: Should rework this to avoid the lexically bottom most block here. + BasicBlock* bottom = loop->GetLexicallyBottomMostBlock(); + + BasicBlock* bbAfterLoop = bottom->Next(); if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", + loop->GetIndex()); return false; } - // Reject cloning if this is a mid-entry loop and the entry has non-loop predecessors other than its head. - // This loop may be part of a larger looping construct that we didn't recognize. - // - // We should really fix this in optCanonicalizeLoop. - // - if (!loop.lpIsTopEntry()) + // TODO-Quirk: Reject loops that with the old loop cloning would put us + // above max number of returns. Return blocks are not considered part of + // loops in the new loop finding since they are never part of the SCC. + // (Is that true? What about due to EH successors?) + LoopDsc* oldLoop = m_newToOldLoop[loop->GetIndex()]; + unsigned loopRetCount = 0; + for (BasicBlock* const blk : oldLoop->LoopBlocks()) { - for (BasicBlock* const entryPred : loop.lpEntry->PredBlocks()) + if (blk->KindIs(BBJ_RETURN)) { - if ((entryPred != loop.lpHead) && !loop.lpContains(entryPred)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Is not top entry, and entry has multiple non-loop preds.\n", - loopInd); - return false; - } + loopRetCount++; } } @@ -1836,78 +1864,65 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has %d returns;" " if added to previously existing %d returns, it would exceed the limit of %d.\n", - loopInd, loopRetCount, fgReturnCount, epilogLimit); + loop->GetIndex(), loopRetCount, fgReturnCount, epilogLimit); return false; } - if (requireIterable) - { - const unsigned ivLclNum = loop.lpIterVar(); - if (lvaVarAddrExposed(ivLclNum)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", - loopInd, ivLclNum); - return false; - } - } + assert(!requireIterable || !lvaVarAddrExposed(iterInfo->IterVar)); - BasicBlock* top = loop.lpTop; - BasicBlock* bottom = loop.lpBottom; + // TODO-Quirk: These conditions are unnecessary. + BasicBlock* oldLoopTop = oldLoop->lpTop; + BasicBlock* oldLoopBottom = oldLoop->lpBottom; - if (!bottom->KindIs(BBJ_COND)) + if (!oldLoopBottom->KindIs(BBJ_COND)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Couldn't find termination test.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Couldn't find termination test.\n", loop->GetIndex()); return false; } - if (!bottom->HasJumpTo(top)) + if (!oldLoopBottom->HasJumpTo(oldLoopTop)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Branch at loop 'bottom' not looping to 'top'.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Branch at loop 'bottom' not looping to 'top'.\n", + loop->GetIndex()); return false; } if (requireIterable) { - if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && - (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Loop limit is neither constant, variable or array length.\n", - loopInd); - return false; - } + assert(iterInfo->HasConstLimit || iterInfo->HasInvariantLocalLimit || iterInfo->HasArrayLengthLimit); // TODO-CQ: Handle other loops like: // - The ones whose limit operator is "==" or "!=" // - The incrementing operator is multiple and divide // - The ones that are inverted are not handled here for cases like "i *= 2" because // they are converted to "i + i". - if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) + if (!iterInfo->IsIncreasingLoop() && !iterInfo->IsDecreasingLoop()) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", - loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); + loop->GetIndex(), GenTree::OpName(iterInfo->TestOper()), GenTree::OpName(iterInfo->IterOper())); return false; } - if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) + if (!iterInfo->TestTree->OperIsCompare() || ((iterInfo->TestTree->gtFlags & GTF_RELOP_ZTT) == 0)) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop inversion NOT present, loop test [%06u] may not protect " "entry from head.\n", - loopInd, loop.lpTestTree->gtTreeID); + loop->GetIndex(), iterInfo->TestTree->gtTreeID); return false; } #ifdef DEBUG - const unsigned ivLclNum = loop.lpIterVar(); - GenTree* const op1 = loop.lpIterator(); + const unsigned ivLclNum = iterInfo->IterVar; + GenTree* const op1 = iterInfo->Iterator(); assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); #endif } // Otherwise, we're going to add those return blocks. fgReturnCount += loopRetCount; + assert(loopRetCount == 0); return true; } @@ -1916,10 +1931,10 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // optInsertLoopChoiceConditions: Insert the loop conditions for a loop after the loop head. // // Arguments: -// context loop cloning context variable -// loopNum the loop index -// slowHead the slow path loop head, where the condition failures branch -// insertAfter insert the conditions after this block +// context - loop cloning context variable +// loop - the loop +// slowHead - the slow path loop head, where the condition failures branch +// insertAfter - insert the conditions after this block // // Return Value: // The last condition block added. @@ -1936,32 +1951,33 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // ... // slowHead -?> e2 (slowHead) branch or fall-through to e2 // -BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, - unsigned loopNum, - BasicBlock* slowHead, - BasicBlock* insertAfter) +BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + BasicBlock* slowHead, + BasicBlock* insertAfter) { - JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loopNum); - assert(context->HasBlockConditions(loopNum)); + JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); + assert(context->HasBlockConditions(loop->GetIndex())); assert(slowHead != nullptr); assert(insertAfter->KindIs(BBJ_NONE)); - if (context->HasBlockConditions(loopNum)) + if (context->HasBlockConditions(loop->GetIndex())) { - JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); + JitExpandArrayStack*>* levelCond = + context->GetBlockConditions(loop->GetIndex()); for (unsigned i = 0; i < levelCond->Size(); ++i) { - JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); + JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loop->GetIndex(), i); DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); } } // Finally insert cloning conditions after all deref conditions have been inserted. - JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loopNum); - DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loop->GetIndex()); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); - insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), slowHead, insertAfter); + insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowHead, insertAfter); return insertAfter; } @@ -1970,21 +1986,25 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, // optCloneLoop: Perform the mechanical cloning of the specified loop // // Arguments: -// loopInd - loop index of loop to clone +// loop - The loop to clone // context - data structure where all loop cloning info is kept. // -void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) +void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - assert(loopInd < optLoopCount); - - LoopDsc& loop = optLoopTable[loopInd]; +#ifdef DEBUG + if (verbose) + { + printf("\nCloning loop " FMT_LP " with blocks:\n", loop->GetIndex()); - JITDUMP("\nCloning loop " FMT_LP ": [head: " FMT_BB ", top: " FMT_BB ", entry: " FMT_BB ", bottom: " FMT_BB - ", child: " FMT_LP "].\n", - loopInd, loop.lpHead->bbNum, loop.lpTop->bbNum, loop.lpEntry->bbNum, loop.lpBottom->bbNum, loop.lpChild); + loop->VisitLoopBlocksReversePostOrder([](BasicBlock* block) { + printf(" " FMT_BB "\n", block->bbNum); + return BasicBlockVisit::Continue; + }); + } +#endif // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). - unsigned depth = optLoopDepth(loopInd); + unsigned depth = optLoopDepth((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable)); weight_t ambientWeight = 1; for (unsigned j = 0; j < depth; j++) { @@ -1993,9 +2013,11 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) assert(ambientWeight > lastWeight); } - // If we're in a non-natural loop, the ambient weight might be higher than we computed above. - // Be safe by taking the max with the head block's weight. - ambientWeight = max(ambientWeight, loop.lpHead->bbWeight); + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + // The ambient weight might be higher than we computed above. Be safe by + // taking the max with the head block's weight. + ambientWeight = max(ambientWeight, preheader->bbWeight); // We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights. // The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should @@ -2006,8 +2028,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // This is the containing loop, if any -- to label any blocks we create that are outside // the loop being cloned. - unsigned char ambientLoop = loop.lpParent; + unsigned char ambientLoop = m_newToOldLoop[loop->GetIndex()]->lpParent; + BasicBlock* h = preheader; // We're going to transform this loop: // // H --> E (if T == E, H falls through to T/E) @@ -2030,7 +2053,6 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // B2 ?-> T2 // X - BasicBlock* h = loop.lpHead; assert((h->bbFlags & BBF_LOOP_PREHEADER) != 0); // Make a new pre-header block 'h2' for the loop. 'h' will fall through to 'h2'. @@ -2045,17 +2067,17 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) if (!h->KindIs(BBJ_NONE)) { assert(h->KindIs(BBJ_ALWAYS)); - assert(h->HasJumpTo(loop.lpEntry)); - h2->SetJumpKindAndTarget(BBJ_ALWAYS, loop.lpEntry); + assert(h->HasJumpTo(loop->GetHeader())); + h2->SetJumpKindAndTarget(BBJ_ALWAYS, loop->GetHeader()); } - fgReplacePred(loop.lpEntry, h, h2); - JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop.lpEntry->bbNum, - h2->bbNum, loop.lpEntry->bbNum); + fgReplacePred(loop->GetHeader(), h, h2); + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop->GetHeader()->bbNum, + h2->bbNum, loop->GetHeader()->bbNum); // 'h' is no longer the loop head; 'h2' is! h->bbFlags &= ~BBF_LOOP_PREHEADER; - optUpdateLoopHead(loopInd, h, h2); + optUpdateLoopHead((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable), h, h2); // Make 'h' fall through to 'h2' (if it didn't already). // Don't add the h->h2 edge because we're going to insert the cloning conditions between 'h' and 'h2', and @@ -2064,7 +2086,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Make X2 after B, if necessary. (Not necessary if B is a BBJ_ALWAYS.) // "newPred" will be the predecessor of the blocks of the cloned loop. - BasicBlock* b = loop.lpBottom; + BasicBlock* b = loop->GetLexicallyBottomMostBlock(); BasicBlock* newPred = b; if (!b->KindIs(BBJ_ALWAYS)) { @@ -2109,9 +2131,26 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. + unsigned numBlocks = 0; + loop->VisitLoopBlocks([&numBlocks](BasicBlock* block) { + numBlocks++; + return BasicBlockVisit::Continue; + }); + + BasicBlock** lexicalBlocks = new (this, CMK_LoopClone) BasicBlock*[numBlocks]; + unsigned index = 0; + loop->VisitLoopBlocks([=, &index](BasicBlock* block) { + lexicalBlocks[index++] = block; + return BasicBlockVisit::Continue; + }); + + jitstd::sort(lexicalBlocks, lexicalBlocks + numBlocks, + [](BasicBlock* lhs, BasicBlock* rhs) { return lhs->bbNum < rhs->bbNum; }); + BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - for (BasicBlock* const blk : loop.LoopBlocks()) + for (unsigned i = 0; i < numBlocks; i++) { + BasicBlock* blk = lexicalBlocks[i]; // Initialize newBlk as BBJ_NONE, and fix up jump kind/target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_NONE, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2155,19 +2194,45 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding // loop, if one exists -- the parent of the loop we're cloning. - newBlk->bbNatLoopNum = loop.lpParent; + newBlk->bbNatLoopNum = ambientLoop; newPred = newBlk; blockMap->Set(blk, newBlk); + + // If the block falls through to a block outside the loop then we may + // need to insert a new block to redirect. + if ((i < numBlocks - 1) && blk->bbFallsThrough() && !blk->NextIs(lexicalBlocks[i + 1])) + { + if (blk->KindIs(BBJ_NONE)) + { + // Changed to BBJ_ALWAYS in below loop. + } + else if (blk->KindIs(BBJ_COND)) + { + // Need to insert a block. + BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); + newRedirBlk->copyEHRegion(newPred); + newRedirBlk->bbNatLoopNum = ambientLoop; + // This block isn't part of the loop, so below loop won't add + // refs for it. + fgAddRefPred(blk->Next(), newRedirBlk); + newPred = newRedirBlk; + } + else + { + assert(!"Cannot handle fallthrough"); + } + } } // Perform the static optimizations on the fast path. - optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); + optPerformStaticOptimizations(loop, context DEBUGARG(true)); // Now go through the new blocks, remapping their jump targets within the loop // and updating the preds lists. - for (BasicBlock* const blk : loop.LoopBlocks()) + for (unsigned i = 0; i < numBlocks; i++) { + BasicBlock* blk = lexicalBlocks[i]; BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2185,7 +2250,15 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) switch (newblk->GetJumpKind()) { case BBJ_NONE: - fgAddRefPred(newblk->Next(), newblk); + if ((i < numBlocks - 1) && !blk->NextIs(lexicalBlocks[i + 1])) + { + newblk->SetJumpKindAndTarget(BBJ_ALWAYS, blk->Next()); + fgAddRefPred(newblk->GetJumpDest(), newblk); + } + else + { + fgAddRefPred(newblk->Next(), newblk); + } break; case BBJ_ALWAYS: @@ -2213,8 +2286,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) #ifdef DEBUG // Display the preds for the new blocks, after all the new blocks have been redirected. JITDUMP("Preds after loop copy:\n"); - for (BasicBlock* const blk : loop.LoopBlocks()) - { + loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* blk) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2224,7 +2296,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) JITDUMP(" " FMT_BB, predBlock->bbNum); } JITDUMP("\n"); - } + + return BasicBlockVisit::Continue; + }); #endif // DEBUG // Insert the loop choice conditions. We will create the following structure: @@ -2240,13 +2314,13 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // // We should always have block conditions. - assert(context->HasBlockConditions(loopInd)); + assert(context->HasBlockConditions(loop->GetIndex())); assert(h->KindIs(BBJ_NONE)); assert(h->NextIs(h2)); // If any condition is false, go to slowHead (which branches or falls through to e2). BasicBlock* e2 = nullptr; - bool foundIt = blockMap->Lookup(loop.lpEntry, &e2); + bool foundIt = blockMap->Lookup(loop->GetHeader(), &e2); assert(foundIt && e2 != nullptr); if (!slowHead->NextIs(e2)) @@ -2259,7 +2333,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) fgAddRefPred(e2, slowHead); JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", slowHead->bbNum, e2->bbNum); - BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, slowHead, h); + BasicBlock* condLast = optInsertLoopChoiceConditions(context, loop, slowHead, h); // Add the fall-through path pred (either to T/E for fall-through from conditions to fast path, // or H2 if branch to E of fast path). @@ -2271,7 +2345,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // initialize the loop counter immediately before entering the loop, but we've left a shared // initialization of the loop counter up above the test that determines which version of the // loop to take. - loop.lpFlags |= LPFLG_DONT_UNROLL; + m_newToOldLoop[loop->GetIndex()]->lpFlags |= LPFLG_DONT_UNROLL; } //------------------------------------------------------------------------- @@ -2284,16 +2358,19 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Return Value: // Returns true if the variable is loop invariant in loopNum. // -bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum) +bool Compiler::optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum) { if (lvaVarAddrExposed(lclNum)) { return false; } - if (optIsVarAssgLoop(loopNum, lclNum)) + + // TODO: Cache this invariance information + if (loop->HasDef(lclNum)) { return false; } + return true; } @@ -2637,22 +2714,24 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop #endif // Check that the array object local variable is invariant within the loop body. - if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl)) + if (!optIsStackLocalInvariant(info->loop, arrIndex.arrLcl)) { JITDUMP("V%02d is not loop invariant\n", arrIndex.arrLcl); return WALK_SKIP_SUBTREES; } + NaturalLoopIterInfo* iterInfo = info->context->GetLoopIterInfo(info->loop->GetIndex()); + // Walk the dimensions and see if iterVar of the loop is used as index. for (unsigned dim = 0; dim < arrIndex.rank; ++dim) { // Is index variable also used as the loop iter var? - if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar()) + if (arrIndex.indLcls[dim] == iterInfo->IterVar) { // Check the previous indices are all loop invariant. for (unsigned dim2 = 0; dim2 < dim; ++dim2) { - if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2])) + if (!optIsStackLocalInvariant(info->loop, arrIndex.indLcls[dim2])) { JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]); return WALK_SKIP_SUBTREES; @@ -2661,19 +2740,18 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop #ifdef DEBUG if (verbose) { - printf("Loop " FMT_LP " can be cloned for ArrIndex ", info->loopNum); + printf("Loop " FMT_LP " can be cloned for ArrIndex ", info->loop->GetIndex()); arrIndex.Print(); printf(" on dim %d\n", dim); } #endif // Update the loop context. - info->context->EnsureLoopOptInfo(info->loopNum) + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt)); } else { - JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), - dim); + JITDUMP("Induction V%02d is not used as index on dim %d\n", iterInfo->IterVar, dim); } } return WALK_SKIP_SUBTREES; @@ -2735,7 +2813,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop JITDUMP("... right form for type test with local V%02u\n", lclNum); - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + if (!optIsStackLocalInvariant(info->loop, lclNum)) { JITDUMP("... but not invariant\n"); return WALK_CONTINUE; @@ -2743,8 +2821,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop // Looks like we found an invariant type test. // - JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loopNum, dspTreeID(tree), - lclNum); + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loop->GetIndex(), + dspTreeID(tree), lclNum); if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) { @@ -2754,7 +2832,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); assert(compCurBB->lastStmt() == info->stmt); - info->context->EnsureLoopOptInfo(info->loopNum) + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd)); } } @@ -2818,13 +2896,13 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_CONTINUE; } - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + if (!optIsStackLocalInvariant(info->loop, lclNum)) { JITDUMP("... but not invariant\n"); return WALK_CONTINUE; } - JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loopNum, + JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loop->GetIndex(), dspTreeID(tree), lclNum); if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) @@ -2839,7 +2917,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(), relopOp2 != iconHandle DEBUG_ARG( (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle)); - info->context->EnsureLoopOptInfo(info->loopNum)->Push(optInfo); + info->context->EnsureLoopOptInfo(info->loop->GetIndex())->Push(optInfo); } } } @@ -2884,16 +2962,15 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // (3) the test is frequently hit during the loop iteration // (4) the test is biased and highly likely to succeed // - const LoopDsc& loopDsc = optLoopTable[info->loopNum]; - BasicBlock* const loopEntry = loopDsc.lpEntry; - BasicBlock* const typeTestBlock = compCurBB; - double const loopFrequency = 0.50; - double const typeTestFrequency = 0.50; - double const typeTestBias = 0.05; + FlowGraphNaturalLoop* loop = info->loop; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; // Check for (1) // - if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + if (!loop->GetHeader()->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) { JITDUMP(" No; loop does not have profile data.\n"); return WALK_CONTINUE; @@ -2901,7 +2978,7 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // Check for (2) // - if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + if (loop->GetHeader()->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) { JITDUMP(" No; loop does not iterate often enough.\n"); return WALK_CONTINUE; @@ -2909,7 +2986,7 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // Check for (3) // - if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + if (typeTestBlock->bbWeight < (typeTestFrequency * loop->GetHeader()->bbWeight)) { JITDUMP(" No; guard does not execute often enough within the loop.\n"); return WALK_CONTINUE; @@ -2959,7 +3036,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // Also, check if the loop is suitable for the optimizations performed. // // Arguments: -// loopNum - the current loop index for which conditions are derived. +// loop - Loop being analyzed // context - data structure where all loop cloning candidates will be updated. // // Return Value: @@ -2972,16 +3049,15 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // optimization candidates and update the "context" parameter with all the // contextual information necessary to perform the optimization later. // -bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - const LoopDsc& loop = optLoopTable[loopNum]; - const bool canCloneForArrayBounds = - ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && ((loop.lpFlags & LPFLG_ITER) != 0); - const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); + const bool canCloneForArrayBounds = ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && (iterInfo != nullptr); + const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); if (!canCloneForArrayBounds && !canCloneForTypeTests) { - JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loopNum); + JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loop->GetIndex()); return false; } @@ -2992,12 +3068,11 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex shouldCloneForGdvTests &= JitConfig.JitCloneLoopsWithGdvTests() != 0; #endif - JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, + JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loop->GetIndex(), shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); - for (BasicBlock* const block : loop.LoopBlocks()) - { + LoopCloneVisitorInfo info(context, loop, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); + loop->VisitLoopBlocks([=, &info](BasicBlock* block) { compCurBB = block; for (Statement* const stmt : block->Statements()) { @@ -3007,7 +3082,9 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex fgWalkTreePre(stmt->GetRootNodePointer(), optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly, computeStack); } - } + + return BasicBlockVisit::Continue; + }); return true; } @@ -3029,14 +3106,22 @@ bool Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) bool result = false; for (unsigned i = 0; i < optLoopCount; i++) { - JITDUMP("Considering loop " FMT_LP " to clone for optimizations.\n", i); - if (optIsLoopClonable(i)) + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + JITDUMP("Considering loop " FMT_LP " to clone for optimizations.\n", loop->GetIndex()); + NaturalLoopIterInfo iterInfo; + // TODO-Quirk: Remove + if ((optLoopTable[i].lpFlags & LPFLG_ITER) != 0) { - if (optIdentifyLoopOptInfo(i, context)) + if (loop->AnalyzeIteration(&iterInfo)) { - result = true; + context->SetLoopIterInfo(loop->GetIndex(), new (this, CMK_LoopClone) NaturalLoopIterInfo(iterInfo)); } } + + if (optIsLoopClonable(loop, context) && optIdentifyLoopOptInfo(loop, context)) + { + result = true; + } JITDUMP("------------------------------------------------------------\n"); } JITDUMP("\n"); @@ -3085,7 +3170,7 @@ PhaseStatus Compiler::optCloneLoops() return PhaseStatus::MODIFIED_NOTHING; } - LoopCloneContext context(optLoopCount, getAllocator(CMK_LoopClone)); + LoopCloneContext context((unsigned)m_loops->NumLoops(), getAllocator(CMK_LoopClone)); // Obtain array optimization candidates in the context. if (!optObtainLoopCloningOpts(&context)) @@ -3098,38 +3183,40 @@ PhaseStatus Compiler::optCloneLoops() unsigned optStaticallyOptimizedLoops = 0; // For each loop, derive cloning conditions for the optimization candidates. - for (unsigned i = 0; i < optLoopCount; ++i) + for (unsigned i = 0; i < optLoopCount; i++) { - JitExpandArrayStack* optInfos = context.GetLoopOptInfo(i); + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + + JitExpandArrayStack* optInfos = context.GetLoopOptInfo(loop->GetIndex()); if (optInfos == nullptr) { continue; } - if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context)) + if (!optDeriveLoopCloningConditions(loop, &context) || !optComputeDerefConditions(loop, &context)) { JITDUMP("> Conditions could not be obtained\n"); - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } else { bool allTrue = false; bool anyFalse = false; - context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose)); + context.EvaluateConditions(loop->GetIndex(), &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } else if (allTrue) { // Perform static optimizations on the fast path since we always // have to take the cloned path. - optPerformStaticOptimizations(i, &context DEBUGARG(false)); + optPerformStaticOptimizations(loop, &context DEBUGARG(false)); ++optStaticallyOptimizedLoops; // No need to clone. - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } } } @@ -3164,12 +3251,13 @@ PhaseStatus Compiler::optCloneLoops() assert(optLoopsCloned == 0); // It should be initialized, but not yet changed. for (unsigned i = 0; i < optLoopCount; ++i) { - if (context.GetLoopOptInfo(i) != nullptr) + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + if (context.GetLoopOptInfo(loop->GetIndex()) != nullptr) { optLoopsCloned++; - context.OptimizeConditions(i DEBUGARG(verbose)); - context.OptimizeBlockConditions(i DEBUGARG(verbose)); - optCloneLoop(i, &context); + context.OptimizeConditions(loop->GetIndex() DEBUGARG(verbose)); + context.OptimizeBlockConditions(loop->GetIndex() DEBUGARG(verbose)); + optCloneLoop(loop, &context); } } @@ -3178,6 +3266,9 @@ PhaseStatus Compiler::optCloneLoops() JITDUMP("Recompute reachability and dominators after loop cloning\n"); // TODO: recompute the loop table, to include the slow loop path in the table? fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); + + m_dfs = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); } #ifdef DEBUG diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 81cd13354b11a..9279bc783a9ec 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -807,6 +807,7 @@ struct LC_ArrayDeref #endif }; +struct NaturalLoopIterInfo; /** * * The "context" represents data that is used for making loop-cloning decisions. @@ -841,16 +842,28 @@ struct LoopCloneContext // The array of block levels of conditions for each loop. (loop x level x conditions) jitstd::vector*>*> blockConditions; + jitstd::vector iterInfo; + LoopCloneContext(unsigned loopCount, CompAllocator alloc) - : alloc(alloc), optInfo(alloc), conditions(alloc), arrayDerefs(alloc), objDerefs(alloc), blockConditions(alloc) + : alloc(alloc) + , optInfo(alloc) + , conditions(alloc) + , arrayDerefs(alloc) + , objDerefs(alloc) + , blockConditions(alloc) + , iterInfo(alloc) { optInfo.resize(loopCount, nullptr); conditions.resize(loopCount, nullptr); arrayDerefs.resize(loopCount, nullptr); objDerefs.resize(loopCount, nullptr); blockConditions.resize(loopCount, nullptr); + iterInfo.resize(loopCount, nullptr); } + NaturalLoopIterInfo* GetLoopIterInfo(unsigned loopNum); + void SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info); + // Evaluate conditions into a JTRUE stmt and put it in a new block after `insertAfter`. BasicBlock* CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6ddb815fd2953..7fbd2c993b8bf 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5613,6 +5613,54 @@ void Compiler::optFindLoops() PhaseStatus Compiler::optFindLoopsPhase() { optFindLoops(); + + m_dfs = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); + + m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); + m_oldToNewLoop = new (this, CMK_Loops) FlowGraphNaturalLoop*[BasicBlock::MAX_LOOP_NUM]{}; + + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) + { + BasicBlock* head = loop->GetHeader(); + if (head->bbNatLoopNum == BasicBlock::NOT_IN_LOOP) + continue; + + LoopDsc* dsc = &optLoopTable[head->bbNatLoopNum]; + if (dsc->lpEntry != head) + continue; + + assert(m_oldToNewLoop[head->bbNatLoopNum] == nullptr); + assert(m_newToOldLoop[loop->GetIndex()] == nullptr); + m_oldToNewLoop[head->bbNatLoopNum] = loop; + m_newToOldLoop[loop->GetIndex()] = dsc; + + if ((dsc->lpFlags & LPFLG_ITER) == 0) + continue; + + NaturalLoopIterInfo iter; + bool analyzed = loop->AnalyzeIteration(&iter); + assert(analyzed); + + assert(iter.HasConstInit == ((dsc->lpFlags & LPFLG_CONST_INIT) != 0)); + assert(iter.HasConstLimit == ((dsc->lpFlags & LPFLG_CONST_LIMIT) != 0)); + assert(iter.HasSimdLimit == ((dsc->lpFlags & LPFLG_SIMD_LIMIT) != 0)); + assert(iter.HasInvariantLocalLimit == ((dsc->lpFlags & LPFLG_VAR_LIMIT) != 0)); + assert(iter.HasArrayLengthLimit == ((dsc->lpFlags & LPFLG_ARRLEN_LIMIT) != 0)); + if (iter.HasConstInit) + { + assert(iter.ConstInitValue == dsc->lpConstInit); + assert(iter.InitBlock == dsc->lpInitBlock); + } + assert(iter.TestTree == dsc->lpTestTree); + } + + // New loop finding should be strictly more general. + for (unsigned i = 0; i < optLoopCount; i++) + { + assert(m_oldToNewLoop[i] != nullptr); + } + return PhaseStatus::MODIFIED_EVERYTHING; } From 1f115e01a86cdd1d2ab49c80e643cb979b49372f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 15:26:54 +0100 Subject: [PATCH 02/20] Fix clang build --- src/coreclr/jit/flowgraph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 9214addb38bf2..484116aebf0bd 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4660,7 +4660,7 @@ bool FlowGraphNaturalLoop::VisitDefs(TFunc func) { class VisitDefsVisitor : public GenTreeVisitor { - using GenTreeVisitor::m_compiler; + using GenTreeVisitor::m_compiler; TFunc& m_func; @@ -4670,7 +4670,7 @@ bool FlowGraphNaturalLoop::VisitDefs(TFunc func) DoPreOrder = true, }; - VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) + VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) { } From 1893d7c0daef1945ea2a6d304a8a9266ac9dfda0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 15:41:53 +0100 Subject: [PATCH 03/20] Fix after merge --- src/coreclr/jit/loopcloning.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 352fd37b43c8c..40275645ee89f 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2072,7 +2072,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Make a new pre-header block 'h2' for the loop. 'h' will fall through to 'h2'. JITDUMP("Create new header block for loop\n"); - BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true, /*jumpDest*/ loop.lpEntry); + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true, /*jumpDest*/ loop->GetHeader()); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", h2->bbNum, h->bbNum); h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; h2->bbNatLoopNum = ambientLoop; @@ -2168,6 +2168,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); for (unsigned i = 0; i < numBlocks; i++) { + BasicBlock* blk = lexicalBlocks[i]; // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2220,11 +2221,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // need to insert a new block to redirect. if ((i < numBlocks - 1) && blk->bbFallsThrough() && !blk->NextIs(lexicalBlocks[i + 1])) { - if (blk->KindIs(BBJ_NONE)) - { - // Changed to BBJ_ALWAYS in below loop. - } - else if (blk->KindIs(BBJ_COND)) + if (blk->KindIs(BBJ_COND)) { // Need to insert a block. BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); From 8be981e86b84a17e0b2830dde1d6cd0f3da3482c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 18:10:08 +0100 Subject: [PATCH 04/20] Clean up --- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/flowgraph.cpp | 15 +++++++++++++-- src/coreclr/jit/loopcloning.cpp | 18 +++++++----------- src/coreclr/jit/optimizer.cpp | 15 ++++++++------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9f0323c0ea7b1..057f5b5aeca2c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2100,6 +2100,8 @@ class FlowGraphNaturalLoop bool ContainsBlock(BasicBlock* block); + unsigned NumLoopBlocks(); + template BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 3206f6d7a6a13..a0473d7cd54a2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4289,8 +4289,7 @@ BitVecTraits FlowGraphNaturalLoop::LoopBlockTraits() // Remarks: // Containment here means that the block is in the SCC of the loop; i.e. it // is in a cycle with the header block. Note that EH successors are taken -// into acount; for example, a BBJ_RETURN may still be a loop block provided -// that its handler can reach the loop header. +// into account. // bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) { @@ -4304,6 +4303,18 @@ bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) return BitVecOps::IsMember(&traits, m_blocks, index); } +//------------------------------------------------------------------------ +// NumLoopBlocks: Get the number of blocks in the SCC of the loop. +// +// Returns: +// Count of blocks. +// +unsigned FlowGraphNaturalLoop::NumLoopBlocks() +{ + BitVecTraits loopTraits = LoopBlockTraits(); + return BitVecOps::Count(&loopTraits, m_blocks); +} + //------------------------------------------------------------------------ // FlowGraphNaturalLoops::FlowGraphNaturalLoops: Initialize a new instance to // track a set of loops over the flow graph. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 40275645ee89f..34f8ad0041e9b 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1853,8 +1853,9 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c // TODO-Quirk: Reject loops that with the old loop cloning would put us // above max number of returns. Return blocks are not considered part of - // loops in the new loop finding since they are never part of the SCC. - // (Is that true? What about due to EH successors?) + // loops in the new loop finding since they are never part of the SCC (and + // they aren't present inside try-regions). + // LoopDsc* oldLoop = m_newToOldLoop[loop->GetIndex()]; unsigned loopRetCount = 0; for (BasicBlock* const blk : oldLoop->LoopBlocks()) @@ -1933,10 +1934,6 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c #endif } - // Otherwise, we're going to add those return blocks. - fgReturnCount += loopRetCount; - assert(loopRetCount == 0); - return true; } @@ -2149,19 +2146,18 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - unsigned numBlocks = 0; - loop->VisitLoopBlocks([&numBlocks](BasicBlock* block) { - numBlocks++; - return BasicBlockVisit::Continue; - }); + unsigned numBlocks = loop->NumLoopBlocks(); BasicBlock** lexicalBlocks = new (this, CMK_LoopClone) BasicBlock*[numBlocks]; unsigned index = 0; loop->VisitLoopBlocks([=, &index](BasicBlock* block) { + assert(index < numBlocks); lexicalBlocks[index++] = block; return BasicBlockVisit::Continue; }); + assert(index == numBlocks); + jitstd::sort(lexicalBlocks, lexicalBlocks + numBlocks, [](BasicBlock* lhs, BasicBlock* rhs) { return lhs->bbNum < rhs->bbNum; }); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 921a1f91a57c0..ac95c6b31f5d8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5646,12 +5646,18 @@ PhaseStatus Compiler::optFindLoopsPhase() assert(m_newToOldLoop[loop->GetIndex()] == nullptr); m_oldToNewLoop[head->bbNatLoopNum] = loop; m_newToOldLoop[loop->GetIndex()] = dsc; + } +#ifdef DEBUG + for (unsigned i = 0; i < optLoopCount; i++) + { + assert(m_oldToNewLoop[i] != nullptr); + LoopDsc* dsc = &optLoopTable[i]; if ((dsc->lpFlags & LPFLG_ITER) == 0) continue; NaturalLoopIterInfo iter; - bool analyzed = loop->AnalyzeIteration(&iter); + bool analyzed = m_oldToNewLoop[i]->AnalyzeIteration(&iter); assert(analyzed); assert(iter.HasConstInit == ((dsc->lpFlags & LPFLG_CONST_INIT) != 0)); @@ -5666,12 +5672,7 @@ PhaseStatus Compiler::optFindLoopsPhase() } assert(iter.TestTree == dsc->lpTestTree); } - - // New loop finding should be strictly more general. - for (unsigned i = 0; i < optLoopCount; i++) - { - assert(m_oldToNewLoop[i] != nullptr); - } +#endif return PhaseStatus::MODIFIED_EVERYTHING; } From d9cd2134f72132660def66a92b917e4038cd9b19 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 18:51:59 +0100 Subject: [PATCH 05/20] Set redirection block weight correctly --- src/coreclr/jit/loopcloning.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 34f8ad0041e9b..e64a2349f6a64 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2223,6 +2223,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); newRedirBlk->copyEHRegion(newPred); newRedirBlk->bbNatLoopNum = ambientLoop; + newRedirBlk->inheritWeight(blk->Next()); + newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); // This block isn't part of the loop, so below loop won't add // refs for it. fgAddRefPred(blk->Next(), newRedirBlk); From 587a22b3e2ba4f5828e9c8200c63bf882855d837 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 20:19:04 +0100 Subject: [PATCH 06/20] Further quirking --- src/coreclr/jit/compiler.h | 4 ++++ src/coreclr/jit/compiler.hpp | 41 +++++++++++++++++++++++++++++++++ src/coreclr/jit/flowgraph.cpp | 23 ++++++++++++++++++ src/coreclr/jit/loopcloning.cpp | 37 ++++++++++------------------- 4 files changed, 80 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 057f5b5aeca2c..4a55dfeabc616 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2111,6 +2111,10 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocks(TFunc func); + template + BasicBlockVisit VisitLoopBlocksLexical(TFunc func); + + BasicBlock* GetLexicallyTopMostBlock(); BasicBlock* GetLexicallyBottomMostBlock(); bool AnalyzeIteration(NaturalLoopIterInfo* info); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 90e490fc94fce..52227bb973de4 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5019,6 +5019,47 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocks(TFunc func) return VisitLoopBlocksReversePostOrder(func); } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksLexical: Visit the loop's blocks in +// lexical order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) +{ + BasicBlock* top = m_header; + BasicBlock* bottom = m_header; + VisitLoopBlocks([&](BasicBlock* block) { + if (block->bbNum < top->bbNum) + top = block; + if (block->bbNum > bottom->bbNum) + bottom = block; + return BasicBlockVisit::Continue; + }); + + BasicBlock* block = top; + while (true) + { + if (ContainsBlock(block) && (func(block) == BasicBlockVisit::Abort)) + return BasicBlockVisit::Abort; + + if (block == bottom) + return BasicBlockVisit::Continue; + + block = block->Next(); + } +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index a0473d7cd54a2..b1ee159179bf5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5018,6 +5018,29 @@ bool FlowGraphNaturalLoop::MatchLimit(NaturalLoopIterInfo* info, GenTree* test) return true; } +//------------------------------------------------------------------------ +// GetLexicallyTopMostBlock: Get the lexically top-most block contained within +// the loop. +// +// Returns: +// Block with highest bbNum. +// +// Remarks: +// Mostly exists as a quirk while transitioning from the old loop +// representation to the new one. +// +BasicBlock* FlowGraphNaturalLoop::GetLexicallyTopMostBlock() +{ + BasicBlock* top = m_header; + VisitLoopBlocks([&top](BasicBlock* loopBlock) { + if (loopBlock->bbNum < top->bbNum) + top = loopBlock; + return BasicBlockVisit::Continue; + }); + + return top; +} + //------------------------------------------------------------------------ // GetLexicallyBottomMostBlock: Get the lexically bottom-most block contained // within the loop. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index e64a2349f6a64..33d69141b1c48 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2146,25 +2146,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - unsigned numBlocks = loop->NumLoopBlocks(); - - BasicBlock** lexicalBlocks = new (this, CMK_LoopClone) BasicBlock*[numBlocks]; - unsigned index = 0; - loop->VisitLoopBlocks([=, &index](BasicBlock* block) { - assert(index < numBlocks); - lexicalBlocks[index++] = block; - return BasicBlockVisit::Continue; - }); - - assert(index == numBlocks); - - jitstd::sort(lexicalBlocks, lexicalBlocks + numBlocks, - [](BasicBlock* lhs, BasicBlock* rhs) { return lhs->bbNum < rhs->bbNum; }); - BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - for (unsigned i = 0; i < numBlocks; i++) - { - BasicBlock* blk = lexicalBlocks[i]; + loop->VisitLoopBlocksLexical([=, &newPred](BasicBlock* blk) { // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2215,7 +2198,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // If the block falls through to a block outside the loop then we may // need to insert a new block to redirect. - if ((i < numBlocks - 1) && blk->bbFallsThrough() && !blk->NextIs(lexicalBlocks[i + 1])) + if (blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) { if (blk->KindIs(BBJ_COND)) { @@ -2235,16 +2218,16 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex assert(!"Cannot handle fallthrough"); } } - } + + return BasicBlockVisit::Continue; + }); // Perform the static optimizations on the fast path. optPerformStaticOptimizations(loop, context DEBUGARG(true)); // Now go through the new blocks, remapping their jump targets within the loop // and updating the preds lists. - for (unsigned i = 0; i < numBlocks; i++) - { - BasicBlock* blk = lexicalBlocks[i]; + loop->VisitLoopBlocks([=](BasicBlock* blk) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2281,7 +2264,9 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex default: break; } - } + + return BasicBlockVisit::Continue; + }); #ifdef DEBUG // Display the preds for the new blocks, after all the new blocks have been redirected. @@ -3070,7 +3055,9 @@ bool Compiler::optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneConte shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); LoopCloneVisitorInfo info(context, loop, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); - loop->VisitLoopBlocks([=, &info](BasicBlock* block) { + + // TODO-Quirk: Switch this to VisitLoopBlocks + loop->VisitLoopBlocksLexical([=, &info](BasicBlock* block) { compCurBB = block; for (Statement* const stmt : block->Statements()) { From baaeccbd1eace0e8ee5fdaaf4cdc34e014b2ce93 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 20:49:20 +0100 Subject: [PATCH 07/20] Fix a bug that crept in --- src/coreclr/jit/loopcloning.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 33d69141b1c48..2bb8cb5377277 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2146,8 +2146,10 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - loop->VisitLoopBlocksLexical([=, &newPred](BasicBlock* blk) { + BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); + unsigned numBlocksLeft = loop->NumLoopBlocks(); + + loop->VisitLoopBlocksLexical([=, &newPred, &numBlocksLeft](BasicBlock* blk) { // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2196,9 +2198,12 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex newPred = newBlk; blockMap->Set(blk, newBlk); + numBlocksLeft--; + assert(numBlocksLeft >= 0); + // If the block falls through to a block outside the loop then we may // need to insert a new block to redirect. - if (blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) + if ((numBlocksLeft > 0) && blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) { if (blk->KindIs(BBJ_COND)) { From 39bf71c3044235ef2e983341468537c13e6853ff Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 21:25:23 +0100 Subject: [PATCH 08/20] Final quirks --- src/coreclr/jit/loopcloning.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 2bb8cb5377277..300f1284dd612 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2207,15 +2207,26 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex { if (blk->KindIs(BBJ_COND)) { + // TODO-Quirk: We see a lot of these cases and some of them cause diffs. + BasicBlock* targetBlk = blk->Next(); + if (targetBlk->KindIs(BBJ_ALWAYS) && targetBlk->isEmpty()) + targetBlk = targetBlk->GetJumpDest(); + // Need to insert a block. - BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); + BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, targetBlk); newRedirBlk->copyEHRegion(newPred); newRedirBlk->bbNatLoopNum = ambientLoop; - newRedirBlk->inheritWeight(blk->Next()); + newRedirBlk->bbWeight = blk->Next()->bbWeight; newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); + + // TODO-Quirk: This next block is not part of the loop and + // should not be scaled down, especially once we get rid of the + // other quirk above. + blk->Next()->scaleBBWeight(fastPathWeightScaleFactor); + // This block isn't part of the loop, so below loop won't add // refs for it. - fgAddRefPred(blk->Next(), newRedirBlk); + fgAddRefPred(targetBlk, newRedirBlk); newPred = newRedirBlk; } else From 1235a0fbaa743cd438cd1f55ca9dfeaac9b90000 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Nov 2023 11:11:01 +0100 Subject: [PATCH 09/20] Actual final quirking --- src/coreclr/jit/loopcloning.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 300f1284dd612..af949e0a8de01 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2217,6 +2217,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex newRedirBlk->copyEHRegion(newPred); newRedirBlk->bbNatLoopNum = ambientLoop; newRedirBlk->bbWeight = blk->Next()->bbWeight; + newRedirBlk->bbFlags |= blk->Next()->bbFlags & (BBF_RUN_RARELY | BBF_PROF_WEIGHT); newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); // TODO-Quirk: This next block is not part of the loop and From 228ea40e2343d7b5c1acd36be8734720b42c4618 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 3 Dec 2023 19:15:45 +0100 Subject: [PATCH 10/20] WIP --- src/coreclr/jit/compiler.h | 108 +++++++--- src/coreclr/jit/flowgraph.cpp | 13 ++ src/coreclr/jit/optimizer.cpp | 359 +++++++++++++++------------------- 3 files changed, 249 insertions(+), 231 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a6092860c71cb..9299214f2140b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2039,6 +2039,8 @@ class FlowGraphNaturalLoop const FlowGraphDfsTree* m_tree; BasicBlock* m_header; FlowGraphNaturalLoop* m_parent = nullptr; + FlowGraphNaturalLoop* m_child = nullptr; + FlowGraphNaturalLoop* m_sibling = nullptr; // Bit vector of blocks in the loop; each index is the RPO index a block, // with the head block's RPO index subtracted. BitVec m_blocks; @@ -2078,6 +2080,16 @@ class FlowGraphNaturalLoop return m_parent; } + FlowGraphNaturalLoop* GetChild() const + { + return m_child; + } + + FlowGraphNaturalLoop* GetSibling() const + { + return m_sibling; + } + unsigned GetIndex() const { return m_index; @@ -2238,6 +2250,12 @@ enum class NodeThreading LIR, // Nodes are in LIR form (after rationalization) }; +enum class FieldKindForVN +{ + SimpleStatic, + WithBaseAddr +}; + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -4746,6 +4764,8 @@ class Compiler struct LoopDsc; LoopDsc** m_newToOldLoop; FlowGraphNaturalLoop** m_oldToNewLoop; + struct LoopSideEffects* m_loopSideEffects; + struct HoistCounts* m_loopHoistCounts; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -6493,6 +6513,13 @@ class Compiler // Previous decisions on loop-invariance of value numbers in the current loop. VNSet m_curLoopVnInvariantCache; + int m_loopVarInOutCount; + int m_loopVarCount; + int m_hoistedExprCount; + int m_loopVarFPCount; + int m_loopVarInOutFPCount; + int m_hoistedFPExprCount; + // Get the VN cache for current loop VNSet* GetHoistedInCurLoop(Compiler* comp) { @@ -6520,26 +6547,26 @@ class Compiler // by the loop "lnum" itself. bool optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); - // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) - bool optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt); + // Do hoisting for a particular loop + bool optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt); - // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) + // Hoist all expressions in "blocks" that are invariant in "loop" // outside of that loop. - void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); + void optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack* blocks, LoopHoistContext* hoistContext); - // Return true if the tree looks profitable to hoist out of loop 'lnum'. - bool optIsProfitableToHoistTree(GenTree* tree, unsigned lnum); + // Return true if the tree looks profitable to hoist out of "loop" + bool optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop); - // Performs the hoisting 'tree' into the PreHeader for loop 'lnum' - void optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt); + // Performs the hoisting 'tree' into the PreHeader for "loop" + void optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt); // Note the new SSA uses in tree void optRecordSsaUses(GenTree* tree, BasicBlock* block); - // Returns true iff the ValueNum "vn" represents a value that is loop-invariant in "lnum". + // Returns true iff the ValueNum "vn" represents a value that is loop-invariant in "loop". // Constants and init values are always loop invariant. // VNPhi's connect VN's to the SSA definition, so we can know if the SSA def occurs in the loop. - bool optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* recordedVNs); + bool optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNSet* recordedVNs); // If "blk" is the entry block of a natural loop, returns true and sets "*pLnum" to the index of the loop // in the loop table. @@ -6560,21 +6587,14 @@ class Compiler void optMarkLoopRemoved(unsigned loopNum); private: - // Requires "lnum" to be the index of an outermost loop in the loop table. Traverses the body of that loop, - // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and - // static) written to, and SZ-array element type equivalence classes updated. - void optComputeLoopNestSideEffects(unsigned lnum); - - // Given a loop number 'lnum' mark it and any nested loops as having 'memoryHavoc' - void optRecordLoopNestsMemoryHavoc(unsigned lnum, MemoryKindSet memoryHavoc); + // Given a loop mark it and any nested loops as having 'memoryHavoc' + void optRecordLoopNestsMemoryHavoc(FlowGraphNaturalLoop* loop, MemoryKindSet memoryHavoc); // Add the side effects of "blk" (which is required to be within a loop) to all loops of which it is a part. - // Returns false if we encounter a block that is not marked as being inside a loop. - // - bool optComputeLoopSideEffectsOfBlock(BasicBlock* blk); + void optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNaturalLoop* mostNestedLoop); // Hoist the expression "expr" out of loop "lnum". - void optPerformHoistExpr(GenTree* expr, BasicBlock* exprBb, unsigned lnum); + void optPerformHoistExpr(GenTree* expr, BasicBlock* exprBb, FlowGraphNaturalLoop* loop); public: PhaseStatus optOptimizeBools(); @@ -6608,12 +6628,6 @@ class Compiler CALLINT_ALL, // kills everything (normal method call) }; - enum class FieldKindForVN - { - SimpleStatic, - WithBaseAddr - }; - public: // A "LoopDsc" describes a ("natural") loop. We (currently) require the body of a loop to be a contiguous (in // bbNext order) sequence of basic blocks. (At times, we may require the blocks in a loop to be "properly numbered" @@ -6951,16 +6965,16 @@ class Compiler const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists); // Marks the containsCall information to "lnum" and any parent loops. - void AddContainsCallAllContainingLoops(unsigned lnum); + void AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop); // Adds the variable liveness information from 'blk' to "lnum" and any parent loops. - void AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk); + void AddVariableLivenessAllContainingLoops(FlowGraphNaturalLoop* loop, BasicBlock* blk); // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops. - void AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); + void AddModifiedFieldAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops. - void AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemType); + void AddModifiedElemTypeAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_CLASS_HANDLE elemType); // Requires that "from" and "to" have the same "bbJumpKind" (perhaps because "to" is a clone // of "from".) Copies the jump destination from "from" to "to". @@ -11980,6 +11994,38 @@ class StringPrinter void Append(char chr); }; +typedef JitHashTable, FieldKindForVN> + FieldHandleSet; + +typedef JitHashTable, bool> ClassHandleSet; + +struct LoopSideEffects +{ + // The loop contains an operation that we assume has arbitrary memory side + // effects. If this is set, the fields below may not be accurate (since + // they become irrelevant.) + bool HasMemoryHavoc[MemoryKindCount]; + // The set of variables that are IN or OUT during the execution of this loop + VARSET_TP VarInOut; + // The set of variables that are USE or DEF during the execution of this loop. + VARSET_TP VarUseDef; + // This has entries for all static field and object instance fields modified + // in the loop. + FieldHandleSet* FieldsModified = nullptr; + // Bits set indicate the set of sz array element types such that + // arrays of that type are modified + // in the loop. + ClassHandleSet* ArrayElemTypesModified = nullptr; + bool ContainsCall = false; + bool HasNestedLoops = false; + + LoopSideEffects(); + + void AddVariableLiveness(Compiler* comp, BasicBlock* block); + void AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); + void AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd); +}; + /***************************************************************************** * * Variables to keep track of total code amounts. diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 34e4b3dd2e2a6..1e0a411926534 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4471,6 +4471,18 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) JITDUMP("Added loop " FMT_LP " with header " FMT_BB "\n", loop->GetIndex(), loop->GetHeader()->bbNum); } + // Now build sibling/child links by iterating loops in post order. This + // makes us end up with sibling links in reverse post order. + for (FlowGraphNaturalLoop* loop : loops->InPostOrder()) + { + if (loop->m_parent != nullptr) + { + loop->m_sibling = loop->m_parent->m_child; + loop->m_parent->m_child = loop; + } + } + +#ifdef DEBUG if (loops->m_loops.size() > 0) { JITDUMP("\nFound %zu loops\n", loops->m_loops.size()); @@ -4480,6 +4492,7 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) { JITDUMP("Rejected %u loop headers\n", loops->m_improperLoopHeaders); } +#endif return loops; } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6879efe98214e..78d6a22cc52dc 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6367,21 +6367,22 @@ void Compiler::optRecordSsaUses(GenTree* tree, BasicBlock* block) // Arguments: // origExpr - tree to hoist // exprBb - block containing the tree -// lnum - loop that we're hoisting origExpr out of +// loop - loop that we're hoisting origExpr out of // -void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsigned lnum) +void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, FlowGraphNaturalLoop* loop) { assert(exprBb != nullptr); + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); #ifdef DEBUG if (verbose) { printf("\nHoisting a copy of "); printTreeID(origExpr); printf(" " FMT_VN, origExpr->gtVNPair.GetLiberal()); - printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", - exprBb->bbNum, optLoopTable[lnum].lpHead->bbNum, lnum, optLoopTable[lnum].lpTop->bbNum, - optLoopTable[lnum].lpBottom->bbNum); + printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " (head: " FMT_BB "):\n", + exprBb->bbNum, preheader->bbNum, loop->GetIndex(), loop->GetHeader()->bbNum); gtDispTree(origExpr); printf("\n"); } @@ -6404,27 +6405,21 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign // The value of the expression isn't used. GenTree* hoist = gtUnusedValNode(hoistExpr); - /* Put the statement in the preheader */ - - INDEBUG(optLoopTable[lnum].lpValidatePreHeader()); - - BasicBlock* preHead = optLoopTable[lnum].lpHead; - // Scan the tree for any new SSA uses. // - optRecordSsaUses(hoist, preHead); + optRecordSsaUses(hoist, preheader); - preHead->bbFlags |= exprBb->bbFlags & BBF_COPY_PROPAGATE; + preheader->bbFlags |= exprBb->bbFlags & BBF_COPY_PROPAGATE; Statement* hoistStmt = gtNewStmt(hoist); // Simply append the statement at the end of the preHead's list. - Statement* firstStmt = preHead->firstStmt(); + Statement* firstStmt = preheader->firstStmt(); if (firstStmt != nullptr) { /* append after last statement */ - Statement* lastStmt = preHead->lastStmt(); + Statement* lastStmt = preheader->lastStmt(); assert(lastStmt->GetNextStmt() == nullptr); lastStmt->SetNextStmt(hoistStmt); @@ -6435,7 +6430,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign { /* Empty pre-header - store the single statement in the block */ - preHead->bbStmtList = hoistStmt; + preheader->bbStmtList = hoistStmt; hoistStmt->SetPrevStmt(hoistStmt); } @@ -6444,7 +6439,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign #ifdef DEBUG if (verbose) { - printf("This hoisted copy placed in PreHeader (" FMT_BB "):\n", preHead->bbNum); + printf("This hoisted copy placed in PreHeader (" FMT_BB "):\n", preheader->bbNum); gtDispTree(hoist); printf("\n"); } @@ -6461,13 +6456,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign { // What is the depth of the loop "lnum"? - ssize_t depth = 0; - unsigned lnumIter = lnum; - while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP) - { - depth++; - lnumIter = optLoopTable[lnumIter].lpParent; - } + ssize_t depth = optLoopDepth((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable)); NodeToTestDataMap* testData = GetNodeTestData(); @@ -6579,6 +6568,10 @@ PhaseStatus Compiler::optHoistLoopCode() // Consider all the loop nests, in inner-to-outer order // + // TODO-Quirk: Switch this to postorder over the loops, instead of this + // loop tree based thing. It is not the exact same order, but it will still + // process child loops before parent loops. + bool modified = false; LoopHoistContext hoistCtxt(this); for (unsigned lnum = 0; lnum < optLoopCount; lnum++) @@ -6659,7 +6652,7 @@ bool Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } - modified |= optHoistThisLoop(lnum, hoistCtxt); + modified |= optHoistThisLoop(m_oldToNewLoop[lnum], hoistCtxt); return modified; } @@ -6668,42 +6661,33 @@ bool Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) // optHoistThisLoop: run loop hoisting for the indicated loop // // Arguments: -// lnum - loop to process +// loop - loop to process // hoistCtxt - context for the hoisting // // Returns: // true if any hoisting was done // -bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) +bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt) { - LoopDsc* pLoopDsc = &optLoopTable[lnum]; - - /* If loop was removed continue */ - - if (pLoopDsc->lpIsRemoved()) - { - JITDUMP(" ... not hoisting " FMT_LP ": removed\n", lnum); - return false; - } - // Ensure the per-loop sets/tables are empty. hoistCtxt->m_curLoopVnInvariantCache.RemoveAll(); + const LoopSideEffects& sideEffs = m_loopSideEffects[loop->GetIndex()]; + #ifdef DEBUG if (verbose) { - printf("optHoistThisLoop for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", lnum, pLoopDsc->lpTop->bbNum, - pLoopDsc->lpBottom->bbNum); - printf(" Loop body %s a call\n", (pLoopDsc->lpFlags & LPFLG_CONTAINS_CALL) ? "contains" : "does not contain"); - printf(" Loop has %s\n", (pLoopDsc->lpExitCnt == 1) ? "single exit" : "multiple exits"); + printf("optHoistThisLoop for loop " FMT_LP " (head: " FMT_BB "):\n", loop->GetIndex(), loop->GetHeader()->bbNum); + printf(" Loop body %s a call\n", sideEffs.ContainsCall ? "contains" : "does not contain"); + printf(" Loop has %s\n", (loop->ExitEdges().size() == 1) ? "single exit" : "multiple exits"); } #endif - VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef)); + VARSET_TP loopVars(VarSetOps::Intersection(this, sideEffs.VarInOut, sideEffs.VarUseDef)); - pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut); - pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars); - pLoopDsc->lpHoistedExprCount = 0; + hoistCtxt->m_loopVarInOutCount = VarSetOps::Count(this, sideEffs.VarInOut); + hoistCtxt->m_loopVarCount = VarSetOps::Count(this, loopVars); + hoistCtxt->m_hoistedExprCount = 0; #ifndef TARGET_64BIT @@ -6713,7 +6697,7 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) // the Counts such that each TYP_LONG variable counts twice. // VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars)); - VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars)); + VARSET_TP inOutLongVars(VarSetOps::Intersection(this, sideEffs.VarInOut, lvaLongVars)); #ifdef DEBUG if (verbose) @@ -6722,21 +6706,21 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) dumpConvertedVarSet(this, lvaLongVars); } #endif - pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars); - pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars); + hoistedCtxt->m_loopVarCount += VarSetOps::Count(this, loopLongVars); + hoistedCtxt->m_loopVarInOutCount += VarSetOps::Count(this, inOutLongVars); } #endif // !TARGET_64BIT #ifdef DEBUG if (verbose) { - printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef)); - dumpConvertedVarSet(this, pLoopDsc->lpVarUseDef); + printf("\n USEDEF (%d)=", VarSetOps::Count(this, sideEffs.VarUseDef)); + dumpConvertedVarSet(this, sideEffs.VarUseDef); - printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount); - dumpConvertedVarSet(this, pLoopDsc->lpVarInOut); + printf("\n INOUT (%d)=", hoistCtxt->m_loopVarInOutCount); + dumpConvertedVarSet(this, sideEffs.VarInOut); - printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount); + printf("\n LOOPVARS(%d)=", hoistCtxt->m_loopVarCount); dumpConvertedVarSet(this, loopVars); printf("\n"); } @@ -6745,22 +6729,21 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) if (!VarSetOps::IsEmpty(this, lvaFloatVars)) { VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars)); - VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars)); - - pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars); - pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); - pLoopDsc->lpHoistedFPExprCount = 0; + VARSET_TP inOutFPVars(VarSetOps::Intersection(this, sideEffs.VarInOut, lvaFloatVars)); - pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount; - pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount; + hoistCtxt->m_loopVarFPCount = VarSetOps::Count(this, loopFPVars); + hoistCtxt->m_loopVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); + hoistCtxt->m_hoistedFPExprCount = 0; + hoistCtxt->m_loopVarCount -= hoistCtxt->m_loopVarFPCount; + hoistCtxt->m_loopVarInOutCount -= hoistCtxt->m_loopVarInOutFPCount; #ifdef DEBUG if (verbose) { - printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount); + printf(" INOUT-FP(%d)=", hoistCtxt->m_loopVarInOutFPCount); dumpConvertedVarSet(this, inOutFPVars); - printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount); + printf("\n LOOPV-FP(%d)=", hoistCtxt->m_loopVarFPCount); dumpConvertedVarSet(this, loopFPVars); printf("\n"); @@ -6769,9 +6752,9 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) } else // lvaFloatVars is empty { - pLoopDsc->lpLoopVarFPCount = 0; - pLoopDsc->lpVarInOutFPCount = 0; - pLoopDsc->lpHoistedFPExprCount = 0; + hoistCtxt->m_loopVarFPCount = 0; + hoistCtxt->m_loopVarInOutFPCount = 0; + hoistCtxt->m_hoistedFPExprCount = 0; } // Find the set of definitely-executed blocks. @@ -6830,17 +6813,19 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) // convenient). But note that it is arbitrary because there is not guaranteed execution order amongst // the child loops. - for (BasicBlock::loopNumber childLoop = pLoopDsc->lpChild; // - childLoop != BasicBlock::NOT_IN_LOOP; // - childLoop = optLoopTable[childLoop].lpSibling) + for (FlowGraphNaturalLoop* childLoop = loop->GetChild(); + childLoop != nullptr; + childLoop = childLoop->GetSibling()) { - if (optLoopTable[childLoop].lpIsRemoved()) + // TODO-Quirk: Remove + if (m_oldToNewLoop[childLoop->GetIndex()] == nullptr) { continue; } - INDEBUG(optLoopTable[childLoop].lpValidatePreHeader()); - BasicBlock* childPreHead = optLoopTable[childLoop].lpHead; - if (pLoopDsc->lpExitCnt == 1) + + assert(childLoop->EntryEdges().size() == 1); + BasicBlock* preheader = childLoop->EntryEdges()[0]->getSourceBlock(); + if (loop->ExitEdges().size() == 1) { if (fgDominate(childPreHead, pLoopDsc->lpExit)) { @@ -6892,17 +6877,15 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum); defExec.Push(pLoopDsc->lpEntry); - optHoistLoopBlocks(lnum, &defExec, hoistCtxt); + optHoistLoopBlocks(loop, &defExec, hoistCtxt); const unsigned numHoisted = pLoopDsc->lpHoistedFPExprCount + pLoopDsc->lpHoistedExprCount; return numHoisted > 0; } -bool Compiler::optIsProfitableToHoistTree(GenTree* tree, unsigned lnum) +bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop) { - LoopDsc* pLoopDsc = &optLoopTable[lnum]; - - bool loopContainsCall = (pLoopDsc->lpFlags & LPFLG_CONTAINS_CALL) != 0; + bool loopContainsCall = m_loopSideEffects[loop->GetIndex()].ContainsCall; int availRegCount; int hoistedExprCount; @@ -7126,7 +7109,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // optHoistLoopBlocks: Hoist invariant expression out of the loop. // // Arguments: -// loopNum - The number of the loop +// loop - The loop // blocks - A stack of blocks belonging to the loop // hoistContext - The loop hoist context // @@ -7135,7 +7118,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext) +void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack* blocks, LoopHoistContext* hoistContext) { class HoistVisitor : public GenTreeVisitor { @@ -7167,7 +7150,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo ArrayStack m_valueStack; bool m_beforeSideEffect; - unsigned m_loopNum; + FlowGraphNaturalLoop* m_loop; LoopHoistContext* m_hoistContext; BasicBlock* m_currentBlock; @@ -7262,7 +7245,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo ValueNum loopMemoryVN = m_compiler->GetMemoryPerSsaData(loopEntryBlock->bbMemorySsaNumIn[memoryKind]) ->m_vnPair.GetLiberal(); - if (!m_compiler->optVNIsLoopInvariant(loopMemoryVN, m_loopNum, + if (!m_compiler->optVNIsLoopInvariant(loopMemoryVN, m_loop, &m_hoistContext->m_curLoopVnInvariantCache)) { return false; @@ -7283,11 +7266,11 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo UseExecutionOrder = true, }; - HoistVisitor(Compiler* compiler, unsigned loopNum, LoopHoistContext* hoistContext) + HoistVisitor(Compiler* compiler, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistContext) : GenTreeVisitor(compiler) , m_valueStack(compiler->getAllocator(CMK_LoopHoist)) , m_beforeSideEffect(true) - , m_loopNum(loopNum) + , m_loop(loop) , m_hoistContext(hoistContext) , m_currentBlock(nullptr) { @@ -7305,7 +7288,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo // hoist the top node? if (top.m_hoistable) { - m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); + m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loop, m_hoistContext); } else { @@ -7344,8 +7327,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo bool isInvariant = lclVar->HasSsaName(); // and the SSA definition must be outside the loop we're hoisting from ... isInvariant = isInvariant && - !m_compiler->optLoopTable[m_loopNum].lpContains( - m_compiler->lvaGetDesc(lclNum)->GetPerSsaData(lclVar->GetSsaNum())->GetBlock()); + !m_loop->ContainsBlock(m_compiler->lvaGetDesc(lclNum)->GetPerSsaData(lclVar->GetSsaNum())->GetBlock()); + // and the VN of the tree is considered invariant as well. // // TODO-CQ: This VN invariance check should not be necessary and in some cases it is conservative - it @@ -7654,7 +7637,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo if (IsHoistableOverExcepSibling(value.Node(), hasExcep)) { - m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); + m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loop, m_hoistContext); } // Don't hoist this tree again. @@ -7700,19 +7683,16 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo } }; - LoopDsc* loopDsc = &optLoopTable[loopNum]; - assert(blocks->Top() == loopDsc->lpEntry); - - HoistVisitor visitor(this, loopNum, hoistContext); + HoistVisitor visitor(this, loop, hoistContext); while (!blocks->Empty()) { BasicBlock* block = blocks->Pop(); weight_t blockWeight = block->getBBWeight(this); - JITDUMP("\n optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop " FMT_LP " <" FMT_BB ".." FMT_BB ">\n", - block->bbNum, refCntWtd2str(blockWeight, /* padForDecimalPlaces */ true), loopNum, - loopDsc->lpTop->bbNum, loopDsc->lpBottom->bbNum); + JITDUMP("\n optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop " FMT_LP " (head: " FMT_BB ")\n", + block->bbNum, refCntWtd2str(blockWeight, /* padForDecimalPlaces */ true), loop->GetIndex(), + loop->GetHeader()->bbNum); if (blockWeight < (BB_UNITY_WEIGHT / 10)) { @@ -7726,12 +7706,10 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo hoistContext->ResetHoistedInCurLoop(); } -void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) +void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt) { - assert(lnum != BasicBlock::NOT_IN_LOOP); - // It must pass the hoistable profitablity tests for this loop level - if (!optIsProfitableToHoistTree(tree, lnum)) + if (!optIsProfitableToHoistTree(tree, loop)) { JITDUMP(" ... not profitable to hoist\n"); return; @@ -7742,25 +7720,26 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu // already hoisted this expression in the current loop, so don't hoist this expression. JITDUMP(" [%06u] ... already hoisted " FMT_VN " in " FMT_LP "\n ", dspTreeID(tree), - tree->gtVNPair.GetLiberal(), lnum); + tree->gtVNPair.GetLiberal(), loop->GetIndex()); return; } // We should already have a pre-header for the loop. - INDEBUG(optLoopTable[lnum].lpValidatePreHeader()); + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); // If the block we're hoisting from and the pre-header are in different EH regions, don't hoist. // TODO: we could probably hoist things that won't raise exceptions, such as constants. - if (!BasicBlock::sameTryRegion(optLoopTable[lnum].lpHead, treeBb)) + if (!BasicBlock::sameTryRegion(preheader, treeBb)) { JITDUMP(" ... not hoisting in " FMT_LP ", eh region constraint (pre-header try index %d, candidate " FMT_BB " try index %d\n", - lnum, optLoopTable[lnum].lpHead->bbTryIndex, treeBb->bbNum, treeBb->bbTryIndex); + loop->GetIndex(), preheader->bbTryIndex, treeBb->bbNum, treeBb->bbTryIndex); return; } // Expression can be hoisted - optPerformHoistExpr(tree, treeBb, lnum); + optPerformHoistExpr(tree, treeBb, loop); // Increment lpHoistedExprCount or lpHoistedFPExprCount if (!varTypeIsFloating(tree->TypeGet())) @@ -7783,7 +7762,7 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); } -bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInvariantCache) +bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNSet* loopVnInvariantCache) { // If it is not a VN, is not loop-invariant. if (vn == ValueNumStore::NoVN) @@ -7814,27 +7793,29 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInv unsigned lclNum = funcApp.m_args[0]; unsigned ssaNum = funcApp.m_args[1]; LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum); - res = !optLoopContains(lnum, ssaDef->GetBlock()->bbNatLoopNum); + res = !loop->ContainsBlock(ssaDef->GetBlock()); } else if (funcApp.m_func == VNF_PhiMemoryDef) { BasicBlock* defnBlk = reinterpret_cast(vnStore->ConstantValue(funcApp.m_args[0])); - res = !optLoopContains(lnum, defnBlk->bbNatLoopNum); + res = !loop->ContainsBlock(defnBlk); } else if (funcApp.m_func == VNF_MemOpaque) { - const unsigned vnLoopNum = funcApp.m_args[0]; + const unsigned blockNum = funcApp.m_args[0]; - // Check for the special "ambiguous" loop MemOpaque VN. + // Check for the special "ambiguous" block VN. // This is considered variant in every loop. // - if (vnLoopNum == BasicBlock::MAX_LOOP_NUM) + if (blockNum == UINT_MAX) { res = false; } else { - res = !optLoopContains(lnum, vnLoopNum); + assert(blockNum < m_dfs->GetPostOrderCount()); + BasicBlock* block = m_dfs->GetPostOrder()[blockNum]; + res = !loop->ContainsBlock(block); } } else @@ -7849,8 +7830,9 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInv if (i == 3) { - const unsigned vnLoopNum = funcApp.m_args[3]; - res = !optLoopContains(lnum, vnLoopNum); + const unsigned blockNum = funcApp.m_args[3]; + assert(blockNum < m_dfs->GetPostOrderCount()); + res = !loop->ContainsBlock(m_dfs->GetPostOrder()[blockNum]); break; } } @@ -7858,7 +7840,7 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInv // TODO-CQ: We need to either make sure that *all* VN functions // always take VN args, or else have a list of arg positions to exempt, as implicitly // constant. - if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache)) + if (!optVNIsLoopInvariant(funcApp.m_args[i], loop, loopVnInvariantCache)) { res = false; break; @@ -8345,27 +8327,42 @@ bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum) return false; } +LoopSideEffects::LoopSideEffects() + : VarInOut(VarSetOps::UninitVal()) + , VarUseDef(VarSetOps::UninitVal()) +{ + for (MemoryKind mk : allMemoryKinds()) + { + HasMemoryHavoc[mk] = false; + } +} + void Compiler::optComputeLoopSideEffects() { - unsigned lnum; - for (lnum = 0; lnum < optLoopCount; lnum++) + m_loopSideEffects = new (this, CMK_LoopOpt) LoopSideEffects[m_loops->NumLoops()]; + + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { - VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this)); - VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this)); - optLoopTable[lnum].lpFlags &= ~LPFLG_CONTAINS_CALL; + m_loopSideEffects[loop->GetIndex()].VarInOut = VarSetOps::MakeEmpty(this); + m_loopSideEffects[loop->GetIndex()].VarUseDef = VarSetOps::MakeEmpty(this); } - for (lnum = 0; lnum < optLoopCount; lnum++) + // Now visit all loops in post order, meaning we see child loops first. + // Mark side effects into the loop and all ancestor loops the first time we + // see it. + BitVecTraits postOrderTraits = m_dfs->PostOrderTraits(); + BitVec visited(BitVecOps::MakeEmpty(&postOrderTraits)); + + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { - if (optLoopTable[lnum].lpIsRemoved()) - { - continue; - } + loop->VisitLoopBlocks([&](BasicBlock* block) { + if (BitVecOps::TryAddElemD(&postOrderTraits, visited, block->bbPostorderNum)) + { + optComputeLoopSideEffectsOfBlock(block, loop); + } - if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) - { // Is outermost... - optComputeLoopNestSideEffects(lnum); - } + return BasicBlockVisit::Continue; + }); } } @@ -8395,58 +8392,25 @@ void Compiler::optComputeInterestingVarSets() } } -void Compiler::optComputeLoopNestSideEffects(unsigned lnum) +void Compiler::optRecordLoopNestsMemoryHavoc(FlowGraphNaturalLoop* loop, MemoryKindSet memoryHavoc) { - JITDUMP("optComputeLoopNestSideEffects for " FMT_LP "\n", lnum); - assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost. - for (BasicBlock* const bbInLoop : optLoopTable[lnum].LoopBlocks()) - { - if (!optComputeLoopSideEffectsOfBlock(bbInLoop)) - { - // When optComputeLoopSideEffectsOfBlock returns false, we encountered - // a block that was moved into the loop range (by fgReorderBlocks), - // but not marked correctly as being inside the loop. - // We conservatively mark this loop (and any outer loops) - // as having memory havoc side effects. - // - // Record that all loops containing this block have memory havoc effects. - // - optRecordLoopNestsMemoryHavoc(lnum, fullMemoryKindSet); - - // All done, no need to keep visiting more blocks - break; - } - } -} - -void Compiler::optRecordLoopNestsMemoryHavoc(unsigned lnum, MemoryKindSet memoryHavoc) -{ - // We should start out with 'lnum' set to a valid natural loop index - assert(lnum != BasicBlock::NOT_IN_LOOP); - - while (lnum != BasicBlock::NOT_IN_LOOP) + do { for (MemoryKind memoryKind : allMemoryKinds()) { if ((memoryHavoc & memoryKindSet(memoryKind)) != 0) { - optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true; + m_loopSideEffects[loop->GetIndex()].HasMemoryHavoc[memoryKind] = true; } } - // Move lnum to the next outtermost loop that we need to mark - lnum = optLoopTable[lnum].lpParent; - } + loop = loop->GetParent(); + } while (loop != nullptr); } -bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) +void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNaturalLoop* mostNestedLoop) { - unsigned mostNestedLoop = blk->bbNatLoopNum; - JITDUMP("optComputeLoopSideEffectsOfBlock " FMT_BB ", mostNestedLoop %d\n", blk->bbNum, mostNestedLoop); - if (mostNestedLoop == BasicBlock::NOT_IN_LOOP) - { - return false; - } + JITDUMP("optComputeLoopSideEffectsOfBlock " FMT_BB ", mostNestedLoop " FMT_LP "\n", blk->bbNum, mostNestedLoop->GetIndex()); AddVariableLivenessAllContainingLoops(mostNestedLoop, blk); // MemoryKinds for which an in-loop call or store has arbitrary effects. @@ -8468,10 +8432,10 @@ bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) AddContainsCallAllContainingLoops(mostNestedLoop); } - // If we just set LPFLG_CONTAINS_CALL or it was previously set - if (optLoopTable[mostNestedLoop].lpFlags & LPFLG_CONTAINS_CALL) + // If we just marked it as containing a call or it was previously set + if (m_loopSideEffects[mostNestedLoop->GetIndex()].ContainsCall) { - // We can early exit after both memoryHavoc and LPFLG_CONTAINS_CALL are both set to true. + // We can early exit after both memoryHavoc and ContainsCall are both set to true. break; } @@ -8681,31 +8645,29 @@ bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) // Record that all loops containing this block have this kind of memoryHavoc effects. optRecordLoopNestsMemoryHavoc(mostNestedLoop, memoryHavoc); } - return true; } -// Marks the containsCall information to "lnum" and any parent loops. -void Compiler::AddContainsCallAllContainingLoops(unsigned lnum) +// Marks the containsCall information to "loop" and any parent loops. +void Compiler::AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop) { #if FEATURE_LOOP_ALIGN // If this is the inner most loop, reset the LOOP_ALIGN flag // because a loop having call will not likely to benefit from // alignment - if (optLoopTable[lnum].lpChild == BasicBlock::NOT_IN_LOOP) + if (!m_loopSideEffects[loop->GetIndex()].HasNestedLoops) { - BasicBlock* top = optLoopTable[lnum].lpTop; + BasicBlock* top = loop->GetLexicallyTopMostBlock(); top->unmarkLoopAlign(this DEBUG_ARG("Loop with call")); } #endif - assert(0 <= lnum && lnum < optLoopCount); - while (lnum != BasicBlock::NOT_IN_LOOP) + do { - optLoopTable[lnum].lpFlags |= LPFLG_CONTAINS_CALL; - lnum = optLoopTable[lnum].lpParent; - } + m_loopSideEffects[loop->GetIndex()].ContainsCall = true; + loop = loop->GetParent(); + } while (loop != nullptr); } // Adds the variable liveness information for 'blk' to 'this' LoopDsc @@ -8719,36 +8681,33 @@ void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk) } // Adds the variable liveness information for 'blk' to "lnum" and any parent loops. -void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk) +void Compiler::AddVariableLivenessAllContainingLoops(FlowGraphNaturalLoop* loop, BasicBlock* blk) { - assert(0 <= lnum && lnum < optLoopCount); - while (lnum != BasicBlock::NOT_IN_LOOP) + do { - optLoopTable[lnum].AddVariableLiveness(this, blk); - lnum = optLoopTable[lnum].lpParent; - } + m_loopSideEffects[loop->GetIndex()].AddVariableLiveness(this, blk); + loop = loop->GetParent(); + } while (loop != nullptr); } -// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops. -void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind) +// Adds "fldHnd" to the set of modified fields of "loop" and any parent loops. +void Compiler::AddModifiedFieldAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind) { - assert(0 <= lnum && lnum < optLoopCount); - while (lnum != BasicBlock::NOT_IN_LOOP) + do { - optLoopTable[lnum].AddModifiedField(this, fldHnd, fieldKind); - lnum = optLoopTable[lnum].lpParent; - } + m_loopSideEffects[loop->GetIndex()].AddModifiedField(this, fldHnd, fieldKind); + loop = loop->GetParent(); + } while (loop != nullptr); } -// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops. -void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd) +// Adds "elemType" to the set of modified array element types of "loop" and any parent loops. +void Compiler::AddModifiedElemTypeAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_CLASS_HANDLE elemClsHnd) { - assert(0 <= lnum && lnum < optLoopCount); - while (lnum != BasicBlock::NOT_IN_LOOP) + do { - optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd); - lnum = optLoopTable[lnum].lpParent; - } + m_loopSideEffects[loop->GetIndex()].AddModifiedElemType(this, elemClsHnd); + loop = loop->GetParent(); + } while (loop != nullptr); } //------------------------------------------------------------------------------ From a078d180e52732db3e7cff3595cb073282010076 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 4 Dec 2023 17:12:49 +0100 Subject: [PATCH 11/20] JIT: Port VN and loop hoisting to new loop representation Switch VN to use the new loop representation for the limited amount of stuff it does. This is mainly computing loop side effects and using it for some more precision, in addition to storing some memory dependencies around loops. It requires us to have a block -> loop mapping, so add a `BlockToNaturalLoopMap` class to do this. We really do not need this functionality for much, so we may consider seeing if we can remove it in the future (and remove `BasicBlock::bbNatLoopNum`). In loop hoisting move a few members out of `LoopDsc` and into `LoopHoistContext`; in particular the counts of hoisted variables, which we use for profitability checks and which gets updated while hoisting is going on for a single loop. We do not need to refer to the information from other loops. Record separate postorder numbers for the old and new DFS since we need to use both simultaneously after loop unrolling. We will be able to get rid of this again soon. A small number of diffs are expected because the loop side effects computation is now more precise, since the old loop code includes some blocks in loops that are not actually part of the loop. For example, blocks that always throw can be in the lexical range and would previously cause the side effect computation to believe there was a memory havoc. Also, the side effect computation does some limited value tracking of assignment, which is more precise now since it is running in RPO instead of based on loop order before. --- src/coreclr/jit/block.cpp | 5 +- src/coreclr/jit/block.h | 5 +- src/coreclr/jit/compiler.h | 58 ++-- src/coreclr/jit/compiler.hpp | 24 +- src/coreclr/jit/fgbasic.cpp | 9 +- src/coreclr/jit/fgprofilesynthesis.cpp | 4 +- src/coreclr/jit/flowgraph.cpp | 129 +++++++-- src/coreclr/jit/loopcloning.cpp | 5 +- src/coreclr/jit/optimizer.cpp | 384 ++++++++++++++----------- src/coreclr/jit/valuenum.cpp | 145 +++++----- src/coreclr/jit/valuenum.h | 10 +- 11 files changed, 478 insertions(+), 300 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index e1f50e2fb1be8..b10392bd168c7 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1586,8 +1586,9 @@ BasicBlock* BasicBlock::New(Compiler* compiler) block->bbNatLoopNum = BasicBlock::NOT_IN_LOOP; - block->bbPreorderNum = 0; - block->bbPostorderNum = 0; + block->bbPreorderNum = 0; + block->bbPostorderNum = 0; + block->bbNewPostorderNum = 0; return block; } diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index ab7efe02cadce..2b956e3d317ce 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -1146,8 +1146,9 @@ struct BasicBlock : private LIR::Range void* bbSparseCountInfo; // Used early on by fgIncorporateEdgeCounts - unsigned bbPreorderNum; // the block's preorder number in the graph (1...fgMaxBBNum] - unsigned bbPostorderNum; // the block's postorder number in the graph (1...fgMaxBBNum] + unsigned bbPreorderNum; // the block's preorder number in the graph (1...fgMaxBBNum] + unsigned bbPostorderNum; // the block's postorder number in the graph (1...fgMaxBBNum] + unsigned bbNewPostorderNum; // the block's postorder number in the graph [0...postOrderCount) IL_OFFSET bbCodeOffs; // IL offset of the beginning of the block IL_OFFSET bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3dfa2e5559e11..4c20596924d1f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2111,6 +2111,7 @@ class FlowGraphNaturalLoop } bool ContainsBlock(BasicBlock* block); + bool ContainsLoop(FlowGraphNaturalLoop* childLoop); unsigned NumLoopBlocks(); @@ -2144,6 +2145,11 @@ class FlowGraphNaturalLoops static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist); public: + const FlowGraphDfsTree* GetDfsTree() + { + return m_dfs; + } + size_t NumLoops() { return m_loops.size(); @@ -2154,7 +2160,8 @@ class FlowGraphNaturalLoops return m_improperLoopHeaders > 0; } - FlowGraphNaturalLoop* GetLoopFromHeader(BasicBlock* header); + FlowGraphNaturalLoop* GetLoopByIndex(unsigned index); + FlowGraphNaturalLoop* GetLoopByHeader(BasicBlock* header); bool IsLoopBackEdge(FlowEdge* edge); bool IsLoopExitEdge(FlowEdge* edge); @@ -2242,6 +2249,22 @@ class FlowGraphDominatorTree static FlowGraphDominatorTree* Build(const FlowGraphDfsTree* dfs); }; +// Represents a reverse mapping from block back to its (most nested) containing loop. +class BlockToNaturalLoopMap +{ + FlowGraphNaturalLoops* m_loops; + unsigned* m_indices; + + BlockToNaturalLoopMap(FlowGraphNaturalLoops* loops, unsigned* indices) + : m_loops(loops), m_indices(indices) + { + } + +public: + FlowGraphNaturalLoop* GetLoop(BasicBlock* block); + + static BlockToNaturalLoopMap* Build(FlowGraphNaturalLoops* loops); +}; // The following holds information about instr offsets in terms of generated code. @@ -4787,12 +4810,17 @@ class Compiler unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder FlowGraphDfsTree* m_dfs; + + // The next members are annotations on the flow graph used during the + // optimization phases. They are invalidated once RBO runs and modifies the + // flow graph. FlowGraphNaturalLoops* m_loops; struct LoopDsc; LoopDsc** m_newToOldLoop; FlowGraphNaturalLoop** m_oldToNewLoop; struct LoopSideEffects* m_loopSideEffects; struct HoistCounts* m_loopHoistCounts; + BlockToNaturalLoopMap* m_blockToLoop; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -5380,10 +5408,10 @@ class Compiler // Perform value-numbering for the trees in "blk". void fgValueNumberBlock(BasicBlock* blk); - // Requires that "entryBlock" is the entry block of loop "loopNum", and that "loopNum" is the + // Requires that "entryBlock" is the header block of "loop" and that "loop" is the // innermost loop of which "entryBlock" is the entry. Returns the value number that should be // assumed for the memoryKind at the start "entryBlk". - ValueNum fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, BasicBlock* entryBlock, unsigned loopNum); + ValueNum fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, BasicBlock* entryBlock, FlowGraphNaturalLoop* loop); // Called when an operation (performed by "tree", described by "msg") may cause the GcHeap to be mutated. // As GcHeap is a subset of ByrefExposed, this will also annotate the ByrefExposed mutation. @@ -6582,7 +6610,7 @@ class Compiler void optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack* blocks, LoopHoistContext* hoistContext); // Return true if the tree looks profitable to hoist out of "loop" - bool optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop); + bool optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt); // Performs the hoisting 'tree' into the PreHeader for "loop" void optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt); @@ -6636,6 +6664,8 @@ class Compiler PhaseStatus optFindLoopsPhase(); // Finds loops and records them in the loop table void optFindLoops(); + void optFindNewLoops(); + void optCrossCheckIterInfo(const NaturalLoopIterInfo& iterInfo, const LoopDsc& dsc); PhaseStatus optCloneLoops(); void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); @@ -6719,15 +6749,6 @@ class Compiler // arrays of that type are modified // in the loop. - // Adds the variable liveness information for 'blk' to 'this' LoopDsc - void AddVariableLiveness(Compiler* comp, BasicBlock* blk); - - inline void AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); - // This doesn't *always* take a class handle -- it can also take primitive types, encoded as class handles - // (shifted left, with a low-order bit set to distinguish.) - // Use the {Encode/Decode}ElemType methods to construct/destruct these. - inline void AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd); - /* The following values are set only for iterator loops, i.e. has the flag LPFLG_ITER set */ GenTree* lpIterTree; // The "i = i const" tree @@ -11956,7 +11977,7 @@ class NewDomTreeVisitor { static_cast(this)->PreOrderVisit(block); - next = tree[block->bbPostorderNum].firstChild; + next = tree[block->bbNewPostorderNum].firstChild; if (next != nullptr) { @@ -11968,7 +11989,7 @@ class NewDomTreeVisitor { static_cast(this)->PostOrderVisit(block); - next = tree[block->bbPostorderNum].nextSibling; + next = tree[block->bbNewPostorderNum].nextSibling; if (next != nullptr) { @@ -12107,8 +12128,7 @@ class StringPrinter void Append(char chr); }; -typedef JitHashTable, FieldKindForVN> - FieldHandleSet; +typedef JitHashTable, FieldKindForVN> FieldHandleSet; typedef JitHashTable, bool> ClassHandleSet; @@ -12129,8 +12149,8 @@ struct LoopSideEffects // arrays of that type are modified // in the loop. ClassHandleSet* ArrayElemTypesModified = nullptr; - bool ContainsCall = false; - bool HasNestedLoops = false; + bool ContainsCall = false; + bool HasNestedLoops = false; LoopSideEffects(); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 640fee9c0ef97..fb917f6dbaf39 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -3624,26 +3624,6 @@ inline void Compiler::optAssertionRemove(AssertionIndex index) } } -inline void Compiler::LoopDsc::AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind) -{ - if (lpFieldsModified == nullptr) - { - lpFieldsModified = - new (comp->getAllocatorLoopHoist()) Compiler::LoopDsc::FieldHandleSet(comp->getAllocatorLoopHoist()); - } - lpFieldsModified->Set(fldHnd, fieldKind, FieldHandleSet::Overwrite); -} - -inline void Compiler::LoopDsc::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd) -{ - if (lpArrayElemTypesModified == nullptr) - { - lpArrayElemTypesModified = - new (comp->getAllocatorLoopHoist()) Compiler::LoopDsc::ClassHandleSet(comp->getAllocatorLoopHoist()); - } - lpArrayElemTypesModified->Set(structHnd, true, ClassHandleSet::Overwrite); -} - inline void Compiler::LoopDsc::VERIFY_lpIterTree() const { #ifdef DEBUG @@ -4959,7 +4939,7 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder(TFunc func // loop block rpo index = head block rpoIndex + index // loop block po index = PostOrderCount - 1 - loop block rpo index // = headPreOrderIndex - index - unsigned poIndex = m_header->bbPostorderNum - index; + unsigned poIndex = m_header->bbNewPostorderNum - index; assert(poIndex < m_tree->GetPostOrderCount()); return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; }); @@ -4987,7 +4967,7 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksPostOrder(TFunc func) { BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); bool result = BitVecOps::VisitBitsReverse(&traits, m_blocks, [=](unsigned index) { - unsigned poIndex = m_header->bbPostorderNum - index; + unsigned poIndex = m_header->bbNewPostorderNum - index; assert(poIndex < m_tree->GetPostOrderCount()); return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; }); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 1971048b7e85b..dcb8f9231b36a 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -65,8 +65,13 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; - m_dfs = nullptr; - m_loops = nullptr; + m_dfs = nullptr; + m_loops = nullptr; + m_newToOldLoop = nullptr; + m_oldToNewLoop = nullptr; + m_loopSideEffects = nullptr; + m_loopHoistCounts = nullptr; + m_blockToLoop = nullptr; // Initialize BlockSet data. fgCurBBEpoch = 0; diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 63d3ba814728b..461dd9758c959 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -721,7 +721,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) } else { - FlowGraphNaturalLoop* const nestedLoop = m_loops->GetLoopFromHeader(block); + FlowGraphNaturalLoop* const nestedLoop = m_loops->GetLoopByHeader(block); if (nestedLoop != nullptr) { @@ -1015,7 +1015,7 @@ void ProfileSynthesis::ComputeBlockWeights() // void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block) { - FlowGraphNaturalLoop* const loop = m_loops->GetLoopFromHeader(block); + FlowGraphNaturalLoop* const loop = m_loops->GetLoopByHeader(block); weight_t newWeight = block->bbWeight; const char* kind = ""; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 467d608ed1188..c74d17c904438 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3999,7 +3999,7 @@ void Compiler::fgLclFldAssign(unsigned lclNum) // bool FlowGraphDfsTree::Contains(BasicBlock* block) const { - return (block->bbPostorderNum < m_postOrderCount) && (m_postOrder[block->bbPostorderNum] == block); + return (block->bbNewPostorderNum < m_postOrderCount) && (m_postOrder[block->bbNewPostorderNum] == block); } //------------------------------------------------------------------------ @@ -4021,7 +4021,7 @@ bool FlowGraphDfsTree::IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) { assert(Contains(ancestor) && Contains(descendant)); return (ancestor->bbPreorderNum <= descendant->bbPreorderNum) && - (descendant->bbPostorderNum <= ancestor->bbPostorderNum); + (descendant->bbNewPostorderNum <= ancestor->bbNewPostorderNum); } //------------------------------------------------------------------------ @@ -4069,7 +4069,7 @@ FlowGraphDfsTree* Compiler::fgComputeDfs() { blocks.Pop(); postOrder[postOrderIndex] = block; - block->bbPostorderNum = postOrderIndex++; + block->bbNewPostorderNum = postOrderIndex++; } } @@ -4136,7 +4136,7 @@ unsigned FlowGraphNaturalLoop::LoopBlockBitVecIndex(BasicBlock* block) { assert(m_tree->Contains(block)); - unsigned index = m_header->bbPostorderNum - block->bbPostorderNum; + unsigned index = m_header->bbNewPostorderNum - block->bbNewPostorderNum; assert(index < m_blocksSize); return index; } @@ -4159,12 +4159,12 @@ unsigned FlowGraphNaturalLoop::LoopBlockBitVecIndex(BasicBlock* block) // bool FlowGraphNaturalLoop::TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex) { - if (block->bbPostorderNum > m_header->bbPostorderNum) + if (block->bbNewPostorderNum > m_header->bbNewPostorderNum) { return false; } - unsigned index = m_header->bbPostorderNum - block->bbPostorderNum; + unsigned index = m_header->bbNewPostorderNum - block->bbNewPostorderNum; if (index >= m_blocksSize) { return false; @@ -4211,6 +4211,20 @@ bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) return BitVecOps::IsMember(&traits, m_blocks, index); } +//------------------------------------------------------------------------ +// ContainsLoop: Returns true if this loop contains the specified other loop. +// +// Parameters: +// childLoop - The potential candidate child loop +// +// Returns: +// True if the child loop is contained in this loop. +// +bool FlowGraphNaturalLoop::ContainsLoop(FlowGraphNaturalLoop* childLoop) +{ + return ContainsBlock(childLoop->GetHeader()); +} + //------------------------------------------------------------------------ // NumLoopBlocks: Get the number of blocks in the SCC of the loop. // @@ -4235,7 +4249,21 @@ FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs) { } -// GetLoopFromHeader: See if a block is a loop header, and if so return the +// GetLoopByIndex: Get loop by a specified index. +// +// Parameters: +// index - Index of loop. Must be less than NumLoops() +// +// Returns: +// Loop with the specified index. +// +FlowGraphNaturalLoop* FlowGraphNaturalLoops::GetLoopByIndex(unsigned index) +{ + assert(index < m_loops.size()); + return m_loops[index]; +} + +// GetLoopByHeader: See if a block is a loop header, and if so return the // associated loop. // // Parameters: @@ -4244,7 +4272,7 @@ FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs) // Returns: // Loop headed by block, or nullptr // -FlowGraphNaturalLoop* FlowGraphNaturalLoops::GetLoopFromHeader(BasicBlock* block) +FlowGraphNaturalLoop* FlowGraphNaturalLoops::GetLoopByHeader(BasicBlock* block) { // TODO-TP: This can use binary search based on post order number. for (FlowGraphNaturalLoop* loop : m_loops) @@ -4331,7 +4359,7 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) unsigned rpoNum = dfs->GetPostOrderCount() - i; BasicBlock* const block = dfs->GetPostOrder()[i - 1]; JITDUMP("%02u -> " FMT_BB "[%u, %u]\n", rpoNum + 1, block->bbNum, block->bbPreorderNum + 1, - block->bbPostorderNum + 1); + block->bbNewPostorderNum + 1); } #endif @@ -4375,7 +4403,7 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) // worklist.clear(); - loop->m_blocksSize = loop->m_header->bbPostorderNum + 1; + loop->m_blocksSize = loop->m_header->bbNewPostorderNum + 1; BitVecTraits loopTraits = loop->LoopBlockTraits(); loop->m_blocks = BitVecOps::MakeEmpty(&loopTraits); @@ -4477,7 +4505,7 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) { if (loop->m_parent != nullptr) { - loop->m_sibling = loop->m_parent->m_child; + loop->m_sibling = loop->m_parent->m_child; loop->m_parent->m_child = loop; } } @@ -5240,7 +5268,7 @@ BasicBlock* FlowGraphDominatorTree::IntersectDom(BasicBlock* finger1, BasicBlock { return nullptr; } - while (finger1 != nullptr && finger1->bbPostorderNum < finger2->bbPostorderNum) + while (finger1 != nullptr && finger1->bbNewPostorderNum < finger2->bbNewPostorderNum) { finger1 = finger1->bbIDom; } @@ -5248,7 +5276,7 @@ BasicBlock* FlowGraphDominatorTree::IntersectDom(BasicBlock* finger1, BasicBlock { return nullptr; } - while (finger2 != nullptr && finger2->bbPostorderNum < finger1->bbPostorderNum) + while (finger2 != nullptr && finger2->bbNewPostorderNum < finger1->bbNewPostorderNum) { finger2 = finger2->bbIDom; } @@ -5288,8 +5316,8 @@ bool FlowGraphDominatorTree::Dominates(BasicBlock* dominator, BasicBlock* domina // // where the equality holds when you ask if A dominates itself. // - return (m_preorderNum[dominator->bbPostorderNum] <= m_preorderNum[dominated->bbPostorderNum]) && - (m_postorderNum[dominator->bbPostorderNum] >= m_postorderNum[dominated->bbPostorderNum]); + return (m_preorderNum[dominator->bbNewPostorderNum] <= m_preorderNum[dominated->bbNewPostorderNum]) && + (m_postorderNum[dominator->bbNewPostorderNum] >= m_postorderNum[dominated->bbNewPostorderNum]); } //------------------------------------------------------------------------ @@ -5342,7 +5370,7 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df continue; // Unreachable pred } - if ((numIters <= 0) && (domPred->bbPostorderNum <= poNum)) + if ((numIters <= 0) && (domPred->bbNewPostorderNum <= poNum)) { continue; // Pred not yet visited } @@ -5382,11 +5410,11 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df continue; } - unsigned poNum = block->bbPostorderNum; + unsigned poNum = block->bbNewPostorderNum; BasicBlock* parent = block->bbIDom; - domTree[poNum].nextSibling = domTree[parent->bbPostorderNum].firstChild; - domTree[parent->bbPostorderNum].firstChild = block; + domTree[poNum].nextSibling = domTree[parent->bbNewPostorderNum].firstChild; + domTree[parent->bbNewPostorderNum].firstChild = block; } //// Skip the root since it has no siblings. @@ -5414,7 +5442,7 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df printf(FMT_BB " :", postOrder[poNum]->bbNum); for (BasicBlock* child = domTree[poNum].firstChild; child != nullptr; - child = domTree[child->bbPostorderNum].nextSibling) + child = domTree[child->bbNewPostorderNum].nextSibling) { printf(" " FMT_BB, child->bbNum); } @@ -5440,12 +5468,12 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df void PreOrderVisit(BasicBlock* block) { - m_preorderNums[block->bbPostorderNum] = m_preNum++; + m_preorderNums[block->bbNewPostorderNum] = m_preNum++; } void PostOrderVisit(BasicBlock* block) { - m_postorderNums[block->bbPostorderNum] = m_postNum++; + m_postorderNums[block->bbNewPostorderNum] = m_postNum++; } }; @@ -5457,3 +5485,60 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df return new (comp, CMK_DominatorMemory) FlowGraphDominatorTree(dfs, domTree, preorderNums, postorderNums); } + +//------------------------------------------------------------------------ +// BlockToNaturalLoopMap::GetLoop: Map a block back to its most nested +// containing loop. +// +// Parameters: +// block - The block +// +// Returns: +// Loop or nullptr if the block is not contained in any loop. +// +FlowGraphNaturalLoop* BlockToNaturalLoopMap::GetLoop(BasicBlock* block) +{ + const FlowGraphDfsTree* dfs = m_loops->GetDfsTree(); + if (!dfs->Contains(block)) + { + return nullptr; + } + + unsigned index = m_indices[block->bbNewPostorderNum]; + if (index == 0) + { + return nullptr; + } + + return m_loops->GetLoopByIndex(index - 1); +} + +//------------------------------------------------------------------------ +// BlockToNaturalLoopMap::Build: Build the map. +// +// Parameters: +// loops - Data structure describing loops +// +// Returns: +// The map. +// +BlockToNaturalLoopMap* BlockToNaturalLoopMap::Build(FlowGraphNaturalLoops* loops) +{ + const FlowGraphDfsTree* dfs = loops->GetDfsTree(); + Compiler* comp = dfs->GetCompiler(); + // Indices are 1-based, with 0 meaning "no loop". + unsigned* indices = + dfs->GetPostOrderCount() == 0 ? nullptr : (new (comp, CMK_Loops) unsigned[dfs->GetPostOrderCount()]{}); + + // Now visit all loops in reverse post order, meaning that we see inner + // loops last and thus write their indices into the map last. + for (FlowGraphNaturalLoop* loop : loops->InReversePostOrder()) + { + loop->VisitLoopBlocks([=](BasicBlock* block) { + indices[block->bbNewPostorderNum] = loop->GetIndex() + 1; + return BasicBlockVisit::Continue; + }); + } + + return new (comp, CMK_Loops) BlockToNaturalLoopMap(loops, indices); +} diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index af949e0a8de01..49cfd70a161e2 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -3116,6 +3116,7 @@ bool Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) { if (loop->AnalyzeIteration(&iterInfo)) { + INDEBUG(optCrossCheckIterInfo(iterInfo, optLoopTable[i])); context->SetLoopIterInfo(loop->GetIndex(), new (this, CMK_LoopClone) NaturalLoopIterInfo(iterInfo)); } } @@ -3269,8 +3270,8 @@ PhaseStatus Compiler::optCloneLoops() // TODO: recompute the loop table, to include the slow loop path in the table? fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); - m_dfs = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfs); + m_dfs = fgComputeDfs(); + optFindNewLoops(); } #ifdef DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 807ef53f26391..73f3f39363f3a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4637,6 +4637,9 @@ PhaseStatus Compiler::optUnrollLoops() fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } + m_dfs = fgComputeDfs(); + optFindNewLoops(); + DBEXEC(verbose, fgDispBasicBlocks()); } else @@ -5540,7 +5543,17 @@ PhaseStatus Compiler::optFindLoopsPhase() { optFindLoops(); - m_dfs = fgComputeDfs(); + m_dfs = fgComputeDfs(); + optFindNewLoops(); + + return PhaseStatus::MODIFIED_EVERYTHING; +} + +//----------------------------------------------------------------------------- +// optFindNewLoops: Compute new loops and cross validate with old loop table. +// +void Compiler::optFindNewLoops() +{ m_loops = FlowGraphNaturalLoops::Find(m_dfs); m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); @@ -5561,34 +5574,28 @@ PhaseStatus Compiler::optFindLoopsPhase() m_oldToNewLoop[head->bbNatLoopNum] = loop; m_newToOldLoop[loop->GetIndex()] = dsc; } +} -#ifdef DEBUG - for (unsigned i = 0; i < optLoopCount; i++) +//----------------------------------------------------------------------------- +// optCrossCheckIterInfo: Validate that new IV analysis matches the old one. +// +// Parameters: +// iterInfo - New IV information +// dsc - Old loop structure containing IV analysis information +// +void Compiler::optCrossCheckIterInfo(const NaturalLoopIterInfo& iterInfo, const LoopDsc& dsc) +{ + assert(iterInfo.HasConstInit == ((dsc.lpFlags & LPFLG_CONST_INIT) != 0)); + assert(iterInfo.HasConstLimit == ((dsc.lpFlags & LPFLG_CONST_LIMIT) != 0)); + assert(iterInfo.HasSimdLimit == ((dsc.lpFlags & LPFLG_SIMD_LIMIT) != 0)); + assert(iterInfo.HasInvariantLocalLimit == ((dsc.lpFlags & LPFLG_VAR_LIMIT) != 0)); + assert(iterInfo.HasArrayLengthLimit == ((dsc.lpFlags & LPFLG_ARRLEN_LIMIT) != 0)); + if (iterInfo.HasConstInit) { - assert(m_oldToNewLoop[i] != nullptr); - LoopDsc* dsc = &optLoopTable[i]; - if ((dsc->lpFlags & LPFLG_ITER) == 0) - continue; - - NaturalLoopIterInfo iter; - bool analyzed = m_oldToNewLoop[i]->AnalyzeIteration(&iter); - assert(analyzed); - - assert(iter.HasConstInit == ((dsc->lpFlags & LPFLG_CONST_INIT) != 0)); - assert(iter.HasConstLimit == ((dsc->lpFlags & LPFLG_CONST_LIMIT) != 0)); - assert(iter.HasSimdLimit == ((dsc->lpFlags & LPFLG_SIMD_LIMIT) != 0)); - assert(iter.HasInvariantLocalLimit == ((dsc->lpFlags & LPFLG_VAR_LIMIT) != 0)); - assert(iter.HasArrayLengthLimit == ((dsc->lpFlags & LPFLG_ARRLEN_LIMIT) != 0)); - if (iter.HasConstInit) - { - assert(iter.ConstInitValue == dsc->lpConstInit); - assert(iter.InitBlock == dsc->lpInitBlock); - } - assert(iter.TestTree == dsc->lpTestTree); + assert(iterInfo.ConstInitValue == dsc.lpConstInit); + assert(iterInfo.InitBlock == dsc.lpInitBlock); } -#endif - - return PhaseStatus::MODIFIED_EVERYTHING; + assert(iterInfo.TestTree == dsc.lpTestTree); } /***************************************************************************** @@ -6388,8 +6395,8 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, FlowGr printf("\nHoisting a copy of "); printTreeID(origExpr); printf(" " FMT_VN, origExpr->gtVNPair.GetLiberal()); - printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " (head: " FMT_BB "):\n", - exprBb->bbNum, preheader->bbNum, loop->GetIndex(), loop->GetHeader()->bbNum); + printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " (head: " FMT_BB "):\n", exprBb->bbNum, + preheader->bbNum, loop->GetIndex(), loop->GetHeader()->bbNum); gtDispTree(origExpr); printf("\n"); } @@ -6463,7 +6470,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, FlowGr { // What is the depth of the loop "lnum"? - ssize_t depth = optLoopDepth((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable)); + ssize_t depth = optLoopDepth((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable)); NodeToTestDataMap* testData = GetNodeTestData(); @@ -6684,17 +6691,23 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho #ifdef DEBUG if (verbose) { - printf("optHoistThisLoop for loop " FMT_LP " (head: " FMT_BB "):\n", loop->GetIndex(), loop->GetHeader()->bbNum); + printf("optHoistThisLoop for loop " FMT_LP " (head: " FMT_BB "):\n", loop->GetIndex(), + loop->GetHeader()->bbNum); printf(" Loop body %s a call\n", sideEffs.ContainsCall ? "contains" : "does not contain"); printf(" Loop has %s\n", (loop->ExitEdges().size() == 1) ? "single exit" : "multiple exits"); + printf(" Blocks:\n"); + loop->VisitLoopBlocksReversePostOrder([](BasicBlock* bb) { + printf(" " FMT_BB "\n", bb->bbNum); + return BasicBlockVisit::Continue; + }); } #endif VARSET_TP loopVars(VarSetOps::Intersection(this, sideEffs.VarInOut, sideEffs.VarUseDef)); hoistCtxt->m_loopVarInOutCount = VarSetOps::Count(this, sideEffs.VarInOut); - hoistCtxt->m_loopVarCount = VarSetOps::Count(this, loopVars); - hoistCtxt->m_hoistedExprCount = 0; + hoistCtxt->m_loopVarCount = VarSetOps::Count(this, loopVars); + hoistCtxt->m_hoistedExprCount = 0; #ifndef TARGET_64BIT @@ -6738,9 +6751,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars)); VARSET_TP inOutFPVars(VarSetOps::Intersection(this, sideEffs.VarInOut, lvaFloatVars)); - hoistCtxt->m_loopVarFPCount = VarSetOps::Count(this, loopFPVars); - hoistCtxt->m_loopVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); - hoistCtxt->m_hoistedFPExprCount = 0; + hoistCtxt->m_loopVarFPCount = VarSetOps::Count(this, loopFPVars); + hoistCtxt->m_loopVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); + hoistCtxt->m_hoistedFPExprCount = 0; hoistCtxt->m_loopVarCount -= hoistCtxt->m_loopVarFPCount; hoistCtxt->m_loopVarInOutCount -= hoistCtxt->m_loopVarInOutFPCount; @@ -6759,9 +6772,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho } else // lvaFloatVars is empty { - hoistCtxt->m_loopVarFPCount = 0; - hoistCtxt->m_loopVarInOutFPCount = 0; - hoistCtxt->m_hoistedFPExprCount = 0; + hoistCtxt->m_loopVarFPCount = 0; + hoistCtxt->m_loopVarInOutFPCount = 0; + hoistCtxt->m_hoistedFPExprCount = 0; } // Find the set of definitely-executed blocks. @@ -6820,21 +6833,24 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho // convenient). But note that it is arbitrary because there is not guaranteed execution order amongst // the child loops. - for (FlowGraphNaturalLoop* childLoop = loop->GetChild(); - childLoop != nullptr; - childLoop = childLoop->GetSibling()) + // TODO-Quirk: Switch this order off the old loop table + LoopDsc* oldLoop = m_newToOldLoop[loop->GetIndex()]; + + for (BasicBlock::loopNumber childLoopNum = oldLoop->lpChild; childLoopNum != BasicBlock::NOT_IN_LOOP; + childLoopNum = optLoopTable[childLoopNum].lpSibling) { - // TODO-Quirk: Remove - if (m_oldToNewLoop[childLoop->GetIndex()] == nullptr) + if (optLoopTable[childLoopNum].lpIsRemoved()) { continue; } + FlowGraphNaturalLoop* childLoop = m_oldToNewLoop[childLoopNum]; + assert(childLoop->EntryEdges().size() == 1); - BasicBlock* preheader = childLoop->EntryEdges()[0]->getSourceBlock(); + BasicBlock* childPreHead = childLoop->EntryEdges()[0]->getSourceBlock(); if (loop->ExitEdges().size() == 1) { - if (fgSsaDomTree->Dominates(childPreHead, pLoopDsc->lpExit)) + if (fgSsaDomTree->Dominates(childPreHead, loop->ExitEdges()[0]->getSourceBlock())) { // If the child loop pre-header dominates the exit, it will get added in the dominator tree // loop below. @@ -6844,7 +6860,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho else { // If the child loop pre-header is the loop entry for a multi-exit loop, it will get added below. - if (childPreHead == pLoopDsc->lpEntry) + if (childPreHead == loop->GetHeader()) { continue; } @@ -6853,24 +6869,24 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho defExec.Push(childPreHead); } - if (pLoopDsc->lpExitCnt == 1) + if (loop->ExitEdges().size() == 1) { - assert(pLoopDsc->lpExit != nullptr); + BasicBlock* exiting = loop->ExitEdges()[0]->getSourceBlock(); JITDUMP(" Considering hoisting in blocks that either dominate exit block " FMT_BB ", or pre-headers of nested loops, if any:\n", - pLoopDsc->lpExit->bbNum); + exiting->bbNum); // Push dominators, until we reach "entry" or exit the loop. - BasicBlock* cur = pLoopDsc->lpExit; - while ((cur != nullptr) && (cur != pLoopDsc->lpEntry)) + BasicBlock* cur = exiting; + while ((cur != nullptr) && (cur != loop->GetHeader())) { JITDUMP(" -- " FMT_BB " (dominate exit block)\n", cur->bbNum); - assert(pLoopDsc->lpContains(cur)); + assert(loop->ContainsBlock(cur)); defExec.Push(cur); cur = cur->bbIDom; } - noway_assert(cur == pLoopDsc->lpEntry); + noway_assert(cur == loop->GetHeader()); } else // More than one exit { @@ -6878,19 +6894,19 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho // We could in the future do better. JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n", - pLoopDsc->lpEntry->bbNum, lnum); + loop->GetHeader()->bbNum, loop->GetIndex()); } - JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum); - defExec.Push(pLoopDsc->lpEntry); + JITDUMP(" -- " FMT_BB " (entry block)\n", loop->GetHeader()->bbNum); + defExec.Push(loop->GetHeader()); optHoistLoopBlocks(loop, &defExec, hoistCtxt); - const unsigned numHoisted = pLoopDsc->lpHoistedFPExprCount + pLoopDsc->lpHoistedExprCount; + const unsigned numHoisted = hoistCtxt->m_hoistedFPExprCount + hoistCtxt->m_hoistedExprCount; return numHoisted > 0; } -bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop) +bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt) { bool loopContainsCall = m_loopSideEffects[loop->GetIndex()].ContainsCall; @@ -6901,9 +6917,9 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l if (varTypeUsesIntReg(tree)) { - hoistedExprCount = pLoopDsc->lpHoistedExprCount; - loopVarCount = pLoopDsc->lpLoopVarCount; - varInOutCount = pLoopDsc->lpVarInOutCount; + hoistedExprCount = hoistCtxt->m_hoistedExprCount; + loopVarCount = hoistCtxt->m_loopVarCount; + varInOutCount = hoistCtxt->m_loopVarInOutCount; availRegCount = CNT_CALLEE_SAVED - 1; if (!loopContainsCall) @@ -6922,9 +6938,9 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l { assert(varTypeUsesFloatReg(tree)); - hoistedExprCount = pLoopDsc->lpHoistedFPExprCount; - loopVarCount = pLoopDsc->lpLoopVarFPCount; - varInOutCount = pLoopDsc->lpVarInOutFPCount; + hoistedExprCount = hoistCtxt->m_hoistedFPExprCount; + loopVarCount = hoistCtxt->m_loopVarFPCount; + varInOutCount = hoistCtxt->m_loopVarInOutFPCount; availRegCount = CNT_CALLEE_SAVED_FLOAT; if (!loopContainsCall) @@ -7006,22 +7022,12 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l // void Compiler::optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, ValueNum memoryVN) { - // If tree is not in a loop, we don't need to track its loop dependence. - // - unsigned const loopNum = block->bbNatLoopNum; - - assert(loopNum != BasicBlock::NOT_IN_LOOP); - // Find the loop associated with this memory VN. // - unsigned updateLoopNum = vnStore->LoopOfVN(memoryVN); + FlowGraphNaturalLoop* updateLoop = vnStore->LoopOfVN(memoryVN); - if (updateLoopNum >= BasicBlock::MAX_LOOP_NUM) + if (updateLoop == nullptr) { - // There should be only two special non-loop loop nums. - // - assert((updateLoopNum == BasicBlock::MAX_LOOP_NUM) || (updateLoopNum == BasicBlock::NOT_IN_LOOP)); - // memoryVN defined outside of any loop, we can ignore. // JITDUMP(" ==> Not updating loop memory dependence of [%06u], memory " FMT_VN " not defined in a loop\n", @@ -7029,36 +7035,26 @@ void Compiler::optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, V return; } - // If the loop was removed, then record the dependence in the nearest enclosing loop, if any. - // - while (optLoopTable[updateLoopNum].lpIsRemoved()) + // TODO-Quirk: Remove + if (m_newToOldLoop[updateLoop->GetIndex()] == nullptr) { - unsigned const updateParentLoopNum = optLoopTable[updateLoopNum].lpParent; - - if (updateParentLoopNum == BasicBlock::NOT_IN_LOOP) - { - // Memory VN was defined in a loop, but no longer. - // - JITDUMP(" ==> Not updating loop memory dependence of [%06u], memory " FMT_VN - " no longer defined in a loop\n", - dspTreeID(tree), memoryVN); - break; - } - - JITDUMP(" ==> " FMT_LP " removed, updating dependence to parent " FMT_LP "\n", updateLoopNum, - updateParentLoopNum); - - updateLoopNum = updateParentLoopNum; + return; } + assert(!m_newToOldLoop[updateLoop->GetIndex()]->lpIsRemoved()); + // If the update block is not the header of a loop containing // block, we can also ignore the update. // - if (!optLoopContains(updateLoopNum, loopNum)) + if (!updateLoop->ContainsBlock(block)) { +#ifdef DEBUG + FlowGraphNaturalLoop* blockLoop = m_blockToLoop->GetLoop(block); + JITDUMP(" ==> Not updating loop memory dependence of [%06u]/" FMT_LP ", memory " FMT_VN "/" FMT_LP - " is not defined in an enclosing loop\n", - dspTreeID(tree), loopNum, memoryVN, updateLoopNum); + " is not defined in a contained block\n", + dspTreeID(tree), blockLoop->GetIndex(), memoryVN, updateLoop->GetIndex()); +#endif return; } @@ -7071,17 +7067,19 @@ void Compiler::optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, V if (map->Lookup(tree, &mapBlock)) { - unsigned const mapLoopNum = mapBlock->bbNatLoopNum; - - // If the update loop contains the existing map loop, - // the existing map loop is more constraining. So no + // If the update loop contains the existing map block, + // the existing entry is more constraining. So no // update needed. // - if (optLoopContains(updateLoopNum, mapLoopNum)) + if (updateLoop->ContainsBlock(mapBlock)) { +#ifdef DEBUG + FlowGraphNaturalLoop* mapLoop = m_blockToLoop->GetLoop(mapBlock); + JITDUMP(" ==> Not updating loop memory dependence of [%06u]; alrady constrained to " FMT_LP " nested in " FMT_LP "\n", - dspTreeID(tree), mapLoopNum, updateLoopNum); + dspTreeID(tree), mapLoop->GetIndex(), updateLoop->GetIndex()); +#endif return; } } @@ -7089,8 +7087,9 @@ void Compiler::optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, V // MemoryVN now describes the most constraining loop memory dependence // we know of. Update the map. // - JITDUMP(" ==> Updating loop memory dependence of [%06u] to " FMT_LP "\n", dspTreeID(tree), updateLoopNum); - map->Set(tree, optLoopTable[updateLoopNum].lpEntry, NodeToLoopMemoryBlockMap::Overwrite); + JITDUMP(" ==> Updating loop memory dependence of [%06u] to " FMT_LP "\n", dspTreeID(tree), + updateLoop->GetIndex()); + map->Set(tree, updateLoop->GetHeader(), NodeToLoopMemoryBlockMap::Overwrite); } //------------------------------------------------------------------------ @@ -7112,6 +7111,57 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) } } +//------------------------------------------------------------------------ +// AddVariableLiveness: Adds the variable liveness information for 'blk' to 'this' LoopDsc +// +// Arguments: +// comp - Compiler instance +// blk - Block whose liveness is to be added +// +void LoopSideEffects::AddVariableLiveness(Compiler* comp, BasicBlock* blk) +{ + VarSetOps::UnionD(comp, VarInOut, blk->bbLiveIn); + VarSetOps::UnionD(comp, VarInOut, blk->bbLiveOut); + + VarSetOps::UnionD(comp, VarUseDef, blk->bbVarUse); + VarSetOps::UnionD(comp, VarUseDef, blk->bbVarDef); +} + +//------------------------------------------------------------------------ +// AddModifiedField: Record that a field is modified in the loop. +// +// Arguments: +// comp - Compiler instance +// fldHnd - Field handle being modified +// fieldKind - Kind of field +// +void LoopSideEffects::AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind) +{ + if (FieldsModified == nullptr) + { + FieldsModified = new (comp->getAllocatorLoopHoist()) FieldHandleSet(comp->getAllocatorLoopHoist()); + } + FieldsModified->Set(fldHnd, fieldKind, FieldHandleSet::Overwrite); +} + +//------------------------------------------------------------------------ +// AddModifiedElemType: Record that an array with the specified element type is +// being modified. +// +// Arguments: +// comp - Compiler instance +// structHnd - Handle for struct. Can also be an encoding of a primitive +// handle, see {Encode/Decode}ElemType. +// +void LoopSideEffects::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd) +{ + if (ArrayElemTypesModified == nullptr) + { + ArrayElemTypesModified = new (comp->getAllocatorLoopHoist()) ClassHandleSet(comp->getAllocatorLoopHoist()); + } + ArrayElemTypesModified->Set(structHnd, true, ClassHandleSet::Overwrite); +} + //------------------------------------------------------------------------ // optHoistLoopBlocks: Hoist invariant expression out of the loop. // @@ -7125,7 +7175,9 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack* blocks, LoopHoistContext* hoistContext) +void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, + ArrayStack* blocks, + LoopHoistContext* hoistContext) { class HoistVisitor : public GenTreeVisitor { @@ -7155,11 +7207,11 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack m_valueStack; - bool m_beforeSideEffect; + ArrayStack m_valueStack; + bool m_beforeSideEffect; FlowGraphNaturalLoop* m_loop; - LoopHoistContext* m_hoistContext; - BasicBlock* m_currentBlock; + LoopHoistContext* m_hoistContext; + BasicBlock* m_currentBlock; bool IsNodeHoistable(GenTree* node) { @@ -7187,7 +7239,7 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStackgtVNPair.GetLiberal(); bool vnIsInvariant = - m_compiler->optVNIsLoopInvariant(vn, m_loopNum, &m_hoistContext->m_curLoopVnInvariantCache); + m_compiler->optVNIsLoopInvariant(vn, m_loop, &m_hoistContext->m_curLoopVnInvariantCache); // Even though VN is invariant in the loop (say a constant) its value may depend on position // of tree, so for loop hoisting we must also check that any memory read by tree @@ -7334,7 +7386,8 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStackHasSsaName(); // and the SSA definition must be outside the loop we're hoisting from ... isInvariant = isInvariant && - !m_loop->ContainsBlock(m_compiler->lvaGetDesc(lclNum)->GetPerSsaData(lclVar->GetSsaNum())->GetBlock()); + !m_loop->ContainsBlock( + m_compiler->lvaGetDesc(lclNum)->GetPerSsaData(lclVar->GetSsaNum())->GetBlock()); // and the VN of the tree is considered invariant as well. // @@ -7713,10 +7766,13 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStackResetHoistedInCurLoop(); } -void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt) +void Compiler::optHoistCandidate(GenTree* tree, + BasicBlock* treeBb, + FlowGraphNaturalLoop* loop, + LoopHoistContext* hoistCtxt) { // It must pass the hoistable profitablity tests for this loop level - if (!optIsProfitableToHoistTree(tree, loop)) + if (!optIsProfitableToHoistTree(tree, loop, hoistCtxt)) { JITDUMP(" ... not profitable to hoist\n"); return; @@ -7751,18 +7807,18 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNat // Increment lpHoistedExprCount or lpHoistedFPExprCount if (!varTypeIsFloating(tree->TypeGet())) { - optLoopTable[lnum].lpHoistedExprCount++; + hoistCtxt->m_hoistedExprCount++; #ifndef TARGET_64BIT // For our 32-bit targets Long types take two registers. if (varTypeIsLong(tree->TypeGet())) { - optLoopTable[lnum].lpHoistedExprCount++; + hoistCtxt->m_hoistedExprCount++; } #endif } else // Floating point expr hoisted { - optLoopTable[lnum].lpHoistedFPExprCount++; + hoistCtxt->m_hoistedFPExprCount++; } // Record the hoisted expression in hoistCtxt @@ -7800,29 +7856,33 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNS unsigned lclNum = funcApp.m_args[0]; unsigned ssaNum = funcApp.m_args[1]; LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum); - res = !loop->ContainsBlock(ssaDef->GetBlock()); + res = !loop->ContainsBlock(ssaDef->GetBlock()); } else if (funcApp.m_func == VNF_PhiMemoryDef) { BasicBlock* defnBlk = reinterpret_cast(vnStore->ConstantValue(funcApp.m_args[0])); - res = !loop->ContainsBlock(defnBlk); + res = !loop->ContainsBlock(defnBlk); } else if (funcApp.m_func == VNF_MemOpaque) { - const unsigned blockNum = funcApp.m_args[0]; + const unsigned loopIndex = funcApp.m_args[0]; - // Check for the special "ambiguous" block VN. + // Check for the special "ambiguous" loop index. // This is considered variant in every loop. // - if (blockNum == UINT_MAX) + if (loopIndex == ValueNumStore::UnknownLoop) { res = false; } + else if (loopIndex == ValueNumStore::NoLoop) + { + res = true; + } else { - assert(blockNum < m_dfs->GetPostOrderCount()); - BasicBlock* block = m_dfs->GetPostOrder()[blockNum]; - res = !loop->ContainsBlock(block); + FlowGraphNaturalLoop* otherLoop = m_loops->GetLoopByIndex(loopIndex); + assert(otherLoop != nullptr); + res = !loop->ContainsLoop(otherLoop); } } else @@ -7837,9 +7897,17 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNS if (i == 3) { - const unsigned blockNum = funcApp.m_args[3]; - assert(blockNum < m_dfs->GetPostOrderCount()); - res = !loop->ContainsBlock(m_dfs->GetPostOrder()[blockNum]); + const unsigned loopIndex = funcApp.m_args[3]; + assert((loopIndex == ValueNumStore::NoLoop) || (loopIndex < m_loops->NumLoops())); + if (loopIndex == ValueNumStore::NoLoop) + { + res = true; + } + else + { + FlowGraphNaturalLoop* otherLoop = m_loops->GetLoopByIndex(loopIndex); + res = !loop->ContainsLoop(otherLoop); + } break; } } @@ -8334,9 +8402,7 @@ bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum) return false; } -LoopSideEffects::LoopSideEffects() - : VarInOut(VarSetOps::UninitVal()) - , VarUseDef(VarSetOps::UninitVal()) +LoopSideEffects::LoopSideEffects() : VarInOut(VarSetOps::UninitVal()), VarUseDef(VarSetOps::UninitVal()) { for (MemoryKind mk : allMemoryKinds()) { @@ -8346,30 +8412,35 @@ LoopSideEffects::LoopSideEffects() void Compiler::optComputeLoopSideEffects() { - m_loopSideEffects = new (this, CMK_LoopOpt) LoopSideEffects[m_loops->NumLoops()]; + m_loopSideEffects = + m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_LoopOpt) LoopSideEffects[m_loops->NumLoops()]); for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { - m_loopSideEffects[loop->GetIndex()].VarInOut = VarSetOps::MakeEmpty(this); + m_loopSideEffects[loop->GetIndex()].VarInOut = VarSetOps::MakeEmpty(this); m_loopSideEffects[loop->GetIndex()].VarUseDef = VarSetOps::MakeEmpty(this); } - // Now visit all loops in post order, meaning we see child loops first. - // Mark side effects into the loop and all ancestor loops the first time we - // see it. - BitVecTraits postOrderTraits = m_dfs->PostOrderTraits(); - BitVec visited(BitVecOps::MakeEmpty(&postOrderTraits)); + BasicBlock** postOrder = m_dfs->GetPostOrder(); + unsigned postOrderCount = m_dfs->GetPostOrderCount(); - for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) + // The side effect code benefits from seeing things in RPO as it has some + // limited treatment assignments it has seen the value of. + for (unsigned i = postOrderCount; i != 0; i--) { - loop->VisitLoopBlocks([&](BasicBlock* block) { - if (BitVecOps::TryAddElemD(&postOrderTraits, visited, block->bbPostorderNum)) - { - optComputeLoopSideEffectsOfBlock(block, loop); - } + BasicBlock* block = postOrder[i - 1]; + FlowGraphNaturalLoop* loop = m_blockToLoop->GetLoop(block); - return BasicBlockVisit::Continue; - }); + // TODO-Quirk: Remove + while ((loop != nullptr) && (m_newToOldLoop[loop->GetIndex()] == nullptr)) + { + loop = loop->GetParent(); + } + + if (loop != nullptr) + { + optComputeLoopSideEffectsOfBlock(block, loop); + } } } @@ -8417,7 +8488,8 @@ void Compiler::optRecordLoopNestsMemoryHavoc(FlowGraphNaturalLoop* loop, MemoryK void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNaturalLoop* mostNestedLoop) { - JITDUMP("optComputeLoopSideEffectsOfBlock " FMT_BB ", mostNestedLoop " FMT_LP "\n", blk->bbNum, mostNestedLoop->GetIndex()); + JITDUMP("optComputeLoopSideEffectsOfBlock " FMT_BB ", mostNestedLoop " FMT_LP "\n", blk->bbNum, + mostNestedLoop->GetIndex()); AddVariableLivenessAllContainingLoops(mostNestedLoop, blk); // MemoryKinds for which an in-loop call or store has arbitrary effects. @@ -8673,20 +8745,10 @@ void Compiler::AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop) do { m_loopSideEffects[loop->GetIndex()].ContainsCall = true; - loop = loop->GetParent(); + loop = loop->GetParent(); } while (loop != nullptr); } -// Adds the variable liveness information for 'blk' to 'this' LoopDsc -void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk) -{ - VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn); - VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut); - - VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse); - VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef); -} - // Adds the variable liveness information for 'blk' to "lnum" and any parent loops. void Compiler::AddVariableLivenessAllContainingLoops(FlowGraphNaturalLoop* loop, BasicBlock* blk) { @@ -8698,7 +8760,9 @@ void Compiler::AddVariableLivenessAllContainingLoops(FlowGraphNaturalLoop* loop, } // Adds "fldHnd" to the set of modified fields of "loop" and any parent loops. -void Compiler::AddModifiedFieldAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind) +void Compiler::AddModifiedFieldAllContainingLoops(FlowGraphNaturalLoop* loop, + CORINFO_FIELD_HANDLE fldHnd, + FieldKindForVN fieldKind) { do { diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index e6ea3c49ea594..989d6848e90f9 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -2716,7 +2716,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ValueNum ValueNumStore::VNForFunc( var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN, ValueNum arg3VN) { - assert(arg0VN != NoVN && arg1VN != NoVN && arg2VN != NoVN && arg3VN != NoVN); + assert(arg0VN != NoVN && arg1VN != NoVN && arg2VN != NoVN && ((arg3VN != NoVN) || func == VNF_MapStore)); // Function arguments carry no exceptions. assert(arg0VN == VNNormalValue(arg0VN)); @@ -2765,9 +2765,18 @@ ValueNum ValueNumStore::VNForMapStore(ValueNum map, ValueNum index, ValueNum val { assert(MapIsPrecise(map)); - BasicBlock* const bb = m_pComp->compCurBB; - BasicBlock::loopNumber const loopNum = bb->bbNatLoopNum; - ValueNum const result = VNForFunc(TypeOfVN(map), VNF_MapStore, map, index, value, loopNum); + BasicBlock* const bb = m_pComp->compCurBB; + FlowGraphNaturalLoop* bbLoop = m_pComp->m_blockToLoop->GetLoop(bb); + + // TODO-Quirk: Remove + while ((bbLoop != nullptr) && (m_pComp->m_newToOldLoop[bbLoop->GetIndex()] == nullptr)) + { + bbLoop = bbLoop->GetParent(); + } + + unsigned loopIndex = bbLoop == nullptr ? UINT_MAX : bbLoop->GetIndex(); + + ValueNum const result = VNForFunc(TypeOfVN(map), VNF_MapStore, map, index, value, loopIndex); #ifdef DEBUG if (m_pComp->verbose) @@ -2964,12 +2973,15 @@ ValueNum ValueNumStore::VNForMapSelectInner(ValueNumKind vnk, var_types type, Va // If the current tree is in a loop then record memory dependencies for // hoisting. Note that this function may be called by other phases than VN // (such as VN-based dead store removal). - if ((m_pComp->compCurBB != nullptr) && (m_pComp->compCurTree != nullptr) && - m_pComp->compCurBB->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) + if ((m_pComp->compCurBB != nullptr) && (m_pComp->compCurTree != nullptr)) { - memoryDependencies.ForEach([this](ValueNum vn) { - m_pComp->optRecordLoopMemoryDependence(m_pComp->compCurTree, m_pComp->compCurBB, vn); - }); + FlowGraphNaturalLoop* loop = m_pComp->m_blockToLoop->GetLoop(m_pComp->compCurBB); + if (loop != nullptr) + { + memoryDependencies.ForEach([this](ValueNum vn) { + m_pComp->optRecordLoopMemoryDependence(m_pComp->compCurTree, m_pComp->compCurBB, vn); + }); + } } return result; @@ -5213,14 +5225,18 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN // ValueNum ValueNumStore::VNForExpr(BasicBlock* block, var_types type) { - BasicBlock::loopNumber loopNum; - if (block == nullptr) + unsigned loopIndex = ValueNumStore::UnknownLoop; + if (block != nullptr) { - loopNum = BasicBlock::MAX_LOOP_NUM; - } - else - { - loopNum = block->bbNatLoopNum; + FlowGraphNaturalLoop* loop = m_pComp->m_blockToLoop->GetLoop(block); + + // TODO-Quirk: Remove + while ((loop != nullptr) && (m_pComp->m_newToOldLoop[loop->GetIndex()] == nullptr)) + { + loop = loop->GetParent(); + } + + loopIndex = loop == nullptr ? ValueNumStore::NoLoop : loop->GetIndex(); } // VNForFunc(typ, func, vn) but bypasses looking in the cache @@ -5229,7 +5245,7 @@ ValueNum ValueNumStore::VNForExpr(BasicBlock* block, var_types type) unsigned const offsetWithinChunk = c->AllocVN(); VNDefFuncAppFlexible* fapp = c->PointerToFuncApp(offsetWithinChunk, 1); fapp->m_func = VNF_MemOpaque; - fapp->m_args[0] = loopNum; + fapp->m_args[0] = loopIndex; ValueNum resultVN = c->m_baseVN + offsetWithinChunk; return resultVN; @@ -5915,37 +5931,48 @@ var_types ValueNumStore::TypeOfVN(ValueNum vn) const //------------------------------------------------------------------------ // LoopOfVN: If the given value number is VNF_MemOpaque, VNF_MapStore, or -// VNF_MemoryPhiDef, return the loop number where the memory update occurs, -// otherwise returns MAX_LOOP_NUM. +// VNF_MemoryPhiDef, return the loop where the memory update occurs, +// otherwise returns nullptr // // Arguments: // vn - Value number to query // // Return Value: -// The memory loop number, which may be BasicBlock::NOT_IN_LOOP. -// Returns BasicBlock::MAX_LOOP_NUM if this VN is not a memory value number. +// The memory loop. // -BasicBlock::loopNumber ValueNumStore::LoopOfVN(ValueNum vn) +FlowGraphNaturalLoop* ValueNumStore::LoopOfVN(ValueNum vn) { VNFuncApp funcApp; if (GetVNFunc(vn, &funcApp)) { if (funcApp.m_func == VNF_MemOpaque) { - return (BasicBlock::loopNumber)funcApp.m_args[0]; + unsigned index = (unsigned)funcApp.m_args[0]; + if ((index == ValueNumStore::NoLoop) || (index == ValueNumStore::UnknownLoop)) + { + return nullptr; + } + + return m_pComp->m_loops->GetLoopByIndex(index); } else if (funcApp.m_func == VNF_MapStore) { - return (BasicBlock::loopNumber)funcApp.m_args[3]; + unsigned index = (unsigned)funcApp.m_args[3]; + if (index == ValueNumStore::NoLoop) + { + return nullptr; + } + + return m_pComp->m_loops->GetLoopByIndex(index); } else if (funcApp.m_func == VNF_PhiMemoryDef) { BasicBlock* const block = reinterpret_cast(ConstantValue(funcApp.m_args[0])); - return block->bbNatLoopNum; + return m_pComp->m_blockToLoop->GetLoop(block); } } - return BasicBlock::MAX_LOOP_NUM; + return nullptr; } bool ValueNumStore::IsVNConstant(ValueNum vn) @@ -9203,7 +9230,7 @@ void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore) printf(" := "); comp->vnPrint(newValVN, 0); printf("]"); - if (loopNum != BasicBlock::NOT_IN_LOOP) + if (loopNum != ValueNumStore::NoLoop) { printf("@" FMT_LP, loopNum); } @@ -9246,17 +9273,17 @@ void ValueNumStore::vnDumpMemOpaque(Compiler* comp, VNFuncApp* memOpaque) assert(memOpaque->m_func == VNF_MemOpaque); // Precondition. const unsigned loopNum = memOpaque->m_args[0]; - if (loopNum == BasicBlock::NOT_IN_LOOP) + if (loopNum == ValueNumStore::NoLoop) { printf("MemOpaque:NotInLoop"); } - else if (loopNum == BasicBlock::MAX_LOOP_NUM) + else if (loopNum == ValueNumStore::UnknownLoop) { printf("MemOpaque:Indeterminate"); } else { - printf("MemOpaque:L%02u", loopNum); + printf("MemOpaque:" FMT_LP, loopNum); } } @@ -9765,6 +9792,7 @@ PhaseStatus Compiler::fgValueNumber() } } + m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops); // Compute the side effects of loops. optComputeLoopSideEffects(); @@ -10008,11 +10036,11 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk) continue; } - unsigned loopNum; - ValueNum newMemoryVN; - if (optBlockIsLoopEntry(blk, &loopNum)) + ValueNum newMemoryVN; + FlowGraphNaturalLoop* loop = m_blockToLoop->GetLoop(blk); + if ((loop != nullptr) && (loop->GetHeader() == blk) && (m_newToOldLoop[loop->GetIndex()] != nullptr)) { - newMemoryVN = fgMemoryVNForLoopSideEffects(memoryKind, blk, loopNum); + newMemoryVN = fgMemoryVNForLoopSideEffects(memoryKind, blk, loop); } else { @@ -10133,40 +10161,28 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk) compCurBB = nullptr; } -ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, - BasicBlock* entryBlock, - unsigned innermostLoopNum) +ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, + BasicBlock* entryBlock, + FlowGraphNaturalLoop* loop) { - // "loopNum" is the innermost loop for which "blk" is the entry; find the outermost one. - assert(innermostLoopNum != BasicBlock::NOT_IN_LOOP); - unsigned loopsInNest = innermostLoopNum; - unsigned loopNum = innermostLoopNum; - while (loopsInNest != BasicBlock::NOT_IN_LOOP) - { - if (optLoopTable[loopsInNest].lpEntry != entryBlock) - { - break; - } - loopNum = loopsInNest; - loopsInNest = optLoopTable[loopsInNest].lpParent; - } - #ifdef DEBUG if (verbose) { - printf("Computing %s state for block " FMT_BB ", entry block for loops %d to %d:\n", - memoryKindNames[memoryKind], entryBlock->bbNum, innermostLoopNum, loopNum); + printf("Computing %s state for block " FMT_BB ", entry block for loop " FMT_LP ":\n", + memoryKindNames[memoryKind], entryBlock->bbNum, loop->GetIndex()); } #endif // DEBUG + const LoopSideEffects& sideEffs = m_loopSideEffects[loop->GetIndex()]; + // If this loop has memory havoc effects, just use a new, unique VN. - if (optLoopTable[loopNum].lpLoopHasMemoryHavoc[memoryKind]) + if (sideEffs.HasMemoryHavoc[memoryKind]) { ValueNum res = vnStore->VNForExpr(entryBlock, TYP_HEAP); #ifdef DEBUG if (verbose) { - printf(" Loop %d has memory havoc effect; heap state is new unique $%x.\n", loopNum, res); + printf(" Loop " FMT_LP " has memory havoc effect; heap state is new unique $%x.\n", loop->GetIndex(), res); } #endif // DEBUG return res; @@ -10175,12 +10191,14 @@ ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, // Otherwise, find the predecessors of the entry block that are not in the loop. // If there is only one such, use its memory value as the "base." If more than one, // use a new unique VN. + // TODO-Cleanup: Ensure canonicalization creates loop preheaders properly for handlers + // and simplify this logic. BasicBlock* nonLoopPred = nullptr; bool multipleNonLoopPreds = false; for (FlowEdge* pred = BlockPredsWithEH(entryBlock); pred != nullptr; pred = pred->getNextPredEdge()) { BasicBlock* predBlock = pred->getSourceBlock(); - if (!optLoopTable[loopNum].lpContains(predBlock)) + if (!loop->ContainsBlock(predBlock)) { if (nonLoopPred == nullptr) { @@ -10229,11 +10247,10 @@ ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, if (memoryKind == GcHeap) { // First the fields/field maps. - Compiler::LoopDsc::FieldHandleSet* fieldsMod = optLoopTable[loopNum].lpFieldsModified; + FieldHandleSet* fieldsMod = sideEffs.FieldsModified; if (fieldsMod != nullptr) { - for (Compiler::LoopDsc::FieldHandleSet::Node* const ki : - Compiler::LoopDsc::FieldHandleSet::KeyValueIteration(fieldsMod)) + for (FieldHandleSet::Node* const ki : FieldHandleSet::KeyValueIteration(fieldsMod)) { CORINFO_FIELD_HANDLE fldHnd = ki->GetKey(); FieldKindForVN fieldKind = ki->GetValue(); @@ -10256,10 +10273,10 @@ ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, } } // Now do the array maps. - Compiler::LoopDsc::ClassHandleSet* elemTypesMod = optLoopTable[loopNum].lpArrayElemTypesModified; + ClassHandleSet* elemTypesMod = sideEffs.ArrayElemTypesModified; if (elemTypesMod != nullptr) { - for (const CORINFO_CLASS_HANDLE elemClsHnd : Compiler::LoopDsc::ClassHandleSet::KeyIteration(elemTypesMod)) + for (const CORINFO_CLASS_HANDLE elemClsHnd : ClassHandleSet::KeyIteration(elemTypesMod)) { #ifdef DEBUG if (verbose) @@ -10290,10 +10307,8 @@ ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, // If there were any fields/elements modified, this should have been recorded as havoc // for ByrefExposed. assert(memoryKind == ByrefExposed); - assert((optLoopTable[loopNum].lpFieldsModified == nullptr) || - optLoopTable[loopNum].lpLoopHasMemoryHavoc[memoryKind]); - assert((optLoopTable[loopNum].lpArrayElemTypesModified == nullptr) || - optLoopTable[loopNum].lpLoopHasMemoryHavoc[memoryKind]); + assert((sideEffs.FieldsModified == nullptr) || sideEffs.HasMemoryHavoc[memoryKind]); + assert((sideEffs.ArrayElemTypesModified == nullptr) || sideEffs.HasMemoryHavoc[memoryKind]); } #ifdef DEBUG diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index e6ffb137584bb..f595d439ff37f 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -231,6 +231,12 @@ class ValueNumStore // A second special value, used to indicate that a function evaluation would cause infinite recursion. static const ValueNum RecursiveVN = UINT32_MAX - 1; + // Special value used to represent something that isn't in a loop for VN functions that take loop parameters. + static const unsigned NoLoop = UINT32_MAX; + // Special value used to represent something that may or may not be in a loop, so needs to be handled + // conservatively. + static const unsigned UnknownLoop = UINT32_MAX - 1; + // ================================================================================================== // VNMap - map from something to ValueNum, where something is typically a constant value or a VNFunc // This class has two purposes - to abstract the implementation and to validate the ValueNums @@ -875,8 +881,8 @@ class ValueNumStore // Returns TYP_UNKNOWN if the given value number has not been given a type. var_types TypeOfVN(ValueNum vn) const; - // Returns BasicBlock::MAX_LOOP_NUM if the given value number's loop nest is unknown or ill-defined. - BasicBlock::loopNumber LoopOfVN(ValueNum vn); + // Returns nullptr if the given value number is not dependent on memory defined in a loop. + class FlowGraphNaturalLoop* LoopOfVN(ValueNum vn); // Returns true iff the VN represents a constant. bool IsVNConstant(ValueNum vn); From 39e6bf5ae8eb7d792ecbb440becbad9ad448cf2f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 4 Dec 2023 17:24:25 +0100 Subject: [PATCH 12/20] Fix after merge --- src/coreclr/jit/optimizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 73f3f39363f3a..00d52a2dce247 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7117,7 +7117,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // Arguments: // comp - Compiler instance // blk - Block whose liveness is to be added -// +// void LoopSideEffects::AddVariableLiveness(Compiler* comp, BasicBlock* blk) { VarSetOps::UnionD(comp, VarInOut, blk->bbLiveIn); @@ -7134,7 +7134,7 @@ void LoopSideEffects::AddVariableLiveness(Compiler* comp, BasicBlock* blk) // comp - Compiler instance // fldHnd - Field handle being modified // fieldKind - Kind of field -// +// void LoopSideEffects::AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind) { if (FieldsModified == nullptr) @@ -7152,7 +7152,7 @@ void LoopSideEffects::AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldH // comp - Compiler instance // structHnd - Handle for struct. Can also be an encoding of a primitive // handle, see {Encode/Decode}ElemType. -// +// void LoopSideEffects::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd) { if (ArrayElemTypesModified == nullptr) From cf8258ab0c3ca851aafad4e4cd59e38f3acb9578 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 4 Dec 2023 17:38:39 +0100 Subject: [PATCH 13/20] Fixes after merge, compiler fixes --- src/coreclr/jit/flowgraph.cpp | 4 ++-- src/coreclr/jit/optimizer.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 505f61f4ad614..1e360fd2e1d89 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5408,8 +5408,8 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df BasicBlock* parent = block->bbIDom; assert(dfs->Contains(block) && dfs->Contains(parent)); - domTree[i].nextSibling = domTree[parent->bbPostorderNum].firstChild; - domTree[parent->bbPostorderNum].firstChild = block; + domTree[i].nextSibling = domTree[parent->bbNewPostorderNum].firstChild; + domTree[parent->bbNewPostorderNum].firstChild = block; } #ifdef DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 00d52a2dce247..63cb2c2a65b5a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6726,8 +6726,8 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho dumpConvertedVarSet(this, lvaLongVars); } #endif - hoistedCtxt->m_loopVarCount += VarSetOps::Count(this, loopLongVars); - hoistedCtxt->m_loopVarInOutCount += VarSetOps::Count(this, inOutLongVars); + hoistCtxt->m_loopVarCount += VarSetOps::Count(this, loopLongVars); + hoistCtxt->m_loopVarInOutCount += VarSetOps::Count(this, inOutLongVars); } #endif // !TARGET_64BIT From 800e44686b6260d3eb4ac4f58a58abf672e553c3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 5 Dec 2023 19:52:30 +0100 Subject: [PATCH 14/20] Fix after merge --- src/coreclr/jit/compiler.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4d71f96def98b..f266c25a8280f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2184,8 +2184,6 @@ class FlowGraphNaturalLoop unsigned NumLoopBlocks(); - unsigned NumLoopBlocks(); - template BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func); @@ -4893,12 +4891,8 @@ class Compiler unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder - FlowGraphDfsTree* m_dfs; - FlowGraphNaturalLoops* m_loops; - struct LoopDsc; - LoopDsc** m_newToOldLoop; - FlowGraphNaturalLoop** m_oldToNewLoop; + FlowGraphDfsTree* m_dfs; // The next members are annotations on the flow graph used during the // optimization phases. They are invalidated once RBO runs and modifies the // flow graph. @@ -4909,6 +4903,9 @@ class Compiler struct LoopSideEffects* m_loopSideEffects; struct HoistCounts* m_loopHoistCounts; BlockToNaturalLoopMap* m_blockToLoop; + // Dominator tree used by SSA construction and copy propagation (the two are expected to use the same tree + // in order to avoid the need for SSA reconstruction and an "out of SSA" phase). + FlowGraphDominatorTree* fgSsaDomTree; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -4919,10 +4916,6 @@ class Compiler unsigned* fgDomTreePreOrder; unsigned* fgDomTreePostOrder; - // Dominator tree used by SSA construction and copy propagation (the two are expected to use the same tree - // in order to avoid the need for SSA reconstruction and an "out of SSA" phase). - FlowGraphDominatorTree* fgSsaDomTree; - bool fgBBVarSetsInited; // Track how many artificial ref counts we've added to fgEntryBB (for OSR) From 3b813ba79d993d6bd07813e907b8d9c883f9b70a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 5 Dec 2023 20:11:20 +0100 Subject: [PATCH 15/20] Rename DFS tree members --- src/coreclr/jit/compiler.h | 26 ++++----- src/coreclr/jit/compiler.hpp | 12 ++-- src/coreclr/jit/fgbasic.cpp | 2 +- src/coreclr/jit/fgprofilesynthesis.cpp | 8 +-- src/coreclr/jit/fgprofilesynthesis.h | 2 +- src/coreclr/jit/flowgraph.cpp | 74 ++++++++++++------------- src/coreclr/jit/loopcloning.cpp | 2 +- src/coreclr/jit/optimizer.cpp | 10 ++-- src/coreclr/jit/redundantbranchopts.cpp | 2 +- src/coreclr/jit/ssabuilder.cpp | 10 ++-- src/coreclr/jit/valuenum.cpp | 6 +- 11 files changed, 77 insertions(+), 77 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f266c25a8280f..1bcbd145c9b45 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2085,7 +2085,7 @@ class FlowGraphNaturalLoop friend class FlowGraphNaturalLoops; // The DFS tree that contains the loop blocks. - const FlowGraphDfsTree* m_tree; + const FlowGraphDfsTree* m_dfsTree; // The header block; dominates all other blocks in the loop, and is the // only block branched to from outside the loop. @@ -2119,7 +2119,7 @@ class FlowGraphNaturalLoop // Can be used to store additional annotations for this loop on the side. unsigned m_index = 0; - FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* head); + FlowGraphNaturalLoop(const FlowGraphDfsTree* dfsTree, BasicBlock* head); unsigned LoopBlockBitVecIndex(BasicBlock* block); bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex); @@ -2141,7 +2141,7 @@ class FlowGraphNaturalLoop const FlowGraphDfsTree* GetDfsTree() const { - return m_tree; + return m_dfsTree; } FlowGraphNaturalLoop* GetParent() const @@ -2213,7 +2213,7 @@ class FlowGraphNaturalLoop // class FlowGraphNaturalLoops { - const FlowGraphDfsTree* m_dfs; + const FlowGraphDfsTree* m_dfsTree; // Collection of loops that were found. jitstd::vector m_loops; @@ -2228,7 +2228,7 @@ class FlowGraphNaturalLoops public: const FlowGraphDfsTree* GetDfsTree() { - return m_dfs; + return m_dfsTree; } size_t NumLoops() @@ -2310,14 +2310,14 @@ class FlowGraphDominatorTree template friend class NewDomTreeVisitor; - const FlowGraphDfsTree* m_dfs; - const DomTreeNode* m_tree; + const FlowGraphDfsTree* m_dfsTree; + const DomTreeNode* m_domTree; const unsigned* m_preorderNum; const unsigned* m_postorderNum; - FlowGraphDominatorTree(const FlowGraphDfsTree* dfs, const DomTreeNode* tree, const unsigned* preorderNum, const unsigned* postorderNum) - : m_dfs(dfs) - , m_tree(tree) + FlowGraphDominatorTree(const FlowGraphDfsTree* dfsTree, const DomTreeNode* domTree, const unsigned* preorderNum, const unsigned* postorderNum) + : m_dfsTree(dfsTree) + , m_domTree(domTree) , m_preorderNum(preorderNum) , m_postorderNum(postorderNum) { @@ -2328,7 +2328,7 @@ class FlowGraphDominatorTree BasicBlock* Intersect(BasicBlock* block, BasicBlock* block2); bool Dominates(BasicBlock* dominator, BasicBlock* dominated); - static FlowGraphDominatorTree* Build(const FlowGraphDfsTree* dfs); + static FlowGraphDominatorTree* Build(const FlowGraphDfsTree* dfsTree); }; // Represents a reverse mapping from block back to its (most nested) containing loop. @@ -4892,7 +4892,7 @@ class Compiler unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder - FlowGraphDfsTree* m_dfs; + FlowGraphDfsTree* m_dfsTree; // The next members are annotations on the flow graph used during the // optimization phases. They are invalidated once RBO runs and modifies the // flow graph. @@ -12125,7 +12125,7 @@ class NewDomTreeVisitor // void WalkTree(const FlowGraphDominatorTree* domTree) { - WalkTree(domTree->m_tree); + WalkTree(domTree->m_domTree); } }; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index fb917f6dbaf39..2105a29df1381 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4933,15 +4933,15 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason) template BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder(TFunc func) { - BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); + BitVecTraits traits(m_blocksSize, m_dfsTree->GetCompiler()); bool result = BitVecOps::VisitBits(&traits, m_blocks, [=](unsigned index) { // head block rpo index = PostOrderCount - 1 - headPreOrderIndex // loop block rpo index = head block rpoIndex + index // loop block po index = PostOrderCount - 1 - loop block rpo index // = headPreOrderIndex - index unsigned poIndex = m_header->bbNewPostorderNum - index; - assert(poIndex < m_tree->GetPostOrderCount()); - return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; + assert(poIndex < m_dfsTree->GetPostOrderCount()); + return func(m_dfsTree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; }); return result ? BasicBlockVisit::Continue : BasicBlockVisit::Abort; @@ -4965,11 +4965,11 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder(TFunc func template BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksPostOrder(TFunc func) { - BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); + BitVecTraits traits(m_blocksSize, m_dfsTree->GetCompiler()); bool result = BitVecOps::VisitBitsReverse(&traits, m_blocks, [=](unsigned index) { unsigned poIndex = m_header->bbNewPostorderNum - index; - assert(poIndex < m_tree->GetPostOrderCount()); - return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; + assert(poIndex < m_dfsTree->GetPostOrderCount()); + return func(m_dfsTree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; }); return result ? BasicBlockVisit::Continue : BasicBlockVisit::Abort; diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 6e9f561a3b14d..a900ccb58e7f3 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -65,7 +65,7 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; - m_dfs = nullptr; + m_dfsTree = nullptr; m_loops = nullptr; m_newToOldLoop = nullptr; m_oldToNewLoop = nullptr; diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 461dd9758c959..cd9d872d7ea17 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -33,8 +33,8 @@ // void ProfileSynthesis::Run(ProfileSynthesisOption option) { - m_dfs = m_comp->fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfs); + m_dfsTree = m_comp->fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); // Retain or compute edge likelihood information // @@ -1000,9 +1000,9 @@ void ProfileSynthesis::ComputeBlockWeights() { JITDUMP("Computing block weights\n"); - for (unsigned i = m_dfs->GetPostOrderCount(); i != 0; i--) + for (unsigned i = m_dfsTree->GetPostOrderCount(); i != 0; i--) { - BasicBlock* block = m_dfs->GetPostOrder()[i - 1]; + BasicBlock* block = m_dfsTree->GetPostOrder()[i - 1]; ComputeBlockWeight(block); } } diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h index ab82fffe5e37c..9297357049e8a 100644 --- a/src/coreclr/jit/fgprofilesynthesis.h +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -78,7 +78,7 @@ class ProfileSynthesis private: Compiler* const m_comp; - FlowGraphDfsTree* m_dfs; + FlowGraphDfsTree* m_dfsTree; FlowGraphNaturalLoops* m_loops; weight_t* m_cyclicProbabilities; unsigned m_improperLoopHeaders; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index b1a81dea5d7c4..bc5fd37686cbd 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4116,13 +4116,13 @@ FlowGraphDfsTree* Compiler::fgComputeDfs() // tree - The DFS tree // header - The loop header // -FlowGraphNaturalLoop::FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* header) - : m_tree(tree) +FlowGraphNaturalLoop::FlowGraphNaturalLoop(const FlowGraphDfsTree* dfsTree, BasicBlock* header) + : m_dfsTree(dfsTree) , m_header(header) , m_blocks(BitVecOps::UninitVal()) - , m_backEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) - , m_entryEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) - , m_exitEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) + , m_backEdges(dfsTree->GetCompiler()->getAllocator(CMK_Loops)) + , m_entryEdges(dfsTree->GetCompiler()->getAllocator(CMK_Loops)) + , m_exitEdges(dfsTree->GetCompiler()->getAllocator(CMK_Loops)) { } @@ -4146,7 +4146,7 @@ FlowGraphNaturalLoop::FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBl // unsigned FlowGraphNaturalLoop::LoopBlockBitVecIndex(BasicBlock* block) { - assert(m_tree->Contains(block)); + assert(m_dfsTree->Contains(block)); unsigned index = m_header->bbNewPostorderNum - block->bbNewPostorderNum; assert(index < m_blocksSize); @@ -4194,7 +4194,7 @@ bool FlowGraphNaturalLoop::TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigne // BitVecTraits FlowGraphNaturalLoop::LoopBlockTraits() { - return BitVecTraits(m_blocksSize, m_tree->GetCompiler()); + return BitVecTraits(m_blocksSize, m_dfsTree->GetCompiler()); } //------------------------------------------------------------------------ @@ -4256,8 +4256,8 @@ unsigned FlowGraphNaturalLoop::NumLoopBlocks() // Parameters: // dfs - A DFS tree. // -FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs) - : m_dfs(dfs), m_loops(m_dfs->GetCompiler()->getAllocator(CMK_Loops)) +FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfsTree) + : m_dfsTree(dfsTree), m_loops(m_dfsTree->GetCompiler()->getAllocator(CMK_Loops)) { } @@ -4354,34 +4354,34 @@ bool FlowGraphNaturalLoops::IsLoopExitEdge(FlowEdge* edge) // constructed for the flow graph. // // Parameters: -// dfs - The DFS tree +// dfsTree - The DFS tree // // Returns: // Identified natural loops. // -FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) +FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfsTree) { - Compiler* comp = dfs->GetCompiler(); + Compiler* comp = dfsTree->GetCompiler(); comp->m_blockToEHPreds = nullptr; #ifdef DEBUG JITDUMP("Identifying loops in DFS tree with following reverse post order:\n"); - for (unsigned i = dfs->GetPostOrderCount(); i != 0; i--) + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) { - unsigned rpoNum = dfs->GetPostOrderCount() - i; - BasicBlock* const block = dfs->GetPostOrder()[i - 1]; + unsigned rpoNum = dfsTree->GetPostOrderCount() - i; + BasicBlock* const block = dfsTree->GetPostOrder()[i - 1]; JITDUMP("%02u -> " FMT_BB "[%u, %u]\n", rpoNum + 1, block->bbNum, block->bbPreorderNum + 1, block->bbNewPostorderNum + 1); } #endif - FlowGraphNaturalLoops* loops = new (comp, CMK_Loops) FlowGraphNaturalLoops(dfs); + FlowGraphNaturalLoops* loops = new (comp, CMK_Loops) FlowGraphNaturalLoops(dfsTree); jitstd::list worklist(comp->getAllocator(CMK_Loops)); - for (unsigned i = dfs->GetPostOrderCount(); i != 0; i--) + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) { - BasicBlock* const header = dfs->GetPostOrder()[i - 1]; + BasicBlock* const header = dfsTree->GetPostOrder()[i - 1]; // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. // @@ -4390,11 +4390,11 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) for (FlowEdge* predEdge : header->PredEdges()) { BasicBlock* predBlock = predEdge->getSourceBlock(); - if (dfs->Contains(predBlock) && dfs->IsAncestor(header, predBlock)) + if (dfsTree->Contains(predBlock) && dfsTree->IsAncestor(header, predBlock)) { if (loop == nullptr) { - loop = new (comp, CMK_Loops) FlowGraphNaturalLoop(dfs, header); + loop = new (comp, CMK_Loops) FlowGraphNaturalLoop(dfsTree, header); JITDUMP("\n"); } @@ -4454,7 +4454,7 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) for (FlowEdge* const predEdge : loop->m_header->PredEdges()) { BasicBlock* predBlock = predEdge->getSourceBlock(); - if (dfs->Contains(predBlock) && !dfs->IsAncestor(header, predEdge->getSourceBlock())) + if (dfsTree->Contains(predBlock) && !dfsTree->IsAncestor(header, predEdge->getSourceBlock())) { JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, loop->m_header->bbNum); @@ -4551,8 +4551,8 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) // bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist) { - const FlowGraphDfsTree* tree = loop->m_tree; - Compiler* comp = tree->GetCompiler(); + const FlowGraphDfsTree* dfsTree = loop->m_dfsTree; + Compiler* comp = dfsTree->GetCompiler(); BitVecTraits loopTraits = loop->LoopBlockTraits(); BitVecOps::AddElemD(&loopTraits, loop->m_blocks, 0); @@ -4585,14 +4585,14 @@ bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ji { BasicBlock* const predBlock = predEdge->getSourceBlock(); - if (!tree->Contains(predBlock)) + if (!dfsTree->Contains(predBlock)) { continue; } // Head cannot dominate `predBlock` unless it is a DFS ancestor. // - if (!tree->IsAncestor(loop->GetHeader(), predBlock)) + if (!dfsTree->IsAncestor(loop->GetHeader(), predBlock)) { JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, loopBlock->bbNum); return false; @@ -4660,7 +4660,7 @@ bool FlowGraphNaturalLoop::VisitDefs(TFunc func) } }; - VisitDefsVisitor visitor(m_tree->GetCompiler(), func); + VisitDefsVisitor visitor(m_dfsTree->GetCompiler(), func); BasicBlockVisit result = VisitLoopBlocks([&](BasicBlock* loopBlock) { for (Statement* stmt : loopBlock->Statements()) @@ -4729,7 +4729,7 @@ bool FlowGraphNaturalLoop::AnalyzeIteration(NaturalLoopIterInfo* info) { JITDUMP("Analyzing iteration for " FMT_LP " with header " FMT_BB "\n", m_index, m_header->bbNum); - const FlowGraphDfsTree* dfs = m_tree; + const FlowGraphDfsTree* dfs = m_dfsTree; Compiler* comp = dfs->GetCompiler(); assert((m_entryEdges.size() == 1) && "Expected preheader"); @@ -5036,7 +5036,7 @@ BasicBlock* FlowGraphNaturalLoop::GetLexicallyBottomMostBlock() // bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) { - Compiler* comp = m_tree->GetCompiler(); + Compiler* comp = m_dfsTree->GetCompiler(); LclVarDsc* dsc = comp->lvaGetDesc(lclNum); assert(!comp->lvaVarAddrExposed(lclNum)); @@ -5316,7 +5316,7 @@ BasicBlock* FlowGraphDominatorTree::Intersect(BasicBlock* block1, BasicBlock* bl // bool FlowGraphDominatorTree::Dominates(BasicBlock* dominator, BasicBlock* dominated) { - assert(m_dfs->Contains(dominator) && m_dfs->Contains(dominated)); + assert(m_dfsTree->Contains(dominator) && m_dfsTree->Contains(dominated)); // What we want to ask here is basically if A is in the middle of the path // from B to the root (the entry node) in the dominator tree. Turns out @@ -5335,7 +5335,7 @@ bool FlowGraphDominatorTree::Dominates(BasicBlock* dominator, BasicBlock* domina // the DFS tree. // // Parameters: -// dfs - DFS tree. +// dfsTree - DFS tree. // // Returns: // Data structure representing dominator tree. Immediate dominators are @@ -5347,11 +5347,11 @@ bool FlowGraphDominatorTree::Dominates(BasicBlock* dominator, BasicBlock* domina // This might require creating a scratch root block in case the first block // has backedges or is in a try region. // -FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* dfs) +FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* dfsTree) { - Compiler* comp = dfs->GetCompiler(); - BasicBlock** postOrder = dfs->GetPostOrder(); - unsigned count = dfs->GetPostOrderCount(); + Compiler* comp = dfsTree->GetCompiler(); + BasicBlock** postOrder = dfsTree->GetPostOrder(); + unsigned count = dfsTree->GetPostOrderCount(); assert((comp->fgFirstBB->bbPreds == nullptr) && !comp->fgFirstBB->hasTryIndex()); assert(postOrder[count - 1] == comp->fgFirstBB); @@ -5375,7 +5375,7 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df for (FlowEdge* pred = comp->BlockDominancePreds(block); pred; pred = pred->getNextPredEdge()) { BasicBlock* domPred = pred->getSourceBlock(); - if (!dfs->Contains(domPred)) + if (!dfsTree->Contains(domPred)) { continue; // Unreachable pred } @@ -5416,7 +5416,7 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df { BasicBlock* block = postOrder[i]; BasicBlock* parent = block->bbIDom; - assert(dfs->Contains(block) && dfs->Contains(parent)); + assert(dfsTree->Contains(block) && dfsTree->Contains(parent)); domTree[i].nextSibling = domTree[parent->bbNewPostorderNum].firstChild; domTree[parent->bbNewPostorderNum].firstChild = block; @@ -5477,7 +5477,7 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df NumberDomTreeVisitor number(comp, preorderNums, postorderNums); number.WalkTree(domTree); - return new (comp, CMK_DominatorMemory) FlowGraphDominatorTree(dfs, domTree, preorderNums, postorderNums); + return new (comp, CMK_DominatorMemory) FlowGraphDominatorTree(dfsTree, domTree, preorderNums, postorderNums); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index dc55ada25f6f7..1ba2dfb91b369 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -3270,7 +3270,7 @@ PhaseStatus Compiler::optCloneLoops() // TODO: recompute the loop table, to include the slow loop path in the table? fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); - m_dfs = fgComputeDfs(); + m_dfsTree = fgComputeDfs(); optFindNewLoops(); } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 63cb2c2a65b5a..59da8e409a8eb 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4637,7 +4637,7 @@ PhaseStatus Compiler::optUnrollLoops() fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); } - m_dfs = fgComputeDfs(); + m_dfsTree = fgComputeDfs(); optFindNewLoops(); DBEXEC(verbose, fgDispBasicBlocks()); @@ -5543,7 +5543,7 @@ PhaseStatus Compiler::optFindLoopsPhase() { optFindLoops(); - m_dfs = fgComputeDfs(); + m_dfsTree = fgComputeDfs(); optFindNewLoops(); return PhaseStatus::MODIFIED_EVERYTHING; @@ -5554,7 +5554,7 @@ PhaseStatus Compiler::optFindLoopsPhase() // void Compiler::optFindNewLoops() { - m_loops = FlowGraphNaturalLoops::Find(m_dfs); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); m_oldToNewLoop = new (this, CMK_Loops) FlowGraphNaturalLoop*[BasicBlock::MAX_LOOP_NUM]{}; @@ -8421,8 +8421,8 @@ void Compiler::optComputeLoopSideEffects() m_loopSideEffects[loop->GetIndex()].VarUseDef = VarSetOps::MakeEmpty(this); } - BasicBlock** postOrder = m_dfs->GetPostOrder(); - unsigned postOrderCount = m_dfs->GetPostOrderCount(); + BasicBlock** postOrder = m_dfsTree->GetPostOrder(); + unsigned postOrderCount = m_dfsTree->GetPostOrderCount(); // The side effect code benefits from seeing things in RPO as it has some // limited treatment assignments it has seen the value of. diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index c542c99cb5238..29dbb8778062b 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -818,7 +818,7 @@ bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const dom { for (BasicBlock* const predBlock : block->PredBlocks()) { - if (m_dfs->Contains(predBlock) && !fgSsaDomTree->Dominates(domBlock, predBlock)) + if (m_dfsTree->Contains(predBlock) && !fgSsaDomTree->Dominates(domBlock, predBlock)) { JITDUMP("Dom " FMT_BB " is stale (does not dominate pred " FMT_BB "); no threading\n", domBlock->bbNum, predBlock->bbNum); diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 0a26f321899e3..8b67a6ee049bb 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -415,9 +415,9 @@ void SsaBuilder::InsertPhiFunctions() { JITDUMP("*************** In SsaBuilder::InsertPhiFunctions()\n"); - FlowGraphDfsTree* dfs = m_pCompiler->m_dfs; - BasicBlock** postOrder = dfs->GetPostOrder(); - unsigned count = dfs->GetPostOrderCount(); + FlowGraphDfsTree* dfsTree = m_pCompiler->m_dfsTree; + BasicBlock** postOrder = dfsTree->GetPostOrder(); + unsigned count = dfsTree->GetPostOrderCount(); // Compute dominance frontier. BlkToBlkVectorMap mapDF(m_allocator); @@ -1304,8 +1304,8 @@ void SsaBuilder::Build() m_visitedTraits = BitVecTraits(blockCount, m_pCompiler); m_visited = BitVecOps::MakeEmpty(&m_visitedTraits); - m_pCompiler->m_dfs = m_pCompiler->fgComputeDfs(); - m_pCompiler->fgSsaDomTree = FlowGraphDominatorTree::Build(m_pCompiler->m_dfs); + m_pCompiler->m_dfsTree = m_pCompiler->fgComputeDfs(); + m_pCompiler->fgSsaDomTree = FlowGraphDominatorTree::Build(m_pCompiler->m_dfsTree); EndPhase(PHASE_BUILD_SSA_DOMS); // Compute liveness on the graph. diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 989d6848e90f9..773f51f988808 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9696,7 +9696,7 @@ class ValueNumberState // bool IsReachable(BasicBlock* bb) { - return m_comp->m_dfs->Contains(bb) && + return m_comp->m_dfsTree->Contains(bb) && !BitVecOps::IsMember(&m_blockTraits, m_provenUnreachableBlocks, bb->bbNum); } @@ -9878,8 +9878,8 @@ PhaseStatus Compiler::fgValueNumber() // SSA has already computed a post-order taking EH successors into account. // Visiting that in reverse will ensure we visit a block's predecessors // before itself whenever possible. - BasicBlock** postOrder = m_dfs->GetPostOrder(); - unsigned postOrderCount = m_dfs->GetPostOrderCount(); + BasicBlock** postOrder = m_dfsTree->GetPostOrder(); + unsigned postOrderCount = m_dfsTree->GetPostOrderCount(); for (unsigned i = postOrderCount; i != 0; i--) { BasicBlock* block = postOrder[i - 1]; From fb0ffeab3e93fdbe17828c6e32b84538bdb15a4a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Dec 2023 09:52:15 +0100 Subject: [PATCH 16/20] Fix nested loop check; iterate blocks in loops directly --- src/coreclr/jit/compiler.h | 3 ++- src/coreclr/jit/flowgraph.cpp | 16 +++++++++------ src/coreclr/jit/optimizer.cpp | 37 ++++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1bcbd145c9b45..7a5332ac1959e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2335,6 +2335,8 @@ class FlowGraphDominatorTree class BlockToNaturalLoopMap { FlowGraphNaturalLoops* m_loops; + // Array from postorder num -> index of most-nested loop containing the + // block, or UINT_MAX if no loop contains it. unsigned* m_indices; BlockToNaturalLoopMap(FlowGraphNaturalLoops* loops, unsigned* indices) @@ -12257,7 +12259,6 @@ struct LoopSideEffects // in the loop. ClassHandleSet* ArrayElemTypesModified = nullptr; bool ContainsCall = false; - bool HasNestedLoops = false; LoopSideEffects(); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index bc5fd37686cbd..e1dda3ebaf82d 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5499,12 +5499,12 @@ FlowGraphNaturalLoop* BlockToNaturalLoopMap::GetLoop(BasicBlock* block) } unsigned index = m_indices[block->bbNewPostorderNum]; - if (index == 0) + if (index == UINT_MAX) { return nullptr; } - return m_loops->GetLoopByIndex(index - 1); + return m_loops->GetLoopByIndex(index); } //------------------------------------------------------------------------ @@ -5520,16 +5520,20 @@ BlockToNaturalLoopMap* BlockToNaturalLoopMap::Build(FlowGraphNaturalLoops* loops { const FlowGraphDfsTree* dfs = loops->GetDfsTree(); Compiler* comp = dfs->GetCompiler(); - // Indices are 1-based, with 0 meaning "no loop". - unsigned* indices = - dfs->GetPostOrderCount() == 0 ? nullptr : (new (comp, CMK_Loops) unsigned[dfs->GetPostOrderCount()]{}); + unsigned* indices = + dfs->GetPostOrderCount() == 0 ? nullptr : (new (comp, CMK_Loops) unsigned[dfs->GetPostOrderCount()]); + + for (unsigned i = 0; i < dfs->GetPostOrderCount(); i++) + { + indices[i] = UINT_MAX; + } // Now visit all loops in reverse post order, meaning that we see inner // loops last and thus write their indices into the map last. for (FlowGraphNaturalLoop* loop : loops->InReversePostOrder()) { loop->VisitLoopBlocks([=](BasicBlock* block) { - indices[block->bbNewPostorderNum] = loop->GetIndex() + 1; + indices[block->bbNewPostorderNum] = loop->GetIndex(); return BasicBlockVisit::Continue; }); } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 59da8e409a8eb..c1c6b1e2db7d2 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -8424,23 +8424,32 @@ void Compiler::optComputeLoopSideEffects() BasicBlock** postOrder = m_dfsTree->GetPostOrder(); unsigned postOrderCount = m_dfsTree->GetPostOrderCount(); - // The side effect code benefits from seeing things in RPO as it has some - // limited treatment assignments it has seen the value of. - for (unsigned i = postOrderCount; i != 0; i--) + // Iterate all blocks in loops. + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { - BasicBlock* block = postOrder[i - 1]; - FlowGraphNaturalLoop* loop = m_blockToLoop->GetLoop(block); - - // TODO-Quirk: Remove - while ((loop != nullptr) && (m_newToOldLoop[loop->GetIndex()] == nullptr)) + if (loop->GetParent() != nullptr) { - loop = loop->GetParent(); + continue; } - if (loop != nullptr) - { - optComputeLoopSideEffectsOfBlock(block, loop); - } + // The side effect code benefits from seeing things in RPO as it has some + // limited treatment assignments it has seen the value of. + loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* loopBlock) { + FlowGraphNaturalLoop* loop = m_blockToLoop->GetLoop(loopBlock); + + // TODO-Quirk: Remove + while ((loop != nullptr) && (m_newToOldLoop[loop->GetIndex()] == nullptr)) + { + loop = loop->GetParent(); + } + + if (loop != nullptr) + { + optComputeLoopSideEffectsOfBlock(loopBlock, loop); + } + + return BasicBlockVisit::Continue; + }); } } @@ -8734,7 +8743,7 @@ void Compiler::AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop) // If this is the inner most loop, reset the LOOP_ALIGN flag // because a loop having call will not likely to benefit from // alignment - if (!m_loopSideEffects[loop->GetIndex()].HasNestedLoops) + if (loop->GetChild() == nullptr) { BasicBlock* top = loop->GetLexicallyTopMostBlock(); From f1e1c59776be5b8b5ac2dbe8ab669eb13df5bcd8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Dec 2023 10:26:24 +0100 Subject: [PATCH 17/20] Recompute new loops in RecomputeLoopInfo --- src/coreclr/jit/compiler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 076daad229f97..b9c49d7b458f3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5765,6 +5765,9 @@ void Compiler::RecomputeLoopInfo() optSetBlockWeights(); // Rebuild the loop tree annotations themselves optFindLoops(); + + m_dfsTree = fgComputeDfs(); + optFindNewLoops(); } /*****************************************************************************/ From dd8f1fd77f31d811ded55977335a80e2894e5030 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Dec 2023 13:36:43 +0100 Subject: [PATCH 18/20] Quirk some more --- src/coreclr/jit/optimizer.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index c1c6b1e2db7d2..d6f834cabe569 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -8735,6 +8735,21 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNatura } } +// TODO-Quirk: Remove +static bool HasOldChildLoop(Compiler* comp, FlowGraphNaturalLoop* loop) +{ + for (FlowGraphNaturalLoop* child = loop->GetChild(); child != nullptr; child = child->GetSibling()) + { + if (comp->m_newToOldLoop[child->GetIndex()] != nullptr) + return true; + + if (HasOldChildLoop(comp, child)) + return true; + } + + return false; +} + // Marks the containsCall information to "loop" and any parent loops. void Compiler::AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop) { @@ -8743,7 +8758,7 @@ void Compiler::AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop) // If this is the inner most loop, reset the LOOP_ALIGN flag // because a loop having call will not likely to benefit from // alignment - if (loop->GetChild() == nullptr) + if (!HasOldChildLoop(this, loop)) { BasicBlock* top = loop->GetLexicallyTopMostBlock(); From 864b7342d3c4b34fbe338bc8e02fde1a39549547 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 7 Dec 2023 08:51:36 +0100 Subject: [PATCH 19/20] Address some feedback; update "lnum" --- src/coreclr/jit/compiler.h | 77 ++++++++++++++++++----------------- src/coreclr/jit/optimizer.cpp | 2 +- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 7a5332ac1959e..24d33e19bdbf2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2350,6 +2350,38 @@ class BlockToNaturalLoopMap static BlockToNaturalLoopMap* Build(FlowGraphNaturalLoops* loops); }; +typedef JitHashTable, FieldKindForVN> FieldHandleSet; + +typedef JitHashTable, bool> ClassHandleSet; + +// Represents a distillation of the useful side effects that occur inside a loop. +// Used by VN to be able to reason more precisely when entering loops. +struct LoopSideEffects +{ + // The loop contains an operation that we assume has arbitrary memory side + // effects. If this is set, the fields below may not be accurate (since + // they become irrelevant.) + bool HasMemoryHavoc[MemoryKindCount]; + // The set of variables that are IN or OUT during the execution of this loop + VARSET_TP VarInOut; + // The set of variables that are USE or DEF during the execution of this loop. + VARSET_TP VarUseDef; + // This has entries for all static field and object instance fields modified + // in the loop. + FieldHandleSet* FieldsModified = nullptr; + // Bits set indicate the set of sz array element types such that + // arrays of that type are modified + // in the loop. + ClassHandleSet* ArrayElemTypesModified = nullptr; + bool ContainsCall = false; + + LoopSideEffects(); + + void AddVariableLiveness(Compiler* comp, BasicBlock* block); + void AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); + void AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd); +}; + // The following holds information about instr offsets in terms of generated code. enum class IPmappingDscKind @@ -4902,8 +4934,7 @@ class Compiler struct LoopDsc; LoopDsc** m_newToOldLoop; FlowGraphNaturalLoop** m_oldToNewLoop; - struct LoopSideEffects* m_loopSideEffects; - struct HoistCounts* m_loopHoistCounts; + LoopSideEffects* m_loopSideEffects; BlockToNaturalLoopMap* m_blockToLoop; // Dominator tree used by SSA construction and copy propagation (the two are expected to use the same tree // in order to avoid the need for SSA reconstruction and an "out of SSA" phase). @@ -6721,7 +6752,7 @@ class Compiler // Return true if the tree looks profitable to hoist out of "loop" bool optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt); - // Performs the hoisting 'tree' into the PreHeader for "loop" + // Performs the hoisting "tree" into the PreHeader for "loop" void optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt); // Note the new SSA uses in tree @@ -6757,7 +6788,7 @@ class Compiler // Add the side effects of "blk" (which is required to be within a loop) to all loops of which it is a part. void optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNaturalLoop* mostNestedLoop); - // Hoist the expression "expr" out of loop "lnum". + // Hoist the expression "expr" out of "loop" void optPerformHoistExpr(GenTree* expr, BasicBlock* exprBb, FlowGraphNaturalLoop* loop); public: @@ -7121,16 +7152,16 @@ class Compiler BlockToBlockMap* redirectMap, const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists); - // Marks the containsCall information to "lnum" and any parent loops. + // Marks the containsCall information to "loop" and any parent loops. void AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop); - // Adds the variable liveness information from 'blk' to "lnum" and any parent loops. + // Adds the variable liveness information from 'blk' to "loop" and any parent loops. void AddVariableLivenessAllContainingLoops(FlowGraphNaturalLoop* loop, BasicBlock* blk); - // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops. + // Adds "fldHnd" to the set of modified fields of "loop" and any parent loops. void AddModifiedFieldAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); - // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops. + // Adds "elemType" to the set of modified array element types of "loop" and any parent loops. void AddModifiedElemTypeAllContainingLoops(FlowGraphNaturalLoop* loop, CORINFO_CLASS_HANDLE elemType); // Requires that "from" and "to" have the same "bbJumpKind" (perhaps because "to" is a clone @@ -12237,36 +12268,6 @@ class StringPrinter void Append(char chr); }; -typedef JitHashTable, FieldKindForVN> FieldHandleSet; - -typedef JitHashTable, bool> ClassHandleSet; - -struct LoopSideEffects -{ - // The loop contains an operation that we assume has arbitrary memory side - // effects. If this is set, the fields below may not be accurate (since - // they become irrelevant.) - bool HasMemoryHavoc[MemoryKindCount]; - // The set of variables that are IN or OUT during the execution of this loop - VARSET_TP VarInOut; - // The set of variables that are USE or DEF during the execution of this loop. - VARSET_TP VarUseDef; - // This has entries for all static field and object instance fields modified - // in the loop. - FieldHandleSet* FieldsModified = nullptr; - // Bits set indicate the set of sz array element types such that - // arrays of that type are modified - // in the loop. - ClassHandleSet* ArrayElemTypesModified = nullptr; - bool ContainsCall = false; - - LoopSideEffects(); - - void AddVariableLiveness(Compiler* comp, BasicBlock* block); - void AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd, FieldKindForVN fieldKind); - void AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd); -}; - /***************************************************************************** * * Variables to keep track of total code amounts. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index d6f834cabe569..4950ac760873c 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7112,7 +7112,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) } //------------------------------------------------------------------------ -// AddVariableLiveness: Adds the variable liveness information for 'blk' to 'this' LoopDsc +// AddVariableLiveness: Adds the variable liveness information for 'blk' // // Arguments: // comp - Compiler instance From d0ba64dcd5ce1eb5f4ecf152918c08078f8b9eaa Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 7 Dec 2023 09:00:28 +0100 Subject: [PATCH 20/20] Convenience edge accessor functions --- src/coreclr/jit/compiler.h | 30 ++++++++++++++++++++++++------ src/coreclr/jit/fgbasic.cpp | 1 - src/coreclr/jit/loopcloning.cpp | 4 ++-- src/coreclr/jit/optimizer.cpp | 6 +++--- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a9b9536dbcaf8..315fd2d0988e2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2179,6 +2179,24 @@ class FlowGraphNaturalLoop return m_exitEdges; } + FlowEdge* BackEdge(unsigned index) + { + assert(index < m_backEdges.size()); + return m_backEdges[index]; + } + + FlowEdge* EntryEdge(unsigned index) + { + assert(index < m_entryEdges.size()); + return m_entryEdges[index]; + } + + FlowEdge* ExitEdge(unsigned index) + { + assert(index < m_exitEdges.size()); + return m_exitEdges[index]; + } + bool ContainsBlock(BasicBlock* block); bool ContainsLoop(FlowGraphNaturalLoop* childLoop); @@ -2350,6 +2368,12 @@ class BlockToNaturalLoopMap static BlockToNaturalLoopMap* Build(FlowGraphNaturalLoops* loops); }; +enum class FieldKindForVN +{ + SimpleStatic, + WithBaseAddr +}; + typedef JitHashTable, FieldKindForVN> FieldHandleSet; typedef JitHashTable, bool> ClassHandleSet; @@ -2416,12 +2440,6 @@ enum class NodeThreading LIR, // Nodes are in LIR form (after rationalization) }; -enum class FieldKindForVN -{ - SimpleStatic, - WithBaseAddr -}; - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 9fd2bd3923a16..d701a9b4b4bec 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -70,7 +70,6 @@ void Compiler::fgInit() m_newToOldLoop = nullptr; m_oldToNewLoop = nullptr; m_loopSideEffects = nullptr; - m_loopHoistCounts = nullptr; m_blockToLoop = nullptr; // Initialize BlockSet data. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 22a2ed340fbb6..70b4d6f138b22 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1824,7 +1824,7 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c // Loop canonicalization should have ensured that there is a unique preheader. assert(loop->EntryEdges().size() == 1); - BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); // If the head and entry are in different EH regions, reject. if (!BasicBlock::sameEHRegion(preheader, loop->GetHeader())) @@ -2025,7 +2025,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex } assert(loop->EntryEdges().size() == 1); - BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); // The ambient weight might be higher than we computed above. Be safe by // taking the max with the head block's weight. ambientWeight = max(ambientWeight, preheader->bbWeight); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index cfea1b53ba5e6..2a546d4d40cb5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6388,7 +6388,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, FlowGr assert(exprBb != nullptr); assert(loop->EntryEdges().size() == 1); - BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); #ifdef DEBUG if (verbose) { @@ -6847,7 +6847,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho FlowGraphNaturalLoop* childLoop = m_oldToNewLoop[childLoopNum]; assert(childLoop->EntryEdges().size() == 1); - BasicBlock* childPreHead = childLoop->EntryEdges()[0]->getSourceBlock(); + BasicBlock* childPreHead = childLoop->EntryEdge(0)->getSourceBlock(); if (loop->ExitEdges().size() == 1) { if (fgSsaDomTree->Dominates(childPreHead, loop->ExitEdges()[0]->getSourceBlock())) @@ -7789,7 +7789,7 @@ void Compiler::optHoistCandidate(GenTree* tree, // We should already have a pre-header for the loop. assert(loop->EntryEdges().size() == 1); - BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); // If the block we're hoisting from and the pre-header are in different EH regions, don't hoist. // TODO: we could probably hoist things that won't raise exceptions, such as constants.