From 05bb910dafc7c2c9fadf9d17f1d2ef933a9f46a2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 13:01:21 +0100 Subject: [PATCH 01/11] JIT: Port loop cloning to the new loop representation Run the new loop recognition at the same time as old loop recognition and cross validate that the information matches the old loop information. Implement induction analysis on the new representation. Validate that it matches the old loop induction analysis. Add a few quirks to ensure this. Port loop cloning to run on the new loop representation. Add a number of quirks to keep the logic close to the old cloning. One notable difference is that the old loop recognition considers some blocks to be part of the loops that actually are not; for example, blocks that return or break out from the loop are not part of the loop's SCC, but can be part of the lexical range. This means that the old loop cloning will clone those blocks and apply static optimizations to them, while the new loop cloning won't. However, this does not seem to cause a significant number of regressions. A few diffs are expected due to considering fewer blocks to be part of the loop and thus cloning fewer blocks. Also, throughput regressions are expected since we are doing both loop identifications for now. I expect to incrementally work towards replacing everything with the new representation, at which point we can get rid of the old loop identification and hopefully most of the reachability/dominator computations. I expect us to make back the TP at that point. --- src/coreclr/jit/compiler.h | 100 ++++-- src/coreclr/jit/fgbasic.cpp | 3 +- src/coreclr/jit/flowgraph.cpp | 620 ++++++++++++++++++++++++++++++++ src/coreclr/jit/loopcloning.cpp | 523 ++++++++++++++++----------- src/coreclr/jit/loopcloning.h | 15 +- src/coreclr/jit/optimizer.cpp | 48 +++ 6 files changed, 1068 insertions(+), 241 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 745372a33b02e..03e1a22eed5fb 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1996,6 +1996,42 @@ class FlowGraphDfsTree bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const; }; +struct NaturalLoopIterInfo +{ + unsigned IterVar = BAD_VAR_NUM; + int ConstInitValue = 0; + BasicBlock* InitBlock = nullptr; + GenTree* TestTree = nullptr; + GenTree* IterTree = nullptr; + bool HasConstInit : 1; + bool HasConstLimit : 1; + bool HasSimdLimit : 1; + bool HasInvariantLocalLimit : 1; + bool HasArrayLengthLimit : 1; + + NaturalLoopIterInfo() + : HasConstInit(false) + , HasConstLimit(false) + , HasSimdLimit(false) + , HasInvariantLocalLimit(false) + , HasArrayLengthLimit(false) + { + } + + int IterConst(); + genTreeOps IterOper(); + var_types IterOperType(); + bool IsReversed(); + genTreeOps TestOper(); + bool IsIncreasingLoop(); + bool IsDecreasingLoop(); + GenTree* Iterator(); + GenTree* Limit(); + int ConstLimit(); + unsigned VarLimit(); + bool ArrLenLimit(Compiler* comp, ArrIndex* index); +}; + class FlowGraphNaturalLoop { friend class FlowGraphNaturalLoops; @@ -2018,6 +2054,14 @@ class FlowGraphNaturalLoop bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex); BitVecTraits LoopBlockTraits(); + + template + bool VisitDefs(TFunc func); + + GenTreeLclVarCommon* FindDef(unsigned lclNum); + + void MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init); + bool MatchLimit(NaturalLoopIterInfo* info, GenTree* test); public: BasicBlock* GetHeader() const { @@ -2064,6 +2108,12 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocks(TFunc func); + + BasicBlock* GetLexicallyBottomMostBlock(); + + bool AnalyzeIteration(NaturalLoopIterInfo* info); + + bool HasDef(unsigned lclNum); }; class FlowGraphNaturalLoops @@ -4686,6 +4736,10 @@ class Compiler unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder FlowGraphDfsTree* m_dfs; + FlowGraphNaturalLoops* m_loops; + struct LoopDsc; + LoopDsc** m_newToOldLoop; + FlowGraphNaturalLoop** m_oldToNewLoop; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -6532,7 +6586,7 @@ class Compiler void optFindLoops(); PhaseStatus optCloneLoops(); - void optCloneLoop(unsigned loopInd, LoopCloneContext* context); + void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) void optRemoveRedundantZeroInits(); PhaseStatus optIfConversion(); // If conversion @@ -6844,7 +6898,7 @@ class Compiler bool optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar); bool optPopulateInitInfo(unsigned loopInd, BasicBlock* initBlock, GenTree* init, unsigned iterVar); bool optExtractInitTestIncr( - BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* exit, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr); + BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr); void optFindNaturalLoops(); @@ -7963,34 +8017,34 @@ class Compiler public: struct LoopCloneVisitorInfo { - LoopCloneContext* context; - Statement* stmt; - const unsigned loopNum; - const bool cloneForArrayBounds; - const bool cloneForGDVTests; - LoopCloneVisitorInfo(LoopCloneContext* context, - unsigned loopNum, - Statement* stmt, - bool cloneForArrayBounds, - bool cloneForGDVTests) + LoopCloneContext* context; + Statement* stmt; + FlowGraphNaturalLoop* loop; + const bool cloneForArrayBounds; + const bool cloneForGDVTests; + LoopCloneVisitorInfo(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + Statement* stmt, + bool cloneForArrayBounds, + bool cloneForGDVTests) : context(context) , stmt(nullptr) - , loopNum(loopNum) + , loop(loop) , cloneForArrayBounds(cloneForArrayBounds) , cloneForGDVTests(cloneForGDVTests) { } }; - bool optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum); + bool optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum); bool optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndex(GenTree* tree, ArrIndex* result); - bool optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context); + bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context); static fgWalkPreFn optCanOptimizeByLoopCloningVisitor; fgWalkResult optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info); bool optObtainLoopCloningOpts(LoopCloneContext* context); - bool optIsLoopClonable(unsigned loopInd); + bool optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context); bool optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info); bool optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType); @@ -7999,13 +8053,13 @@ class Compiler #ifdef DEBUG void optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore); #endif - void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath)); - bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context); - bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context); - BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, - unsigned loopNum, - BasicBlock* slowHead, - BasicBlock* insertAfter); + void optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, LoopCloneContext* context DEBUGARG(bool fastPath)); + bool optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + bool optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + BasicBlock* slowHead, + BasicBlock* insertAfter); protected: ssize_t optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk)); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 917fd55ca5a95..48accd29435b8 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -68,7 +68,8 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; - m_dfs = nullptr; + m_dfs = nullptr; + m_loops = nullptr; // Initialize BlockSet data. fgCurBBEpoch = 0; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index f81c55b004a04..9214addb38bf2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4640,3 +4640,623 @@ bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ji return true; } + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitDefs: Visit all definitions contained in the +// loop. +// +// Type parameters: +// TFunc - Callback functor type +// +// Parameters: +// func - Callback functor that accepts a GenTreeLclVarCommon* and returns a +// bool. On true, continue looking for defs; on false, abort. +// +// Returns: +// True if all defs were visited and the functor never returned false; otherwise false. +// +template +bool FlowGraphNaturalLoop::VisitDefs(TFunc func) +{ + class VisitDefsVisitor : public GenTreeVisitor + { + using GenTreeVisitor::m_compiler; + + TFunc& m_func; + + public: + enum + { + DoPreOrder = true, + }; + + VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) + { + } + + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + if ((tree->gtFlags & GTF_ASG) == 0) + { + return Compiler::WALK_SKIP_SUBTREES; + } + + GenTreeLclVarCommon* lclDef; + if (tree->DefinesLocal(m_compiler, &lclDef)) + { + if (!m_func(lclDef)) + return Compiler::WALK_ABORT; + } + + return Compiler::WALK_CONTINUE; + } + }; + + VisitDefsVisitor visitor(m_tree->GetCompiler(), func); + + BasicBlockVisit result = VisitLoopBlocks([&](BasicBlock* loopBlock) { + for (Statement* stmt : loopBlock->Statements()) + { + if (visitor.WalkTree(stmt->GetRootNodePointer(), nullptr) == Compiler::WALK_ABORT) + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + + return result == BasicBlockVisit::Continue; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::FindDef: Find a def of the specified local number. +// +// Parameters: +// lclNum - The local. +// +// Returns: +// Tree that represents a def of the local; nullptr if no def was found. +// +// Remarks: +// Does not take promotion into account. +// +GenTreeLclVarCommon* FlowGraphNaturalLoop::FindDef(unsigned lclNum) +{ + GenTreeLclVarCommon* result = nullptr; + VisitDefs([&result, lclNum](GenTreeLclVarCommon* def) { + if (def->GetLclNum() == lclNum) + { + result = def; + return false; + } + + return true; + }); + + return result; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::AnalyzeIteration: Analyze the induction structure of +// the loop. +// +// Parameters: +// info - [out] Loop information +// +// Returns: +// True if the structure was analyzed and we can make guarantees about it; +// otherwise false. +// +// Remarks: +// The function guarantees that at all definitions of the iteration local, +// the loop condition is reestablished before iteration continues. In other +// words, if this function returns true the loop invariant is guaranteed to +// be upheld in all blocks in the loop (but see below for establishing the +// base case). +// +// Currently we do not validate that there is a zero-trip test ensuring the +// condition is true, so it is up to the user to validate that. This is +// normally done via GTF_RELOP_ZTT set by loop inversion. +// +bool FlowGraphNaturalLoop::AnalyzeIteration(NaturalLoopIterInfo* info) +{ + JITDUMP("Analyzing iteration for " FMT_LP " with header " FMT_BB "\n", m_index, m_header->bbNum); + + const FlowGraphDfsTree* dfs = m_tree; + Compiler* comp = dfs->GetCompiler(); + assert((m_entryEdges.size() == 1) && "Expected preheader"); + + BasicBlock* preheader = m_entryEdges[0]->getSourceBlock(); + + JITDUMP(" Preheader = " FMT_BB "\n", preheader->bbNum); + + // TODO-Quirk: For backwards compatibility always try the lexically + // bottom-most block for the loop variable. + BasicBlock* bottom = GetLexicallyBottomMostBlock(); + JITDUMP(" Bottom = " FMT_BB "\n", bottom->bbNum); + + BasicBlock* cond = bottom; + BasicBlock* initBlock = preheader; + GenTree* init; + GenTree* test; + if (!cond->KindIs(BBJ_COND) || + !comp->optExtractInitTestIncr(&initBlock, bottom, m_header, &init, &test, &info->IterTree)) + { + // TODO-CQ: Try all exit edges here to see if we can find an induction variable. + JITDUMP(" Could not extract induction variable from bottom\n"); + return false; + } + + info->IterVar = comp->optIsLoopIncrTree(info->IterTree); + + assert(info->IterVar != BAD_VAR_NUM); + LclVarDsc* const iterVarDsc = comp->lvaGetDesc(info->IterVar); + + // Bail on promoted case, otherwise we'd have to search the loop + // for both iterVar and its parent. + // TODO-CQ: Fix this + // + if (iterVarDsc->lvIsStructField) + { + JITDUMP(" iterVar V%02u is a promoted field\n", info->IterVar); + return false; + } + + // Bail on the potentially aliased case. + // + if (iterVarDsc->IsAddressExposed()) + { + JITDUMP(" iterVar V%02u is address exposed\n", info->IterVar); + return false; + } + + if (init == nullptr) + { + JITDUMP(" Init = , test = [%06u], incr = [%06u]\n", Compiler::dspTreeID(test), + Compiler::dspTreeID(info->IterTree)); + } + else + { + JITDUMP(" Init = [%06u], test = [%06u], incr = [%06u]\n", Compiler::dspTreeID(init), Compiler::dspTreeID(test), + Compiler::dspTreeID(info->IterTree)); + } + + if (!MatchLimit(info, test)) + { + return false; + } + + MatchInit(info, initBlock, init); + + bool result = VisitDefs([=](GenTreeLclVarCommon* def) { + if ((def->GetLclNum() != info->IterVar) || (def == info->IterTree)) + return true; + + JITDUMP(" Loop has extraneous def [%06u]\n", Compiler::dspTreeID(def)); + return false; + }); + + if (!result) + { + return false; + } + +#ifdef DEBUG + if (comp->verbose) + { + printf(" IterVar = V%02u\n", info->IterVar); + + if (info->HasConstInit) + printf(" Const init with value %d in " FMT_BB "\n", info->ConstInitValue, info->InitBlock->bbNum); + + printf(" Test is [%06u] (", Compiler::dspTreeID(info->TestTree)); + if (info->HasConstLimit) + printf("const limit "); + if (info->HasSimdLimit) + printf("simd limit "); + if (info->HasInvariantLocalLimit) + printf("invariant local limit "); + if (info->HasArrayLengthLimit) + printf("array length limit "); + printf(")\n"); + } +#endif + + return result; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::MatchInit: Try to pattern match the initialization of +// an induction variable. +// +// Parameters: +// info - [in, out] Info structure to query and fill out +// initBlock - Block containing the initialization tree +// init - Initialization tree +// +// Remarks: +// We do not necessarily guarantee or require to be able to find any +// initialization. +// +void FlowGraphNaturalLoop::MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init) +{ + if ((init == nullptr) || !init->OperIs(GT_STORE_LCL_VAR) || (init->AsLclVarCommon()->GetLclNum() != info->IterVar)) + return; + + GenTree* initValue = init->AsLclVar()->Data(); + if (!initValue->IsCnsIntOrI() || !initValue->TypeIs(TYP_INT)) + return; + + info->HasConstInit = true; + info->ConstInitValue = (int)initValue->AsIntCon()->IconValue(); + info->InitBlock = initBlock; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::MatchLimit: Try to pattern match the loop test of an +// induction variable. +// +// Parameters: +// info - [in, out] Info structure to query and fill out +// test - Loop condition test +// +// Returns: +// True if the loop condition was recognized and "info" was filled out. +// +// Remarks: +// Unlike the initialization, we do require that we are able to match the +// loop condition. +// +bool FlowGraphNaturalLoop::MatchLimit(NaturalLoopIterInfo* info, GenTree* test) +{ + // Obtain the relop from the "test" tree. + GenTree* relop; + if (test->OperIs(GT_JTRUE)) + { + relop = test->gtGetOp1(); + } + else + { + assert(test->OperIs(GT_STORE_LCL_VAR)); + relop = test->AsLclVar()->Data(); + } + + noway_assert(relop->OperIsCompare()); + + GenTree* opr1 = relop->AsOp()->gtOp1; + GenTree* opr2 = relop->AsOp()->gtOp2; + + GenTree* iterOp; + GenTree* limitOp; + + // Make sure op1 or op2 is the iterVar. + if (opr1->gtOper == GT_LCL_VAR && opr1->AsLclVarCommon()->GetLclNum() == info->IterVar) + { + iterOp = opr1; + limitOp = opr2; + } + else if (opr2->gtOper == GT_LCL_VAR && opr2->AsLclVarCommon()->GetLclNum() == info->IterVar) + { + iterOp = opr2; + limitOp = opr1; + } + else + { + return false; + } + + if (iterOp->gtType != TYP_INT) + { + return false; + } + + // Mark the iterator node. + iterOp->gtFlags |= GTF_VAR_ITERATOR; + + // Check what type of limit we have - constant, variable or arr-len. + if (limitOp->gtOper == GT_CNS_INT) + { + info->HasConstLimit = true; + if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0) + { + info->HasSimdLimit = true; + } + } + else if (limitOp->OperIs(GT_LCL_VAR)) + { + // See if limit var is a loop invariant + // + GenTreeLclVarCommon* def = FindDef(limitOp->AsLclVarCommon()->GetLclNum()); + if (def != nullptr) + { + JITDUMP(" Limit var V%02u modified by [%06u]\n", limitOp->AsLclVarCommon()->GetLclNum(), + Compiler::dspTreeID(def)); + return false; + } + + info->HasInvariantLocalLimit = true; + } + else if (limitOp->OperIs(GT_ARR_LENGTH)) + { + // See if limit array is a loop invariant + // + GenTree* const array = limitOp->AsArrLen()->ArrRef(); + + if (!array->OperIs(GT_LCL_VAR)) + { + JITDUMP(" Array limit tree [%06u] not analyzable\n", Compiler::dspTreeID(limitOp)); + return false; + } + + GenTreeLclVarCommon* def = FindDef(array->AsLclVar()->GetLclNum()); + if (def != nullptr) + { + JITDUMP(" Array limit var V%02u modified by [%06u]\n", array->AsLclVarCommon()->GetLclNum(), + Compiler::dspTreeID(def)); + return false; + } + + info->HasArrayLengthLimit = true; + } + else + { + JITDUMP(" Loop limit tree [%06u] not analyzable\n", Compiler::dspTreeID(limitOp)); + return false; + } + + // Were we able to successfully analyze the limit? + // + assert(info->HasConstLimit || info->HasInvariantLocalLimit || info->HasArrayLengthLimit); + + info->TestTree = relop; + return true; +} + +//------------------------------------------------------------------------ +// GetLexicallyBottomMostBlock: Get the lexically bottom-most block contained +// within the loop. +// +// Returns: +// Block with highest bbNum. +// +// Remarks: +// Mostly exists as a quirk while transitioning from the old loop +// representation to the new one. +// +BasicBlock* FlowGraphNaturalLoop::GetLexicallyBottomMostBlock() +{ + BasicBlock* bottom = m_header; + VisitLoopBlocks([&bottom](BasicBlock* loopBlock) { + if (loopBlock->bbNum > bottom->bbNum) + bottom = loopBlock; + return BasicBlockVisit::Continue; + }); + + return bottom; +} + +//------------------------------------------------------------------------ +// HasDef: Check if a local is defined anywhere in the loop. +// +// Parameters: +// lclNum - Local to check for a def for. +// +// Returns: +// True if the local has any def. +// +// Remarks: +// +bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) +{ + Compiler* comp = m_tree->GetCompiler(); + LclVarDsc* dsc = comp->lvaGetDesc(lclNum); + + assert(!comp->lvaVarAddrExposed(lclNum)); + // Currently does not handle promoted locals, only fields. + assert(!dsc->lvPromoted); + + unsigned defLclNum1 = lclNum; + unsigned defLclNum2 = BAD_VAR_NUM; + if (dsc->lvIsStructField) + { + defLclNum2 = dsc->lvParentLcl; + } + + bool result = VisitDefs([=](GenTreeLclVarCommon* lcl) { + if ((lcl->GetLclNum() == defLclNum1) || (lcl->GetLclNum() == defLclNum2)) + { + return false; + } + + return true; + }); + + // If we stopped early we found a def. + return !result; +} + +//------------------------------------------------------------------------ +// IterConst: Get the constant with which the iterator is modified +// +// Returns: +// Constant value. +// +int NaturalLoopIterInfo::IterConst() +{ + GenTree* value = IterTree->AsLclVar()->Data(); + return (int)value->gtGetOp2()->AsIntCon()->IconValue(); +} + +//------------------------------------------------------------------------ +// IterOper: Get the type of the operation on the iterator +// +// Returns: +// Oper +// +genTreeOps NaturalLoopIterInfo::IterOper() +{ + return IterTree->AsLclVar()->Data()->OperGet(); +} + +//------------------------------------------------------------------------ +// IterOperType: Get the type of the operation on the iterator. +// +// Returns: +// Type, used for overflow instructions. +// +var_types NaturalLoopIterInfo::IterOperType() +{ + assert(genActualType(IterTree) == TYP_INT); + return IterTree->TypeGet(); +} + +//------------------------------------------------------------------------ +// IsReversed: Returns true if the iterator node is the second operand in the +// loop condition. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsReversed() +{ + return TestTree->gtGetOp2()->OperIs(GT_LCL_VAR) && ((TestTree->gtGetOp2()->gtFlags & GTF_VAR_ITERATOR) != 0); +} + +//------------------------------------------------------------------------ +// TestOper: The type of the comparison between the iterator and the limit +// (GT_LE, GT_GE, etc.) +// +// Returns: +// Oper. +// +genTreeOps NaturalLoopIterInfo::TestOper() +{ + genTreeOps op = TestTree->OperGet(); + return IsReversed() ? GenTree::SwapRelop(op) : op; +} + +//------------------------------------------------------------------------ +// IsIncreasingLoop: Returns true if the loop iterator increases from low to +// high value. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsIncreasingLoop() +{ + // Increasing loop is the one that has "+=" increment operation and "< or <=" limit check. + bool isLessThanLimitCheck = GenTree::StaticOperIs(TestOper(), GT_LT, GT_LE); + return (isLessThanLimitCheck && + (((IterOper() == GT_ADD) && (IterConst() > 0)) || ((IterOper() == GT_SUB) && (IterConst() < 0)))); +} + +//------------------------------------------------------------------------ +// IsIncreasingLoop: Returns true if the loop iterator decreases from high to +// low value. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsDecreasingLoop() +{ + // Decreasing loop is the one that has "-=" decrement operation and "> or >=" limit check. If the operation is + // "+=", make sure the constant is negative to give an effect of decrementing the iterator. + bool isGreaterThanLimitCheck = GenTree::StaticOperIs(TestOper(), GT_GT, GT_GE); + return (isGreaterThanLimitCheck && + (((IterOper() == GT_ADD) && (IterConst() < 0)) || ((IterOper() == GT_SUB) && (IterConst() > 0)))); +} + +//------------------------------------------------------------------------ +// Iterator: Get the iterator node in the loop test +// +// Returns: +// Iterator node. +// +GenTree* NaturalLoopIterInfo::Iterator() +{ + return IsReversed() ? TestTree->gtGetOp2() : TestTree->gtGetOp1(); +} + +//------------------------------------------------------------------------ +// Limit: Get the limit node in the loop test. +// +// Returns: +// Iterator node. +// +GenTree* NaturalLoopIterInfo::Limit() +{ + return IsReversed() ? TestTree->gtGetOp1() : TestTree->gtGetOp2(); +} + +//------------------------------------------------------------------------ +// ConstLimit: Get the constant value of the iterator limit, i.e. when the loop +// condition is "i RELOP const". +// +// Returns: +// Limit constant. +// +// Remarks: +// Only valid if HasConstLimit is true. +// +int NaturalLoopIterInfo::ConstLimit() +{ + assert(HasConstLimit); + GenTree* limit = Limit(); + assert(limit->OperIsConst()); + return (int)limit->AsIntCon()->gtIconVal; +} + +//------------------------------------------------------------------------ +// VarLimit: Get the local var num used in the loop condition, i.e. when the +// loop condition is "i RELOP lclVar" with a loop invariant local. +// +// Returns: +// Local var number. +// +// Remarks: +// Only valid if HasInvariantLocalLimit is true. +// +unsigned NaturalLoopIterInfo::VarLimit() +{ + assert(HasInvariantLocalLimit); + + GenTree* limit = Limit(); + assert(limit->OperGet() == GT_LCL_VAR); + return limit->AsLclVarCommon()->GetLclNum(); +} + +//------------------------------------------------------------------------ +// ArrLenLimit: Get the array length used in the loop condition, i.e. when the +// loop condition is "i RELOP arr.len". +// +// Parameters: +// comp - Compiler instance +// index - [out] Array index information +// +// Returns: +// True if the array length was extracted. +// +// Remarks: +// Only valid if HasArrayLengthLimit is true. +// +bool NaturalLoopIterInfo::ArrLenLimit(Compiler* comp, ArrIndex* index) +{ + assert(HasArrayLengthLimit); + + GenTree* limit = Limit(); + assert(limit->OperIs(GT_ARR_LENGTH)); + + // Check if we have a.length or a[i][j].length + if (limit->AsArrLen()->ArrRef()->OperIs(GT_LCL_VAR)) + { + index->arrLcl = limit->AsArrLen()->ArrRef()->AsLclVarCommon()->GetLclNum(); + index->rank = 0; + return true; + } + // We have a[i].length, extract a[i] pattern. + else if (limit->AsArrLen()->ArrRef()->OperIs(GT_COMMA)) + { + return comp->optReconstructArrIndex(limit->AsArrLen()->ArrRef(), index); + } + return false; +} diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index b7c0facd5065c..04858ee63252b 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -11,6 +11,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" +#include "jitstd/algorithm.h" #ifdef DEBUG @@ -783,6 +784,33 @@ void LoopCloneContext::PrintConditions(unsigned loopNum) } #endif +//-------------------------------------------------------------------------------------------------- +// GetLoopIterInfo: Get the analyzed loop iteration for a loop. +// +// Arguments: +// loopNum - Index of loop, as returned by FlowGraphNaturalLoop::GetIndex(). +// +// Returns: +// The info, or nullptr if the loop iteration structure could not be +// analyzed. +// +NaturalLoopIterInfo* LoopCloneContext::GetLoopIterInfo(unsigned loopNum) +{ + return iterInfo[loopNum]; +} + +//-------------------------------------------------------------------------------------------------- +// SetLoopIterInfo: Set the analyzed loop iteration for a loop. +// +// Arguments: +// loopNum - Index of loop, as returned by FlowGraphNaturalLoop::GetIndex(). +// info - Info to store +// +void LoopCloneContext::SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info) +{ + iterInfo[loopNum] = info; +} + //-------------------------------------------------------------------------------------------------- // CondToStmtInBlock: Convert an array of conditions to IR. Evaluate them into a JTRUE stmt and add it to // a new block after `insertAfter`. @@ -1075,13 +1103,12 @@ LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children // Callers should assume AND operation is used i.e., if all conditions are // true, then take the fast path. // -bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { JITDUMP("------------------------------------------------------------\n"); - JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loopNum); + JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loop->GetIndex()); - LoopDsc* loop = &optLoopTable[loopNum]; - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos->Size() > 0); // We only need to check for iteration behavior if we have array checks. @@ -1104,8 +1131,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident objDeref = LC_Ident::CreateIndirOfLocal(ttInfo->lclNum, 0); LC_Ident methodTable = LC_Ident::CreateClassHandle(ttInfo->clsHnd); LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); - context->EnsureObjDerefs(loopNum)->Push(objDeref); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureObjDerefs(loop->GetIndex())->Push(objDeref); + context->EnsureConditions(loop->GetIndex())->Push(cond); break; } @@ -1126,8 +1153,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methAddr)); - context->EnsureObjDerefs(loopNum)->Push(objDeref); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureObjDerefs(loop->GetIndex())->Push(objDeref); + context->EnsureConditions(loop->GetIndex())->Push(cond); break; } @@ -1142,27 +1169,28 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // No array conditions here, so we're done // JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); return true; } + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); // Note we see cases where the test oper is NE (array.Len) which we could handle // with some extra care. // - if (!GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + if (!GenTree::StaticOperIs(iterInfo->TestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) { // We can't reason about how this loop iterates return false; } - const bool isIncreasingLoop = loop->lpIsIncreasingLoop(); - assert(isIncreasingLoop || loop->lpIsDecreasingLoop()); + const bool isIncreasingLoop = iterInfo->IsIncreasingLoop(); + assert(isIncreasingLoop || iterInfo->IsDecreasingLoop()); // We already know that this is either increasing or decreasing loop and the // stride is (> 0) or (< 0). Here, just take the abs() value and check if it // is beyond the limit. - int stride = abs(loop->lpIterConst()); + int stride = abs(iterInfo->IterConst()); if (stride >= 58) { @@ -1176,27 +1204,27 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident ident; // Init conditions - if (loop->lpFlags & LPFLG_CONST_INIT) + if (iterInfo->HasConstInit) { // Only allowing non-negative const init at this time. // This is because the variable initialized with this constant will be used as an array index, // and array indices must be non-negative. - if (loop->lpConstInit < 0) + if (iterInfo->ConstInitValue < 0) { - JITDUMP("> Init %d is invalid\n", loop->lpConstInit); + JITDUMP("> Init %d is invalid\n", iterInfo->ConstInitValue); return false; } if (!isIncreasingLoop) { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident::CreateConst(static_cast(loop->lpConstInit)); + ident = LC_Ident::CreateConst(static_cast(iterInfo->ConstInitValue)); } } else { // iterVar >= 0 - const unsigned initLcl = loop->lpIterVar(); + const unsigned initLcl = iterInfo->IterVar; if (!genActualTypeIsInt(lvaGetDesc(initLcl))) { JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); @@ -1214,13 +1242,13 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext ident = LC_Ident::CreateVar(initLcl); geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } - context->EnsureConditions(loopNum)->Push(geZero); + context->EnsureConditions(loop->GetIndex())->Push(geZero); } // Limit Conditions - if (loop->lpFlags & LPFLG_CONST_LIMIT) + if (iterInfo->HasConstLimit) { - int limit = loop->lpConstLimit(); + int limit = iterInfo->ConstLimit(); if (limit < 0) { JITDUMP("> limit %d is invalid\n", limit); @@ -1233,9 +1261,9 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext ident = LC_Ident::CreateConst(static_cast(limit)); } } - else if (loop->lpFlags & LPFLG_VAR_LIMIT) + else if (iterInfo->HasInvariantLocalLimit) { - const unsigned limitLcl = loop->lpVarLimit(); + const unsigned limitLcl = iterInfo->VarLimit(); if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) { JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); @@ -1254,12 +1282,12 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(limitLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } - context->EnsureConditions(loopNum)->Push(geZero); + context->EnsureConditions(loop->GetIndex())->Push(geZero); } - else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + else if (iterInfo->HasArrayLengthLimit) { ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); - if (!loop->lpArrLenLimit(this, index)) + if (!iterInfo->ArrLenLimit(this, index)) { JITDUMP("> ArrLen not matching\n"); return false; @@ -1268,7 +1296,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, index, LC_Array::None); - context->EnsureArrayDerefs(loopNum)->Push(array); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } else { @@ -1284,7 +1312,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // GT_GT loop test: (end > start) ==> (end <= arrLen) // GT_GE loop test: (end >= start) ==> (end < arrLen) genTreeOps opLimitCondition; - switch (loop->lpTestOper()) + switch (iterInfo->TestOper()) { case GT_LT: case GT_GT: @@ -1309,11 +1337,11 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureConditions(loop->GetIndex())->Push(cond); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); - context->EnsureArrayDerefs(loopNum)->Push(array); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } break; case LcOptInfo::LcMdArray: @@ -1323,7 +1351,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext mdArrInfo->dim, LC_Array::None)); LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureConditions(loop->GetIndex())->Push(cond); // TODO: ensure array is dereference-able? } @@ -1339,7 +1367,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext } JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); return true; @@ -1450,11 +1478,11 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // // and so on. // -bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { // Get the dereference-able arrays and objects. - JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loopNum); - JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loopNum); + JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loop->GetIndex()); + JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loop->GetIndex()); // We currently expect to have at least one of these. // @@ -1545,7 +1573,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] JitExpandArrayStack*>* levelCond = - context->EnsureBlockConditions(loopNum, condBlocks); + context->EnsureBlockConditions(loop->GetIndex(), condBlocks); for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) { arrayDerefNodes[i]->DeriveLevelConditions(levelCond); @@ -1554,7 +1582,8 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con if (objDeref->Size() > 0) { - JitExpandArrayStack*>* levelCond = context->EnsureBlockConditions(loopNum, 1); + JitExpandArrayStack*>* levelCond = + context->EnsureBlockConditions(loop->GetIndex(), 1); for (unsigned i = 0; i < objDeref->Size(); ++i) { @@ -1566,7 +1595,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con } } - DBEXEC(verbose, context->PrintBlockConditions(loopNum)); + DBEXEC(verbose, context->PrintBlockConditions(loop->GetIndex())); return true; } @@ -1615,9 +1644,10 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore // performs the optimizations assuming that the path in which the candidates // were collected is the fast path in which the optimizations will be performed. // -void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath)) +void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, + LoopCloneContext* context DEBUGARG(bool dynamicPath)) { - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos != nullptr); for (unsigned i = 0; i < optInfos->Size(); ++i) { @@ -1735,20 +1765,14 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // return blocks in the loop that will be cloned. (REVIEW: this 'predicate' function // doesn't seem like the right place to do this change.) // -bool Compiler::optIsLoopClonable(unsigned loopInd) +bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - const LoopDsc& loop = optLoopTable[loopInd]; - const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); - - if (requireIterable && !(loop.lpFlags & LPFLG_ITER)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". No LPFLG_ITER flag.\n", loopInd); - return false; - } + const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); - if (loop.lpIsRemoved()) + if (requireIterable && (iterInfo == nullptr)) { - JITDUMP("Loop cloning: rejecting removed loop " FMT_LP ".\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Could not analyze iteration.\n", loop->GetIndex()); return false; } @@ -1761,34 +1785,39 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // TODO: this limitation could be removed if we do the work to insert new EH regions in the exception table, // for the cloned loop (and its embedded EH regions). // - // Also, count the number of return blocks within the loop for future use. - unsigned loopRetCount = 0; - for (BasicBlock* const blk : loop.LoopBlocks()) - { - if (blk->KindIs(BBJ_RETURN)) - { - loopRetCount++; - } + BasicBlockVisit result = loop->VisitLoopBlocks([=](BasicBlock* blk) { + assert(!blk->KindIs(BBJ_RETURN)); if (bbIsTryBeg(blk)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loopInd); - return false; + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loop->GetIndex()); + return BasicBlockVisit::Abort; } + + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + return false; } // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump // into the middle of a handler (to go to the cloned copy.) Reject. - if (bbIsHandlerBeg(loop.lpEntry)) + if (bbIsHandlerBeg(loop->GetHeader())) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Entry block is a handler start.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Header block is a handler start.\n", loop->GetIndex()); return false; } + // Loop canonicalization should have ensured that there is a unique preheader. + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + // If the head and entry are in different EH regions, reject. - if (!BasicBlock::sameEHRegion(loop.lpHead, loop.lpEntry)) + if (!BasicBlock::sameEHRegion(preheader, loop->GetHeader())) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Head and entry blocks are in different EH regions.\n", - loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Preheader and header blocks are in different EH regions.\n", + loop->GetIndex()); return false; } @@ -1798,29 +1827,28 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // that block; this is one of those cases. This could be fixed fairly easily; for example, // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the // loop. This is just a corner to cut to get this working faster. - BasicBlock* bbAfterLoop = loop.lpBottom->Next(); + // TODO-Quirk: Should rework this to avoid the lexically bottom most block here. + BasicBlock* bottom = loop->GetLexicallyBottomMostBlock(); + + BasicBlock* bbAfterLoop = bottom->Next(); if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", + loop->GetIndex()); return false; } - // Reject cloning if this is a mid-entry loop and the entry has non-loop predecessors other than its head. - // This loop may be part of a larger looping construct that we didn't recognize. - // - // We should really fix this in optCanonicalizeLoop. - // - if (!loop.lpIsTopEntry()) + // TODO-Quirk: Reject loops that with the old loop cloning would put us + // above max number of returns. Return blocks are not considered part of + // loops in the new loop finding since they are never part of the SCC. + // (Is that true? What about due to EH successors?) + LoopDsc* oldLoop = m_newToOldLoop[loop->GetIndex()]; + unsigned loopRetCount = 0; + for (BasicBlock* const blk : oldLoop->LoopBlocks()) { - for (BasicBlock* const entryPred : loop.lpEntry->PredBlocks()) + if (blk->KindIs(BBJ_RETURN)) { - if ((entryPred != loop.lpHead) && !loop.lpContains(entryPred)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Is not top entry, and entry has multiple non-loop preds.\n", - loopInd); - return false; - } + loopRetCount++; } } @@ -1836,78 +1864,65 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has %d returns;" " if added to previously existing %d returns, it would exceed the limit of %d.\n", - loopInd, loopRetCount, fgReturnCount, epilogLimit); + loop->GetIndex(), loopRetCount, fgReturnCount, epilogLimit); return false; } - if (requireIterable) - { - const unsigned ivLclNum = loop.lpIterVar(); - if (lvaVarAddrExposed(ivLclNum)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", - loopInd, ivLclNum); - return false; - } - } + assert(!requireIterable || !lvaVarAddrExposed(iterInfo->IterVar)); - BasicBlock* top = loop.lpTop; - BasicBlock* bottom = loop.lpBottom; + // TODO-Quirk: These conditions are unnecessary. + BasicBlock* oldLoopTop = oldLoop->lpTop; + BasicBlock* oldLoopBottom = oldLoop->lpBottom; - if (!bottom->KindIs(BBJ_COND)) + if (!oldLoopBottom->KindIs(BBJ_COND)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Couldn't find termination test.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Couldn't find termination test.\n", loop->GetIndex()); return false; } - if (!bottom->HasJumpTo(top)) + if (!oldLoopBottom->HasJumpTo(oldLoopTop)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Branch at loop 'bottom' not looping to 'top'.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Branch at loop 'bottom' not looping to 'top'.\n", + loop->GetIndex()); return false; } if (requireIterable) { - if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && - (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Loop limit is neither constant, variable or array length.\n", - loopInd); - return false; - } + assert(iterInfo->HasConstLimit || iterInfo->HasInvariantLocalLimit || iterInfo->HasArrayLengthLimit); // TODO-CQ: Handle other loops like: // - The ones whose limit operator is "==" or "!=" // - The incrementing operator is multiple and divide // - The ones that are inverted are not handled here for cases like "i *= 2" because // they are converted to "i + i". - if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) + if (!iterInfo->IsIncreasingLoop() && !iterInfo->IsDecreasingLoop()) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", - loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); + loop->GetIndex(), GenTree::OpName(iterInfo->TestOper()), GenTree::OpName(iterInfo->IterOper())); return false; } - if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) + if (!iterInfo->TestTree->OperIsCompare() || ((iterInfo->TestTree->gtFlags & GTF_RELOP_ZTT) == 0)) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop inversion NOT present, loop test [%06u] may not protect " "entry from head.\n", - loopInd, loop.lpTestTree->gtTreeID); + loop->GetIndex(), iterInfo->TestTree->gtTreeID); return false; } #ifdef DEBUG - const unsigned ivLclNum = loop.lpIterVar(); - GenTree* const op1 = loop.lpIterator(); + const unsigned ivLclNum = iterInfo->IterVar; + GenTree* const op1 = iterInfo->Iterator(); assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); #endif } // Otherwise, we're going to add those return blocks. fgReturnCount += loopRetCount; + assert(loopRetCount == 0); return true; } @@ -1916,10 +1931,10 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // optInsertLoopChoiceConditions: Insert the loop conditions for a loop after the loop head. // // Arguments: -// context loop cloning context variable -// loopNum the loop index -// slowHead the slow path loop head, where the condition failures branch -// insertAfter insert the conditions after this block +// context - loop cloning context variable +// loop - the loop +// slowHead - the slow path loop head, where the condition failures branch +// insertAfter - insert the conditions after this block // // Return Value: // The last condition block added. @@ -1936,32 +1951,33 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // ... // slowHead -?> e2 (slowHead) branch or fall-through to e2 // -BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, - unsigned loopNum, - BasicBlock* slowHead, - BasicBlock* insertAfter) +BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + BasicBlock* slowHead, + BasicBlock* insertAfter) { - JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loopNum); - assert(context->HasBlockConditions(loopNum)); + JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); + assert(context->HasBlockConditions(loop->GetIndex())); assert(slowHead != nullptr); assert(insertAfter->KindIs(BBJ_NONE)); - if (context->HasBlockConditions(loopNum)) + if (context->HasBlockConditions(loop->GetIndex())) { - JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); + JitExpandArrayStack*>* levelCond = + context->GetBlockConditions(loop->GetIndex()); for (unsigned i = 0; i < levelCond->Size(); ++i) { - JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); + JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loop->GetIndex(), i); DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); } } // Finally insert cloning conditions after all deref conditions have been inserted. - JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loopNum); - DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loop->GetIndex()); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); - insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), slowHead, insertAfter); + insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowHead, insertAfter); return insertAfter; } @@ -1970,21 +1986,25 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, // optCloneLoop: Perform the mechanical cloning of the specified loop // // Arguments: -// loopInd - loop index of loop to clone +// loop - The loop to clone // context - data structure where all loop cloning info is kept. // -void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) +void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - assert(loopInd < optLoopCount); - - LoopDsc& loop = optLoopTable[loopInd]; +#ifdef DEBUG + if (verbose) + { + printf("\nCloning loop " FMT_LP " with blocks:\n", loop->GetIndex()); - JITDUMP("\nCloning loop " FMT_LP ": [head: " FMT_BB ", top: " FMT_BB ", entry: " FMT_BB ", bottom: " FMT_BB - ", child: " FMT_LP "].\n", - loopInd, loop.lpHead->bbNum, loop.lpTop->bbNum, loop.lpEntry->bbNum, loop.lpBottom->bbNum, loop.lpChild); + loop->VisitLoopBlocksReversePostOrder([](BasicBlock* block) { + printf(" " FMT_BB "\n", block->bbNum); + return BasicBlockVisit::Continue; + }); + } +#endif // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). - unsigned depth = optLoopDepth(loopInd); + unsigned depth = optLoopDepth((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable)); weight_t ambientWeight = 1; for (unsigned j = 0; j < depth; j++) { @@ -1993,9 +2013,11 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) assert(ambientWeight > lastWeight); } - // If we're in a non-natural loop, the ambient weight might be higher than we computed above. - // Be safe by taking the max with the head block's weight. - ambientWeight = max(ambientWeight, loop.lpHead->bbWeight); + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + // The ambient weight might be higher than we computed above. Be safe by + // taking the max with the head block's weight. + ambientWeight = max(ambientWeight, preheader->bbWeight); // We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights. // The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should @@ -2006,8 +2028,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // This is the containing loop, if any -- to label any blocks we create that are outside // the loop being cloned. - unsigned char ambientLoop = loop.lpParent; + unsigned char ambientLoop = m_newToOldLoop[loop->GetIndex()]->lpParent; + BasicBlock* h = preheader; // We're going to transform this loop: // // H --> E (if T == E, H falls through to T/E) @@ -2030,7 +2053,6 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // B2 ?-> T2 // X - BasicBlock* h = loop.lpHead; assert((h->bbFlags & BBF_LOOP_PREHEADER) != 0); // Make a new pre-header block 'h2' for the loop. 'h' will fall through to 'h2'. @@ -2045,17 +2067,17 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) if (!h->KindIs(BBJ_NONE)) { assert(h->KindIs(BBJ_ALWAYS)); - assert(h->HasJumpTo(loop.lpEntry)); - h2->SetJumpKindAndTarget(BBJ_ALWAYS, loop.lpEntry); + assert(h->HasJumpTo(loop->GetHeader())); + h2->SetJumpKindAndTarget(BBJ_ALWAYS, loop->GetHeader()); } - fgReplacePred(loop.lpEntry, h, h2); - JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop.lpEntry->bbNum, - h2->bbNum, loop.lpEntry->bbNum); + fgReplacePred(loop->GetHeader(), h, h2); + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop->GetHeader()->bbNum, + h2->bbNum, loop->GetHeader()->bbNum); // 'h' is no longer the loop head; 'h2' is! h->bbFlags &= ~BBF_LOOP_PREHEADER; - optUpdateLoopHead(loopInd, h, h2); + optUpdateLoopHead((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable), h, h2); // Make 'h' fall through to 'h2' (if it didn't already). // Don't add the h->h2 edge because we're going to insert the cloning conditions between 'h' and 'h2', and @@ -2064,7 +2086,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Make X2 after B, if necessary. (Not necessary if B is a BBJ_ALWAYS.) // "newPred" will be the predecessor of the blocks of the cloned loop. - BasicBlock* b = loop.lpBottom; + BasicBlock* b = loop->GetLexicallyBottomMostBlock(); BasicBlock* newPred = b; if (!b->KindIs(BBJ_ALWAYS)) { @@ -2109,9 +2131,26 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. + unsigned numBlocks = 0; + loop->VisitLoopBlocks([&numBlocks](BasicBlock* block) { + numBlocks++; + return BasicBlockVisit::Continue; + }); + + BasicBlock** lexicalBlocks = new (this, CMK_LoopClone) BasicBlock*[numBlocks]; + unsigned index = 0; + loop->VisitLoopBlocks([=, &index](BasicBlock* block) { + lexicalBlocks[index++] = block; + return BasicBlockVisit::Continue; + }); + + jitstd::sort(lexicalBlocks, lexicalBlocks + numBlocks, + [](BasicBlock* lhs, BasicBlock* rhs) { return lhs->bbNum < rhs->bbNum; }); + BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - for (BasicBlock* const blk : loop.LoopBlocks()) + for (unsigned i = 0; i < numBlocks; i++) { + BasicBlock* blk = lexicalBlocks[i]; // Initialize newBlk as BBJ_NONE, and fix up jump kind/target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_NONE, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2155,19 +2194,45 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding // loop, if one exists -- the parent of the loop we're cloning. - newBlk->bbNatLoopNum = loop.lpParent; + newBlk->bbNatLoopNum = ambientLoop; newPred = newBlk; blockMap->Set(blk, newBlk); + + // If the block falls through to a block outside the loop then we may + // need to insert a new block to redirect. + if ((i < numBlocks - 1) && blk->bbFallsThrough() && !blk->NextIs(lexicalBlocks[i + 1])) + { + if (blk->KindIs(BBJ_NONE)) + { + // Changed to BBJ_ALWAYS in below loop. + } + else if (blk->KindIs(BBJ_COND)) + { + // Need to insert a block. + BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); + newRedirBlk->copyEHRegion(newPred); + newRedirBlk->bbNatLoopNum = ambientLoop; + // This block isn't part of the loop, so below loop won't add + // refs for it. + fgAddRefPred(blk->Next(), newRedirBlk); + newPred = newRedirBlk; + } + else + { + assert(!"Cannot handle fallthrough"); + } + } } // Perform the static optimizations on the fast path. - optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); + optPerformStaticOptimizations(loop, context DEBUGARG(true)); // Now go through the new blocks, remapping their jump targets within the loop // and updating the preds lists. - for (BasicBlock* const blk : loop.LoopBlocks()) + for (unsigned i = 0; i < numBlocks; i++) { + BasicBlock* blk = lexicalBlocks[i]; BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2185,7 +2250,15 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) switch (newblk->GetJumpKind()) { case BBJ_NONE: - fgAddRefPred(newblk->Next(), newblk); + if ((i < numBlocks - 1) && !blk->NextIs(lexicalBlocks[i + 1])) + { + newblk->SetJumpKindAndTarget(BBJ_ALWAYS, blk->Next()); + fgAddRefPred(newblk->GetJumpDest(), newblk); + } + else + { + fgAddRefPred(newblk->Next(), newblk); + } break; case BBJ_ALWAYS: @@ -2213,8 +2286,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) #ifdef DEBUG // Display the preds for the new blocks, after all the new blocks have been redirected. JITDUMP("Preds after loop copy:\n"); - for (BasicBlock* const blk : loop.LoopBlocks()) - { + loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* blk) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2224,7 +2296,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) JITDUMP(" " FMT_BB, predBlock->bbNum); } JITDUMP("\n"); - } + + return BasicBlockVisit::Continue; + }); #endif // DEBUG // Insert the loop choice conditions. We will create the following structure: @@ -2240,13 +2314,13 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // // We should always have block conditions. - assert(context->HasBlockConditions(loopInd)); + assert(context->HasBlockConditions(loop->GetIndex())); assert(h->KindIs(BBJ_NONE)); assert(h->NextIs(h2)); // If any condition is false, go to slowHead (which branches or falls through to e2). BasicBlock* e2 = nullptr; - bool foundIt = blockMap->Lookup(loop.lpEntry, &e2); + bool foundIt = blockMap->Lookup(loop->GetHeader(), &e2); assert(foundIt && e2 != nullptr); if (!slowHead->NextIs(e2)) @@ -2259,7 +2333,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) fgAddRefPred(e2, slowHead); JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", slowHead->bbNum, e2->bbNum); - BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, slowHead, h); + BasicBlock* condLast = optInsertLoopChoiceConditions(context, loop, slowHead, h); // Add the fall-through path pred (either to T/E for fall-through from conditions to fast path, // or H2 if branch to E of fast path). @@ -2271,7 +2345,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // initialize the loop counter immediately before entering the loop, but we've left a shared // initialization of the loop counter up above the test that determines which version of the // loop to take. - loop.lpFlags |= LPFLG_DONT_UNROLL; + m_newToOldLoop[loop->GetIndex()]->lpFlags |= LPFLG_DONT_UNROLL; } //------------------------------------------------------------------------- @@ -2284,16 +2358,19 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Return Value: // Returns true if the variable is loop invariant in loopNum. // -bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum) +bool Compiler::optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum) { if (lvaVarAddrExposed(lclNum)) { return false; } - if (optIsVarAssgLoop(loopNum, lclNum)) + + // TODO: Cache this invariance information + if (loop->HasDef(lclNum)) { return false; } + return true; } @@ -2637,22 +2714,24 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop #endif // Check that the array object local variable is invariant within the loop body. - if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl)) + if (!optIsStackLocalInvariant(info->loop, arrIndex.arrLcl)) { JITDUMP("V%02d is not loop invariant\n", arrIndex.arrLcl); return WALK_SKIP_SUBTREES; } + NaturalLoopIterInfo* iterInfo = info->context->GetLoopIterInfo(info->loop->GetIndex()); + // Walk the dimensions and see if iterVar of the loop is used as index. for (unsigned dim = 0; dim < arrIndex.rank; ++dim) { // Is index variable also used as the loop iter var? - if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar()) + if (arrIndex.indLcls[dim] == iterInfo->IterVar) { // Check the previous indices are all loop invariant. for (unsigned dim2 = 0; dim2 < dim; ++dim2) { - if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2])) + if (!optIsStackLocalInvariant(info->loop, arrIndex.indLcls[dim2])) { JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]); return WALK_SKIP_SUBTREES; @@ -2661,19 +2740,18 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop #ifdef DEBUG if (verbose) { - printf("Loop " FMT_LP " can be cloned for ArrIndex ", info->loopNum); + printf("Loop " FMT_LP " can be cloned for ArrIndex ", info->loop->GetIndex()); arrIndex.Print(); printf(" on dim %d\n", dim); } #endif // Update the loop context. - info->context->EnsureLoopOptInfo(info->loopNum) + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt)); } else { - JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), - dim); + JITDUMP("Induction V%02d is not used as index on dim %d\n", iterInfo->IterVar, dim); } } return WALK_SKIP_SUBTREES; @@ -2735,7 +2813,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop JITDUMP("... right form for type test with local V%02u\n", lclNum); - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + if (!optIsStackLocalInvariant(info->loop, lclNum)) { JITDUMP("... but not invariant\n"); return WALK_CONTINUE; @@ -2743,8 +2821,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop // Looks like we found an invariant type test. // - JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loopNum, dspTreeID(tree), - lclNum); + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loop->GetIndex(), + dspTreeID(tree), lclNum); if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) { @@ -2754,7 +2832,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); assert(compCurBB->lastStmt() == info->stmt); - info->context->EnsureLoopOptInfo(info->loopNum) + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd)); } } @@ -2818,13 +2896,13 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_CONTINUE; } - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + if (!optIsStackLocalInvariant(info->loop, lclNum)) { JITDUMP("... but not invariant\n"); return WALK_CONTINUE; } - JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loopNum, + JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loop->GetIndex(), dspTreeID(tree), lclNum); if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) @@ -2839,7 +2917,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(), relopOp2 != iconHandle DEBUG_ARG( (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle)); - info->context->EnsureLoopOptInfo(info->loopNum)->Push(optInfo); + info->context->EnsureLoopOptInfo(info->loop->GetIndex())->Push(optInfo); } } } @@ -2884,16 +2962,15 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // (3) the test is frequently hit during the loop iteration // (4) the test is biased and highly likely to succeed // - const LoopDsc& loopDsc = optLoopTable[info->loopNum]; - BasicBlock* const loopEntry = loopDsc.lpEntry; - BasicBlock* const typeTestBlock = compCurBB; - double const loopFrequency = 0.50; - double const typeTestFrequency = 0.50; - double const typeTestBias = 0.05; + FlowGraphNaturalLoop* loop = info->loop; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; // Check for (1) // - if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + if (!loop->GetHeader()->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) { JITDUMP(" No; loop does not have profile data.\n"); return WALK_CONTINUE; @@ -2901,7 +2978,7 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // Check for (2) // - if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + if (loop->GetHeader()->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) { JITDUMP(" No; loop does not iterate often enough.\n"); return WALK_CONTINUE; @@ -2909,7 +2986,7 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // Check for (3) // - if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + if (typeTestBlock->bbWeight < (typeTestFrequency * loop->GetHeader()->bbWeight)) { JITDUMP(" No; guard does not execute often enough within the loop.\n"); return WALK_CONTINUE; @@ -2959,7 +3036,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // Also, check if the loop is suitable for the optimizations performed. // // Arguments: -// loopNum - the current loop index for which conditions are derived. +// loop - Loop being analyzed // context - data structure where all loop cloning candidates will be updated. // // Return Value: @@ -2972,16 +3049,15 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // optimization candidates and update the "context" parameter with all the // contextual information necessary to perform the optimization later. // -bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - const LoopDsc& loop = optLoopTable[loopNum]; - const bool canCloneForArrayBounds = - ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && ((loop.lpFlags & LPFLG_ITER) != 0); - const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); + const bool canCloneForArrayBounds = ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && (iterInfo != nullptr); + const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); if (!canCloneForArrayBounds && !canCloneForTypeTests) { - JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loopNum); + JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loop->GetIndex()); return false; } @@ -2992,12 +3068,11 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex shouldCloneForGdvTests &= JitConfig.JitCloneLoopsWithGdvTests() != 0; #endif - JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, + JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loop->GetIndex(), shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); - for (BasicBlock* const block : loop.LoopBlocks()) - { + LoopCloneVisitorInfo info(context, loop, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); + loop->VisitLoopBlocks([=, &info](BasicBlock* block) { compCurBB = block; for (Statement* const stmt : block->Statements()) { @@ -3007,7 +3082,9 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex fgWalkTreePre(stmt->GetRootNodePointer(), optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly, computeStack); } - } + + return BasicBlockVisit::Continue; + }); return true; } @@ -3029,14 +3106,22 @@ bool Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) bool result = false; for (unsigned i = 0; i < optLoopCount; i++) { - JITDUMP("Considering loop " FMT_LP " to clone for optimizations.\n", i); - if (optIsLoopClonable(i)) + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + JITDUMP("Considering loop " FMT_LP " to clone for optimizations.\n", loop->GetIndex()); + NaturalLoopIterInfo iterInfo; + // TODO-Quirk: Remove + if ((optLoopTable[i].lpFlags & LPFLG_ITER) != 0) { - if (optIdentifyLoopOptInfo(i, context)) + if (loop->AnalyzeIteration(&iterInfo)) { - result = true; + context->SetLoopIterInfo(loop->GetIndex(), new (this, CMK_LoopClone) NaturalLoopIterInfo(iterInfo)); } } + + if (optIsLoopClonable(loop, context) && optIdentifyLoopOptInfo(loop, context)) + { + result = true; + } JITDUMP("------------------------------------------------------------\n"); } JITDUMP("\n"); @@ -3085,7 +3170,7 @@ PhaseStatus Compiler::optCloneLoops() return PhaseStatus::MODIFIED_NOTHING; } - LoopCloneContext context(optLoopCount, getAllocator(CMK_LoopClone)); + LoopCloneContext context((unsigned)m_loops->NumLoops(), getAllocator(CMK_LoopClone)); // Obtain array optimization candidates in the context. if (!optObtainLoopCloningOpts(&context)) @@ -3098,38 +3183,40 @@ PhaseStatus Compiler::optCloneLoops() unsigned optStaticallyOptimizedLoops = 0; // For each loop, derive cloning conditions for the optimization candidates. - for (unsigned i = 0; i < optLoopCount; ++i) + for (unsigned i = 0; i < optLoopCount; i++) { - JitExpandArrayStack* optInfos = context.GetLoopOptInfo(i); + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + + JitExpandArrayStack* optInfos = context.GetLoopOptInfo(loop->GetIndex()); if (optInfos == nullptr) { continue; } - if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context)) + if (!optDeriveLoopCloningConditions(loop, &context) || !optComputeDerefConditions(loop, &context)) { JITDUMP("> Conditions could not be obtained\n"); - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } else { bool allTrue = false; bool anyFalse = false; - context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose)); + context.EvaluateConditions(loop->GetIndex(), &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } else if (allTrue) { // Perform static optimizations on the fast path since we always // have to take the cloned path. - optPerformStaticOptimizations(i, &context DEBUGARG(false)); + optPerformStaticOptimizations(loop, &context DEBUGARG(false)); ++optStaticallyOptimizedLoops; // No need to clone. - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } } } @@ -3164,12 +3251,13 @@ PhaseStatus Compiler::optCloneLoops() assert(optLoopsCloned == 0); // It should be initialized, but not yet changed. for (unsigned i = 0; i < optLoopCount; ++i) { - if (context.GetLoopOptInfo(i) != nullptr) + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + if (context.GetLoopOptInfo(loop->GetIndex()) != nullptr) { optLoopsCloned++; - context.OptimizeConditions(i DEBUGARG(verbose)); - context.OptimizeBlockConditions(i DEBUGARG(verbose)); - optCloneLoop(i, &context); + context.OptimizeConditions(loop->GetIndex() DEBUGARG(verbose)); + context.OptimizeBlockConditions(loop->GetIndex() DEBUGARG(verbose)); + optCloneLoop(loop, &context); } } @@ -3178,6 +3266,9 @@ PhaseStatus Compiler::optCloneLoops() JITDUMP("Recompute reachability and dominators after loop cloning\n"); // TODO: recompute the loop table, to include the slow loop path in the table? fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); + + m_dfs = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); } #ifdef DEBUG diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 81cd13354b11a..9279bc783a9ec 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -807,6 +807,7 @@ struct LC_ArrayDeref #endif }; +struct NaturalLoopIterInfo; /** * * The "context" represents data that is used for making loop-cloning decisions. @@ -841,16 +842,28 @@ struct LoopCloneContext // The array of block levels of conditions for each loop. (loop x level x conditions) jitstd::vector*>*> blockConditions; + jitstd::vector iterInfo; + LoopCloneContext(unsigned loopCount, CompAllocator alloc) - : alloc(alloc), optInfo(alloc), conditions(alloc), arrayDerefs(alloc), objDerefs(alloc), blockConditions(alloc) + : alloc(alloc) + , optInfo(alloc) + , conditions(alloc) + , arrayDerefs(alloc) + , objDerefs(alloc) + , blockConditions(alloc) + , iterInfo(alloc) { optInfo.resize(loopCount, nullptr); conditions.resize(loopCount, nullptr); arrayDerefs.resize(loopCount, nullptr); objDerefs.resize(loopCount, nullptr); blockConditions.resize(loopCount, nullptr); + iterInfo.resize(loopCount, nullptr); } + NaturalLoopIterInfo* GetLoopIterInfo(unsigned loopNum); + void SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info); + // Evaluate conditions into a JTRUE stmt and put it in a new block after `insertAfter`. BasicBlock* CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6ddb815fd2953..7fbd2c993b8bf 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5613,6 +5613,54 @@ void Compiler::optFindLoops() PhaseStatus Compiler::optFindLoopsPhase() { optFindLoops(); + + m_dfs = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); + + m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); + m_oldToNewLoop = new (this, CMK_Loops) FlowGraphNaturalLoop*[BasicBlock::MAX_LOOP_NUM]{}; + + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) + { + BasicBlock* head = loop->GetHeader(); + if (head->bbNatLoopNum == BasicBlock::NOT_IN_LOOP) + continue; + + LoopDsc* dsc = &optLoopTable[head->bbNatLoopNum]; + if (dsc->lpEntry != head) + continue; + + assert(m_oldToNewLoop[head->bbNatLoopNum] == nullptr); + assert(m_newToOldLoop[loop->GetIndex()] == nullptr); + m_oldToNewLoop[head->bbNatLoopNum] = loop; + m_newToOldLoop[loop->GetIndex()] = dsc; + + if ((dsc->lpFlags & LPFLG_ITER) == 0) + continue; + + NaturalLoopIterInfo iter; + bool analyzed = loop->AnalyzeIteration(&iter); + assert(analyzed); + + assert(iter.HasConstInit == ((dsc->lpFlags & LPFLG_CONST_INIT) != 0)); + assert(iter.HasConstLimit == ((dsc->lpFlags & LPFLG_CONST_LIMIT) != 0)); + assert(iter.HasSimdLimit == ((dsc->lpFlags & LPFLG_SIMD_LIMIT) != 0)); + assert(iter.HasInvariantLocalLimit == ((dsc->lpFlags & LPFLG_VAR_LIMIT) != 0)); + assert(iter.HasArrayLengthLimit == ((dsc->lpFlags & LPFLG_ARRLEN_LIMIT) != 0)); + if (iter.HasConstInit) + { + assert(iter.ConstInitValue == dsc->lpConstInit); + assert(iter.InitBlock == dsc->lpInitBlock); + } + assert(iter.TestTree == dsc->lpTestTree); + } + + // New loop finding should be strictly more general. + for (unsigned i = 0; i < optLoopCount; i++) + { + assert(m_oldToNewLoop[i] != nullptr); + } + return PhaseStatus::MODIFIED_EVERYTHING; } From 1f115e01a86cdd1d2ab49c80e643cb979b49372f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 15:26:54 +0100 Subject: [PATCH 02/11] Fix clang build --- src/coreclr/jit/flowgraph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 9214addb38bf2..484116aebf0bd 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4660,7 +4660,7 @@ bool FlowGraphNaturalLoop::VisitDefs(TFunc func) { class VisitDefsVisitor : public GenTreeVisitor { - using GenTreeVisitor::m_compiler; + using GenTreeVisitor::m_compiler; TFunc& m_func; @@ -4670,7 +4670,7 @@ bool FlowGraphNaturalLoop::VisitDefs(TFunc func) DoPreOrder = true, }; - VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) + VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) { } From 1893d7c0daef1945ea2a6d304a8a9266ac9dfda0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 15:41:53 +0100 Subject: [PATCH 03/11] Fix after merge --- src/coreclr/jit/loopcloning.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 352fd37b43c8c..40275645ee89f 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2072,7 +2072,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Make a new pre-header block 'h2' for the loop. 'h' will fall through to 'h2'. JITDUMP("Create new header block for loop\n"); - BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true, /*jumpDest*/ loop.lpEntry); + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true, /*jumpDest*/ loop->GetHeader()); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", h2->bbNum, h->bbNum); h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; h2->bbNatLoopNum = ambientLoop; @@ -2168,6 +2168,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); for (unsigned i = 0; i < numBlocks; i++) { + BasicBlock* blk = lexicalBlocks[i]; // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2220,11 +2221,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // need to insert a new block to redirect. if ((i < numBlocks - 1) && blk->bbFallsThrough() && !blk->NextIs(lexicalBlocks[i + 1])) { - if (blk->KindIs(BBJ_NONE)) - { - // Changed to BBJ_ALWAYS in below loop. - } - else if (blk->KindIs(BBJ_COND)) + if (blk->KindIs(BBJ_COND)) { // Need to insert a block. BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); From 8be981e86b84a17e0b2830dde1d6cd0f3da3482c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 18:10:08 +0100 Subject: [PATCH 04/11] Clean up --- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/flowgraph.cpp | 15 +++++++++++++-- src/coreclr/jit/loopcloning.cpp | 18 +++++++----------- src/coreclr/jit/optimizer.cpp | 15 ++++++++------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9f0323c0ea7b1..057f5b5aeca2c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2100,6 +2100,8 @@ class FlowGraphNaturalLoop bool ContainsBlock(BasicBlock* block); + unsigned NumLoopBlocks(); + template BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 3206f6d7a6a13..a0473d7cd54a2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4289,8 +4289,7 @@ BitVecTraits FlowGraphNaturalLoop::LoopBlockTraits() // Remarks: // Containment here means that the block is in the SCC of the loop; i.e. it // is in a cycle with the header block. Note that EH successors are taken -// into acount; for example, a BBJ_RETURN may still be a loop block provided -// that its handler can reach the loop header. +// into account. // bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) { @@ -4304,6 +4303,18 @@ bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) return BitVecOps::IsMember(&traits, m_blocks, index); } +//------------------------------------------------------------------------ +// NumLoopBlocks: Get the number of blocks in the SCC of the loop. +// +// Returns: +// Count of blocks. +// +unsigned FlowGraphNaturalLoop::NumLoopBlocks() +{ + BitVecTraits loopTraits = LoopBlockTraits(); + return BitVecOps::Count(&loopTraits, m_blocks); +} + //------------------------------------------------------------------------ // FlowGraphNaturalLoops::FlowGraphNaturalLoops: Initialize a new instance to // track a set of loops over the flow graph. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 40275645ee89f..34f8ad0041e9b 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1853,8 +1853,9 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c // TODO-Quirk: Reject loops that with the old loop cloning would put us // above max number of returns. Return blocks are not considered part of - // loops in the new loop finding since they are never part of the SCC. - // (Is that true? What about due to EH successors?) + // loops in the new loop finding since they are never part of the SCC (and + // they aren't present inside try-regions). + // LoopDsc* oldLoop = m_newToOldLoop[loop->GetIndex()]; unsigned loopRetCount = 0; for (BasicBlock* const blk : oldLoop->LoopBlocks()) @@ -1933,10 +1934,6 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c #endif } - // Otherwise, we're going to add those return blocks. - fgReturnCount += loopRetCount; - assert(loopRetCount == 0); - return true; } @@ -2149,19 +2146,18 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - unsigned numBlocks = 0; - loop->VisitLoopBlocks([&numBlocks](BasicBlock* block) { - numBlocks++; - return BasicBlockVisit::Continue; - }); + unsigned numBlocks = loop->NumLoopBlocks(); BasicBlock** lexicalBlocks = new (this, CMK_LoopClone) BasicBlock*[numBlocks]; unsigned index = 0; loop->VisitLoopBlocks([=, &index](BasicBlock* block) { + assert(index < numBlocks); lexicalBlocks[index++] = block; return BasicBlockVisit::Continue; }); + assert(index == numBlocks); + jitstd::sort(lexicalBlocks, lexicalBlocks + numBlocks, [](BasicBlock* lhs, BasicBlock* rhs) { return lhs->bbNum < rhs->bbNum; }); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 921a1f91a57c0..ac95c6b31f5d8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5646,12 +5646,18 @@ PhaseStatus Compiler::optFindLoopsPhase() assert(m_newToOldLoop[loop->GetIndex()] == nullptr); m_oldToNewLoop[head->bbNatLoopNum] = loop; m_newToOldLoop[loop->GetIndex()] = dsc; + } +#ifdef DEBUG + for (unsigned i = 0; i < optLoopCount; i++) + { + assert(m_oldToNewLoop[i] != nullptr); + LoopDsc* dsc = &optLoopTable[i]; if ((dsc->lpFlags & LPFLG_ITER) == 0) continue; NaturalLoopIterInfo iter; - bool analyzed = loop->AnalyzeIteration(&iter); + bool analyzed = m_oldToNewLoop[i]->AnalyzeIteration(&iter); assert(analyzed); assert(iter.HasConstInit == ((dsc->lpFlags & LPFLG_CONST_INIT) != 0)); @@ -5666,12 +5672,7 @@ PhaseStatus Compiler::optFindLoopsPhase() } assert(iter.TestTree == dsc->lpTestTree); } - - // New loop finding should be strictly more general. - for (unsigned i = 0; i < optLoopCount; i++) - { - assert(m_oldToNewLoop[i] != nullptr); - } +#endif return PhaseStatus::MODIFIED_EVERYTHING; } From d9cd2134f72132660def66a92b917e4038cd9b19 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 18:51:59 +0100 Subject: [PATCH 05/11] Set redirection block weight correctly --- src/coreclr/jit/loopcloning.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 34f8ad0041e9b..e64a2349f6a64 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2223,6 +2223,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); newRedirBlk->copyEHRegion(newPred); newRedirBlk->bbNatLoopNum = ambientLoop; + newRedirBlk->inheritWeight(blk->Next()); + newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); // This block isn't part of the loop, so below loop won't add // refs for it. fgAddRefPred(blk->Next(), newRedirBlk); From 587a22b3e2ba4f5828e9c8200c63bf882855d837 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 20:19:04 +0100 Subject: [PATCH 06/11] Further quirking --- src/coreclr/jit/compiler.h | 4 ++++ src/coreclr/jit/compiler.hpp | 41 +++++++++++++++++++++++++++++++++ src/coreclr/jit/flowgraph.cpp | 23 ++++++++++++++++++ src/coreclr/jit/loopcloning.cpp | 37 ++++++++++------------------- 4 files changed, 80 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 057f5b5aeca2c..4a55dfeabc616 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2111,6 +2111,10 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocks(TFunc func); + template + BasicBlockVisit VisitLoopBlocksLexical(TFunc func); + + BasicBlock* GetLexicallyTopMostBlock(); BasicBlock* GetLexicallyBottomMostBlock(); bool AnalyzeIteration(NaturalLoopIterInfo* info); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 90e490fc94fce..52227bb973de4 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5019,6 +5019,47 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocks(TFunc func) return VisitLoopBlocksReversePostOrder(func); } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksLexical: Visit the loop's blocks in +// lexical order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) +{ + BasicBlock* top = m_header; + BasicBlock* bottom = m_header; + VisitLoopBlocks([&](BasicBlock* block) { + if (block->bbNum < top->bbNum) + top = block; + if (block->bbNum > bottom->bbNum) + bottom = block; + return BasicBlockVisit::Continue; + }); + + BasicBlock* block = top; + while (true) + { + if (ContainsBlock(block) && (func(block) == BasicBlockVisit::Abort)) + return BasicBlockVisit::Abort; + + if (block == bottom) + return BasicBlockVisit::Continue; + + block = block->Next(); + } +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index a0473d7cd54a2..b1ee159179bf5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5018,6 +5018,29 @@ bool FlowGraphNaturalLoop::MatchLimit(NaturalLoopIterInfo* info, GenTree* test) return true; } +//------------------------------------------------------------------------ +// GetLexicallyTopMostBlock: Get the lexically top-most block contained within +// the loop. +// +// Returns: +// Block with highest bbNum. +// +// Remarks: +// Mostly exists as a quirk while transitioning from the old loop +// representation to the new one. +// +BasicBlock* FlowGraphNaturalLoop::GetLexicallyTopMostBlock() +{ + BasicBlock* top = m_header; + VisitLoopBlocks([&top](BasicBlock* loopBlock) { + if (loopBlock->bbNum < top->bbNum) + top = loopBlock; + return BasicBlockVisit::Continue; + }); + + return top; +} + //------------------------------------------------------------------------ // GetLexicallyBottomMostBlock: Get the lexically bottom-most block contained // within the loop. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index e64a2349f6a64..33d69141b1c48 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2146,25 +2146,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - unsigned numBlocks = loop->NumLoopBlocks(); - - BasicBlock** lexicalBlocks = new (this, CMK_LoopClone) BasicBlock*[numBlocks]; - unsigned index = 0; - loop->VisitLoopBlocks([=, &index](BasicBlock* block) { - assert(index < numBlocks); - lexicalBlocks[index++] = block; - return BasicBlockVisit::Continue; - }); - - assert(index == numBlocks); - - jitstd::sort(lexicalBlocks, lexicalBlocks + numBlocks, - [](BasicBlock* lhs, BasicBlock* rhs) { return lhs->bbNum < rhs->bbNum; }); - BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - for (unsigned i = 0; i < numBlocks; i++) - { - BasicBlock* blk = lexicalBlocks[i]; + loop->VisitLoopBlocksLexical([=, &newPred](BasicBlock* blk) { // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2215,7 +2198,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // If the block falls through to a block outside the loop then we may // need to insert a new block to redirect. - if ((i < numBlocks - 1) && blk->bbFallsThrough() && !blk->NextIs(lexicalBlocks[i + 1])) + if (blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) { if (blk->KindIs(BBJ_COND)) { @@ -2235,16 +2218,16 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex assert(!"Cannot handle fallthrough"); } } - } + + return BasicBlockVisit::Continue; + }); // Perform the static optimizations on the fast path. optPerformStaticOptimizations(loop, context DEBUGARG(true)); // Now go through the new blocks, remapping their jump targets within the loop // and updating the preds lists. - for (unsigned i = 0; i < numBlocks; i++) - { - BasicBlock* blk = lexicalBlocks[i]; + loop->VisitLoopBlocks([=](BasicBlock* blk) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2281,7 +2264,9 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex default: break; } - } + + return BasicBlockVisit::Continue; + }); #ifdef DEBUG // Display the preds for the new blocks, after all the new blocks have been redirected. @@ -3070,7 +3055,9 @@ bool Compiler::optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneConte shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); LoopCloneVisitorInfo info(context, loop, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); - loop->VisitLoopBlocks([=, &info](BasicBlock* block) { + + // TODO-Quirk: Switch this to VisitLoopBlocks + loop->VisitLoopBlocksLexical([=, &info](BasicBlock* block) { compCurBB = block; for (Statement* const stmt : block->Statements()) { From baaeccbd1eace0e8ee5fdaaf4cdc34e014b2ce93 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 20:49:20 +0100 Subject: [PATCH 07/11] Fix a bug that crept in --- src/coreclr/jit/loopcloning.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 33d69141b1c48..2bb8cb5377277 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2146,8 +2146,10 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - loop->VisitLoopBlocksLexical([=, &newPred](BasicBlock* blk) { + BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); + unsigned numBlocksLeft = loop->NumLoopBlocks(); + + loop->VisitLoopBlocksLexical([=, &newPred, &numBlocksLeft](BasicBlock* blk) { // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2196,9 +2198,12 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex newPred = newBlk; blockMap->Set(blk, newBlk); + numBlocksLeft--; + assert(numBlocksLeft >= 0); + // If the block falls through to a block outside the loop then we may // need to insert a new block to redirect. - if (blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) + if ((numBlocksLeft > 0) && blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) { if (blk->KindIs(BBJ_COND)) { From 39bf71c3044235ef2e983341468537c13e6853ff Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Nov 2023 21:25:23 +0100 Subject: [PATCH 08/11] Final quirks --- src/coreclr/jit/loopcloning.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 2bb8cb5377277..300f1284dd612 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2207,15 +2207,26 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex { if (blk->KindIs(BBJ_COND)) { + // TODO-Quirk: We see a lot of these cases and some of them cause diffs. + BasicBlock* targetBlk = blk->Next(); + if (targetBlk->KindIs(BBJ_ALWAYS) && targetBlk->isEmpty()) + targetBlk = targetBlk->GetJumpDest(); + // Need to insert a block. - BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, blk->Next()); + BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, targetBlk); newRedirBlk->copyEHRegion(newPred); newRedirBlk->bbNatLoopNum = ambientLoop; - newRedirBlk->inheritWeight(blk->Next()); + newRedirBlk->bbWeight = blk->Next()->bbWeight; newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); + + // TODO-Quirk: This next block is not part of the loop and + // should not be scaled down, especially once we get rid of the + // other quirk above. + blk->Next()->scaleBBWeight(fastPathWeightScaleFactor); + // This block isn't part of the loop, so below loop won't add // refs for it. - fgAddRefPred(blk->Next(), newRedirBlk); + fgAddRefPred(targetBlk, newRedirBlk); newPred = newRedirBlk; } else From 1235a0fbaa743cd438cd1f55ca9dfeaac9b90000 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Nov 2023 11:11:01 +0100 Subject: [PATCH 09/11] Actual final quirking --- src/coreclr/jit/loopcloning.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 300f1284dd612..af949e0a8de01 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2217,6 +2217,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex newRedirBlk->copyEHRegion(newPred); newRedirBlk->bbNatLoopNum = ambientLoop; newRedirBlk->bbWeight = blk->Next()->bbWeight; + newRedirBlk->bbFlags |= blk->Next()->bbFlags & (BBF_RUN_RARELY | BBF_PROF_WEIGHT); newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); // TODO-Quirk: This next block is not part of the loop and From 95ff1157b1bb4842380539776147660bf7fc4233 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 5 Dec 2023 10:53:15 +0100 Subject: [PATCH 10/11] Address feedback --- src/coreclr/jit/compiler.h | 81 +++++++++++++++++++++++++++++++++++ src/coreclr/jit/flowgraph.cpp | 2 - 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c004b62c65fed..25afa57e4b701 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1961,6 +1961,10 @@ inline LoopFlags& operator&=(LoopFlags& a, LoopFlags b) class FlowGraphDfsTree { Compiler* m_comp; + + // Post-order that we saw reachable basic blocks in. This order can be + // particularly useful to iterate in reverse, as reverse post-order ensures + // that all predecessors are visited before successors whenever possible. BasicBlock** m_postOrder; unsigned m_postOrderCount; @@ -1996,17 +2000,43 @@ class FlowGraphDfsTree bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const; }; +// Represents the result of induction variable analysis. See +// FlowGraphNaturalLoop::AnalyzeIteration. struct NaturalLoopIterInfo { + // The local that is the induction variable. unsigned IterVar = BAD_VAR_NUM; + + // Constant value that the induction variable is initialized with, outside + // the loop. Only valid if HasConstInit is true. int ConstInitValue = 0; + + // Block outside the loop that initializes the induction variable. Only + // value if HasConstInit is true. BasicBlock* InitBlock = nullptr; + + // Tree that has the loop test for the induction variable. GenTree* TestTree = nullptr; + + // Tree that mutates the induction variable. GenTree* IterTree = nullptr; + + // Whether or not we found an initialization of the induction variable. bool HasConstInit : 1; + + // Whether or not the loop test compares the induction variable with a + // constant value. bool HasConstLimit : 1; + + // Whether or not the loop test constant value is a SIMD vector element count. bool HasSimdLimit : 1; + + // Whether or not the loop test compares the induction variable with an + // invariant local. bool HasInvariantLocalLimit : 1; + + // Whether or not the loop test compares the induction variable with the + // length of an invariant array. bool HasArrayLengthLimit : 1; NaturalLoopIterInfo() @@ -2032,20 +2062,58 @@ struct NaturalLoopIterInfo bool ArrLenLimit(Compiler* comp, ArrIndex* index); }; +// Represents a natural loop in the flow graph. Natural loops are characterized +// by the following properties: +// +// * All loop blocks are strongly connected, meaning that every block of the +// loop can reach every other block of the loop. +// +// * All loop blocks are dominated by the header block, i.e. the header block +// is guaranteed to be entered on every iteration. +// +// * From the above it follows that the loop can only be entered at the header +// block. FlowGraphNaturalLoop::EntryEdges() gives a vector of these edges. +// After loop canonicalization it is expected that this vector has exactly one +// edge, from the "preheader". +// +// * The loop can have multiple exits. The regular exit edges are recorded in +// FlowGraphNaturalLoop::ExitEdges(). The loop can also be exited by +// exceptional flow. +// class FlowGraphNaturalLoop { friend class FlowGraphNaturalLoops; + // The DFS tree that contains the loop blocks. const FlowGraphDfsTree* m_tree; + + // The header block; dominates all other blocks in the loop, and is the + // only block branched to from outside the loop. BasicBlock* m_header; + + // Parent loop. By loop properties, well-scopedness is always guaranteed. + // That is, the parent loop contains all blocks of this loop. FlowGraphNaturalLoop* m_parent = nullptr; + // Bit vector of blocks in the loop; each index is the RPO index a block, // with the head block's RPO index subtracted. BitVec m_blocks; + + // Side of m_blocks. unsigned m_blocksSize = 0; + + // Edges from blocks inside the loop back to the header. jitstd::vector m_backEdges; + + // Edges from blocks outside the loop to the header. jitstd::vector m_entryEdges; + + // Edges from inside the loop to outside the loop. Note that exceptional + // flow can also exit the loop and is not modelled. jitstd::vector m_exitEdges; + + // Index of the loop in the range [0..FlowGraphNaturalLoops::NumLoops()). + // Can be used to store additional annotations for this loop on the side. unsigned m_index = 0; FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* head); @@ -2122,10 +2190,22 @@ class FlowGraphNaturalLoop bool HasDef(unsigned lclNum); }; +// Represents a collection of the natural loops in the flow graph. See +// FlowGraphNaturalLoop for the characteristics of these loops. +// +// Loops are stored in a vector, with easily accessible indices (see +// FlowGraphNaturalLoop::GetIndex()). These indices can be used to store +// additional annotations for each loop on the side. +// class FlowGraphNaturalLoops { const FlowGraphDfsTree* m_dfs; + + // Collection of loops that were found. jitstd::vector m_loops; + + // Whether or not we saw any non-natural loop cycles, also known as + // irreducible loops. unsigned m_improperLoopHeaders = 0; FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); @@ -2204,6 +2284,7 @@ class FlowGraphNaturalLoops static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); }; +// Represents the dominator tree of the flow graph. class FlowGraphDominatorTree { template diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index c85386ced1152..f38ba2f7dd535 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4981,8 +4981,6 @@ BasicBlock* FlowGraphNaturalLoop::GetLexicallyBottomMostBlock() // Returns: // True if the local has any def. // -// Remarks: -// bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) { Compiler* comp = m_tree->GetCompiler(); From 0d12eec221f595b9b617ee232e665a88a48feccf Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 5 Dec 2023 10:59:08 +0100 Subject: [PATCH 11/11] Address more feedback --- src/coreclr/jit/loopcloning.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index af949e0a8de01..67f154dfebd2d 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1096,8 +1096,8 @@ LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children // optDeriveLoopCloningConditions: Derive loop cloning conditions. // // Arguments: -// loopNum - the current loop index for which conditions are derived. -// context - data structure where all loop cloning info is kept. +// loop - the current loop for which conditions are derived. +// context - data structure where all loop cloning info is kept. // // Return Value: // "false" if conditions cannot be obtained. "true" otherwise. @@ -1390,8 +1390,8 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays. // // Arguments: -// loopNum - the current loop index for which conditions are derived. -// context - data structure where all loop cloning info is kept. +// loop - the current loop for which conditions are derived. +// context - data structure where all loop cloning info is kept. // // Return Value: // "false" if conditions cannot be obtained. "true" otherwise. @@ -1639,11 +1639,11 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore // candidates gathered during the cloning phase. // // Arguments: -// loopNum - the current loop index for which the optimizations are performed. -// context - data structure where all loop cloning info is kept. -// dynamicPath - If true, the optimization is performed in the fast path among the -// cloned loops. If false, it means this is the only path (i.e., -// there is no slow path.) +// loop - the current loop for which the optimizations are performed. +// context - data structure where all loop cloning info is kept. +// dynamicPath - If true, the optimization is performed in the fast path among the +// cloned loops. If false, it means this is the only path (i.e., +// there is no slow path.) // // Operation: // Perform the optimizations on the fast path i.e., the path in which the @@ -1768,7 +1768,7 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, // optIsLoopClonable: Determine whether this loop can be cloned. // // Arguments: -// loopInd loop index which needs to be checked if it can be cloned. +// loop - loop index which needs to be checked if it can be cloned. // // Return Value: // Returns true if the loop can be cloned. If it returns false, @@ -2352,8 +2352,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // optIsStackLocalInvariant: Is stack local invariant in loop. // // Arguments: -// loopNum The loop in which the variable is tested for invariance. -// lclNum The local that is tested for invariance in the loop. +// loop - The loop in which the variable is tested for invariance. +// lclNum - The local that is tested for invariance in the loop. // // Return Value: // Returns true if the variable is loop invariant in loopNum. @@ -3036,8 +3036,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // Also, check if the loop is suitable for the optimizations performed. // // Arguments: -// loop - Loop being analyzed -// context - data structure where all loop cloning candidates will be updated. +// loop - Loop being analyzed +// context - data structure where all loop cloning candidates will be updated. // // Return Value: // If the loop is not suitable for the optimizations, return false - context