Skip to content

Commit

Permalink
JIT: Put all CSEs into SSA (dotnet#106637)
Browse files Browse the repository at this point in the history
This adds an SSA updater that can incrementally put locals that were added to
the IR into SSA form and starts using this infrastructure from CSE. Previously
only single-def CSEs were put into SSA, which can cause e.g. IV opts to miss out
on them.

The SSA updater requires all uses and definitions to be supplied. Based on the
definitions it is then possible to compute the candidate blocks for phi nodes in
the usual way as the iterated dominance frontier. Since we do not have liveness
for the local we do not insert the phi definitions eagerly; instead, we
recursively compute the reaching def for each use by walking its dominators.
Walking the dominators we either expect to find a real definition, or to hit a
block that is part of the iterated dominance frontier of the definitions, in
which case we know we have a live phi definition that we can insert.

Inserting phi definitions needs to recursively do the same thing for the phi
arguments. To make this faster we could memoize the reaching SSA numbers for
each block, though this is not currently done.

This also requires computing the DFS tree, dominators, and dominance frontiers
after RBO (which invalidates those). Since the DFS tree and dominator tree is
usually computed anyway by IV opts, this is not that costly. The cost is further
reduced by avoiding any of these computations in the single-def case (which CSE
already handled). This special case is also moved into the SSA updater.

Finally, since IV opts need liveness to work, the SSA inserter also computes
liveness for the inserted locals. This is done by marking all paths from each
use back to its reaching definition as having that local live-in.
  • Loading branch information
jakobbotsch authored Nov 7, 2024
1 parent 36bcc2c commit 9c1f53e
Show file tree
Hide file tree
Showing 13 changed files with 997 additions and 292 deletions.
54 changes: 54 additions & 0 deletions src/coreclr/jit/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,60 @@ FlowEdge* Compiler::BlockDominancePreds(BasicBlock* blk)
return res;
}

//------------------------------------------------------------------------
// IsInsertedSsaLiveIn: See if a local is marked as being live-in to a block in
// the side table with locals inserted into SSA.
//
// Arguments:
// block - The block
// lclNum - The local
//
// Returns:
// True if the local is marked as live-in to that block
//
bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum)
{
assert(lvaGetDesc(lclNum)->lvInSsa);

if (m_insertedSsaLocalsLiveIn == nullptr)
{
return false;
}

return m_insertedSsaLocalsLiveIn->Lookup(BasicBlockLocalPair(block, lclNum));
}

//------------------------------------------------------------------------
// AddInsertedSsaLiveIn: Mark as local that was inserted into SSA as being
// live-in to a block.
//
// Arguments:
// block - The block
// lclNum - The local
//
// Returns:
// True if this was added anew; false if the local was already marked as such.
//
bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum)
{
// SSA-inserted locals always have explicit reaching defs for all uses, so
// it never makes sense for them to be live into the first block.
assert(block != fgFirstBB);

if (m_insertedSsaLocalsLiveIn == nullptr)
{
m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA));
}

if (m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite))
{
return false;
}

JITDUMP("Marked V%02u as live into " FMT_BB "\n", lclNum, block->bbNum);
return true;
}

//------------------------------------------------------------------------
// IsLastHotBlock: see if this is the last block before the cold section
//
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4985,6 +4985,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables);
}

fgInvalidateDfsTree();

if (doVNBasedDeadStoreRemoval)
{
// Note: this invalidates SSA and value numbers on tree nodes.
Expand Down
60 changes: 57 additions & 3 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2387,6 +2387,29 @@ class FlowGraphDominatorTree
static FlowGraphDominatorTree* Build(const FlowGraphDfsTree* dfsTree);
};

class FlowGraphDominanceFrontiers
{
FlowGraphDominatorTree* m_domTree;
BlkToBlkVectorMap m_map;
BitVecTraits m_poTraits;
BitVec m_visited;

FlowGraphDominanceFrontiers(FlowGraphDominatorTree* domTree);

#ifdef DEBUG
void Dump();
#endif

public:
FlowGraphDominatorTree* GetDomTree()
{
return m_domTree;
}

static FlowGraphDominanceFrontiers* Build(FlowGraphDominatorTree* domTree);
void ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result);
};

// Represents a reverse mapping from block back to its (most nested) containing loop.
class BlockToNaturalLoopMap
{
Expand Down Expand Up @@ -2925,6 +2948,35 @@ class Compiler
return m_dominancePreds;
}

struct BasicBlockLocalPair
{
BasicBlock* Block;
unsigned LclNum;

BasicBlockLocalPair(BasicBlock* block, unsigned lclNum)
: Block(block)
, LclNum(lclNum)
{
}

static bool Equals(const BasicBlockLocalPair& x, const BasicBlockLocalPair& y)
{
return (x.Block == y.Block) && (x.LclNum == y.LclNum);
}
static unsigned GetHashCode(const BasicBlockLocalPair& val)
{
unsigned hash = val.Block->bbID;
hash ^= val.LclNum + 0x9e3779b9 + (hash << 19) + (hash >> 13);
return hash;
}
};

typedef JitHashTable<BasicBlockLocalPair, BasicBlockLocalPair, bool> BasicBlockLocalPairSet;

BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn = nullptr;
bool IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum);
bool AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum);

void* ehEmitCookie(BasicBlock* block);
UNATIVE_OFFSET ehCodeOffset(BasicBlock* block);

Expand Down Expand Up @@ -5147,6 +5199,7 @@ class Compiler
// Dominator tree used by SSA construction and copy propagation (the two are expected to use the same tree
// in order to avoid the need for SSA reconstruction and an "out of SSA" phase).
FlowGraphDominatorTree* m_domTree = nullptr;
FlowGraphDominanceFrontiers* m_domFrontiers = nullptr;
BlockReachabilitySets* m_reachabilitySets = nullptr;

// Do we require loops to be in canonical form? The canonical form ensures that:
Expand Down Expand Up @@ -5419,7 +5472,7 @@ class Compiler

void fgMergeBlockReturn(BasicBlock* block);

bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg));
bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg), bool invalidateDFSTreeOnFGChange = true);
void fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt);

bool gtRemoveTreesAfterNoReturnCall(BasicBlock* block, Statement* stmt);
Expand Down Expand Up @@ -5717,7 +5770,7 @@ class Compiler

// The value numbers for this compilation.
ValueNumStore* vnStore = nullptr;
class ValueNumberState* vnState;
class ValueNumberState* vnState = nullptr;

public:
ValueNumStore* GetValueNumStore()
Expand Down Expand Up @@ -7682,6 +7735,7 @@ class Compiler
LoopLocalOccurrences* loopLocals);
bool optCanAndShouldChangeExitTest(GenTree* cond, bool dump);
bool optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals);
bool optLocalIsLiveIntoBlock(unsigned lclNum, BasicBlock* block);

bool optWidenIVs(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals);
bool optWidenPrimaryIV(FlowGraphNaturalLoop* loop,
Expand Down Expand Up @@ -12184,7 +12238,7 @@ class DomTreeVisitor

public:
//------------------------------------------------------------------------
// WalkTree: Walk the dominator tree.
// WalkTree: Walk the dominator tree starting from the first BB.
//
// Parameter:
// domTree - Dominator tree.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4748,6 +4748,7 @@ void Compiler::fgDebugCheckFlowGraphAnnotations()

assert((m_loops == nullptr) || (m_loops->GetDfsTree() == m_dfsTree));
assert((m_domTree == nullptr) || (m_domTree->GetDfsTree() == m_dfsTree));
assert((m_domFrontiers == nullptr) || (m_domFrontiers->GetDomTree() == m_domTree));
assert((m_reachabilitySets == nullptr) || (m_reachabilitySets->GetDfsTree() == m_dfsTree));
}

Expand Down
184 changes: 184 additions & 0 deletions src/coreclr/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4370,6 +4370,7 @@ void Compiler::fgInvalidateDfsTree()
m_dfsTree = nullptr;
m_loops = nullptr;
m_domTree = nullptr;
m_domFrontiers = nullptr;
m_reachabilitySets = nullptr;
fgSsaValid = false;
}
Expand Down Expand Up @@ -6603,6 +6604,189 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df
return new (comp, CMK_DominatorMemory) FlowGraphDominatorTree(dfsTree, domTree, preorderNums, postorderNums);
}

FlowGraphDominanceFrontiers::FlowGraphDominanceFrontiers(FlowGraphDominatorTree* domTree)
: m_domTree(domTree)
, m_map(domTree->GetDfsTree()->GetCompiler()->getAllocator(CMK_DominatorMemory))
, m_poTraits(domTree->GetDfsTree()->PostOrderTraits())
, m_visited(BitVecOps::MakeEmpty(&m_poTraits))
{
}

//------------------------------------------------------------------------
// FlowGraphDominanceFrontiers::Build: Build the dominance frontiers for all
// blocks.
//
// Parameters:
// domTree - Dominator tree to build dominance frontiers for
//
// Returns:
// Data structure representing dominance frontiers.
//
// Remarks:
// Recall that the dominance frontier of a block B is the set of blocks B3
// such that there exists some B2 s.t. B3 is a successor of B2, and B
// dominates B2 but not B3. Note that this dominance need not be strict -- B2
// and B may be the same node.
//
// In other words, a block B' is in DF(B) if B dominates an immediate
// predecessor of B', but does not dominate B'. Intuitively, these blocks are
// the "first" blocks that are no longer dominated by B; these are the places
// we are interested in inserting phi definitions that may refer to defs in
// B.
//
// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy.
//
FlowGraphDominanceFrontiers* FlowGraphDominanceFrontiers::Build(FlowGraphDominatorTree* domTree)
{
const FlowGraphDfsTree* dfsTree = domTree->GetDfsTree();
Compiler* comp = dfsTree->GetCompiler();

FlowGraphDominanceFrontiers* result = new (comp, CMK_DominatorMemory) FlowGraphDominanceFrontiers(domTree);

for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++)
{
BasicBlock* block = dfsTree->GetPostOrder(i);

// Recall that B3 is in the dom frontier of B1 if there exists a B2
// such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor
// of B2. (Note that B1 might be the same block as B2.)
// In that definition, we're considering "block" to be B3, and trying
// to find B1's. To do so, first we consider the predecessors of "block",
// searching for candidate B2's -- "block" is obviously an immediate successor
// of its immediate predecessors. If there are zero or one preds, then there
// is no pred, or else the single pred dominates "block", so no B2 exists.
FlowEdge* blockPreds = comp->BlockPredsWithEH(block);

// If block has 0/1 predecessor, skip, apart from handler entry blocks
// that are always in the dominance frontier of its enclosed blocks.
if (!comp->bbIsHandlerBeg(block) && ((blockPreds == nullptr) || (blockPreds->getNextPredEdge() == nullptr)))
{
continue;
}

// Otherwise, there are > 1 preds. Each is a candidate B2 in the definition --
// *unless* it dominates "block"/B3.

for (FlowEdge* pred = blockPreds; pred != nullptr; pred = pred->getNextPredEdge())
{
BasicBlock* predBlock = pred->getSourceBlock();

if (!dfsTree->Contains(predBlock))
{
continue;
}

// If we've found a B2, then consider the possible B1's. We start with
// B2, since a block dominates itself, then traverse upwards in the dominator
// tree, stopping when we reach the root, or the immediate dominator of "block"/B3.
// (Note that we are guaranteed to encounter this immediate dominator of "block"/B3:
// a predecessor must be dominated by B3's immediate dominator.)
// Along this way, make "block"/B3 part of the dom frontier of the B1.
// When we reach this immediate dominator, the definition no longer applies, since this
// potential B1 *does* dominate "block"/B3, so we stop.
for (BasicBlock* b1 = predBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop
b1 = b1->bbIDom)
{
BlkVector& b1DF = *result->m_map.Emplace(b1, comp->getAllocator(CMK_DominatorMemory));
// It's possible to encounter the same DF multiple times, ensure that we don't add duplicates.
if (b1DF.empty() || (b1DF.back() != block))
{
b1DF.push_back(block);
}
}
}
}

return result;
}

//------------------------------------------------------------------------
// ComputeIteratedDominanceFrontier: Compute the iterated dominance frontier of
// a block. This is the transitive closure of taking dominance frontiers.
//
// Parameters:
// block - Block to compute iterated dominance frontier for.
// result - Vector to add blocks of IDF into.
//
// Remarks:
// When we create phi definitions we are creating new definitions that
// themselves induce the creation of more phi nodes. Thus, the transitive
// closure of DF(B) contains all blocks that may have phi definitions
// referring to defs in B, or referring to other phis referring to defs in B.
//
void FlowGraphDominanceFrontiers::ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result)
{
assert(result->empty());

BlkVector* bDF = m_map.LookupPointer(block);

if (bDF == nullptr)
{
return;
}

// Compute IDF(b) - start by adding DF(b) to IDF(b).
result->reserve(bDF->size());
BitVecOps::ClearD(&m_poTraits, m_visited);

for (BasicBlock* f : *bDF)
{
BitVecOps::AddElemD(&m_poTraits, m_visited, f->bbPostorderNum);
result->push_back(f);
}

// Now for each block f from IDF(b) add DF(f) to IDF(b). This may result in new
// blocks being added to IDF(b) and the process repeats until no more new blocks
// are added. Note that since we keep adding to bIDF we can't use iterators as
// they may get invalidated. This happens to be a convenient way to avoid having
// to track newly added blocks in a separate set.
for (size_t newIndex = 0; newIndex < result->size(); newIndex++)
{
BasicBlock* f = (*result)[newIndex];
BlkVector* fDF = m_map.LookupPointer(f);

if (fDF == nullptr)
{
continue;
}

for (BasicBlock* ff : *fDF)
{
if (BitVecOps::TryAddElemD(&m_poTraits, m_visited, ff->bbPostorderNum))
{
result->push_back(ff);
}
}
}
}

#ifdef DEBUG
//------------------------------------------------------------------------
// FlowGraphDominanceFrontiers::Dump: Dump a textual representation of the
// dominance frontiers to jitstdout.
//
void FlowGraphDominanceFrontiers::Dump()
{
printf("DF:\n");
for (unsigned i = 0; i < m_domTree->GetDfsTree()->GetPostOrderCount(); ++i)
{
BasicBlock* b = m_domTree->GetDfsTree()->GetPostOrder(i);
printf("Block " FMT_BB " := {", b->bbNum);

BlkVector* bDF = m_map.LookupPointer(b);
if (bDF != nullptr)
{
int index = 0;
for (BasicBlock* f : *bDF)
{
printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum);
}
}
printf("}\n");
}
}
#endif

//------------------------------------------------------------------------
// BlockToNaturalLoopMap::GetLoop: Map a block back to its most nested
// containing loop.
Expand Down
Loading

0 comments on commit 9c1f53e

Please sign in to comment.