Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Support retbuf optimization for non 'lvIsTemp' locals #104467

Merged
merged 9 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5515,6 +5515,7 @@ class Compiler

void fgLocalVarLivenessInit();

template <bool lowered>
void fgPerNodeLocalVarLiveness(GenTree* node);
void fgPerBlockLocalVarLiveness();

Expand All @@ -5526,7 +5527,7 @@ class Compiler

void fgLiveVarAnalysis();

void fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call);
void fgComputeLifeCall(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTreeCall* call);

void fgComputeLifeTrackedLocalUse(VARSET_TP& life, LclVarDsc& varDsc, GenTreeLclVarCommon* node);
bool fgComputeLifeTrackedLocalDef(VARSET_TP& life,
Expand All @@ -5548,6 +5549,7 @@ class Compiler
bool* pStmtInfoDirty DEBUGARG(bool* treeModf));

void fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALARG_TP keepAliveVars);
bool fgIsTrackedRetBufferAddress(LIR::Range& range, GenTree* node);

bool fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange);

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/gschecks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ void Compiler::gsParamsToShadows()
shadowVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
#ifdef DEBUG
shadowVarDsc->SetDoNotEnregReason(varDsc->GetDoNotEnregReason());
shadowVarDsc->SetHiddenBufferStructArg(varDsc->IsHiddenBufferStructArg());
#endif

if (varTypeIsStruct(type))
Expand Down Expand Up @@ -503,6 +504,7 @@ void Compiler::gsParamsToShadows()
}
}

compCurBB = fgFirstBB;
// Now insert code to copy the params to their shadow copy.
for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++)
{
Expand All @@ -522,6 +524,7 @@ void Compiler::gsParamsToShadows()
compCurBB = fgFirstBB; // Needed by some morphing
(void)fgNewStmtAtBeg(fgFirstBB, fgMorphTree(store));
}
compCurBB = nullptr;

// If the method has "Jmp CalleeMethod", then we need to copy shadow params back to original
// params before "jmp" to CalleeMethod.
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1250,12 +1250,9 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
if (m_compiler->opts.compJitOptimizeStructHiddenBuffer && (callUser != nullptr) &&
m_compiler->IsValidLclAddr(lclNum, val.Offset()))
{
// We will only attempt this optimization for locals that are:
// a) Not susceptible to liveness bugs (see "lvaSetHiddenBufferStructArg").
// b) Do not later turn into indirections.
//
bool isSuitableLocal =
varTypeIsStruct(varDsc) && varDsc->lvIsTemp && !m_compiler->lvaIsImplicitByRefLocal(lclNum);
// We will only attempt this optimization for locals that do not
// later turn into indirections.
bool isSuitableLocal = varTypeIsStruct(varDsc) && !m_compiler->lvaIsImplicitByRefLocal(lclNum);
#ifdef TARGET_X86
if (m_compiler->lvaIsArgAccessedViaVarArgsCookie(lclNum))
{
Expand Down
6 changes: 0 additions & 6 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3068,12 +3068,6 @@ void Compiler::lvaSetVarAddrExposed(unsigned varNum DEBUGARG(AddressExposedReaso
// *do not* mark the local address-exposed and treat the call much like a local store node throughout
// the compilation.
//
// TODO-ADDR-Bug: currently, we rely on these locals not being present in call argument lists,
// outside of the buffer address argument itself, as liveness - currently - treats the location node
// associated with the address itself as the definition point, and call arguments can be reordered
// rather arbitrarily. We should fix liveness to treat the call as the definition point instead and
// enable this optimization for "!lvIsTemp" locals.
//
void Compiler::lvaSetHiddenBufferStructArg(unsigned varNum)
{
LclVarDsc* varDsc = lvaGetDesc(varNum);
Expand Down
114 changes: 103 additions & 11 deletions src/coreclr/jit/liveness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,14 @@ void Compiler::fgLocalVarLivenessInit()
// Set fgCurMemoryUse and fgCurMemoryDef when memory is read or updated
// Call fgMarkUseDef for any Local variables encountered
//
// Template arguments:
// lowered - Whether or not this is liveness on lowered IR, where LCL_ADDRs
// on tracked locals may appear.
//
// Arguments:
// tree - The current node.
//
template <bool lowered>
void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
{
assert(tree != nullptr);
Expand All @@ -209,12 +214,25 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)

case GT_LCL_VAR:
case GT_LCL_FLD:
case GT_LCL_ADDR:
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
fgMarkUseDef(tree->AsLclVarCommon());
break;

case GT_LCL_ADDR:
if (lowered)
{
// If this is a definition of a retbuf then we process it as
// part of the GT_CALL node.
if (fgIsTrackedRetBufferAddress(LIR::AsRange(compCurBB), tree))
{
break;
}

fgMarkUseDef(tree->AsLclVarCommon());
}
break;

case GT_IND:
case GT_BLK:
// For Volatile indirection, first mutate GcHeap/ByrefExposed
Expand Down Expand Up @@ -303,6 +321,12 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
}
}
}

GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call);
if (definedLcl != nullptr)
{
fgMarkUseDef(definedLcl);
}
break;
}

Expand Down Expand Up @@ -367,7 +391,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
{
for (GenTree* node : LIR::AsRange(block))
{
fgPerNodeLocalVarLiveness(node);
fgPerNodeLocalVarLiveness<true>(node);
}
}
else if (fgNodeThreading == NodeThreading::AllTrees)
Expand All @@ -377,7 +401,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
compCurStmt = stmt;
for (GenTree* const node : stmt->TreeList())
{
fgPerNodeLocalVarLiveness(node);
fgPerNodeLocalVarLiveness<false>(node);
}
}
}
Expand Down Expand Up @@ -744,10 +768,11 @@ void Compiler::fgLiveVarAnalysis()
// due to a GT_CALL node.
//
// Arguments:
// life - The live set that is being computed.
// call - The call node in question.
// life - The live set that is being computed.
// keepAliveVars - Tracked locals that must be kept alive everywhere in the block
// call - The call node in question.
//
void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call)
void Compiler::fgComputeLifeCall(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTreeCall* call)
{
assert(call != nullptr);

Expand Down Expand Up @@ -808,6 +833,12 @@ void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call)
}
}
}

GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call);
if (definedLcl != nullptr)
{
fgComputeLifeLocal(life, keepAliveVars, definedLcl);
}
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -1140,11 +1171,11 @@ void Compiler::fgComputeLife(VARSET_TP& life,
AGAIN:
assert(tree->OperGet() != GT_QMARK);

if (tree->gtOper == GT_CALL)
if (tree->IsCall())
{
fgComputeLifeCall(life, tree->AsCall());
fgComputeLifeCall(life, keepAliveVars, tree->AsCall());
}
else if (tree->OperIsNonPhiLocal() || tree->OperIs(GT_LCL_ADDR))
else if (tree->OperIsNonPhiLocal())
{
bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, tree);
if (isDeadStore)
Expand Down Expand Up @@ -1191,6 +1222,15 @@ void Compiler::fgComputeLife(VARSET_TP& life,
}
}

//---------------------------------------------------------------------
// fgComputeLifeLIR - fill out liveness flags in the IR nodes of the block
// provided the live-out set.
//
// Arguments
// life - the set of live-out variables from the block
// block - the block
// keepAliveVars - variables that are globally live (usually due to being live into an EH successor)
//
void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALARG_TP keepAliveVars)
{
noway_assert(VarSetOps::IsSubset(this, keepAliveVars, life));
Expand Down Expand Up @@ -1247,7 +1287,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
}
else
{
fgComputeLifeCall(life, call);
fgComputeLifeCall(life, keepAliveVars, call);
}
break;
}
Expand Down Expand Up @@ -1295,11 +1335,21 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
}
else
{
// For LCL_ADDRs that are defined by being passed as a
// retbuf we will handle them when we get to the call. We
// cannot consider them to be defined at the point of the
// LCL_ADDR since there may be uses between the LCL_ADDR
// and call.
if (fgIsTrackedRetBufferAddress(blockRange, node))
{
break;
}

isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node);
if (isDeadStore)
{
LIR::Use addrUse;
if (blockRange.TryGetUse(node, &addrUse) && (addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK)))
if (blockRange.TryGetUse(node, &addrUse) && addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK))
{
GenTreeIndir* const store = addrUse.User()->AsIndir();

Expand Down Expand Up @@ -1465,6 +1515,48 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
}
}

//---------------------------------------------------------------------
// fgIsTrackedRetBufferAddress - given a LCL_ADDR node, check if it is the
// return buffer definition of a call.
//
// Arguments
// range - the block range containing the LCL_ADDR
// node - the LCL_ADDR
//
bool Compiler::fgIsTrackedRetBufferAddress(LIR::Range& range, GenTree* node)
{
assert(node->OperIs(GT_LCL_ADDR));
if ((node->gtFlags & GTF_VAR_DEF) == 0)
{
return false;
}

LclVarDsc* dsc = lvaGetDesc(node->AsLclVarCommon());
if (!dsc->lvTracked)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This early out seems to make the method a bit more specific than the name implies... maybe add a Note: or something?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed the function to fgIsTrackedRetBufferAddress

{
return false;
}

GenTree* curNode = node;
do
{
LIR::Use use;
if (!range.TryGetUse(curNode, &use))
{
return false;
}

curNode = use.User();

if (curNode->IsCall())
{
return gtCallGetDefinedRetBufLclAddr(curNode->AsCall()) == node;
}
} while (curNode->OperIs(GT_FIELD_LIST) || curNode->OperIsPutArg());

return false;
}

//---------------------------------------------------------------------
// fgTryRemoveNonLocal - try to remove a node if it is unused and has no direct
// side effects.
Expand Down
Loading