From c1de4ba68eccad2d450ab2fb5ba39fdf498a373e Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 7 Jan 2024 16:45:46 +0100 Subject: [PATCH] PGO: Profiled SequenceEqual (#96571) --- src/coreclr/jit/compiler.h | 11 ++++++++- src/coreclr/jit/fgprofile.cpp | 12 ++++++---- src/coreclr/jit/importercalls.cpp | 39 +++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 05bd2e53dfef8..7cb0dfb564f88 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9515,7 +9515,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { Memset, Memcpy, - Memmove + Memmove, + ProfiledMemmove, + ProfiledMemcmp }; //------------------------------------------------------------------------ @@ -9589,6 +9591,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX threshold = maxRegSize * 4; } + // For profiled memcmp/memmove we don't want to unroll too much as it's just a guess, + // and it works better for small sizes. + if ((type == UnrollKind::ProfiledMemcmp) || (type == UnrollKind::ProfiledMemmove)) + { + threshold = maxRegSize * 2; + } + return threshold; } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index f3fe032a4c363..a4d9c21722471 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -1950,9 +1950,13 @@ class ValueHistogramProbeVisitor final : public GenTreeVisitorIsCall() && node->AsCall()->IsSpecialIntrinsic(m_compiler, NI_System_Buffer_Memmove)) + if (node->IsCall() && node->AsCall()->IsSpecialIntrinsic()) { - m_functor(m_compiler, node); + const NamedIntrinsic ni = m_compiler->lookupNamedIntrinsic(node->AsCall()->gtCallMethHnd); + if ((ni == NI_System_Buffer_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual)) + { + m_functor(m_compiler, node); + } } return Compiler::WALK_CONTINUE; } @@ -2276,8 +2280,8 @@ class ValueHistogramProbeInserter return; } - // Only Buffer.Memmove call is currently expected - assert(node->IsCall() && (node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_Buffer_Memmove))); + assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_Buffer_Memmove) || + node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_SequenceEqual)); const ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex]; if (countEntry.ILOffset != diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 4692bd53fc60c..cf698110ef129 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1440,6 +1440,32 @@ var_types Compiler::impImportCall(OPCODE opcode, { impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("non-inline candidate call")); } + + if (JitConfig.JitProfileValues() && call->IsCall() && + call->AsCall()->IsSpecialIntrinsic(this, NI_System_SpanHelpers_SequenceEqual)) + { + if (opts.IsOptimizedWithProfile()) + { + call = impDuplicateWithProfiledArg(call->AsCall(), rawILOffset); + if (call->OperIs(GT_QMARK)) + { + // QMARK has to be a root node + unsigned tmp = lvaGrabTemp(true DEBUGARG("Grabbing temp for Qmark")); + impStoreTemp(tmp, call, CHECK_SPILL_ALL); + call = gtNewLclvNode(tmp, call->TypeGet()); + } + } + else if (opts.IsInstrumented()) + { + // We might want to instrument it for optimized versions too, but we don't currently. + HandleHistogramProfileCandidateInfo* pInfo = + new (this, CMK_Inlining) HandleHistogramProfileCandidateInfo; + pInfo->ilOffset = rawILOffset; + pInfo->probeIndex = 0; + call->AsCall()->gtHandleHistogramProfileCandidateInfo = pInfo; + compCurBB->SetFlags(BBF_HAS_VALUE_PROFILE); + } + } } //------------------------------------------------------------------------- @@ -1536,7 +1562,15 @@ GenTree* Compiler::impDuplicateWithProfiledArg(GenTreeCall* call, IL_OFFSET ilOf argNum = 2; minValue = 1; // TODO: enable for 0 as well. - maxValue = (ssize_t)getUnrollThreshold(Memmove); + maxValue = (ssize_t)getUnrollThreshold(ProfiledMemmove); + } + else if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_SequenceEqual)) + { + // dst(0), src(1), len(2) + argNum = 2; + + minValue = 1; // TODO: enable for 0 as well. + maxValue = (ssize_t)getUnrollThreshold(ProfiledMemcmp); } else { @@ -2729,7 +2763,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; case NI_System_Buffer_Memmove: - // We're going to instrument this one + case NI_System_SpanHelpers_SequenceEqual: + // We're going to instrument these betterToExpand = opts.IsInstrumented(); break;