From 667c0073fb8adb7ada0d55e89dc198d77f4b2a12 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 8 Oct 2024 15:04:11 +0200 Subject: [PATCH] JIT: Optimize Memmove unrolling for constant src (#108576) --- src/coreclr/jit/assertionprop.cpp | 97 ++++++++++++++++++++++++++++++- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/valuenum.cpp | 39 +++++++++++++ 3 files changed, 137 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 8623e2f18ffea..64c3251a4f2d3 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -2590,6 +2590,95 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab return NO_ASSERTION_INDEX; } +//------------------------------------------------------------------------------ +// optVNBasedFoldExpr_Call_Memmove: Unrolls NI_System_SpanHelpers_Memmove/CORINFO_HELP_MEMCPY +// if possible. This function effectively duplicates LowerCallMemmove. +// However, unlike LowerCallMemmove, it is able to optimize src into constants with help of VN. +// +// Arguments: +// call - NI_System_SpanHelpers_Memmove/CORINFO_HELP_MEMCPY call to unroll +// +// Return Value: +// Returns a new tree or nullptr if nothing is changed. +// +GenTree* Compiler::optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call) +{ + JITDUMP("See if we can optimize NI_System_SpanHelpers_Memmove with help of VN...\n") + assert(call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove) || + call->IsHelperCall(this, CORINFO_HELP_MEMCPY)); + + CallArg* dstArg = call->gtArgs.GetUserArgByIndex(0); + CallArg* srcArg = call->gtArgs.GetUserArgByIndex(1); + CallArg* lenArg = call->gtArgs.GetUserArgByIndex(2); + ValueNum lenVN = vnStore->VNConservativeNormalValue(lenArg->GetNode()->gtVNPair); + if (!vnStore->IsVNConstant(lenVN)) + { + JITDUMP("...length is not a constant - bail out.\n"); + return nullptr; + } + + size_t len = vnStore->CoercedConstantValue(lenVN); + if (len == 0) + { + // Memmove(dst, src, 0) -> no-op. + // Memmove doesn't dereference src/dst pointers if length is 0. + JITDUMP("...length is 0 -> optimize to no-op.\n"); + return gtWrapWithSideEffects(gtNewNothingNode(), call, GTF_ALL_EFFECT, true); + } + + if (len > getUnrollThreshold(Memcpy)) + { + JITDUMP("...length is too big to unroll - bail out.\n"); + return nullptr; + } + + // if GetImmutableDataFromAddress returns true, it means that the src is a read-only constant. + // Thus, dst and src do not overlap (if they do - it's an UB). + uint8_t* buffer = new (this, CMK_AssertionProp) uint8_t[len]; + if (!GetImmutableDataFromAddress(srcArg->GetNode(), (int)len, buffer)) + { + JITDUMP("...src is not a constant - fallback to LowerCallMemmove.\n"); + return nullptr; + } + + // if dstArg is not simple, we replace the arg directly with a temp assignment and + // continue using that temp - it allows us reliably extract all side effects. + GenTree* dst = fgMakeMultiUse(&dstArg->LateNodeRef()); + + // Now we're going to emit a chain of STOREIND via COMMA nodes. + // the very first tree is expected to be side-effects from the original call (including all args) + GenTree* result = nullptr; + gtExtractSideEffList(call, &result, GTF_ALL_EFFECT, true); + + unsigned lenRemaining = (unsigned)len; + while (lenRemaining > 0) + { + const ssize_t offset = (ssize_t)len - (ssize_t)lenRemaining; + + // Clone dst and add offset if necessary. + GenTree* currDst = gtCloneExpr(dst); + if (offset != 0) + { + currDst = gtNewOperNode(GT_ADD, dst->TypeGet(), currDst, gtNewIconNode(offset, TYP_I_IMPL)); + } + + // Create an unaligned STOREIND node using the largest possible word size. + var_types type = roundDownMaxType(lenRemaining); + GenTree* srcCns = gtNewGenericCon(type, buffer + offset); + GenTreeStoreInd* storeInd = gtNewStoreIndNode(type, currDst, srcCns, GTF_IND_UNALIGNED); + fgUpdateConstTreeValueNumber(srcCns); + + // Merge with the previous result. + result = result == nullptr ? storeInd : gtNewOperNode(GT_COMMA, TYP_VOID, result, storeInd); + + lenRemaining -= genTypeSize(type); + } + + JITDUMP("...optimized into STOREIND(s)!:\n"); + DISPTREE(result); + return result; +} + //------------------------------------------------------------------------------ // optVNBasedFoldExpr_Call: Folds given call using VN to a simpler tree. // @@ -2654,6 +2743,11 @@ GenTree* Compiler::optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, G break; } + if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove) || call->IsHelperCall(this, CORINFO_HELP_MEMCPY)) + { + return optVNBasedFoldExpr_Call_Memmove(call); + } + return nullptr; } @@ -6372,7 +6466,8 @@ Compiler::fgWalkResult Compiler::optVNBasedFoldCurStmt(BasicBlock* block, break; case GT_CALL: - if (!tree->AsCall()->IsPure(this)) + // The checks aren't for correctness, but to avoid unnecessary work. + if (!tree->AsCall()->IsPure(this) && !tree->AsCall()->IsSpecialIntrinsic()) { return WALK_CONTINUE; } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 253372e8b4781..70c4c86b42d0a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5956,6 +5956,7 @@ class Compiler } } + bool GetImmutableDataFromAddress(GenTree* address, int size, uint8_t* pValue); bool GetObjectHandleAndOffset(GenTree* tree, ssize_t* byteOffset, CORINFO_OBJECT_HANDLE* pObj); // Convert a BYTE which represents the VM's CorInfoGCtype to the JIT's var_types @@ -8040,6 +8041,7 @@ class Compiler GenTree* optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, GenTree* tree); GenTree* optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTree* tree); GenTree* optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call); + GenTree* optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call); GenTree* optExtractSideEffListFromConst(GenTree* tree); AssertionIndex GetAssertionCount() diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index cfde9f95ec0c1..e48283d0ce68b 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12024,6 +12024,45 @@ bool Compiler::fgGetStaticFieldSeqAndAddress(ValueNumStore* vnStore, return false; } +//---------------------------------------------------------------------------------- +// GetImmutableDataFromAddress: Given a tree representing an address, try to obtain +// the actual content of the value stored at that address (of the given size). +// The value is expected to be immutable (invariant). +// +// Arguments: +// address - tree node representing the address +// size - size of the value to read +// pValue - [out] resulting value +// +// Return Value: +// true if the value was successfully obtained, false otherwise +// +bool Compiler::GetImmutableDataFromAddress(GenTree* address, int size, uint8_t* pValue) +{ + assert(vnStore != nullptr); + + ssize_t byteOffset = 0; + FieldSeq* fieldSeq = nullptr; + + // See if 'src' is a non-gc object handle. + CORINFO_OBJECT_HANDLE obj = NO_OBJECT_HANDLE; + if (GetObjectHandleAndOffset(address, &byteOffset, &obj) && ((size_t)byteOffset <= INT32_MAX)) + { + assert(obj != NO_OBJECT_HANDLE); + return info.compCompHnd->isObjectImmutable(obj) && + info.compCompHnd->getObjectContent(obj, pValue, size, (int)byteOffset); + } + + // See if 'src' is some static read-only field (including RVA) + if (fgGetStaticFieldSeqAndAddress(vnStore, address, &byteOffset, &fieldSeq) && ((size_t)byteOffset <= INT32_MAX)) + { + CORINFO_FIELD_HANDLE fld = fieldSeq->GetFieldHandle(); + return (fld != nullptr) && info.compCompHnd->getStaticFieldContent(fld, pValue, size, (int)byteOffset); + } + + return false; +} + //---------------------------------------------------------------------------------- // GetObjectHandleAndOffset: Try to obtain a constant object handle with an offset from // the given tree.