From 391393179a6e316909add3b8455eb6d7c7c38ddb Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Thu, 1 Feb 2024 14:19:11 -0800 Subject: [PATCH] [lld-macho] icf objc stubs (#79730) This supports icf for objc stubs. --- lld/MachO/Arch/ARM64.cpp | 14 ++--- lld/MachO/Arch/ARM64Common.h | 26 ++++----- lld/MachO/Arch/ARM64_32.cpp | 5 +- lld/MachO/Arch/X86_64.cpp | 9 +-- lld/MachO/SyntheticSections.cpp | 91 +++++++++++++++++++----------- lld/MachO/SyntheticSections.h | 4 +- lld/MachO/Target.h | 2 +- lld/MachO/Writer.cpp | 1 + lld/test/MachO/objc-selrefs.s | 3 - lld/test/MachO/x86-64-objc-stubs.s | 12 ++++ 10 files changed, 96 insertions(+), 71 deletions(-) diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index 2741df9c3070..e192676394c9 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -37,8 +37,7 @@ struct ARM64 : ARM64Common { uint64_t entryAddr) const override; void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, - uint64_t &stubOffset, uint64_t selrefsVA, - uint64_t selectorIndex, + uint64_t &stubOffset, uint64_t selrefVA, Symbol *objcMsgSend) const override; void populateThunk(InputSection *thunk, Symbol *funcSym) override; void applyOptimizationHints(uint8_t *, const ObjFile &) const override; @@ -124,8 +123,7 @@ static constexpr uint32_t objcStubsSmallCode[] = { }; void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, - uint64_t &stubOffset, uint64_t selrefsVA, - uint64_t selectorIndex, + uint64_t &stubOffset, uint64_t selrefVA, Symbol *objcMsgSend) const { uint64_t objcMsgSendAddr; uint64_t objcStubSize; @@ -136,8 +134,8 @@ void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, objcMsgSendAddr = in.got->addr; objcMsgSendIndex = objcMsgSend->gotIndex; ::writeObjCMsgSendFastStub(buf, objcStubsFastCode, sym, stubsAddr, - stubOffset, selrefsVA, selectorIndex, - objcMsgSendAddr, objcMsgSendIndex); + stubOffset, selrefVA, objcMsgSendAddr, + objcMsgSendIndex); } else { assert(config->objcStubsMode == ObjCStubsMode::small); objcStubSize = target->objcStubsSmallSize; @@ -149,8 +147,8 @@ void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, objcMsgSendIndex = objcMsgSend->stubsIndex; } ::writeObjCMsgSendSmallStub(buf, objcStubsSmallCode, sym, stubsAddr, - stubOffset, selrefsVA, selectorIndex, - objcMsgSendAddr, objcMsgSendIndex); + stubOffset, selrefVA, objcMsgSendAddr, + objcMsgSendIndex); } stubOffset += objcStubSize; } diff --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h index b038b6200f4d..70461b4e83d3 100644 --- a/lld/MachO/Arch/ARM64Common.h +++ b/lld/MachO/Arch/ARM64Common.h @@ -153,11 +153,11 @@ inline void writeStubHelperEntry(uint8_t *buf8, } template -inline void -writeObjCMsgSendFastStub(uint8_t *buf, const uint32_t objcStubsFastCode[8], - Symbol *sym, uint64_t stubsAddr, uint64_t stubOffset, - uint64_t selrefsVA, uint64_t selectorIndex, - uint64_t gotAddr, uint64_t msgSendIndex) { +inline void writeObjCMsgSendFastStub(uint8_t *buf, + const uint32_t objcStubsFastCode[8], + Symbol *sym, uint64_t stubsAddr, + uint64_t stubOffset, uint64_t selrefVA, + uint64_t gotAddr, uint64_t msgSendIndex) { SymbolDiagnostic d = {sym, sym->getName()}; auto *buf32 = reinterpret_cast(buf); @@ -165,11 +165,9 @@ writeObjCMsgSendFastStub(uint8_t *buf, const uint32_t objcStubsFastCode[8], return pageBits(stubsAddr + stubOffset + i * sizeof(uint32_t)); }; - uint64_t selectorOffset = selectorIndex * LP::wordSize; encodePage21(&buf32[0], d, objcStubsFastCode[0], - pageBits(selrefsVA + selectorOffset) - pcPageBits(0)); - encodePageOff12(&buf32[1], d, objcStubsFastCode[1], - selrefsVA + selectorOffset); + pageBits(selrefVA) - pcPageBits(0)); + encodePageOff12(&buf32[1], d, objcStubsFastCode[1], selrefVA); uint64_t gotOffset = msgSendIndex * LP::wordSize; encodePage21(&buf32[2], d, objcStubsFastCode[2], pageBits(gotAddr + gotOffset) - pcPageBits(2)); @@ -184,8 +182,8 @@ template inline void writeObjCMsgSendSmallStub(uint8_t *buf, const uint32_t objcStubsSmallCode[3], Symbol *sym, uint64_t stubsAddr, uint64_t stubOffset, - uint64_t selrefsVA, uint64_t selectorIndex, - uint64_t msgSendAddr, uint64_t msgSendIndex) { + uint64_t selrefVA, uint64_t msgSendAddr, + uint64_t msgSendIndex) { SymbolDiagnostic d = {sym, sym->getName()}; auto *buf32 = reinterpret_cast(buf); @@ -193,11 +191,9 @@ writeObjCMsgSendSmallStub(uint8_t *buf, const uint32_t objcStubsSmallCode[3], return pageBits(stubsAddr + stubOffset + i * sizeof(uint32_t)); }; - uint64_t selectorOffset = selectorIndex * LP::wordSize; encodePage21(&buf32[0], d, objcStubsSmallCode[0], - pageBits(selrefsVA + selectorOffset) - pcPageBits(0)); - encodePageOff12(&buf32[1], d, objcStubsSmallCode[1], - selrefsVA + selectorOffset); + pageBits(selrefVA) - pcPageBits(0)); + encodePageOff12(&buf32[1], d, objcStubsSmallCode[1], selrefVA); uint64_t msgSendStubVA = msgSendAddr + msgSendIndex * target->stubSize; uint64_t pcVA = stubsAddr + stubOffset + 2 * sizeof(uint32_t); encodeBranch26(&buf32[2], {nullptr, "objc_msgSend stub"}, diff --git a/lld/MachO/Arch/ARM64_32.cpp b/lld/MachO/Arch/ARM64_32.cpp index 16c7cbee9ba7..e79ca745ed4a 100644 --- a/lld/MachO/Arch/ARM64_32.cpp +++ b/lld/MachO/Arch/ARM64_32.cpp @@ -34,8 +34,7 @@ struct ARM64_32 : ARM64Common { void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, - uint64_t &stubOffset, uint64_t selrefsVA, - uint64_t selectorIndex, + uint64_t &stubOffset, uint64_t selrefVA, Symbol *objcMsgSend) const override; }; @@ -101,7 +100,7 @@ void ARM64_32::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym, void ARM64_32::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, uint64_t &stubOffset, - uint64_t selrefsVA, uint64_t selectorIndex, + uint64_t selrefVA, Symbol *objcMsgSend) const { fatal("TODO: implement this"); } diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index 9e8e1d01e493..0a950f28f02a 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -38,8 +38,7 @@ struct X86_64 : TargetInfo { uint64_t entryAddr) const override; void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, - uint64_t &stubOffset, uint64_t selrefsVA, - uint64_t selectorIndex, + uint64_t &stubOffset, uint64_t selrefVA, Symbol *objcMsgSend) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; @@ -182,8 +181,7 @@ static constexpr uint8_t objcStubsFastCode[] = { }; void X86_64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, - uint64_t &stubOffset, uint64_t selrefsVA, - uint64_t selectorIndex, + uint64_t &stubOffset, uint64_t selrefVA, Symbol *objcMsgSend) const { uint64_t objcMsgSendAddr = in.got->addr; uint64_t objcMsgSendIndex = objcMsgSend->gotIndex; @@ -191,8 +189,7 @@ void X86_64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, memcpy(buf, objcStubsFastCode, sizeof(objcStubsFastCode)); SymbolDiagnostic d = {sym, sym->getName()}; uint64_t stubAddr = stubsAddr + stubOffset; - writeRipRelative(d, buf, stubAddr, 7, - selrefsVA + selectorIndex * LP64::wordSize); + writeRipRelative(d, buf, stubAddr, 7, selrefVA); writeRipRelative(d, buf, stubAddr, 0xd, objcMsgSendAddr + objcMsgSendIndex * LP64::wordSize); stubOffset += target->objcStubsFastSize; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index d3c8cb02942b..544847d3d448 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -825,10 +825,60 @@ StringRef ObjCStubsSection::getMethname(Symbol *sym) { return methname; } +void ObjCStubsSection::initialize() { + // Do not fold selrefs without ICF. + if (config->icfLevel == ICFLevel::none) + return; + + // Search methnames already referenced in __objc_selrefs + // Map the name to the corresponding selref entry + // which we will reuse when creating objc stubs. + for (ConcatInputSection *isec : inputSections) { + if (isec->shouldOmitFromOutput()) + continue; + if (isec->getName() != section_names::objcSelrefs) + continue; + // We expect a single relocation per selref entry to __objc_methname that + // might be aggregated. + assert(isec->relocs.size() == 1); + auto Reloc = isec->relocs[0]; + if (const auto *sym = Reloc.referent.dyn_cast()) { + if (const auto *d = dyn_cast(sym)) { + auto *cisec = cast(d->isec); + auto methname = cisec->getStringRefAtOffset(d->value); + methnameToSelref[CachedHashStringRef(methname)] = isec; + } + } + } +} + void ObjCStubsSection::addEntry(Symbol *sym) { StringRef methname = getMethname(sym); - offsets.push_back( - in.objcMethnameSection->getStringOffset(methname).outSecOff); + // We create a selref entry for each unique methname. + if (!methnameToSelref.count(CachedHashStringRef(methname))) { + auto methnameOffset = + in.objcMethnameSection->getStringOffset(methname).outSecOff; + + size_t wordSize = target->wordSize; + uint8_t *selrefData = bAlloc().Allocate(wordSize); + write64le(selrefData, methnameOffset); + auto *objcSelref = makeSyntheticInputSection( + segment_names::data, section_names::objcSelrefs, + S_LITERAL_POINTERS | S_ATTR_NO_DEAD_STRIP, + ArrayRef{selrefData, wordSize}, + /*align=*/wordSize); + objcSelref->live = true; + objcSelref->relocs.push_back( + {/*type=*/target->unsignedRelocType, + /*pcrel=*/false, /*length=*/3, + /*offset=*/0, + /*addend=*/static_cast(methnameOffset), + /*referent=*/in.objcMethnameSection->isec}); + objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref); + inputSections.push_back(objcSelref); + objcSelref->isFinal = true; + methnameToSelref[CachedHashStringRef(methname)] = objcSelref; + } auto stubSize = config->objcStubsMode == ObjCStubsMode::fast ? target->objcStubsFastSize @@ -862,32 +912,6 @@ void ObjCStubsSection::setUp() { if (!isa(objcMsgSend)) in.stubs->addEntry(objcMsgSend); } - - size_t size = offsets.size() * target->wordSize; - uint8_t *selrefsData = bAlloc().Allocate(size); - for (size_t i = 0, n = offsets.size(); i < n; ++i) - write64le(&selrefsData[i * target->wordSize], offsets[i]); - - in.objcSelrefs = - makeSyntheticInputSection(segment_names::data, section_names::objcSelrefs, - S_LITERAL_POINTERS | S_ATTR_NO_DEAD_STRIP, - ArrayRef{selrefsData, size}, - /*align=*/target->wordSize); - in.objcSelrefs->live = true; - - for (size_t i = 0, n = offsets.size(); i < n; ++i) { - in.objcSelrefs->relocs.push_back( - {/*type=*/target->unsignedRelocType, - /*pcrel=*/false, /*length=*/3, - /*offset=*/static_cast(i * target->wordSize), - /*addend=*/offsets[i] * in.objcMethnameSection->align, - /*referent=*/in.objcMethnameSection->isec}); - } - - in.objcSelrefs->parent = - ConcatOutputSection::getOrCreateForInput(in.objcSelrefs); - inputSections.push_back(in.objcSelrefs); - in.objcSelrefs->isFinal = true; } uint64_t ObjCStubsSection::getSize() const { @@ -898,15 +922,16 @@ uint64_t ObjCStubsSection::getSize() const { } void ObjCStubsSection::writeTo(uint8_t *buf) const { - assert(in.objcSelrefs->live); - assert(in.objcSelrefs->isFinal); - uint64_t stubOffset = 0; for (size_t i = 0, n = symbols.size(); i < n; ++i) { Defined *sym = symbols[i]; + + auto methname = getMethname(sym); + auto j = methnameToSelref.find(CachedHashStringRef(methname)); + assert(j != methnameToSelref.end()); + auto selrefAddr = j->second->getVA(0); target->writeObjCMsgSendStub(buf + stubOffset, sym, in.objcStubs->addr, - stubOffset, in.objcSelrefs->getVA(), i, - objcMsgSend); + stubOffset, selrefAddr, objcMsgSend); } } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 5ae97954ab3f..8d54cacc8d75 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -324,6 +324,7 @@ class StubHelperSection final : public SyntheticSection { class ObjCStubsSection final : public SyntheticSection { public: ObjCStubsSection(); + void initialize(); void addEntry(Symbol *sym); uint64_t getSize() const override; bool isNeeded() const override { return !symbols.empty(); } @@ -337,7 +338,7 @@ class ObjCStubsSection final : public SyntheticSection { private: std::vector symbols; - std::vector offsets; + llvm::DenseMap methnameToSelref; Symbol *objcMsgSend = nullptr; }; @@ -794,7 +795,6 @@ struct InStruct { StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; ObjCStubsSection *objcStubs = nullptr; - ConcatInputSection *objcSelrefs = nullptr; UnwindInfoSection *unwindInfo = nullptr; ObjCImageInfoSection *objCImageInfo = nullptr; ConcatInputSection *imageLoaderCache = nullptr; diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h index b07967d0abb7..cc47ae4386b4 100644 --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -71,7 +71,7 @@ class TargetInfo { virtual void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, uint64_t &stubOffset, - uint64_t selrefsVA, uint64_t selectorIndex, + uint64_t selrefVA, Symbol *objcMsgSend) const = 0; // Symbols may be referenced via either the GOT or the stubs section, diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 3634c626f069..65b598d1d7c4 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -720,6 +720,7 @@ static void addNonWeakDefinition(const Defined *defined) { void Writer::scanSymbols() { TimeTraceScope timeScope("Scan symbols"); + in.objcStubs->initialize(); for (Symbol *sym : symtab->getSymbols()) { if (auto *defined = dyn_cast(sym)) { if (!defined->isLive()) diff --git a/lld/test/MachO/objc-selrefs.s b/lld/test/MachO/objc-selrefs.s index a906978cc2cf..eebe7c6476cd 100644 --- a/lld/test/MachO/objc-selrefs.s +++ b/lld/test/MachO/objc-selrefs.s @@ -24,7 +24,6 @@ # SELREFS-NEXT: __TEXT:__objc_methname:length # SELREFS-EMPTY: -## We don't yet support dedup'ing implicitly-defined selrefs. # RUN: %lld -dylib -arch arm64 -lSystem --icf=all -o %t/explicit-and-implicit \ # RUN: %t/explicit-selrefs-1.o %t/explicit-selrefs-2.o %t/implicit-selrefs.o \ # RUN: -no_fixup_chains @@ -44,8 +43,6 @@ # EXPLICIT-AND-IMPLICIT: Contents of (__DATA,__objc_selrefs) section # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:bar -# NOTE: Ideally this wouldn't exist, but while it does it needs to point to the deduplicated string -# EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:length #--- explicit-selrefs-1.s diff --git a/lld/test/MachO/x86-64-objc-stubs.s b/lld/test/MachO/x86-64-objc-stubs.s index 2dd8d5593771..a8bf5de9fe03 100644 --- a/lld/test/MachO/x86-64-objc-stubs.s +++ b/lld/test/MachO/x86-64-objc-stubs.s @@ -10,6 +10,11 @@ # WARNING: warning: -objc_stubs_small is not yet implemented, defaulting to -objc_stubs_fast +# RUN: %lld -arch x86_64 -lSystem -o %t-icfsafe.out --icf=safe %t.o +# RUN: llvm-otool -vs __DATA __objc_selrefs %t-icfsafe.out | FileCheck %s --check-prefix=ICF +# RUN: %lld -arch x86_64 -lSystem -o %t-icfall.out --icf=all %t.o +# RUN: llvm-otool -vs __DATA __objc_selrefs %t-icfall.out | FileCheck %s --check-prefix=ICF + # CHECK: Sections: # CHECK: __got {{[0-9a-f]*}} [[#%x, GOTSTART:]] DATA # CHECK: __objc_selrefs {{[0-9a-f]*}} [[#%x, SELSTART:]] DATA @@ -21,6 +26,13 @@ # CHECK-NEXT: [[#%x, FOOSELREF:]] __TEXT:__objc_methname:foo # CHECK-NEXT: [[#%x, LENGTHSELREF:]] __TEXT:__objc_methname:length +# ICF: Contents of (__DATA,__objc_selrefs) section + +# ICF-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:foo +# ICF-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:bar +# ICF-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:length +# ICF-EMPTY: + # CHECK: Contents of (__TEXT,__objc_stubs) section # CHECK-NEXT: _objc_msgSend$foo: