diff --git a/Makefile b/Makefile index 3643a286..0a5aa959 100644 --- a/Makefile +++ b/Makefile @@ -142,7 +142,7 @@ AS_DEFINES := -DMIPSEB -D_LANGUAGE_ASSEMBLY -D_ULTRA64 C_DEFINES := -DLANGUAGE_C -D_LANGUAGE_C ENDIAN := -EB -OPTFLAGS := -O2 +OPTFLAGS := -O2 -g3 MIPS_VERSION := -mips2 ICONV_FLAGS := --from-code=UTF-8 --to-code=EUC-JP @@ -188,6 +188,7 @@ $(shell mkdir -p $(BUILD_DIR)/linker_scripts/$(VERSION) $(BUILD_DIR)/linker_scri # directory flags +build/src/boot/O2/%.o: OPTFLAGS := -O2 # per-file flags diff --git a/include/loadfragment.h b/include/loadfragment.h new file mode 100644 index 00000000..37a747ce --- /dev/null +++ b/include/loadfragment.h @@ -0,0 +1,38 @@ +#ifndef LOADFRAGMENT_H +#define LOADFRAGMENT_H + +#include "ultra64.h" + +extern s32 gOverlayLogSeverity; + +#define RELOC_SECTION(reloc) ((reloc) >> 30) +#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF) +#define RELOC_TYPE_MASK(reloc) ((reloc) & 0x3F000000) +#define RELOC_TYPE_SHIFT 24 + +/* MIPS Relocation Types, matches the MIPS ELF spec */ +#define R_MIPS_32 2 +#define R_MIPS_26 4 +#define R_MIPS_HI16 5 +#define R_MIPS_LO16 6 + +typedef enum { + /* 0 */ RELOC_SECTION_NULL, + /* 1 */ RELOC_SECTION_TEXT, + /* 2 */ RELOC_SECTION_DATA, + /* 3 */ RELOC_SECTION_RODATA, + /* 4 */ RELOC_SECTION_MAX +} RelocSectionId; + +typedef struct OverlayRelocationSection { + /* 0x00 */ u32 textSize; + /* 0x04 */ u32 dataSize; + /* 0x08 */ u32 rodataSize; + /* 0x0C */ u32 bssSize; + /* 0x10 */ u32 nRelocations; + /* 0x14 */ u32 relocations[1]; // size is nRelocations +} OverlayRelocationSection; // size >= 0x18 + +s32 Overlay_Load(void *vromStart, void *vromEnd, void *ovlStart, void *ovlEnd, void *vramStart, void *vramEnd, void *allocatedRamAddr, OverlayRelocationSection *ovlRelocs); + +#endif diff --git a/include/z_std_dma.h b/include/z_std_dma.h index e1529565..7096e501 100644 --- a/include/z_std_dma.h +++ b/include/z_std_dma.h @@ -23,7 +23,7 @@ extern DmaEntry gDmaDataTable[]; // void func_80026828_jp(); // void func_800269E4_jp(); // void func_80026A64_jp(); -// void func_80026B44_jp(); +void DmaMgr_RequestSync(void* ram, void* vrom, size_t size); // void func_80026BC0_jp(); // void func_80026C00_jp(); // void func_80026C28_jp(); diff --git a/linker_scripts/jp/symbol_addrs_boot.txt b/linker_scripts/jp/symbol_addrs_boot.txt index 22536ef5..69bec0ce 100644 --- a/linker_scripts/jp/symbol_addrs_boot.txt +++ b/linker_scripts/jp/symbol_addrs_boot.txt @@ -27,7 +27,7 @@ func_80026814_jp = 0x80026814; // type:func func_80026828_jp = 0x80026828; // type:func func_800269E4_jp = 0x800269E4; // type:func func_80026A64_jp = 0x80026A64; // type:func -func_80026B44_jp = 0x80026B44; // type:func +DmaMgr_RequestSync = 0x80026B44; // type:func func_80026BC0_jp = 0x80026BC0; // type:func func_80026C00_jp = 0x80026C00; // type:func func_80026C28_jp = 0x80026C28; // type:func @@ -153,7 +153,7 @@ gfxprint_printf = 0x8002B770; // type:func MtxConv_F2L = 0x8002B7A0; // type:func MtxConv_L2F = 0x8002B8E8; // type:func -func_8002B910_jp = 0x8002B910; // type:func +Overlay_Load = 0x8002B910; // type:func DoRelocation = 0x8002B9C0; // type:func _dbg_hungup = 0x8002BC00; // type:func @@ -279,6 +279,8 @@ D_8003BC50_jp = 0x8003BC50; // D_8003BC90_jp = 0x8003BC90; // D_8003BC94_jp = 0x8003BC94; // +gOverlayLogSeverity = 0x8003BCA0 // type:s32 size:0x4 + gfxprint_moji_tlut = 0x8003BCB0; // type:u8 size:0x80 gfxprint_rainbow_tlut = 0x8003BD30; // type:u8 size:0x20 gfxprint_rainbow_txtr = 0x8003BD50; // type:u8 size:0x8 diff --git a/src/boot/O2/loadfragment2.c b/src/boot/O2/loadfragment2.c new file mode 100644 index 00000000..42a826d4 --- /dev/null +++ b/src/boot/O2/loadfragment2.c @@ -0,0 +1,182 @@ +/** + * @file loadfragment2.c + * + * Functions used to process and relocate dynamically loadable code segments (overlays). + * + * @note: + * These are for specific fragment overlays with the .ovl file extension + */ +#include "global.h" +#include "loadfragment.h" +#include "z_std_dma.h" + +void DoRelocation(void* allocatedRamAddr, OverlayRelocationSection* ovlRelocs, void* vramStart); + +// Unknown original name +s32 Overlay_Load(void* vromStart, void* vromEnd, void* ovlStart, void* ovlEnd, void* vramStart, void* vramEnd, + void* allocatedRamAddr, OverlayRelocationSection* ovlRelocs) { + OverlayRelocationSection* ovl = ovlRelocs; + s32 vromSize = (uintptr_t)vromEnd - (uintptr_t)vromStart; + s32 ovlSize = (uintptr_t)ovlEnd - (uintptr_t)ovlStart; + s32 vramSize = (uintptr_t)vramEnd - (uintptr_t)vramStart; + void* end = (void*)((uintptr_t)allocatedRamAddr + vromSize); + + DmaMgr_RequestSync(allocatedRamAddr, vromStart, vromSize); + DmaMgr_RequestSync(ovl, ovlStart, ovlSize); + DoRelocation(allocatedRamAddr, ovl, vramStart); + + if (ovl->bssSize != 0) { + bzero(end, ovl->bssSize); + } + + osWritebackDCache(allocatedRamAddr, vramSize); + osInvalICache(allocatedRamAddr, vramSize); + return vramSize; +} + +// Extract MIPS register rs from an instruction word +#define MIPS_REG_RS(insn) (((insn) >> 0x15) & 0x1F) + +// Extract MIPS register rt from an instruction word +#define MIPS_REG_RT(insn) (((insn) >> 0x10) & 0x1F) + +// Extract MIPS jump target from an instruction word +#define MIPS_JUMP_TARGET(insn) (((insn)&0x03FFFFFF) << 2) + +/** + * Performs runtime relocation of overlay files, loadable code segments. + * + * Overlays are expected to be loadable anywhere in direct-mapped cached (KSEG0) memory, with some appropriate + * alignment requirements; memory addresses in such code must be updated once loaded in order to execute properly. + * When compiled, overlays are given 'fake' KSEG0 RAM addresses larger than the total possible available main memory + * (>= 0x80800000), such addresses are referred to as Virtual RAM (VRAM) to distinguish them. When loading the overlay + * the relocation table produced at compile time is consulted to determine where and how to update these VRAM addresses + * to correct RAM addresses based on the location the overlay was loaded at, enabling the code to execute at this + * address as if it were compiled to run at this address. + * + * Each relocation is represented by a packed 32-bit value, formatted in the following way: + * - [31:30] 2-bit section id, taking values from the `RelocSectionId` enum. + * - [29:24] 6-bit relocation type describing which relocation operation should be performed. Same as ELF32 MIPS. + * - [23: 0] 24-bit section-relative offset indicating where in the section to apply this relocation. + * + * @param allocatedRamAddr Memory address the binary was loaded at. + * @param ovlRelocs Overlay relocation section containing overlay section layout and runtime relocations. + * @param vramStart Virtual RAM address that the overlay was compiled at. + */ +void DoRelocation(void* allocatedRamAddr, OverlayRelocationSection* ovlRelocs, void* vramStart) { + uintptr_t sections[RELOC_SECTION_MAX]; + u32* relocDataP; + u32 reloc; + u32 relocData; + u32 isLoNeg; + uintptr_t allocu32 = (uintptr_t)allocatedRamAddr; + u32 i; + u32* regValP; + //! MIPS ELF relocation does not generally require tracking register values, so at first glance it appears this + //! register tracking was an unnecessary complication. However there is a bug in the IDO compiler that can cause + //! relocations to be emitted in the wrong order under rare circumstances when the compiler attempts to reuse a + //! previous HI16 relocation for a different LO16 relocation as an optimization. This register tracking is likely + //! a workaround to prevent improper matching of unrelated HI16 and LO16 relocations that would otherwise arise + //! due to the incorrect ordering. + u32* luiRefs[32]; + u32 luiVals[32]; + u32* luiInstRef; + UNUSED u32 dbg; + ptrdiff_t relocOffset = 0; + u32 relocatedValue = 0; + UNUSED uintptr_t unrelocatedAddress = 0; + uintptr_t relocatedAddress = 0; + UNUSED s32 pad; + + if (gOverlayLogSeverity >= 3) {} + + sections[RELOC_SECTION_NULL] = 0; + sections[RELOC_SECTION_TEXT] = allocu32; + sections[RELOC_SECTION_DATA] = ovlRelocs->textSize + allocu32; + sections[RELOC_SECTION_RODATA] = ovlRelocs->dataSize + sections[RELOC_SECTION_DATA]; + + for (i = 0; i < ovlRelocs->nRelocations; i++) { + reloc = ovlRelocs->relocations[i]; + // This will always resolve to a 32-bit aligned address as each section + // containing code or pointers must be aligned to at least 4 bytes and the + // MIPS ABI defines the offset of both 16-bit and 32-bit relocations to be + // the start of the 32-bit word containing the target. + relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc)); + relocData = *relocDataP; + + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: + // Handles 32-bit address relocation, used for things such as jump tables and pointers in data. + // Just relocate the full address. + + // Check address is valid for relocation + if ((*relocDataP & 0x0F000000) == 0) { + relocOffset = *relocDataP - (uintptr_t)vramStart; + relocatedValue = relocOffset + allocu32; + unrelocatedAddress = relocData; + relocatedAddress = relocatedValue; + *relocDataP = relocatedAddress; + } + break; + + case R_MIPS_26 << RELOC_TYPE_SHIFT: + // Handles 26-bit address relocation, used for jumps and jals. + // Extract the address from the target field of the J-type MIPS instruction. + // Relocate the address and update the instruction. + if (1) { + relocOffset = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)) - (uintptr_t)vramStart; + unrelocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)); + relocatedValue = (*relocDataP & 0xFC000000) | (((allocu32 + relocOffset) & 0x0FFFFFFF) >> 2); + relocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(relocatedValue)); + *relocDataP = relocatedValue; + } + break; + + case R_MIPS_HI16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 1. + // Store the reference to the LUI instruction (hi) using the `rt` register of the instruction. + // This will be updated later in the `R_MIPS_LO16` section. + + luiRefs[MIPS_REG_RT(*relocDataP)] = relocDataP; + luiVals[MIPS_REG_RT(*relocDataP)] = *relocDataP; + break; + + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 2. + // Grab the stored LUI (hi) from the `R_MIPS_HI16` section using the `rs` register of the instruction. + // The full address is calculated, relocated, and then used to update both the LUI and lo instructions. + // If the lo part is negative, add 1 to the LUI value. + // Note: The lo instruction is assumed to have a signed immediate. + + luiInstRef = luiRefs[MIPS_REG_RS(*relocDataP)]; + regValP = &luiVals[MIPS_REG_RS(*relocDataP)]; + + // Check address is valid for relocation + if ((((*regValP << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { + relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vramStart; + isLoNeg = ((relocOffset + allocu32) & 0x8000) ? 1 : 0; // adjust for signed immediate + unrelocatedAddress = (*luiInstRef << 0x10) + (s16)relocData; + *luiInstRef = + (*luiInstRef & 0xFFFF0000) | ((((relocOffset + allocu32) >> 0x10) & 0xFFFF) + isLoNeg); + relocatedValue = (*relocDataP & 0xFFFF0000) | ((relocOffset + allocu32) & 0xFFFF); + + relocatedAddress = (*luiInstRef << 0x10) + (s16)relocatedValue; + *relocDataP = relocatedValue; + } + break; + } + + dbg = 16; + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: + dbg += 6; + FALLTHROUGH; + case R_MIPS_26 << RELOC_TYPE_SHIFT: + dbg += 10; + FALLTHROUGH; + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: + if (gOverlayLogSeverity >= 3) {} + // Adding a break prevents matching + } + } +} diff --git a/src/boot/logseverity.c b/src/boot/logseverity.c new file mode 100644 index 00000000..940aa87c --- /dev/null +++ b/src/boot/logseverity.c @@ -0,0 +1,3 @@ +#include "global.h" + +s32 gOverlayLogSeverity = 2; diff --git a/yamls/jp/boot.yaml b/yamls/jp/boot.yaml index 863e3c28..228e8298 100644 --- a/yamls/jp/boot.yaml +++ b/yamls/jp/boot.yaml @@ -20,7 +20,7 @@ - [0x005BE0, asm, boot/stackcheck] - [0x005E30, asm, boot/gfxprint] - [0x006BA0, asm, boot/mtxuty-cvt] - - [0x006D10, asm, boot/006D10] + - [0x006D10, c, boot/O2/loadfragment2] - [0x007000, asm, boot/debug] - [0x007060, asm, boot/007060] - [0x0073A0, asm, boot/0073A0] @@ -202,6 +202,7 @@ - [0x016EF0, data, libultra/rsp/rspboot] - [0x016FC0, data, boot/016FC0] + - [0x0170A0, .data, boot/logseverity] - [0x0170B0, data, boot/gfxprint] - [0x017960, data, boot/017960] # boot/007060 starts around here, but no idea where it ends