diff --git a/file/file.cpp b/file/file.cpp index 6fe018f..5802780 100644 --- a/file/file.cpp +++ b/file/file.cpp @@ -13,7 +13,7 @@ static const char *FILE_TAG = "file"; // 将 value 对齐到 alignment 的倍数 -int alignTo(int value, int alignment) { +uint64_t alignTo(uint64_t value, uint64_t alignment) { int remainder = value % alignment; if (remainder == 0) { return value; // 已对齐 diff --git a/file/file.h b/file/file.h index bac2cd1..7d17f6c 100644 --- a/file/file.h +++ b/file/file.h @@ -8,7 +8,7 @@ #include #include -int alignTo(int value, int alignment); +uint64_t alignTo(uint64_t value, uint64_t alignment); extern void openFile(const char *fileName); diff --git a/generator/arm64/macos_syscall.cpp b/generator/arm64/macos_syscall.cpp index 20e7849..a07da0f 100644 --- a/generator/arm64/macos_syscall.cpp +++ b/generator/arm64/macos_syscall.cpp @@ -3,7 +3,15 @@ // #include "macos_syscall.h" +#include "binary_arm64.h" +#include "register_arm64.h" void initMachOProgramStart() { - + emitLabel("_start"); + binaryOp2(INST_MOV, 1, X29, 0, true); + binaryOp2(INST_MOV, 1, X30, 0, true); + binaryOpBranch(INST_BL, UNUSED, "main"); + //keep the "main" ret value in X0 + binaryOp2(INST_MOV, 1, X16, MACOS_SYS_EXIT, true); + binaryOpSvc(INST_SVC, 0x80); } \ No newline at end of file diff --git a/generator/arm64/macos_syscall.h b/generator/arm64/macos_syscall.h index 4ced671..7c2d97a 100644 --- a/generator/arm64/macos_syscall.h +++ b/generator/arm64/macos_syscall.h @@ -5,6 +5,16 @@ #ifndef PCC_MACOS_SYSCALL_H #define PCC_MACOS_SYSCALL_H +enum MacOsArm64SysCallNumber { + MACOS_SYS_SYSCALL = 0, + MACOS_SYS_EXIT = 1, + MACOS_SYS_FORK = 2, + MACOS_SYS_READ = 3, + MACOS_SYS_WRITE = 4, + MACOS_SYS_OPEN = 5, + MACOS_SYS_CLOSE = 6 +}; + void initMachOProgramStart(); #endif //PCC_MACOS_SYSCALL_H diff --git a/generator/arm64/windows_syscall.cpp b/generator/arm64/windows_syscall.cpp index 7ba5c0b..99ce859 100644 --- a/generator/arm64/windows_syscall.cpp +++ b/generator/arm64/windows_syscall.cpp @@ -8,7 +8,8 @@ void initWindowsArm64ProgramStart() { binaryOp2(INST_MOV, 1, X29, 0, true); binaryOp2(INST_MOV, 1, X30, 0, true); binaryOpBranch(INST_BL, UNUSED, "main"); - //keep the "main" ret value in X0 + //move the "main" ret value to X1, set X0 with "invalid handle" + //NtTerminateProcess(null, returnValue); binaryOp2(INST_MOV, 1, X1, X0, false); binaryOp2(INST_MOV, 1, X0, INVALID_HANDLE_VALUE, true); binaryOpSvc(INST_SVC, NtTerminateProcess); diff --git a/generator/assembler.cpp b/generator/assembler.cpp index 6a03a8a..4bfb7e5 100644 --- a/generator/assembler.cpp +++ b/generator/assembler.cpp @@ -12,6 +12,7 @@ #include "file.h" #include "pe.h" #include "macho.h" +#include "sys/syscall.h" static const char *ASSEMBLER_TAG = "assembler"; @@ -158,84 +159,111 @@ void generatePeArm64(Mir *mir, } void generateMachoArm64(Mir *mir, - int sharedLibrary, - const char *outputFileName) { + int sharedLibrary, + const char *outputFileName) { if (outputFileName == nullptr) { outputFileName = "output.macho"; } openFile(outputFileName); - int currentOffset = 0; + const uint64_t ram_alignment = 16384; + const uint64_t file_alignment = 4; + int loadCommandCount = 0; - int sectionCount = 1; // Initial section count (__TEXT segment) + uint64_t currentFileAddr = 0; + uint64_t currentVmAddr = 0; initMachOProgramStart(); // Generate ARM64 target code - int textSectionSize = generateArm64Target(mir); - currentOffset += sizeof(struct mach_header_64); + generateArm64Target(mir); + relocateBinary(0); + InstBuffer *instBuffer = getEmittedInstBuffer(); // Calculate offsets and counts - loadCommandCount += 2; // One LC_SEGMENT_64 and one LC_SYMTAB - currentOffset += loadCommandCount * sizeof(struct segment_command_64); - int textSectionOffset = currentOffset; - currentOffset += textSectionSize; - int stringTableOffset = currentOffset; + /** + * mach-o header + * [ + * segment64: page0 + * segment64: __TEXT + * segment64: lc_main + * ] + */ + currentFileAddr += sizeof(mach_header_64);//header + currentFileAddr += sizeof(segment_command_64);//__PAGE0 + currentFileAddr += sizeof(segment_command_64);//__TEXT + currentFileAddr += sizeof(entry_point_command);//LC_MAIN + currentFileAddr += sizeof(section_64);//__text section - // Set program entry point - uint64_t programEntry = 0x100000000 + textSectionOffset; + uint64_t codeFileAddr = alignTo(currentFileAddr, file_alignment); - // Relocate binary - relocateBinary(0); - InstBuffer *instBuffer = getEmittedInstBuffer(); + uint64_t loadCommandSize = 0; + loadCommandSize += sizeof(segment_command_64);//__PAGE0 + loadCommandSize += sizeof(segment_command_64);//__TEXT + loadCommandSize += sizeof(entry_point_command);//LC_MAIN + //lc_segment64: page0 + const uint64_t page0Size = 0x100000000; + uint64_t pageZeroSize = alignTo(page0Size, ram_alignment); + segment_command_64 *pageZeroLc = createSegmentCommand64("__PAGEZERO", + currentVmAddr, + pageZeroSize, + 0, + 0, + VM_PROT_NONE, + VM_PROT_NONE, + 0, + 0); + loadCommandCount++; + currentVmAddr += pageZeroSize; + currentVmAddr = alignTo(currentVmAddr, ram_alignment); + + //lc_segment64: text + uint64_t textVmSize = alignTo(codeFileAddr + instBuffer->size, ram_alignment); + const char *TEXT_SEGMENT_NAME = "__TEXT"; + segment_command_64 *textLc = createSegmentCommand64(TEXT_SEGMENT_NAME, + currentVmAddr, + textVmSize, + 0, + textVmSize, + VM_PROT_EXECUTE | VM_PROT_READ, + VM_PROT_EXECUTE | VM_PROT_READ, + 1, + 0); + loadCommandCount++; + currentVmAddr += textVmSize; + currentVmAddr = alignTo(currentVmAddr, ram_alignment); + + //lc_main + entry_point_command *mainLc = createEntryPointCommand(codeFileAddr, 0); + loadCommandCount++; + + //section64:text + uint64_t codeVmAddr = page0Size + codeFileAddr; + section_64 *textSection = createSection64("__text", + TEXT_SEGMENT_NAME, + codeVmAddr, + instBuffer->size, + codeFileAddr, + file_alignment, + 0, + 0, + S_REGULAR | S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS); - // Create Mach-O header - struct mach_header_64 *machHeader = createMachHeader64( + mach_header_64 *machHeader = createMachHeader64( CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL, MH_EXECUTE, loadCommandCount, - loadCommandCount * sizeof(struct segment_command_64), + loadCommandSize, MH_NOUNDEFS | MH_PIE); - // Create __TEXT segment - struct segment_command_64 *textSegment = createSegmentCommand64( - "__TEXT", - 0x100000000, - 0x1000, - textSectionOffset, - textSectionSize, - VM_PROT_READ | VM_PROT_EXECUTE, - VM_PROT_READ | VM_PROT_EXECUTE, - 1, - 0); - - // Create __text section - struct section_64 *textSection = createSection64( - "__text", - "__TEXT", - 0x100000000 + textSectionOffset, - textSectionSize, - textSectionOffset, - 4, - 0, - 0, - S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS); - - // Create symbol table command - struct symtab_command *symtabCommand = createSymtabCommand( - stringTableOffset, // Offset of string table - 0, // Number of symbols - stringTableOffset, // Offset of string table - 0); // Size of string table - - writeFileB(machHeader, sizeof(struct mach_header_64)); - writeFileB(textSegment, sizeof(struct segment_command_64)); - writeFileB(textSection, sizeof(struct section_64)); + writeFileB(machHeader, sizeof(mach_header_64)); + writeFileB(pageZeroLc, sizeof(segment_command_64)); + writeFileB(textLc, sizeof(segment_command_64)); + writeFileB(mainLc, sizeof(entry_point_command)); + writeFileB(textSection, sizeof(section_64)); + writeEmptyAlignment(file_alignment); writeFileB(instBuffer->result, instBuffer->size); - const char *stringTable = "\0"; - writeFileB(stringTable, 1); - logd(ASSEMBLER_TAG, "mach-o arm64 generation finish."); } diff --git a/wrapper/macho.cpp b/wrapper/macho.cpp index e052e72..1ad806f 100644 --- a/wrapper/macho.cpp +++ b/wrapper/macho.cpp @@ -3,19 +3,22 @@ // #include "macho.h" +#include "logger.h" #include #include #include +static const char *MACHO_TAG = "macho"; + mach_header_64 *createMachHeader64(uint32_t cputype, - uint32_t cpusubtype, - uint32_t filetype, - uint32_t ncmds, - uint32_t sizeofcmds, - uint32_t flags) { + uint32_t cpusubtype, + uint32_t filetype, + uint32_t ncmds, + uint32_t sizeofcmds, + uint32_t flags) { // 分配内存 - mach_header_64 *header = (mach_header_64 *)malloc(sizeof(mach_header_64)); + mach_header_64 *header = (mach_header_64 *) malloc(sizeof(mach_header_64)); // 检查分配是否成功 if (header == NULL) { @@ -37,16 +40,16 @@ mach_header_64 *createMachHeader64(uint32_t cputype, } segment_command_64 *createSegmentCommand64(const char *segname, - uint64_t vmaddr, - uint64_t vmsize, - uint64_t fileoff, - uint64_t filesize, - uint32_t maxprot, - uint32_t initprot, - uint32_t nsects, - uint32_t flags) { + uint64_t vmaddr, + uint64_t vmsize, + uint64_t fileoff, + uint64_t filesize, + uint32_t maxprot, + uint32_t initprot, + uint32_t nsects, + uint32_t flags) { // 分配内存 - segment_command_64 *segment = (segment_command_64 *)malloc(sizeof(segment_command_64)); + segment_command_64 *segment = (segment_command_64 *) malloc(sizeof(segment_command_64)); // 检查分配是否成功 if (segment == NULL) { @@ -71,11 +74,11 @@ segment_command_64 *createSegmentCommand64(const char *segname, } symtab_command *createSymtabCommand(uint32_t symoff, - uint32_t nsyms, - uint32_t stroff, - uint32_t strsize) { + uint32_t nsyms, + uint32_t stroff, + uint32_t strsize) { // 分配内存 - symtab_command *symtab = (symtab_command *)malloc(sizeof(symtab_command)); + symtab_command *symtab = (symtab_command *) malloc(sizeof(symtab_command)); // 检查分配是否成功 if (symtab == NULL) { @@ -95,16 +98,16 @@ symtab_command *createSymtabCommand(uint32_t symoff, } section_64 *createSection64(const char *sectname, - const char *segname, - uint64_t addr, - uint64_t size, - uint32_t offset, - uint32_t align, - uint32_t reloff, - uint32_t nreloc, - uint32_t flags) { + const char *segname, + uint64_t addr, + uint64_t size, + uint32_t offset, + uint32_t align, + uint32_t reloff, + uint32_t nreloc, + uint32_t flags) { // 分配内存 - section_64 *section = (section_64 *)malloc(sizeof(section_64)); + section_64 *section = (section_64 *) malloc(sizeof(section_64)); // 检查分配是否成功 if (section == NULL) { @@ -129,3 +132,21 @@ section_64 *createSection64(const char *sectname, return section; // 返回已填充的区块指针 } +entry_point_command *createEntryPointCommand(uint64_t entryoff, uint64_t stacksize) { + // 分配内存 + entry_point_command *entryPointCmd = (entry_point_command *) malloc(sizeof(entry_point_command)); + + // 检查分配是否成功 + if (entryPointCmd == nullptr) { + loge(MACHO_TAG, "Error: Memory allocation failed\n"); + return nullptr; + } + + // 填充 entry_point_command 的字段 + entryPointCmd->cmd = LC_MAIN; // 设置命令类型为 LC_MAIN + entryPointCmd->cmdsize = sizeof(entry_point_command); // 命令大小为 24 字节 + entryPointCmd->entryoff = entryoff; // 程序入口偏移(从 __TEXT 段的起始位置计算) + entryPointCmd->stacksize = stacksize; // 初始栈大小(如果为 0,则使用默认值) + + return entryPointCmd; // 返回已填充的 entry_point_command 指针 +} diff --git a/wrapper/macho.h b/wrapper/macho.h index f637a7f..0ae4d2f 100644 --- a/wrapper/macho.h +++ b/wrapper/macho.h @@ -15,8 +15,9 @@ #define VM_PROT_EXECUTE ((int) 0x04) /* execute permission */ -#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 // section contains only true machine instructions -#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 // section contains some machine instructions +#define S_REGULAR 0x0 /* regular section */ +#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 // section contains only true machine instructions +#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 // section contains some machine instructions /* * The 64-bit mach header appears at the very beginning of object files for * 64-bit architectures. @@ -57,46 +58,11 @@ typedef enum { typedef enum { // Generic subtype wildcard CPU_SUBTYPE_ANY = -1, /* Wildcard */ - CPU_SUBTYPE_MULTIPLE = -1, /* Multiple subtypes */ - CPU_SUBTYPE_LITTLE_ENDIAN = 0, /* Little endian */ - CPU_SUBTYPE_BIG_ENDIAN = 1, /* Big endian */ - // x86 and x86_64 CPU_SUBTYPE_I386_ALL = 3, /* All x86 models */ - CPU_SUBTYPE_386 = 3, /* Intel 386 */ - CPU_SUBTYPE_486 = 4, /* Intel 486 */ - CPU_SUBTYPE_486SX = 4 + 128, /* Intel 486SX */ - CPU_SUBTYPE_PENT = 5, /* Intel Pentium */ - CPU_SUBTYPE_PENTPRO = 6, /* Intel Pentium Pro */ - CPU_SUBTYPE_PENTII_M3 = 7, /* Intel Pentium II M3 */ - CPU_SUBTYPE_PENTII_M5 = 8, /* Intel Pentium II M5 */ - CPU_SUBTYPE_CELERON = 9, /* Intel Celeron */ - CPU_SUBTYPE_CELERON_MOBILE = 10, /* Intel Celeron Mobile */ - CPU_SUBTYPE_PENTIUM_3 = 11, /* Intel Pentium III */ - CPU_SUBTYPE_PENTIUM_3_M = 12, /* Intel Pentium III Mobile */ - CPU_SUBTYPE_PENTIUM_4 = 13, /* Intel Pentium 4 */ - CPU_SUBTYPE_ITANIUM = 14, /* Intel Itanium */ - CPU_SUBTYPE_ITANIUM_2 = 15, /* Intel Itanium 2 */ - CPU_SUBTYPE_XEON = 16, /* Intel Xeon */ - CPU_SUBTYPE_XEON_MP = 17, /* Intel Xeon MP */ - - // ARM - CPU_SUBTYPE_ARM_ALL = 0, /* All ARM architectures */ - CPU_SUBTYPE_ARM_V4T = 5, /* ARMv4T */ - CPU_SUBTYPE_ARM_V6 = 6, /* ARMv6 */ - CPU_SUBTYPE_ARM_V5TEJ = 7, /* ARMv5TEJ */ - CPU_SUBTYPE_ARM_XSCALE = 8, /* ARM XScale */ - CPU_SUBTYPE_ARM_V7 = 9, /* ARMv7 */ - CPU_SUBTYPE_ARM_V7F = 10, /* ARMv7 with floating point */ - CPU_SUBTYPE_ARM_V7S = 11, /* ARMv7S (iPhone 5) */ - CPU_SUBTYPE_ARM_V7K = 12, /* ARMv7K (watchOS) */ - CPU_SUBTYPE_ARM_V8 = 13, /* ARMv8 */ - CPU_SUBTYPE_ARM_V8_1 = 14, /* ARMv8.1 */ // ARM64 CPU_SUBTYPE_ARM64_ALL = 0, /* All ARM64 architectures */ - CPU_SUBTYPE_ARM64_V8 = 1, /* ARM64 v8 */ - CPU_SUBTYPE_ARM64E = 2, /* ARM64E (with pointer authentication) */ } CpuSubType; typedef enum { @@ -147,6 +113,14 @@ struct load_command { uint32_t cmd; /* type of load command */ uint32_t cmdsize; /* total size of command in bytes */ }; + +struct entry_point_command { + uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */ + uint32_t cmdsize; /* 24 */ + uint64_t entryoff; /* file (__TEXT) offset of main() */ + uint64_t stacksize;/* if not zero, initial stack size */ +}; + // 定义 LC_REQ_DYLD 常量 #define LC_REQ_DYLD 0x80000000 @@ -280,4 +254,7 @@ mach_header_64 *createMachHeader64(uint32_t cputype, uint32_t sizeofcmds, uint32_t flags); +entry_point_command *createEntryPointCommand(uint64_t entryoff, + uint64_t stacksize); + #endif //PCC_MACHO_H