From 88b9d98c8fff5920246f801760ff90c7d96945ed Mon Sep 17 00:00:00 2001 From: dmaivel Date: Fri, 12 Jul 2024 15:13:17 -0400 Subject: [PATCH] Added env variable, performance improvements * added SGL_SHARED_MEMORY_DIRECT for direct access to shared memory * added scratch buffer for glMapBufferRange * remove unnecessary command submits * moved most important env variables in README upwards --- CMakeLists.txt | 2 +- README.md | 7 +++-- inc/client/pb.h | 9 +++--- inc/client/scratch.h | 8 +++++ src/client/glimpl.c | 33 +++++++------------- src/client/pb.c | 41 ++++++++++++++---------- src/client/scratch.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 128 insertions(+), 46 deletions(-) create mode 100644 inc/client/scratch.h create mode 100644 src/client/scratch.c diff --git a/CMakeLists.txt b/CMakeLists.txt index beda963..a9de39c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ IF(UNIX) file(GLOB GLOBBED_CLIENT_SOURCES CONFIGURE_DEPENDS "src/client/*.c" "src/network/*.c") file(GLOB GLOBBED_CLIENT_P_SOURCES CONFIGURE_DEPENDS "src/client/platform/*.c") ELSEIF(WIN32) - file(GLOB GLOBBED_CLIENT_SOURCES CONFIGURE_DEPENDS "src/client/winmain.c" "src/client/pb.c" "src/client/spinlock.c" "src/client/glimpl.c" "src/network/*.c") + file(GLOB GLOBBED_CLIENT_SOURCES CONFIGURE_DEPENDS "src/client/winmain.c" "src/client/pb.c" "src/client/spinlock.c" "src/client/glimpl.c" "src/client/scratch.c" "src/network/*.c") file(GLOB GLOBBED_CLIENT_P_SOURCES CONFIGURE_DEPENDS "src/client/platform/windrv.c") ENDIF(UNIX) diff --git a/README.md b/README.md index daa10db..c139a4f 100644 --- a/README.md +++ b/README.md @@ -70,12 +70,13 @@ Variables labeled with `host` get their values from the host/server when their o | **Option** | **Legal values** | **Default** | **Description** | |-|-|-|-| +| SGL_SHARED_MEMORY_DIRECT | Boolean | true | If you intend on only running a single accelerated application at a time, this variable ensures maximum performance by writing/reading directly to/from shared memory. Make sure to set to `false` if you intend on using multiclient support. Available for both Windows and Linux clients. | +| SGL_WINED3D_DONT_VFLIP | Boolean | false | If running a DirectX application via WineD3D, ensure this variable is set to `true` in order for the application to render the framebuffer in the proper orientation. Only available for Windows clients. | +| SGL_RUN_WITH_LOW_PRIORITY | Boolean | false | On single core setups, by setting the process priority to low / `IDLE_PRIORITY_CLASS`, applications will run smoother as the kernel driver is given more CPU time. Users should only set this to `true` if the VM has only a single VCPU. Only available for Windows clients. | | GL_VERSION_OVERRIDE | Digit.Digit | `host` | Override the OpenGL version on the client side. Available for both Windows and Linux clients. | | GLX_VERSION_OVERRIDE | Digit.Digit | 1.4 | Override the GLX version on the client side. Only available for Linux clients. | | GLSL_VERSION_OVERRIDE | Digit.Digit | | Override the GLSL version on the client side. Available for both Windows and Linux clients. | | SGL_NET_OVER_SHARED | Ip:Port | | If networking is enabled, this environment variable must exist on the guest. Available for both Windows and Linux clients. | -| SGL_RUN_WITH_LOW_PRIORITY | Boolean | false | On single core setups, by setting the process priority to low / `IDLE_PRIORITY_CLASS`, applications will run smoother as the kernel driver is given more CPU time. Only set to `true` if the VM has only a single VCPU. Only available for Windows clients. | -| SGL_WINED3D_DONT_VFLIP | Boolean | false | If running a DirectX application via WineD3D, ensure this variable is set to `true` in order for the application to render the framebuffer in the proper orientation. Only available for Windows clients. | ## Windows (in a VM) @@ -121,6 +122,8 @@ There are two possible drivers one may use: ``` 3. By default, this builds for Windows 10 x64 (`10_X64`). If you wish to compile for a different version or multiple versions, you must provide it through the command line like so: `kcertify.bat 10_X64,10_NI_X64`. A list of OS versions is provided on MSDN [here](https://learn.microsoft.com/en-us/windows-hardware/drivers/devtest/inf2cat). +If using multiclient support, please read about `SGL_SHARED_MEMORY_DIRECT` in the [environment variables](#environment-variables) section. + ### Library / ICD There are two ways to install the library on windows: diff --git a/inc/client/pb.h b/inc/client/pb.h index 683d1ce..a33eead 100644 --- a/inc/client/pb.h +++ b/inc/client/pb.h @@ -8,7 +8,7 @@ #endif #include -#include +#include struct pb_net_hooks { int(*_pb_read)(int s); @@ -23,9 +23,9 @@ struct pb_net_hooks { void pb_set_net(struct pb_net_hooks hooks, size_t internal_alloc_size); #ifndef _WIN32 -void pb_set(int pb); +void pb_set(int pb, bool direct_access); #else -void pb_set(void); +void pb_set(bool direct_access); void pb_unset(void); #endif @@ -36,9 +36,8 @@ void pb_pushf(float c); int pb_read(int s); int64_t pb_read64(int s); void pb_write(int s, int c); -void pb_copy(void *data, int s, size_t length); -void pb_memcpy(const void *src, size_t length); +void pb_memcpy(const void *src, size_t length); void pb_memcpy_unaligned(const void *src, size_t length); void pb_realign(); diff --git a/inc/client/scratch.h b/inc/client/scratch.h new file mode 100644 index 0000000..6d582a6 --- /dev/null +++ b/inc/client/scratch.h @@ -0,0 +1,8 @@ +#ifndef _SCRATCH_H_ +#define _SCRATCH_H_ + +#include + +void *scratch_buffer_get(size_t size); + +#endif \ No newline at end of file diff --git a/src/client/glimpl.c b/src/client/glimpl.c index 314efe2..3bdf2a7 100644 --- a/src/client/glimpl.c +++ b/src/client/glimpl.c @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -546,6 +547,11 @@ void *glimpl_fb_address() void glimpl_init() { char *network = getenv("SGL_NET_OVER_SHARED"); + char *direct_access = getenv("SGL_SHARED_MEMORY_DIRECT"); + + bool use_direct_access = direct_access == NULL; + if (direct_access != NULL) + use_direct_access = strcmp(direct_access, "true") == 0; if (network == NULL) { #ifndef _WIN32 @@ -557,9 +563,9 @@ void glimpl_init() exit(1); } - pb_set(fd); + pb_set(fd, use_direct_access); #else - pb_set(); + pb_set(use_direct_access); #endif pb_reset(); } @@ -638,6 +644,8 @@ void glimpl_init() int packed_dims = pb_read(SGL_OFFSET_REGISTER_RETVAL); icd_set_max_dimensions(UNPACK_A(packed_dims), UNPACK_B(packed_dims)); } + + glimpl_map_buffer.mem = scratch_buffer_get(0x1000); } static struct gl_vertex_attrib_pointer *glimpl_get_enabled_vap() @@ -905,8 +913,6 @@ static void glimpl_texture_subimage(int cmd, int n_dims, GLuint texture, GLint l const int offsets[3] = { xoffset, yoffset, zoffset }; const int dims[3] = { width, height, depth }; - glimpl_submit(); - glimpl_upload_texture(width, n_dims > 1 ? height : 1, n_dims > 2 ? depth : 1, format, pixels); pb_push(cmd); @@ -918,8 +924,6 @@ static void glimpl_texture_subimage(int cmd, int n_dims, GLuint texture, GLint l pb_push(dims[i]); pb_push(format); pb_push(type); - - glimpl_submit(); } static void glimpl_compressed_texture_subimage(int cmd, int n_dims, GLuint texture, GLint level, GLint xoffset, GLint yoffset, @@ -928,8 +932,6 @@ static void glimpl_compressed_texture_subimage(int cmd, int n_dims, GLuint textu const int offsets[3] = { xoffset, yoffset, zoffset }; const int dims[3] = { width, height, depth }; - glimpl_submit(); - glimpl_upload_buffer((void*)data, imageSize); pb_push(cmd); @@ -941,14 +943,10 @@ static void glimpl_compressed_texture_subimage(int cmd, int n_dims, GLuint textu pb_push(dims[i]); pb_push(format); pb_push(imageSize); - - glimpl_submit(); } static void glimpl_buffer_store_data(int cmd, GLuint buffer, GLsizeiptr size, const void *data, GLenum usage) { - glimpl_submit(); - if (data != NULL) glimpl_upload_buffer(data, size); @@ -957,22 +955,16 @@ static void glimpl_buffer_store_data(int cmd, GLuint buffer, GLsizeiptr size, co pb_push(size); pb_push(data != NULL); pb_push(usage); - - glimpl_submit(); } static void glimpl_buffer_subdata(int cmd, GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data) { - glimpl_submit(); - glimpl_upload_buffer(data, size); pb_push(cmd); pb_push(buffer); pb_push(offset); pb_push(size); - - glimpl_submit(); } static void glimpl_buffer_clear_data(int cmd, bool is_subdata, GLenum buffer, GLenum internalformat, GLintptr offset, @@ -995,13 +987,13 @@ static void glimpl_buffer_clear_data(int cmd, bool is_subdata, GLenum buffer, GL static void *glimpl_map_buffer_range(int cmd, GLenum buffer, GLintptr offset, GLsizeiptr length, GLbitfield access) { if (glimpl_map_buffer.in_use) { - fprintf(stderr, "glMapBufferRange: map buffer already in use, returning NULL\n"); + fprintf(stderr, "glimpl_map_buffer_range: map buffer already in use, returning NULL\n"); return NULL; } glimpl_map_buffer = (struct gl_map_buffer){ /* target = */ buffer, - /* mem = */ calloc(length, 1), + /* mem = */ scratch_buffer_get(length), // calloc(length, 1), // glimpl_map_buffer.mem, /* offset = */ offset, /* length = */ length, /* access = */ access, @@ -3775,7 +3767,6 @@ GLboolean glUnmapBuffer(GLenum target) pb_memcpy(glimpl_map_buffer.mem, glimpl_map_buffer.length + ((glimpl_map_buffer.length % 4 != 0) * sizeof(int))); if (glimpl_map_buffer.in_use) { - free(glimpl_map_buffer.mem); glimpl_map_buffer.in_use = false; } diff --git a/src/client/pb.c b/src/client/pb.c index f243b05..c0d5196 100644 --- a/src/client/pb.c +++ b/src/client/pb.c @@ -1,10 +1,9 @@ #include #include + #include #include -#include - #ifndef _WIN32 #define __USE_GNU #define _GNU_SOURCE @@ -52,10 +51,12 @@ static int *cur; static void *in_base; static int *in_cur; +static bool using_direct_access = false; + static struct pb_net_hooks net_hooks = { NULL }; #ifndef _WIN32 -void pb_set(int fd) +void pb_set(int fd, bool direct_access) { ptr = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); @@ -65,11 +66,18 @@ void pb_set(int fd) base = ptr + 0x1000; - in_base = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - in_cur = in_base; + if (direct_access) { + using_direct_access = true; + in_base = base; + in_cur = base; + } + else { + in_base = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + in_cur = in_base; + } } #else -void pb_set(void) +void pb_set(bool direct_access) { HDEVINFO device_info; PSP_DEVICE_INTERFACE_DETAIL_DATA inf_data; @@ -115,8 +123,14 @@ void pb_set(void) ptr = map.pointer; base = (PVOID)((DWORD64)map.pointer + (DWORD64)0x1000); - in_base = VirtualAlloc(NULL, map.size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - in_cur = in_base; + if (direct_access) { + in_base = base; + in_cur = in_base; + } + else { + in_base = VirtualAlloc(NULL, map.size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + in_cur = in_base; + } } void pb_unset(void) @@ -143,19 +157,16 @@ void pb_reset() { cur = base; in_cur = in_base; - // pb_ctx.current_offset = pb_ctx.reset_offset; } void pb_push(int c) { *in_cur++ = c; - // pb_ctx.current_offset += sizeof(c); } void pb_pushf(float c) { *in_cur++ = *(int*)&c; - // pb_ctx.current_offset += sizeof(c); } int pb_read(int s) @@ -177,11 +188,6 @@ void pb_write(int s, int c) *(int*)((size_t)ptr + s) = c; } -void pb_copy(void *data, int s, size_t length) -{ - memcpy(data, (void*)((size_t)ptr + s), length); -} - /* * // equivalent to * int *pdata = (int*)data; @@ -231,5 +237,6 @@ size_t pb_size() void pb_copy_to_shared() { - memcpy(base, in_base, (size_t)in_cur - (size_t)in_base); + if (!using_direct_access) + memcpy(base, in_base, (size_t)in_cur - (size_t)in_base); } \ No newline at end of file diff --git a/src/client/scratch.c b/src/client/scratch.c new file mode 100644 index 0000000..dd3a525 --- /dev/null +++ b/src/client/scratch.c @@ -0,0 +1,74 @@ +#include +#include + +#ifdef _WIN32 +#include +#else +#define __USE_GNU +#define _GNU_SOURCE +#include +#include +#include +#endif + +static void *address = NULL; +static size_t current_size = 0; + +static inline uintptr_t align_to_4kb(uintptr_t ptr) +{ + return (ptr + 4095) & ~4095; +} + +#ifdef _WIN32 +static void *windows_mremap(void *old_address, size_t old_size, size_t new_size) +{ + /* + * check to see if we can extend the current allocation + */ + MEMORY_BASIC_INFORMATION mbi; + if (VirtualQuery((char*)old_address + old_size, &mbi, sizeof(mbi)) == sizeof(mbi)) + if (mbi.State == MEM_FREE && mbi.RegionSize >= (new_size - old_size)) + if (VirtualAlloc((char*)old_address + old_size, new_size - old_size, MEM_COMMIT, PAGE_READWRITE)) + return old_address; + + /* + * otherwise, allocate a new region, copy, free + * this can be expensive, but will rarely be performed + */ + void *new_address = VirtualAlloc(NULL, new_size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (new_address) { + memcpy(new_address, old_address, old_size); + VirtualFree(old_address, 0, MEM_RELEASE); + return new_address; + } + + return NULL; +} +#endif + +void *scratch_buffer_get(size_t size) +{ + if (!current_size) { + current_size = align_to_4kb(size); +#ifdef _WIN32 + address = VirtualAlloc(NULL, current_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); +#else + address = mmap(NULL, current_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +#endif + return address; + } + else { + size_t aligned_size = align_to_4kb(size); + if (current_size < aligned_size) { +#ifdef _WIN32 + address = windows_mremap(address, current_size, aligned_size); +#else + address = mremap(address, current_size, aligned_size, MREMAP_MAYMOVE); +#endif + current_size = aligned_size; + return address; + } + + return address; + } +} \ No newline at end of file