Skip to content

Commit

Permalink
Added env variable, performance improvements
Browse files Browse the repository at this point in the history
* added SGL_SHARED_MEMORY_DIRECT for direct access to shared memory
* added scratch buffer for glMapBufferRange
* remove unnecessary command submits
* moved most important env variables in README upwards
  • Loading branch information
dmaivel committed Jul 12, 2024
1 parent 61634e9 commit 88b9d98
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 46 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ IF(UNIX)
file(GLOB GLOBBED_CLIENT_SOURCES CONFIGURE_DEPENDS "src/client/*.c" "src/network/*.c")
file(GLOB GLOBBED_CLIENT_P_SOURCES CONFIGURE_DEPENDS "src/client/platform/*.c")
ELSEIF(WIN32)
file(GLOB GLOBBED_CLIENT_SOURCES CONFIGURE_DEPENDS "src/client/winmain.c" "src/client/pb.c" "src/client/spinlock.c" "src/client/glimpl.c" "src/network/*.c")
file(GLOB GLOBBED_CLIENT_SOURCES CONFIGURE_DEPENDS "src/client/winmain.c" "src/client/pb.c" "src/client/spinlock.c" "src/client/glimpl.c" "src/client/scratch.c" "src/network/*.c")
file(GLOB GLOBBED_CLIENT_P_SOURCES CONFIGURE_DEPENDS "src/client/platform/windrv.c")
ENDIF(UNIX)

Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,13 @@ Variables labeled with `host` get their values from the host/server when their o
| **Option** | **Legal values** | **Default** | **Description** |
|-|-|-|-|
| SGL_SHARED_MEMORY_DIRECT | Boolean | true | If you intend on only running a single accelerated application at a time, this variable ensures maximum performance by writing/reading directly to/from shared memory. Make sure to set to `false` if you intend on using multiclient support. Available for both Windows and Linux clients. |
| SGL_WINED3D_DONT_VFLIP | Boolean | false | If running a DirectX application via WineD3D, ensure this variable is set to `true` in order for the application to render the framebuffer in the proper orientation. Only available for Windows clients. |
| SGL_RUN_WITH_LOW_PRIORITY | Boolean | false | On single core setups, by setting the process priority to low / `IDLE_PRIORITY_CLASS`, applications will run smoother as the kernel driver is given more CPU time. Users should only set this to `true` if the VM has only a single VCPU. Only available for Windows clients. |
| GL_VERSION_OVERRIDE | Digit.Digit | `host` | Override the OpenGL version on the client side. Available for both Windows and Linux clients. |
| GLX_VERSION_OVERRIDE | Digit.Digit | 1.4 | Override the GLX version on the client side. Only available for Linux clients. |
| GLSL_VERSION_OVERRIDE | Digit.Digit | | Override the GLSL version on the client side. Available for both Windows and Linux clients. |
| SGL_NET_OVER_SHARED | Ip:Port | | If networking is enabled, this environment variable must exist on the guest. Available for both Windows and Linux clients. |
| SGL_RUN_WITH_LOW_PRIORITY | Boolean | false | On single core setups, by setting the process priority to low / `IDLE_PRIORITY_CLASS`, applications will run smoother as the kernel driver is given more CPU time. Only set to `true` if the VM has only a single VCPU. Only available for Windows clients. |
| SGL_WINED3D_DONT_VFLIP | Boolean | false | If running a DirectX application via WineD3D, ensure this variable is set to `true` in order for the application to render the framebuffer in the proper orientation. Only available for Windows clients. |
## Windows (in a VM)
Expand Down Expand Up @@ -121,6 +122,8 @@ There are two possible drivers one may use:
```
3. By default, this builds for Windows 10 x64 (`10_X64`). If you wish to compile for a different version or multiple versions, you must provide it through the command line like so: `kcertify.bat 10_X64,10_NI_X64`. A list of OS versions is provided on MSDN [here](https://learn.microsoft.com/en-us/windows-hardware/drivers/devtest/inf2cat).
If using multiclient support, please read about `SGL_SHARED_MEMORY_DIRECT` in the [environment variables](#environment-variables) section.
### Library / ICD
There are two ways to install the library on windows:
Expand Down
9 changes: 4 additions & 5 deletions inc/client/pb.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#endif

#include <stdint.h>
#include <inttypes.h>
#include <stdbool.h>

struct pb_net_hooks {
int(*_pb_read)(int s);
Expand All @@ -23,9 +23,9 @@ struct pb_net_hooks {
void pb_set_net(struct pb_net_hooks hooks, size_t internal_alloc_size);

#ifndef _WIN32
void pb_set(int pb);
void pb_set(int pb, bool direct_access);
#else
void pb_set(void);
void pb_set(bool direct_access);
void pb_unset(void);
#endif

Expand All @@ -36,9 +36,8 @@ void pb_pushf(float c);
int pb_read(int s);
int64_t pb_read64(int s);
void pb_write(int s, int c);
void pb_copy(void *data, int s, size_t length);
void pb_memcpy(const void *src, size_t length);

void pb_memcpy(const void *src, size_t length);
void pb_memcpy_unaligned(const void *src, size_t length);
void pb_realign();

Expand Down
8 changes: 8 additions & 0 deletions inc/client/scratch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef _SCRATCH_H_
#define _SCRATCH_H_

#include <stddef.h>

void *scratch_buffer_get(size_t size);

#endif
33 changes: 12 additions & 21 deletions src/client/glimpl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <client/memory.h>
#include <client/spinlock.h>
#include <client/pb.h>
#include <client/scratch.h>

#include <client/platform/icd.h>

Expand Down Expand Up @@ -546,6 +547,11 @@ void *glimpl_fb_address()
void glimpl_init()
{
char *network = getenv("SGL_NET_OVER_SHARED");
char *direct_access = getenv("SGL_SHARED_MEMORY_DIRECT");

bool use_direct_access = direct_access == NULL;
if (direct_access != NULL)
use_direct_access = strcmp(direct_access, "true") == 0;

if (network == NULL) {
#ifndef _WIN32
Expand All @@ -557,9 +563,9 @@ void glimpl_init()
exit(1);
}

pb_set(fd);
pb_set(fd, use_direct_access);
#else
pb_set();
pb_set(use_direct_access);
#endif
pb_reset();
}
Expand Down Expand Up @@ -638,6 +644,8 @@ void glimpl_init()
int packed_dims = pb_read(SGL_OFFSET_REGISTER_RETVAL);
icd_set_max_dimensions(UNPACK_A(packed_dims), UNPACK_B(packed_dims));
}

glimpl_map_buffer.mem = scratch_buffer_get(0x1000);
}

static struct gl_vertex_attrib_pointer *glimpl_get_enabled_vap()
Expand Down Expand Up @@ -905,8 +913,6 @@ static void glimpl_texture_subimage(int cmd, int n_dims, GLuint texture, GLint l
const int offsets[3] = { xoffset, yoffset, zoffset };
const int dims[3] = { width, height, depth };

glimpl_submit();

glimpl_upload_texture(width, n_dims > 1 ? height : 1, n_dims > 2 ? depth : 1, format, pixels);

pb_push(cmd);
Expand All @@ -918,8 +924,6 @@ static void glimpl_texture_subimage(int cmd, int n_dims, GLuint texture, GLint l
pb_push(dims[i]);
pb_push(format);
pb_push(type);

glimpl_submit();
}

static void glimpl_compressed_texture_subimage(int cmd, int n_dims, GLuint texture, GLint level, GLint xoffset, GLint yoffset,
Expand All @@ -928,8 +932,6 @@ static void glimpl_compressed_texture_subimage(int cmd, int n_dims, GLuint textu
const int offsets[3] = { xoffset, yoffset, zoffset };
const int dims[3] = { width, height, depth };

glimpl_submit();

glimpl_upload_buffer((void*)data, imageSize);

pb_push(cmd);
Expand All @@ -941,14 +943,10 @@ static void glimpl_compressed_texture_subimage(int cmd, int n_dims, GLuint textu
pb_push(dims[i]);
pb_push(format);
pb_push(imageSize);

glimpl_submit();
}

static void glimpl_buffer_store_data(int cmd, GLuint buffer, GLsizeiptr size, const void *data, GLenum usage)
{
glimpl_submit();

if (data != NULL)
glimpl_upload_buffer(data, size);

Expand All @@ -957,22 +955,16 @@ static void glimpl_buffer_store_data(int cmd, GLuint buffer, GLsizeiptr size, co
pb_push(size);
pb_push(data != NULL);
pb_push(usage);

glimpl_submit();
}

static void glimpl_buffer_subdata(int cmd, GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data)
{
glimpl_submit();

glimpl_upload_buffer(data, size);

pb_push(cmd);
pb_push(buffer);
pb_push(offset);
pb_push(size);

glimpl_submit();
}

static void glimpl_buffer_clear_data(int cmd, bool is_subdata, GLenum buffer, GLenum internalformat, GLintptr offset,
Expand All @@ -995,13 +987,13 @@ static void glimpl_buffer_clear_data(int cmd, bool is_subdata, GLenum buffer, GL
static void *glimpl_map_buffer_range(int cmd, GLenum buffer, GLintptr offset, GLsizeiptr length, GLbitfield access)
{
if (glimpl_map_buffer.in_use) {
fprintf(stderr, "glMapBufferRange: map buffer already in use, returning NULL\n");
fprintf(stderr, "glimpl_map_buffer_range: map buffer already in use, returning NULL\n");
return NULL;
}

glimpl_map_buffer = (struct gl_map_buffer){
/* target = */ buffer,
/* mem = */ calloc(length, 1),
/* mem = */ scratch_buffer_get(length), // calloc(length, 1), // glimpl_map_buffer.mem,
/* offset = */ offset,
/* length = */ length,
/* access = */ access,
Expand Down Expand Up @@ -3775,7 +3767,6 @@ GLboolean glUnmapBuffer(GLenum target)
pb_memcpy(glimpl_map_buffer.mem, glimpl_map_buffer.length + ((glimpl_map_buffer.length % 4 != 0) * sizeof(int)));

if (glimpl_map_buffer.in_use) {
free(glimpl_map_buffer.mem);
glimpl_map_buffer.in_use = false;
}

Expand Down
41 changes: 24 additions & 17 deletions src/client/pb.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#include <sharedgl.h>
#include <client/pb.h>

#include <stdio.h>
#include <string.h>

#include <inttypes.h>

#ifndef _WIN32
#define __USE_GNU
#define _GNU_SOURCE
Expand Down Expand Up @@ -52,10 +51,12 @@ static int *cur;
static void *in_base;
static int *in_cur;

static bool using_direct_access = false;

static struct pb_net_hooks net_hooks = { NULL };

#ifndef _WIN32
void pb_set(int fd)
void pb_set(int fd, bool direct_access)
{
ptr = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

Expand All @@ -65,11 +66,18 @@ void pb_set(int fd)

base = ptr + 0x1000;

in_base = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
in_cur = in_base;
if (direct_access) {
using_direct_access = true;
in_base = base;
in_cur = base;
}
else {
in_base = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
in_cur = in_base;
}
}
#else
void pb_set(void)
void pb_set(bool direct_access)
{
HDEVINFO device_info;
PSP_DEVICE_INTERFACE_DETAIL_DATA inf_data;
Expand Down Expand Up @@ -115,8 +123,14 @@ void pb_set(void)
ptr = map.pointer;
base = (PVOID)((DWORD64)map.pointer + (DWORD64)0x1000);

in_base = VirtualAlloc(NULL, map.size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
in_cur = in_base;
if (direct_access) {
in_base = base;
in_cur = in_base;
}
else {
in_base = VirtualAlloc(NULL, map.size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
in_cur = in_base;
}
}

void pb_unset(void)
Expand All @@ -143,19 +157,16 @@ void pb_reset()
{
cur = base;
in_cur = in_base;
// pb_ctx.current_offset = pb_ctx.reset_offset;
}

void pb_push(int c)
{
*in_cur++ = c;
// pb_ctx.current_offset += sizeof(c);
}

void pb_pushf(float c)
{
*in_cur++ = *(int*)&c;
// pb_ctx.current_offset += sizeof(c);
}

int pb_read(int s)
Expand All @@ -177,11 +188,6 @@ void pb_write(int s, int c)
*(int*)((size_t)ptr + s) = c;
}

void pb_copy(void *data, int s, size_t length)
{
memcpy(data, (void*)((size_t)ptr + s), length);
}

/*
* // equivalent to
* int *pdata = (int*)data;
Expand Down Expand Up @@ -231,5 +237,6 @@ size_t pb_size()

void pb_copy_to_shared()
{
memcpy(base, in_base, (size_t)in_cur - (size_t)in_base);
if (!using_direct_access)
memcpy(base, in_base, (size_t)in_cur - (size_t)in_base);
}
74 changes: 74 additions & 0 deletions src/client/scratch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include <client/scratch.h>
#include <stdint.h>

#ifdef _WIN32
#include <windows.h>
#else
#define __USE_GNU
#define _GNU_SOURCE
#include <unistd.h>
#include <linux/mman.h>
#include <sys/mman.h>
#endif

static void *address = NULL;
static size_t current_size = 0;

static inline uintptr_t align_to_4kb(uintptr_t ptr)
{
return (ptr + 4095) & ~4095;
}

#ifdef _WIN32
static void *windows_mremap(void *old_address, size_t old_size, size_t new_size)
{
/*
* check to see if we can extend the current allocation
*/
MEMORY_BASIC_INFORMATION mbi;
if (VirtualQuery((char*)old_address + old_size, &mbi, sizeof(mbi)) == sizeof(mbi))
if (mbi.State == MEM_FREE && mbi.RegionSize >= (new_size - old_size))
if (VirtualAlloc((char*)old_address + old_size, new_size - old_size, MEM_COMMIT, PAGE_READWRITE))
return old_address;

/*
* otherwise, allocate a new region, copy, free
* this can be expensive, but will rarely be performed
*/
void *new_address = VirtualAlloc(NULL, new_size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (new_address) {
memcpy(new_address, old_address, old_size);
VirtualFree(old_address, 0, MEM_RELEASE);
return new_address;
}

return NULL;
}
#endif

void *scratch_buffer_get(size_t size)
{
if (!current_size) {
current_size = align_to_4kb(size);
#ifdef _WIN32
address = VirtualAlloc(NULL, current_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
#else
address = mmap(NULL, current_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
return address;
}
else {
size_t aligned_size = align_to_4kb(size);
if (current_size < aligned_size) {
#ifdef _WIN32
address = windows_mremap(address, current_size, aligned_size);
#else
address = mremap(address, current_size, aligned_size, MREMAP_MAYMOVE);
#endif
current_size = aligned_size;
return address;
}

return address;
}
}

0 comments on commit 88b9d98

Please sign in to comment.