From 9cb88de0148036a0d02337487df5a1e55aca60c0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 14:59:19 -0700 Subject: [PATCH 001/176] ubuntu-win64-cross: Use LLVM toolchain --- ubuntu-win64-cross/Dockerfile | 97 +++++++++++++++++++++-------------- ubuntu-win64-cross/glib.mk | 51 ++++++++++++++++++ ubuntu-win64-cross/sdl2.mk | 5 -- 3 files changed, 109 insertions(+), 44 deletions(-) create mode 100644 ubuntu-win64-cross/glib.mk diff --git a/ubuntu-win64-cross/Dockerfile b/ubuntu-win64-cross/Dockerfile index 5f71cde7b45..c7740efb143 100644 --- a/ubuntu-win64-cross/Dockerfile +++ b/ubuntu-win64-cross/Dockerfile @@ -4,13 +4,21 @@ FROM ubuntu:24.04 -ENV MXE_VERSION=8fa8c126f64a0ca6d263c641632645db4f941f00 +ENV MXE_PATH=/opt/mxe +ENV MXE_REPO=https://github.com/mxe/mxe.git +ENV MXE_VERSION=ab676e8ce5ba921daaa80a123ff2e415aac4524a + +ENV MXE_LLVM_MINGW_REPO=https://github.com/libvips/build-win64-mxe +ENV MXE_LLVM_MINGW_VERSION=21e02f87c282fcfe17c8376217b0a4f44f14d01b +ENV MXE_LLVM_MINGW_PATH=/opt/build-win64-mxe + +ARG PLUGIN_DIRS="${MXE_LLVM_MINGW_PATH} ${MXE_LLVM_MINGW_PATH}/build/plugins/llvm-mingw" +ARG TARGETS="x86_64-w64-mingw32.static" +ARG JOBS= RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive \ apt-get -qy install \ - software-properties-common \ - lsb-release \ autoconf \ automake \ autopoint \ @@ -26,67 +34,78 @@ RUN apt-get update \ intltool \ libc6-dev-i386 \ libgdk-pixbuf2.0-dev \ - libltdl-dev \ libgl-dev \ + libltdl-dev \ libssl-dev \ libtool-bin \ libxml-parser-perl \ + lsb-release \ lzip \ make \ + ninja-build \ openssl \ p7zip-full \ patch \ perl \ + python-is-python3 \ python3 \ python3-mako \ + python3-pip \ python3-pkg-resources \ - python-is-python3 \ + python3-yaml \ ruby \ sed \ + software-properties-common \ unzip \ wget \ - xz-utils \ - ninja-build \ - python3-pip \ - python3-yaml + xz-utils -RUN cd /opt \ - && git clone https://github.com/mxe/mxe.git \ - && cd mxe \ +RUN git clone ${MXE_LLVM_MINGW_REPO} ${MXE_LLVM_MINGW_PATH} \ + && cd ${MXE_LLVM_MINGW_PATH} \ + && git checkout ${MXE_LLVM_MINGW_VERSION} \ + \ + && git clone ${MXE_REPO} ${MXE_PATH} \ + && cd ${MXE_PATH} \ && git checkout ${MXE_VERSION} \ - && make MXE_TARGETS=x86_64-w64-mingw32.static \ - MXE_PLUGIN_DIRS=plugins/gcc13 \ - cc \ - glib \ - libepoxy \ - pixman \ - libsamplerate \ - openssl \ - cmake \ - libslirp + && git apply ${MXE_LLVM_MINGW_PATH}/build/patches/mxe-fixes.patch -RUN rm /opt/mxe/src/sdl2*.patch -COPY sdl2.mk /opt/mxe/src/sdl2.mk -RUN V=1 MXE_VERBOSE=1 make -C /opt/mxe \ - MXE_TARGETS=x86_64-w64-mingw32.static \ - MXE_PLUGIN_DIRS=plugins/gcc13 \ - sdl2 +RUN make \ + MXE_TARGETS="${TARGETS}" \ + MXE_PLUGIN_DIRS="${PLUGIN_DIRS}" \ + JOBS=${JOBS} \ + -C ${MXE_PATH} \ + cc +RUN rm ${MXE_PATH}/src/sdl2*.patch COPY vulkan-headers.mk \ spirv-headers.mk \ spirv-tools.mk \ glslang.mk \ - /opt/mxe/src/ -RUN V=1 MXE_VERBOSE=1 make -C /opt/mxe \ - MXE_TARGETS=x86_64-w64-mingw32.static \ - MXE_PLUGIN_DIRS=plugins/gcc13 \ - vulkan-headers \ - spirv-headers \ - spirv-tools \ - glslang + glib.mk \ + sdl2.mk \ + ${MXE_PATH}/src/ + +RUN make \ + MXE_TARGETS="${TARGETS}" \ + MXE_PLUGIN_DIRS="${PLUGIN_DIRS}" \ + JOBS=${JOBS} \ + CFLAGS=-O2 \ + -C ${MXE_PATH} \ + glib \ + libepoxy \ + pixman \ + libsamplerate \ + openssl \ + cmake \ + libslirp \ + sdl2 \ + vulkan-headers \ + spirv-headers \ + spirv-tools \ + glslang -RUN find /opt/mxe/usr -executable -type f -exec chmod a+x {} \; +RUN find ${MXE_PATH}/usr -executable -type f -exec chmod a+x {} \; ENV CROSSPREFIX=x86_64-w64-mingw32.static- -ENV CROSSAR=${CROSSPREFIX}gcc-ar -ENV PATH="/opt/mxe/.ccache/bin:/opt/mxe/usr/x86_64-pc-linux-gnu/bin:/opt/mxe/usr/bin:${PATH}" +ENV CROSSAR=${CROSSPREFIX}ar +ENV PATH="${MXE_PATH}/.ccache/bin:${MXE_PATH}/usr/x86_64-pc-linux-gnu/bin:${MXE_PATH}/usr/bin:${PATH}" diff --git a/ubuntu-win64-cross/glib.mk b/ubuntu-win64-cross/glib.mk new file mode 100644 index 00000000000..54483643aa0 --- /dev/null +++ b/ubuntu-win64-cross/glib.mk @@ -0,0 +1,51 @@ +# This file is part of MXE. See LICENSE.md for licensing information. + +PKG := glib +$(PKG)_WEBSITE := https://gtk.org/ +$(PKG)_DESCR := GLib +$(PKG)_IGNORE := +$(PKG)_VERSION := 2.83.2 +$(PKG)_CHECKSUM := 8428d672c8485636d940f03ce8dcdc174f9b3892ac8b2eea76dd281af6a6e937 +$(PKG)_SUBDIR := glib-$($(PKG)_VERSION) +$(PKG)_FILE := glib-$($(PKG)_VERSION).tar.xz +$(PKG)_URL := https://download.gnome.org/sources/glib/$(call SHORT_PKG_VERSION,$(PKG))/$($(PKG)_FILE) +$(PKG)_DEPS := cc meson-wrapper dbus gettext libffi libiconv pcre2 zlib $(BUILD)~$(PKG) +$(PKG)_TARGETS := $(BUILD) $(MXE_TARGETS) +$(PKG)_DEPS_$(BUILD) := cc meson-wrapper gettext libffi libiconv zlib + +define $(PKG)_UPDATE + $(WGET) -q -O- 'https://gitlab.gnome.org/GNOME/glib/tags' | \ + $(SED) -n "s,.*]\+>v\?\([0-9]\+\.[0-9.]\+\)<.*,\1,p" | \ + $(SORT) -Vr | \ + head -1 +endef + +define $(PKG)_BUILD_$(BUILD) + # native build + $(if $(findstring darwin, $(BUILD)), \ + CPPFLAGS='-I$(PREFIX)/$(TARGET).gnu/include' \ + LDFLAGS='-L$(PREFIX)/$(TARGET).gnu/lib' \, + CPPFLAGS='-I$(PREFIX)/$(TARGET)/include' \ + LDFLAGS='-L$(PREFIX)/$(TARGET)/lib' \) + '$(MXE_MESON_NATIVE_WRAPPER)' \ + --buildtype=release \ + -Dtests=false \ + '$(BUILD_DIR)' '$(SOURCE_DIR)' + '$(MXE_NINJA)' -C '$(BUILD_DIR)' -j '$(JOBS)' + '$(MXE_NINJA)' -C '$(BUILD_DIR)' -j '$(JOBS)' install +endef + +define $(PKG)_BUILD + # other packages expect glib-tools in $(TARGET)/bin + rm -f '$(PREFIX)/$(TARGET)/bin/glib-*' + ln -sf '$(PREFIX)/$(BUILD)/bin/glib-genmarshal' '$(PREFIX)/$(TARGET)/bin/' + ln -sf '$(PREFIX)/$(BUILD)/bin/glib-compile-schemas' '$(PREFIX)/$(TARGET)/bin/' + ln -sf '$(PREFIX)/$(BUILD)/bin/glib-compile-resources' '$(PREFIX)/$(TARGET)/bin/' + + '$(MXE_MESON_WRAPPER)' \ + $(MXE_MESON_OPTS) \ + -Dtests=false \ + '$(BUILD_DIR)' '$(SOURCE_DIR)' + '$(MXE_NINJA)' -C '$(BUILD_DIR)' -j '$(JOBS)' + '$(MXE_NINJA)' -C '$(BUILD_DIR)' -j '$(JOBS)' install +endef diff --git a/ubuntu-win64-cross/sdl2.mk b/ubuntu-win64-cross/sdl2.mk index e86637308e7..9e049495574 100644 --- a/ubuntu-win64-cross/sdl2.mk +++ b/ubuntu-win64-cross/sdl2.mk @@ -19,9 +19,4 @@ define $(PKG)_BUILD -DVERBOSE=1 $(MAKE) -C '$(BUILD_DIR)' -j '$(JOBS)' $(MAKE) -C '$(BUILD_DIR)' -j 1 install - - '$(TARGET)-gcc' \ - -W -Wall -Werror -ansi -pedantic \ - '$(TEST_FILE)' -o '$(PREFIX)/$(TARGET)/bin/test-sdl2.exe' \ - `'$(TARGET)-pkg-config' sdl2 --cflags --libs` endef From 76905733f17bfadfde4f8264aeb87ba41bb43dd5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 16:32:28 -0700 Subject: [PATCH 002/176] ubuntu-win64-cross: Add updated libsamplerate --- ubuntu-win64-cross/Dockerfile | 1 + ubuntu-win64-cross/libsamplerate.mk | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 ubuntu-win64-cross/libsamplerate.mk diff --git a/ubuntu-win64-cross/Dockerfile b/ubuntu-win64-cross/Dockerfile index c7740efb143..84b4e2ad7a1 100644 --- a/ubuntu-win64-cross/Dockerfile +++ b/ubuntu-win64-cross/Dockerfile @@ -83,6 +83,7 @@ COPY vulkan-headers.mk \ glslang.mk \ glib.mk \ sdl2.mk \ + libsamplerate.mk \ ${MXE_PATH}/src/ RUN make \ diff --git a/ubuntu-win64-cross/libsamplerate.mk b/ubuntu-win64-cross/libsamplerate.mk new file mode 100644 index 00000000000..7a808e419ee --- /dev/null +++ b/ubuntu-win64-cross/libsamplerate.mk @@ -0,0 +1,20 @@ +# This file is part of MXE. See LICENSE.md for licensing information. + +PKG := libsamplerate +$(PKG)_WEBSITE := http://libsndfile.github.io/libsamplerate/ +$(PKG)_DESCR := libsamplerate +$(PKG)_IGNORE := +$(PKG)_VERSION := 0.2.2 +$(PKG)_SUBDIR := libsamplerate-$($(PKG)_VERSION) +$(PKG)_FILE := libsamplerate-$($(PKG)_VERSION).tar.gz +$(PKG)_CHECKSUM := 16e881487f184250deb4fcb60432d7556ab12cb58caea71ef23960aec6c0405a +$(PKG)_URL := https://github.com/libsndfile/libsamplerate/archive/refs/tags/$($(PKG)_VERSION)/$($(PKG)_VERSION).tar.gz +$(PKG)_DEPS := cc + +define $(PKG)_BUILD + cd '$(BUILD_DIR)' && $(TARGET)-cmake '$(SOURCE_DIR)' \ + -DLIBSAMPLERATE_EXAMPLES=OFF \ + -DVERBOSE=1 + $(MAKE) -C '$(BUILD_DIR)' -j '$(JOBS)' + $(MAKE) -C '$(BUILD_DIR)' -j 1 install +endef From 52012f6f98f05ab4dc8955eb7f88a6040710d8d1 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 18:41:42 -0700 Subject: [PATCH 003/176] ubuntu-win64-cross: Add libressl --- ubuntu-win64-cross/Dockerfile | 3 ++- ubuntu-win64-cross/libressl.mk | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 ubuntu-win64-cross/libressl.mk diff --git a/ubuntu-win64-cross/Dockerfile b/ubuntu-win64-cross/Dockerfile index 84b4e2ad7a1..41d4ca58812 100644 --- a/ubuntu-win64-cross/Dockerfile +++ b/ubuntu-win64-cross/Dockerfile @@ -84,6 +84,7 @@ COPY vulkan-headers.mk \ glib.mk \ sdl2.mk \ libsamplerate.mk \ + libressl.mk \ ${MXE_PATH}/src/ RUN make \ @@ -96,7 +97,7 @@ RUN make \ libepoxy \ pixman \ libsamplerate \ - openssl \ + libressl \ cmake \ libslirp \ sdl2 \ diff --git a/ubuntu-win64-cross/libressl.mk b/ubuntu-win64-cross/libressl.mk new file mode 100644 index 00000000000..93d409b8367 --- /dev/null +++ b/ubuntu-win64-cross/libressl.mk @@ -0,0 +1,19 @@ +# This file is part of MXE. See LICENSE.md for licensing information. + +PKG := libressl +$(PKG)_WEBSITE := https://www.libressl.org/ +$(PKG)_DESCR := libressl +$(PKG)_IGNORE := +$(PKG)_VERSION := 4.0.0 +$(PKG)_SUBDIR := libressl-$($(PKG)_VERSION) +$(PKG)_FILE := libressl-$($(PKG)_VERSION).tar.gz +$(PKG)_CHECKSUM := 4d841955f0acc3dfc71d0e3dd35f283af461222350e26843fea9731c0246a1e4 +$(PKG)_URL := https://github.com/libressl/portable/releases/download/v$($(PKG)_VERSION)/$($(PKG)_FILE) +$(PKG)_DEPS := cc + +define $(PKG)_BUILD + cd '$(BUILD_DIR)' && $(TARGET)-cmake '$(SOURCE_DIR)' \ + -DVERBOSE=1 + $(MAKE) -C '$(BUILD_DIR)' -j '$(JOBS)' + $(MAKE) -C '$(BUILD_DIR)' -j 1 install +endef From debf7a2e03d51090d0a12f8aa798cde8889993e2 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 18:42:00 -0700 Subject: [PATCH 004/176] ubuntu-win64-cross: Enable aarch64 target --- ubuntu-win64-cross/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ubuntu-win64-cross/Dockerfile b/ubuntu-win64-cross/Dockerfile index 41d4ca58812..0d58f4b1c54 100644 --- a/ubuntu-win64-cross/Dockerfile +++ b/ubuntu-win64-cross/Dockerfile @@ -13,7 +13,7 @@ ENV MXE_LLVM_MINGW_VERSION=21e02f87c282fcfe17c8376217b0a4f44f14d01b ENV MXE_LLVM_MINGW_PATH=/opt/build-win64-mxe ARG PLUGIN_DIRS="${MXE_LLVM_MINGW_PATH} ${MXE_LLVM_MINGW_PATH}/build/plugins/llvm-mingw" -ARG TARGETS="x86_64-w64-mingw32.static" +ARG TARGETS="x86_64-w64-mingw32.static aarch64-w64-mingw32.static" ARG JOBS= RUN apt-get update \ From a1342ee174ab685c7c92ff7db267e5587a6418e2 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 18:43:04 -0700 Subject: [PATCH 005/176] ubuntu-win64-cross: Set default number of jobs = 6 --- ubuntu-win64-cross/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ubuntu-win64-cross/Dockerfile b/ubuntu-win64-cross/Dockerfile index 0d58f4b1c54..fc438cf38f0 100644 --- a/ubuntu-win64-cross/Dockerfile +++ b/ubuntu-win64-cross/Dockerfile @@ -14,7 +14,7 @@ ENV MXE_LLVM_MINGW_PATH=/opt/build-win64-mxe ARG PLUGIN_DIRS="${MXE_LLVM_MINGW_PATH} ${MXE_LLVM_MINGW_PATH}/build/plugins/llvm-mingw" ARG TARGETS="x86_64-w64-mingw32.static aarch64-w64-mingw32.static" -ARG JOBS= +ARG JOBS=6 RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive \ From 530a7b65151e4d7e4010545987ee1e69abff53f0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 17:28:30 -0700 Subject: [PATCH 006/176] util/oslib-win32: Fix check of filename, wfilename --- block/file-win32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/file-win32.c b/block/file-win32.c index b69de110961..5f3249693a3 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -379,6 +379,10 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } filename = qemu_opt_get(opts, "filename"); + if (filename == NULL) { + ret = -EINVAL; + goto fail; + } use_aio = get_aio_option(opts, flags, &local_err); if (local_err) { @@ -401,7 +405,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } wfilename = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); - if (!filename) { + if (!wfilename) { + ret = -EINVAL; goto fail; } @@ -420,6 +425,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, OPEN_EXISTING, overlapped, NULL); #endif g_free(wfilename); + if (s->hfile == INVALID_HANDLE_VALUE) { int err = GetLastError(); From 212988421f4466b470aeb379d9a6173a5dc90140 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 17:29:02 -0700 Subject: [PATCH 007/176] mcpx: Remove set but not used count variable --- hw/xbox/mcpx/dsp/dsp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/xbox/mcpx/dsp/dsp.c b/hw/xbox/mcpx/dsp/dsp.c index 2a08cbf3206..d1e3b2a9b36 100644 --- a/hw/xbox/mcpx/dsp/dsp.c +++ b/hw/xbox/mcpx/dsp/dsp.c @@ -166,7 +166,6 @@ void dsp_run(DSPState* dsp, int cycles) if (dsp->save_cycles <= 0) return; - int count = 0; int dma_timer = 0; while (dsp->save_cycles > 0) @@ -174,7 +173,6 @@ void dsp_run(DSPState* dsp, int cycles) dsp56k_execute_instruction(&dsp->core); dsp->save_cycles -= dsp->core.instr_cycle; dsp->core.cycle_count++; - count++; if (dsp->dma.control & DMA_CONTROL_RUNNING) { dma_timer++; From 816c875ac4332e206037ce8ddbc390f5bf4c3a31 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 17:29:37 -0700 Subject: [PATCH 008/176] net/slirp: Move prototype def into slirp.h --- include/net/slirp.h | 4 ++++ ui/xemu-net.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/slirp.h b/include/net/slirp.h index bad3e1e2416..eaf036c86b6 100644 --- a/include/net/slirp.h +++ b/include/net/slirp.h @@ -32,6 +32,10 @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict); void hmp_info_usernet(Monitor *mon, const QDict *qdict); +#ifdef XBOX +void *slirp_get_state_from_netdev(const char *id); +#endif + #endif #endif /* QEMU_NET_SLIRP_H */ diff --git a/ui/xemu-net.c b/ui/xemu-net.c index d7dbd5de116..606109efccf 100644 --- a/ui/xemu-net.c +++ b/ui/xemu-net.c @@ -42,8 +42,6 @@ static const char *id = "xemu-netdev"; static const char *id_hubport = "xemu-netdev-hubport"; -void *slirp_get_state_from_netdev(const char *id); - void xemu_net_enable(void) { Error *local_err = NULL; From e0f4f42fda1f5bf4844189f5687bdebde05053d3 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 17:30:07 -0700 Subject: [PATCH 009/176] target/i386: Macro out some unused functions --- target/i386/cpu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2330a8d9021..9664e1bd969 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1497,12 +1497,16 @@ static inline bool accel_uses_host_cpuid(void) return kvm_enabled() || hvf_enabled(); } +#ifndef XBOX + static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) { return ((uint64_t)cpu->env.features[FEAT_XSAVE_XCR0_HI]) << 32 | cpu->env.features[FEAT_XSAVE_XCR0_LO]; } +#endif + /* Return name of 32-bit register, from a R_* constant */ static const char *get_register_name_32(unsigned int reg) { @@ -1512,12 +1516,16 @@ static const char *get_register_name_32(unsigned int reg) return x86_reg_info_32[reg].name; } +#ifndef XBOX + static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu) { return ((uint64_t)cpu->env.features[FEAT_XSAVE_XSS_HI]) << 32 | cpu->env.features[FEAT_XSAVE_XSS_LO]; } +#endif + /* * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. From feee2f142d62d8d231ae211aa48ddc97172c906d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 17:30:42 -0700 Subject: [PATCH 010/176] ui: Fix variable-as-format-string cases --- ui/xui/update.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/xui/update.cc b/ui/xui/update.cc index 3c7580596b1..8f075215bd6 100644 --- a/ui/xui/update.cc +++ b/ui/xui/update.cc @@ -80,9 +80,9 @@ void AutoUpdateWindow::Draw() }; if (updater.get_status() == UPDATER_IDLE) { - ImGui::Text(available_msg[updater.get_update_availability()]); + ImGui::Text("%s", available_msg[updater.get_update_availability()]); } else { - ImGui::Text(status_msg[updater.get_status()]); + ImGui::Text("%s", status_msg[updater.get_status()]); } if (updater.is_updating()) { From 67053d1db3a96f704f96e77d200205459829bc0a Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 17:30:57 -0700 Subject: [PATCH 011/176] ui: Macro out some variables only used for debugging --- ui/xemu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ui/xemu.c b/ui/xemu.c index b06e19eb1c7..d0cec857f4e 100644 --- a/ui/xemu.c +++ b/ui/xemu.c @@ -1225,8 +1225,10 @@ void sdl2_gl_refresh(DisplayChangeListener *dcl) static int64_t last_update = 0; int64_t deadline = last_update + 16666666; +#ifdef DEBUG_XEMU_C int64_t sleep_acc = 0; int64_t spin_acc = 0; +#endif #ifndef _WIN32 const int64_t sleep_threshold = 2000000; @@ -1241,12 +1243,16 @@ void sdl2_gl_refresh(DisplayChangeListener *dcl) if (time_remaining > sleep_threshold) { // Try to sleep until the until reaching the sleep threshold. sleep_ns(time_remaining - sleep_threshold); +#ifdef DEBUG_XEMU_C sleep_acc += qemu_clock_get_ns(QEMU_CLOCK_REALTIME)-now; +#endif } else { // Simply spin to avoid extra delays incurred with swapping to // another process and back in the event of being within // threshold to desired event. +#ifdef DEBUG_XEMU_C spin_acc++; +#endif } } else { DPRINTF("zzZz %g %ld\n", (double)sleep_acc/1000000.0, spin_acc); From 454668e5b2e371fa51c49b1a72dcd06dfd69e660 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 14:57:09 -0700 Subject: [PATCH 012/176] qemu/compiler.h: Drop gcc_struct attribute in QEMU_PACKED This attribute was added in 0f7fdd3 to reverse the effects of -mms-bitfields, which is enabled by default when building for Windows. Let's just align with other libraries. Really all structures using QEMU_PACKED should be audited for bitfield packing dependency, but it mostly looks to be applied to structs without bitfields. --- include/qemu/compiler.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 756ec181b69..7375ea09a60 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -22,12 +22,7 @@ #define QEMU_EXTERN_C extern #endif -#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) -# define QEMU_PACKED __attribute__((gcc_struct, packed)) -#else -# define QEMU_PACKED __attribute__((packed)) -#endif - +#define QEMU_PACKED __attribute__((packed)) #define QEMU_ALIGNED(X) __attribute__((aligned(X))) #ifndef glue From cff4504c730631e35d74f2e8dd21d4a6681e3d12 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 14:57:30 -0700 Subject: [PATCH 013/176] meson.build: Don't try to link pthreads on Windows --- meson.build | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index a6fd6fa5a3b..4a856d30f1c 100644 --- a/meson.build +++ b/meson.build @@ -388,7 +388,13 @@ have_vhost_net = have_vhost_net_kernel or have_vhost_net_user or have_vhost_net_ # Target-specific libraries and flags libm = cc.find_library('m', required: false) -threads = dependency('threads') + +if targetos == 'windows' + threads = dependency('', required: false) +else + threads = dependency('threads') +endif + util = cc.find_library('util', required: false) winmm = [] socket = [] From da662b6ce4da5dc7f34d1860fd663817deacd600 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 14:57:52 -0700 Subject: [PATCH 014/176] qemu-io-cmds.c: Implement clock_gettime for WIN32 --- qemu-io-cmds.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 952dc940f1d..f66a5754602 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -23,6 +23,22 @@ #include "qemu/cutils.h" #include "qemu/memalign.h" +#ifdef _WIN32 +/* clock_gettime depends on pthreads. We only use it for measuring IO perf below + * so simply implement it here for now. */ +static int clock_gettime_monotonic(struct timespec *tp) +{ + LARGE_INTEGER freq, ticks; + if (!QueryPerformanceFrequency(&freq) || !QueryPerformanceCounter(&ticks)) { + return -1; + } + tp->tv_sec = ticks.QuadPart / freq.QuadPart; + tp->tv_nsec = (ticks.QuadPart % freq.QuadPart) * 1000000000 / freq.QuadPart; + return 0; +} +#define clock_gettime(c, ts) clock_gettime_monotonic(ts) +#endif + #define CMD_NOFILE_OK 0x01 bool qemuio_misalign; From 6ee1639c827e12ecc0d45ba9437a07f0f231af39 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 14:58:16 -0700 Subject: [PATCH 015/176] ui/xemu-net.c: Include qemu/sockets.h for inet_aton --- ui/xemu-net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/xemu-net.c b/ui/xemu-net.c index 606109efccf..79b87522350 100644 --- a/ui/xemu-net.c +++ b/ui/xemu-net.c @@ -23,6 +23,7 @@ #include "xemu-settings.h" #include "qemu/osdep.h" +#include "qemu/sockets.h" #include "hw/qdev-core.h" #include "hw/qdev-properties.h" #include "qapi/error.h" From 2c722c86617c2e2bd908f53ace8cd844859cac0f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 15:25:13 -0700 Subject: [PATCH 016/176] configure: Don't use -no-pie flag with LLVM --- configure | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 761cd5e4313..11471698b68 100755 --- a/configure +++ b/configure @@ -1333,6 +1333,13 @@ EOF # Meson currently only handles pie as a boolean for now so if we have # explicitly disabled PIE we need to extend our cflags because it wont. + +if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1; then + NO_PIE_LDFLAG="" +else + NO_PIE_LDFLAG="-no-pie" +fi + if test "$static" = "yes"; then if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; then CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS" @@ -1341,13 +1348,13 @@ if test "$static" = "yes"; then error_exit "-static-pie not available due to missing toolchain support" else pie="no" - QEMU_CFLAGS="-fno-pie -no-pie $QEMU_CFLAGS" + QEMU_CFLAGS="-fno-pie $NO_PIE_LDFLAG $QEMU_CFLAGS" fi elif test "$pie" = "no"; then - if compile_prog "-Werror -fno-pie" "-no-pie"; then + if compile_prog "-Werror -fno-pie" $NO_PIE_LDFLAG; then CONFIGURE_CFLAGS="-fno-pie $CONFIGURE_CFLAGS" - CONFIGURE_LDFLAGS="-no-pie $CONFIGURE_LDFLAGS" - QEMU_CFLAGS="-fno-pie -no-pie $QEMU_CFLAGS" + CONFIGURE_LDFLAGS="$NO_PIE_LDFLAG $CONFIGURE_LDFLAGS" + QEMU_CFLAGS="-fno-pie $NO_PIE_LDFLAG $QEMU_CFLAGS" fi elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS" From 8749cb70ca1d651f2ced23bb4087d62a0ac60a65 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 14:27:30 -0700 Subject: [PATCH 017/176] meson.build: Link with crypt32 on Windows for httplib --- meson.build | 1 + ui/thirdparty/meson.build | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 4a856d30f1c..8980f55a134 100644 --- a/meson.build +++ b/meson.build @@ -414,6 +414,7 @@ if targetos == 'windows' pathcch = cc.find_library('pathcch') socket = cc.find_library('ws2_32') winmm = cc.find_library('winmm') + crypt32 = cc.find_library('crypt32') win = import('windows') version_res = win.compile_resources('version.rc', diff --git a/ui/thirdparty/meson.build b/ui/thirdparty/meson.build index ad01b589322..90d348a34e1 100644 --- a/ui/thirdparty/meson.build +++ b/ui/thirdparty/meson.build @@ -61,7 +61,7 @@ libfpng = static_library('fpng', sources: 'fpng/fpng.cpp', cpp_args: libfpng_cpp fpng = declare_dependency(include_directories: 'fpng', link_with: libfpng) json = declare_dependency(include_directories: 'json') -httplib = declare_dependency(include_directories: 'httplib') +httplib = declare_dependency(include_directories: 'httplib', dependencies: targetos == 'windows' ? [crypt32] : []) libfatx = static_library('fatx', sources: 'fatx/fatx.c') fatx = declare_dependency(include_directories: 'fatx', link_with: libfatx) From 930b5398f7a1d23a0cfcd66015b722bda6754fcc Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 28 Dec 2024 18:50:12 -0700 Subject: [PATCH 018/176] ci: Enable Windows aarch64 builds --- .github/workflows/build.yml | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e226945185a..461c8df6b71 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,7 +54,7 @@ jobs: path: src.tar.gz Windows: - name: Build for Windows (${{ matrix.configuration }}) on Ubuntu + name: Build for Windows (${{ matrix.arch }}, ${{ matrix.configuration }}) on Ubuntu runs-on: ubuntu-latest needs: Init strategy: @@ -62,10 +62,20 @@ jobs: include: - configuration: Debug build_param: --debug - artifact_name: xemu-win-debug + artifact_name: xemu-win-x86_64-debug + arch: x86_64 - configuration: Release build_param: - artifact_name: xemu-win-release + artifact_name: xemu-win-x86_64-release + arch: x86_64 + - configuration: Debug + build_param: --debug + artifact_name: xemu-win-aarch64-debug + arch: aarch64 + - configuration: Release + build_param: + artifact_name: xemu-win-aarch64-release + arch: aarch64 env: DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-8152913 @@ -93,6 +103,8 @@ jobs: -v /tmp/xemu-ccache:/tmp/xemu-ccache \ -e CCACHE_DIR=/tmp/xemu-ccache \ -e CCACHE_MAXSIZE=512M \ + -e CROSSPREFIX=${{ matrix.arch }}-w64-mingw32.static- \ + -e CROSSAR=${{ matrix.arch }}-w64-mingw32.static-ar \ -u $(id -u):$(id -g) \ $DOCKER_IMAGE_NAME \ bash -c "ccache -z; ./build.sh -p win64-cross ${{ matrix.build_param }} && ccache -s" @@ -106,16 +118,20 @@ jobs: # DWARF and update + strip the executable. Re-package the original release # and create symbols package. WindowsPdb: - name: Generate PDB for Windows (${{ matrix.configuration }}) + name: Generate PDB for Windows (${{ matrix.arch }}, ${{ matrix.configuration }}) runs-on: windows-latest needs: Windows strategy: matrix: include: - configuration: Debug - artifact_name: xemu-win-debug + artifact_name: xemu-win-x86_64-debug + - configuration: Release + artifact_name: xemu-win-x86_64-release + - configuration: Debug + artifact_name: xemu-win-aarch64-debug - configuration: Release - artifact_name: xemu-win-release + artifact_name: xemu-win-aarch64-release steps: - name: Download artifacts uses: actions/download-artifact@v4 @@ -350,7 +366,7 @@ jobs: Release: if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/v')) runs-on: ubuntu-latest - needs: [Ubuntu, macOSUniversal, WindowsPdb] + needs: [Ubuntu, macOSUniversal, Windows, WindowsPdb] steps: - name: Download artifacts uses: actions/download-artifact@v4 @@ -402,7 +418,7 @@ jobs: PushToPPA: name: Push to PPA Snapshot Branch if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/v')) - needs: [Ubuntu, macOSUniversal, WindowsPdb] + needs: [Ubuntu, macOSUniversal, Windows, WindowsPdb] runs-on: ubuntu-latest steps: - name: Download source package From ec0c337cfa0d792049bf2ed367b43d91981f5128 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 19:44:21 -0700 Subject: [PATCH 019/176] ci: Bump Windows build container --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 461c8df6b71..9cf1bf83e74 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -77,7 +77,7 @@ jobs: artifact_name: xemu-win-aarch64-release arch: aarch64 env: - DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-8152913 + DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-a1342ee steps: - name: Download source package From 794cc3311bed32c8995ef53311127f3a0729ad51 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 16:35:09 -0700 Subject: [PATCH 020/176] ci: Add Windows arch tag to Release job --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9cf1bf83e74..79f4c19bbf8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -395,10 +395,10 @@ jobs: draft: false files: | dist/src.tar.gz/src.tar.gz - dist/xemu-win-debug-pdb/xemu-win-debug.zip - dist/xemu-win-debug-pdb/xemu-win-debug-pdb.zip - dist/xemu-win-release-pdb/xemu-win-release.zip - dist/xemu-win-release-pdb/xemu-win-release-pdb.zip + dist/xemu-win-x86_64-debug-pdb/xemu-win-x86_64-debug.zip + dist/xemu-win-x86_64-debug-pdb/xemu-win-x86_64-debug-pdb.zip + dist/xemu-win-x86_64-release-pdb/xemu-win-x86_64-release.zip + dist/xemu-win-x86_64-release-pdb/xemu-win-x86_64-release-pdb.zip dist/xemu-macos-universal-release/xemu-macos-universal-release.zip dist/xemu-macos-universal-debug/xemu-macos-universal-debug.zip dist/xemu-ubuntu-release/xemu/xemu-v${{ env.XEMU_VERSION }}-x86_64.AppImage From db5ee2b66863bfaec2f8f2c4217f759dfb648020 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 15:49:37 -0700 Subject: [PATCH 021/176] ubuntu-win64-cross: Update sdl2 --- ubuntu-win64-cross/sdl2.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ubuntu-win64-cross/sdl2.mk b/ubuntu-win64-cross/sdl2.mk index 9e049495574..7c32905ea1d 100644 --- a/ubuntu-win64-cross/sdl2.mk +++ b/ubuntu-win64-cross/sdl2.mk @@ -4,10 +4,10 @@ PKG := sdl2 $(PKG)_WEBSITE := https://www.libsdl.org/ $(PKG)_DESCR := SDL2 $(PKG)_IGNORE := -$(PKG)_VERSION := 2.30.3 +$(PKG)_VERSION := 2.30.10 $(PKG)_SUBDIR := SDL2-$($(PKG)_VERSION) $(PKG)_FILE := SDL2-$($(PKG)_VERSION).tar.gz -$(PKG)_CHECKSUM := 820440072f8f5b50188c1dae104f2ad25984de268785be40c41a099a510f0aec +$(PKG)_CHECKSUM := f59adf36a0fcf4c94198e7d3d776c1b3824211ab7aeebeb31fe19836661196aa $(PKG)_URL := https://github.com/libsdl-org/SDL/releases/download/release-$($(PKG)_VERSION)/$($(PKG)_FILE) $(PKG)_GH_CONF := libsdl-org/SDL/releases/tag,release-,, $(PKG)_DEPS := cc libiconv libsamplerate From 3ef73d9d831efd9469583ea4e1fc9183828f36f9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 15:49:45 -0700 Subject: [PATCH 022/176] ubuntu-win64-cross: Update spirv-headers --- ubuntu-win64-cross/spirv-headers.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ubuntu-win64-cross/spirv-headers.mk b/ubuntu-win64-cross/spirv-headers.mk index 966540762f4..c7f82bc75cf 100644 --- a/ubuntu-win64-cross/spirv-headers.mk +++ b/ubuntu-win64-cross/spirv-headers.mk @@ -2,10 +2,10 @@ PKG := spirv-headers $(PKG)_WEBSITE := https://github.com/KhronosGroup/SPIRV-Headers $(PKG)_DESCR := SPIRV-Headers $(PKG)_IGNORE := -$(PKG)_VERSION := vulkan-sdk-1.3.283.0 +$(PKG)_VERSION := vulkan-sdk-1.3.296.0 $(PKG)_SUBDIR := SPIRV-Headers-$($(PKG)_VERSION) $(PKG)_FILE := spirv-headers-$($(PKG)_VERSION).tar.gz -$(PKG)_CHECKSUM := a68a25996268841073c01514df7bab8f64e2db1945944b45087e5c40eed12cb9 +$(PKG)_CHECKSUM := 1423d58a1171611d5aba2bf6f8c69c72ef9c38a0aca12c3493e4fda64c9b2dc6 $(PKG)_URL := https://github.com/KhronosGroup/SPIRV-Headers/archive/refs/tags/$($(PKG)_VERSION).tar.gz $(PKG)_DEPS := cc From 2ab23d4e683908536940232d4d0869d2bec1396f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 15:49:53 -0700 Subject: [PATCH 023/176] ubuntu-win64-cross: Update spirv-tools --- ubuntu-win64-cross/spirv-tools.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ubuntu-win64-cross/spirv-tools.mk b/ubuntu-win64-cross/spirv-tools.mk index c19c251fcba..6d6d6a6148b 100644 --- a/ubuntu-win64-cross/spirv-tools.mk +++ b/ubuntu-win64-cross/spirv-tools.mk @@ -2,10 +2,10 @@ PKG := spirv-tools $(PKG)_WEBSITE := https://github.com/KhronosGroup/SPIRV-Tools $(PKG)_DESCR := SPIRV-Tools $(PKG)_IGNORE := -$(PKG)_VERSION := vulkan-sdk-1.3.283.0 +$(PKG)_VERSION := vulkan-sdk-1.3.296.0 $(PKG)_SUBDIR := SPIRV-Tools-$($(PKG)_VERSION) $(PKG)_FILE := spirv-tools-$($(PKG)_VERSION).tar.gz -$(PKG)_CHECKSUM := 5e2e5158bdd7442f9e01e13b5b33417b06cddff4965c9c19aab9763ab3603aae +$(PKG)_CHECKSUM := 75aafdf7e731b4b6bfb36a590ddfbb38ebc605d80487f38254da24fe0cb95837 $(PKG)_URL := https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/$($(PKG)_VERSION).tar.gz $(PKG)_DEPS := cc spirv-headers From eab07f5c49387730eea6453bed55414d241bcee2 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 15:50:03 -0700 Subject: [PATCH 024/176] ubuntu-win64-cross: Update vulkan-headers --- ubuntu-win64-cross/vulkan-headers.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ubuntu-win64-cross/vulkan-headers.mk b/ubuntu-win64-cross/vulkan-headers.mk index 9cb88ac45c0..8e398ff7ce8 100644 --- a/ubuntu-win64-cross/vulkan-headers.mk +++ b/ubuntu-win64-cross/vulkan-headers.mk @@ -2,10 +2,10 @@ PKG := vulkan-headers $(PKG)_WEBSITE := https://github.com/KhronosGroup/Vulkan-Headers $(PKG)_DESCR := Vulkan-Headers $(PKG)_IGNORE := -$(PKG)_VERSION := vulkan-sdk-1.3.283.0 +$(PKG)_VERSION := vulkan-sdk-1.3.296.0 $(PKG)_SUBDIR := Vulkan-Headers-$($(PKG)_VERSION) $(PKG)_FILE := vulkan-headers-$($(PKG)_VERSION).tar.gz -$(PKG)_CHECKSUM := cf54a812911b4e3e4ff15716c222a8fb9a87c2771c0b86060cb0ca2570ea55a9 +$(PKG)_CHECKSUM := 1e872a0be3890784bbe68dcd89b7e017fed77ba95820841848718c98bda6dc33 $(PKG)_URL := https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/$($(PKG)_VERSION).tar.gz $(PKG)_DEPS := cc From b6d29d487675c885cdef0cf65695e4e100e43136 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 15:49:30 -0700 Subject: [PATCH 025/176] ubuntu-win64-cross: Update glslang --- ubuntu-win64-cross/glslang.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ubuntu-win64-cross/glslang.mk b/ubuntu-win64-cross/glslang.mk index 3df28246220..99bd7e1b47a 100644 --- a/ubuntu-win64-cross/glslang.mk +++ b/ubuntu-win64-cross/glslang.mk @@ -2,10 +2,10 @@ PKG := glslang $(PKG)_WEBSITE := https://github.com/KhronosGroup/glslang $(PKG)_DESCR := glslang $(PKG)_IGNORE := -$(PKG)_VERSION := 14.3.0 +$(PKG)_VERSION := 15.0.0 $(PKG)_SUBDIR := glslang-$($(PKG)_VERSION) $(PKG)_FILE := glslang-$($(PKG)_VERSION).tar.gz -$(PKG)_CHECKSUM := be6339048e20280938d9cb399fcdd06e04f8654d43e170e8cce5a56c9a754284 +$(PKG)_CHECKSUM := c31c8c2e89af907507c0631273989526ee7d5cdf7df95ececd628fd7b811e064 $(PKG)_URL := https://github.com/KhronosGroup/glslang/archive/refs/tags/$($(PKG)_VERSION).tar.gz $(PKG)_DEPS := cc spirv-tools From 4ad2374e32692d2372b6c0b156393228fd93cd28 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 17:01:35 -0700 Subject: [PATCH 026/176] ui: Fix update org, add Windows arch tag to update --- ui/xui/update.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ui/xui/update.cc b/ui/xui/update.cc index 8f075215bd6..b29000fda2c 100644 --- a/ui/xui/update.cc +++ b/ui/xui/update.cc @@ -27,9 +27,15 @@ #if defined(_WIN32) const char *version_host = "raw.githubusercontent.com"; -const char *version_uri = "/mborgerson/xemu/ppa-snapshot/XEMU_VERSION"; +const char *version_uri = "/xemu-project/xemu/ppa-snapshot/XEMU_VERSION"; const char *download_host = "github.com"; -const char *download_uri = "/mborgerson/xemu/releases/latest/download/xemu-win-release.zip"; +#if defined(__x86_64__) +const char *download_uri = "/xemu-project/xemu/releases/latest/download/xemu-win-x86_64-release.zip"; +#elif defined(__aarch64__) +const char *download_uri = "/xemu-project/xemu/releases/latest/download/xemu-win-aarch64-release.zip"; +#else +#error Unknown update path +#endif #else FIXME #endif From 8184d5ee29a744c77d124f7e22e85dc33e0370a1 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 19:32:24 -0700 Subject: [PATCH 027/176] ci: Bump Windows build container --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 79f4c19bbf8..1d3bb790167 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -77,7 +77,7 @@ jobs: artifact_name: xemu-win-aarch64-release arch: aarch64 env: - DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-a1342ee + DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-b6d29d4 steps: - name: Download source package From 4261541d6ad8d3d054aea97154a6b484b6b4aaf8 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 21:26:54 -0700 Subject: [PATCH 028/176] ci: Add missing arch var to WindowsPdb job matrix --- .github/workflows/build.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1d3bb790167..63a8a938cc9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -126,12 +126,16 @@ jobs: include: - configuration: Debug artifact_name: xemu-win-x86_64-debug + arch: x86_64 - configuration: Release artifact_name: xemu-win-x86_64-release + arch: x86_64 - configuration: Debug artifact_name: xemu-win-aarch64-debug + arch: aarch64 - configuration: Release artifact_name: xemu-win-aarch64-release + arch: aarch64 steps: - name: Download artifacts uses: actions/download-artifact@v4 From 15338ec31c9ec68a22b3105b5ea21ff7d62c5e2f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 20:16:08 -0700 Subject: [PATCH 029/176] ci: Strip Windows release executables --- .github/workflows/build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 63a8a938cc9..756e7c3bdf2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -145,9 +145,12 @@ jobs: - name: Generate PDB run: | Invoke-WebRequest -Uri "https://github.com/rainers/cv2pdb/releases/download/v0.52/cv2pdb-0.52.zip" -OutFile "cv2pdb.zip" + Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20241217/llvm-mingw-20241217-ucrt-x86_64.zip" -OutFile "llvm-mingw.zip" 7z x -ocv2pdb -y cv2pdb.zip + 7z x -y llvm-mingw.zip cd ${{ matrix.artifact_name }} ../cv2pdb/cv2pdb64.exe xemu.exe + ../llvm-mingw-20241217-ucrt-x86_64/bin/${{ matrix.arch }}-w64-mingw32-strip.exe xemu.exe mkdir ../dist 7z a -tzip ../dist/${{ matrix.artifact_name }}.zip * "-xr!*.pdb" 7z a -tzip ../dist/${{ matrix.artifact_name }}-pdb.zip "-ir!*.pdb" From e639e0cdb7a49e66467e1f8685f14f58e88f33c4 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 22:13:47 -0700 Subject: [PATCH 030/176] ci: Add package alias for old Windows release name --- .github/workflows/build.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 756e7c3bdf2..d7aeab6340a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -393,6 +393,10 @@ jobs: pushd dist/xemu-ubuntu-debug tar xvf xemu-ubuntu-debug.tgz popd + # Architecture tags were recently added to the Windows release path. Provide an alias with the former name for a while. + - name: Add transitionary package alias + run: | + cp dist/xemu-win-x86_64-release-pdb/xemu-win-x86_64-release.zip dist/xemu-win-x86_64-release-pdb/xemu-win-release.zip - name: Publish release uses: softprops/action-gh-release@v1 with: @@ -405,6 +409,7 @@ jobs: dist/xemu-win-x86_64-debug-pdb/xemu-win-x86_64-debug.zip dist/xemu-win-x86_64-debug-pdb/xemu-win-x86_64-debug-pdb.zip dist/xemu-win-x86_64-release-pdb/xemu-win-x86_64-release.zip + dist/xemu-win-x86_64-release-pdb/xemu-win-release.zip dist/xemu-win-x86_64-release-pdb/xemu-win-x86_64-release-pdb.zip dist/xemu-macos-universal-release/xemu-macos-universal-release.zip dist/xemu-macos-universal-debug/xemu-macos-universal-debug.zip From a5385803db96d4cbaf293a223e5d097bf7ffc0d4 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 031/176] nv2a: Add Vulkan renderer --- .clang-format | 4 +- .gitmodules | 13 +- config_spec.yml | 6 + configure | 2 +- debian/control | 3 + hw/xbox/nv2a/debug.h | 98 +- hw/xbox/nv2a/gl/meson.build | 6 - hw/xbox/nv2a/meson.build | 12 +- hw/xbox/nv2a/nv2a.c | 28 +- hw/xbox/nv2a/nv2a.h | 2 +- hw/xbox/nv2a/nv2a_int.h | 374 +- hw/xbox/nv2a/nv2a_regs.h | 11 + hw/xbox/nv2a/pfifo.c | 62 +- hw/xbox/nv2a/pgraph.c | 7775 ----------------- hw/xbox/nv2a/pgraph/debug_renderdoc.c | 84 + hw/xbox/nv2a/pgraph/gl/blit.c | 174 + hw/xbox/nv2a/pgraph/gl/constants.h | 322 + hw/xbox/nv2a/{ => pgraph/gl}/debug.c | 63 +- hw/xbox/nv2a/pgraph/gl/debug.h | 60 + hw/xbox/nv2a/pgraph/gl/display.c | 407 + hw/xbox/nv2a/pgraph/gl/draw.c | 528 ++ hw/xbox/nv2a/pgraph/gl/meson.build | 12 + hw/xbox/nv2a/pgraph/gl/renderer.c | 201 + hw/xbox/nv2a/pgraph/gl/renderer.h | 283 + hw/xbox/nv2a/pgraph/gl/reports.c | 111 + hw/xbox/nv2a/pgraph/gl/shaders.c | 1102 +++ hw/xbox/nv2a/pgraph/gl/surface.c | 1400 +++ hw/xbox/nv2a/pgraph/gl/texture.c | 819 ++ hw/xbox/nv2a/pgraph/gl/vertex.c | 283 + hw/xbox/nv2a/pgraph/glsl/common.c | 58 + hw/xbox/nv2a/pgraph/glsl/common.h | 38 + hw/xbox/nv2a/pgraph/glsl/geom.c | 228 + hw/xbox/nv2a/pgraph/glsl/geom.h | 34 + hw/xbox/nv2a/pgraph/glsl/meson.build | 8 + hw/xbox/nv2a/{ => pgraph/glsl}/psh.c | 172 +- hw/xbox/nv2a/pgraph/glsl/psh.h | 41 + hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 497 ++ hw/xbox/nv2a/pgraph/glsl/vsh-ff.h | 31 + .../nv2a/{vsh.c => pgraph/glsl/vsh-prog.c} | 34 +- hw/xbox/nv2a/pgraph/glsl/vsh-prog.h | 35 + hw/xbox/nv2a/pgraph/glsl/vsh.c | 274 + hw/xbox/nv2a/pgraph/glsl/vsh.h | 33 + hw/xbox/nv2a/pgraph/meson.build | 19 + .../{pgraph_methods.h => pgraph/methods.h} | 0 hw/xbox/nv2a/pgraph/null/meson.build | 3 + hw/xbox/nv2a/pgraph/null/renderer.c | 146 + hw/xbox/nv2a/pgraph/pgraph.c | 2874 ++++++ hw/xbox/nv2a/pgraph/pgraph.h | 383 + hw/xbox/nv2a/pgraph/profile.c | 74 + hw/xbox/nv2a/{ => pgraph}/psh.h | 8 +- hw/xbox/nv2a/pgraph/rdi.c | 60 + hw/xbox/nv2a/{ => pgraph}/s3tc.c | 78 +- hw/xbox/nv2a/{ => pgraph}/s3tc.h | 27 +- hw/xbox/nv2a/pgraph/shaders.c | 295 + hw/xbox/nv2a/{ => pgraph}/shaders.h | 69 +- hw/xbox/nv2a/pgraph/surface.h | 35 + hw/xbox/nv2a/{ => pgraph}/swizzle.c | 0 hw/xbox/nv2a/{ => pgraph}/swizzle.h | 6 +- hw/xbox/nv2a/pgraph/texture.c | 405 + hw/xbox/nv2a/pgraph/texture.h | 67 + .../thirdparty/gloffscreen/common.c} | 0 .../thirdparty/gloffscreen}/gloffscreen.h | 0 .../thirdparty/gloffscreen/sdl.c} | 2 +- .../nv2a/{ => pgraph}/thirdparty/meson.build | 6 + .../nv2a/{ => pgraph}/thirdparty/nv2a_vsh_cpu | 0 hw/xbox/nv2a/pgraph/util.h | 86 + hw/xbox/nv2a/pgraph/vertex.c | 131 + hw/xbox/nv2a/pgraph/vk/blit.c | 177 + hw/xbox/nv2a/pgraph/vk/buffer.c | 206 + hw/xbox/nv2a/pgraph/vk/command.c | 119 + hw/xbox/nv2a/pgraph/vk/constants.h | 418 + hw/xbox/nv2a/pgraph/vk/debug.c | 59 + hw/xbox/nv2a/pgraph/vk/debug.h | 61 + hw/xbox/nv2a/pgraph/vk/display.c | 896 ++ hw/xbox/nv2a/pgraph/vk/draw.c | 1916 ++++ hw/xbox/nv2a/pgraph/vk/glsl.c | 380 + hw/xbox/nv2a/pgraph/vk/glsl.h | 205 + hw/xbox/nv2a/pgraph/vk/image.c | 209 + hw/xbox/nv2a/pgraph/vk/instance.c | 662 ++ hw/xbox/nv2a/pgraph/vk/meson.build | 24 + hw/xbox/nv2a/pgraph/vk/renderer.c | 266 + hw/xbox/nv2a/pgraph/vk/renderer.h | 526 ++ hw/xbox/nv2a/pgraph/vk/reports.c | 134 + hw/xbox/nv2a/pgraph/vk/shaders.c | 797 ++ hw/xbox/nv2a/pgraph/vk/surface-compute.c | 473 + hw/xbox/nv2a/pgraph/vk/surface.c | 1485 ++++ hw/xbox/nv2a/pgraph/vk/texture.c | 1456 +++ hw/xbox/nv2a/pgraph/vk/vertex.c | 312 + hw/xbox/nv2a/{ => pgraph}/vsh.h | 9 +- hw/xbox/nv2a/shaders.c | 1599 ---- hw/xbox/nv2a/shaders_common.h | 125 - {hw/xbox/nv2a => include/qemu}/lru.h | 50 +- include/qemu/mstring.h | 82 + licenses/SPIRV-Reflect.license.txt | 201 + licenses/VulkanMemoryAllocator.license.txt | 19 + licenses/volk.license.txt | 19 + meson.build | 30 + scripts/archive-source.sh | 6 +- scripts/gen-license.py | 31 +- thirdparty/SPIRV-Reflect | 1 + thirdparty/VulkanMemoryAllocator | 1 + thirdparty/meson.build | 12 + thirdparty/renderdoc_app.h | 88 +- thirdparty/vma.cc | 2 + thirdparty/volk | 1 + ui/meson.build | 4 - ui/xemu.c | 4 +- ui/xui/main-menu.cc | 10 +- ui/xui/main.cc | 2 +- ui/xui/menubar.cc | 6 +- util/meson.build | 1 + util/mstring.c | 49 + xemu-version.c | 3 + xemu-version.h | 3 + 114 files changed, 23344 insertions(+), 10297 deletions(-) delete mode 100644 hw/xbox/nv2a/gl/meson.build delete mode 100644 hw/xbox/nv2a/pgraph.c create mode 100644 hw/xbox/nv2a/pgraph/debug_renderdoc.c create mode 100644 hw/xbox/nv2a/pgraph/gl/blit.c create mode 100644 hw/xbox/nv2a/pgraph/gl/constants.h rename hw/xbox/nv2a/{ => pgraph/gl}/debug.c (77%) create mode 100644 hw/xbox/nv2a/pgraph/gl/debug.h create mode 100644 hw/xbox/nv2a/pgraph/gl/display.c create mode 100644 hw/xbox/nv2a/pgraph/gl/draw.c create mode 100644 hw/xbox/nv2a/pgraph/gl/meson.build create mode 100644 hw/xbox/nv2a/pgraph/gl/renderer.c create mode 100644 hw/xbox/nv2a/pgraph/gl/renderer.h create mode 100644 hw/xbox/nv2a/pgraph/gl/reports.c create mode 100644 hw/xbox/nv2a/pgraph/gl/shaders.c create mode 100644 hw/xbox/nv2a/pgraph/gl/surface.c create mode 100644 hw/xbox/nv2a/pgraph/gl/texture.c create mode 100644 hw/xbox/nv2a/pgraph/gl/vertex.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/common.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/common.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/geom.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/geom.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/meson.build rename hw/xbox/nv2a/{ => pgraph/glsl}/psh.c (90%) create mode 100644 hw/xbox/nv2a/pgraph/glsl/psh.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh-ff.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh-ff.h rename hw/xbox/nv2a/{vsh.c => pgraph/glsl/vsh-prog.c} (97%) create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh-prog.h create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh.c create mode 100644 hw/xbox/nv2a/pgraph/glsl/vsh.h create mode 100644 hw/xbox/nv2a/pgraph/meson.build rename hw/xbox/nv2a/{pgraph_methods.h => pgraph/methods.h} (100%) create mode 100644 hw/xbox/nv2a/pgraph/null/meson.build create mode 100644 hw/xbox/nv2a/pgraph/null/renderer.c create mode 100644 hw/xbox/nv2a/pgraph/pgraph.c create mode 100644 hw/xbox/nv2a/pgraph/pgraph.h create mode 100644 hw/xbox/nv2a/pgraph/profile.c rename hw/xbox/nv2a/{ => pgraph}/psh.h (96%) create mode 100644 hw/xbox/nv2a/pgraph/rdi.c rename hw/xbox/nv2a/{ => pgraph}/s3tc.c (71%) rename hw/xbox/nv2a/{ => pgraph}/s3tc.h (63%) create mode 100644 hw/xbox/nv2a/pgraph/shaders.c rename hw/xbox/nv2a/{ => pgraph}/shaders.h (56%) create mode 100644 hw/xbox/nv2a/pgraph/surface.h rename hw/xbox/nv2a/{ => pgraph}/swizzle.c (100%) rename hw/xbox/nv2a/{ => pgraph}/swizzle.h (94%) create mode 100644 hw/xbox/nv2a/pgraph/texture.c create mode 100644 hw/xbox/nv2a/pgraph/texture.h rename hw/xbox/nv2a/{gl/gloffscreen_common.c => pgraph/thirdparty/gloffscreen/common.c} (100%) rename hw/xbox/nv2a/{gl => pgraph/thirdparty/gloffscreen}/gloffscreen.h (100%) rename hw/xbox/nv2a/{gl/gloffscreen_sdl.c => pgraph/thirdparty/gloffscreen/sdl.c} (98%) rename hw/xbox/nv2a/{ => pgraph}/thirdparty/meson.build (62%) rename hw/xbox/nv2a/{ => pgraph}/thirdparty/nv2a_vsh_cpu (100%) create mode 100644 hw/xbox/nv2a/pgraph/util.h create mode 100644 hw/xbox/nv2a/pgraph/vertex.c create mode 100644 hw/xbox/nv2a/pgraph/vk/blit.c create mode 100644 hw/xbox/nv2a/pgraph/vk/buffer.c create mode 100644 hw/xbox/nv2a/pgraph/vk/command.c create mode 100644 hw/xbox/nv2a/pgraph/vk/constants.h create mode 100644 hw/xbox/nv2a/pgraph/vk/debug.c create mode 100644 hw/xbox/nv2a/pgraph/vk/debug.h create mode 100644 hw/xbox/nv2a/pgraph/vk/display.c create mode 100644 hw/xbox/nv2a/pgraph/vk/draw.c create mode 100644 hw/xbox/nv2a/pgraph/vk/glsl.c create mode 100644 hw/xbox/nv2a/pgraph/vk/glsl.h create mode 100644 hw/xbox/nv2a/pgraph/vk/image.c create mode 100644 hw/xbox/nv2a/pgraph/vk/instance.c create mode 100644 hw/xbox/nv2a/pgraph/vk/meson.build create mode 100644 hw/xbox/nv2a/pgraph/vk/renderer.c create mode 100644 hw/xbox/nv2a/pgraph/vk/renderer.h create mode 100644 hw/xbox/nv2a/pgraph/vk/reports.c create mode 100644 hw/xbox/nv2a/pgraph/vk/shaders.c create mode 100644 hw/xbox/nv2a/pgraph/vk/surface-compute.c create mode 100644 hw/xbox/nv2a/pgraph/vk/surface.c create mode 100644 hw/xbox/nv2a/pgraph/vk/texture.c create mode 100644 hw/xbox/nv2a/pgraph/vk/vertex.c rename hw/xbox/nv2a/{ => pgraph}/vsh.h (92%) delete mode 100644 hw/xbox/nv2a/shaders.c delete mode 100644 hw/xbox/nv2a/shaders_common.h rename {hw/xbox/nv2a => include/qemu}/lru.h (87%) create mode 100644 include/qemu/mstring.h create mode 100644 licenses/SPIRV-Reflect.license.txt create mode 100644 licenses/VulkanMemoryAllocator.license.txt create mode 100644 licenses/volk.license.txt create mode 160000 thirdparty/SPIRV-Reflect create mode 160000 thirdparty/VulkanMemoryAllocator create mode 100644 thirdparty/meson.build create mode 100644 thirdparty/vma.cc create mode 160000 thirdparty/volk create mode 100644 util/mstring.c diff --git a/.clang-format b/.clang-format index 8750a94dc87..3779a034030 100644 --- a/.clang-format +++ b/.clang-format @@ -71,8 +71,8 @@ IndentWidth: 4 AccessModifierOffset: -4 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -MacroBlockEnd: '.*_END$' +#MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? +#MacroBlockEnd: '.*_END$' MaxEmptyLinesToKeep: 2 #PenaltyBreakBeforeFirstCallParameter: 19 #PenaltyBreakComment: 300 diff --git a/.gitmodules b/.gitmodules index 4118661130c..420d7d9cd2d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,9 +82,18 @@ [submodule "tomlplusplus"] path = tomlplusplus url = https://github.com/marzer/tomlplusplus -[submodule "hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"] - path = hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu +[submodule "hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"] + path = hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu url = https://github.com/abaire/nv2a_vsh_cpu.git [submodule "ui/thirdparty/httplib"] path = ui/thirdparty/httplib url = https://github.com/yhirose/cpp-httplib +[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"] + path = thirdparty/VulkanMemoryAllocator + url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator +[submodule "thirdparty/volk"] + path = thirdparty/volk + url = https://github.com/zeux/volk +[submodule "thirdparty/SPIRV-Reflect"] + path = thirdparty/SPIRV-Reflect + url = https://github.com/KhronosGroup/SPIRV-Reflect diff --git a/config_spec.yml b/config_spec.yml index b858606e685..f2c3736a8f9 100644 --- a/config_spec.yml +++ b/config_spec.yml @@ -130,6 +130,12 @@ input: default: 18 # w display: + renderer: + type: enum + values: ["NULL", OPENGL, VULKAN] + default: OPENGL + vulkan: + validation_layers: bool quality: surface_scale: type: integer diff --git a/configure b/configure index 11471698b68..880f30c4bd2 100755 --- a/configure +++ b/configure @@ -237,7 +237,7 @@ else git_submodules_action="ignore" fi -git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu" +git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" git="git" # Don't accept a target_list environment variable. diff --git a/debian/control b/debian/control index 91ed61433f4..30603057eab 100644 --- a/debian/control +++ b/debian/control @@ -16,6 +16,9 @@ Build-Depends: debhelper (>= 11), libssl-dev, libpcap-dev, libslirp-dev, + glslang-dev, + libvulkan-dev, + Standards-Version: 3.9.8 Homepage: https://xemu.app XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h index 0c2c3d5f769..8a7fcc14492 100644 --- a/hw/xbox/nv2a/debug.h +++ b/hw/xbox/nv2a/debug.h @@ -1,8 +1,9 @@ /* - * QEMU Geforce NV2A debug helpers + * QEMU Geforce NV2A profiling and debug helpers * - * Copyright (c) 2015 Jannik Vogel * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2023 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,8 +19,8 @@ * License along with this library; if not, see . */ -#ifndef HW_NV2A_DEBUG_H -#define HW_NV2A_DEBUG_H +#ifndef HW_XBOX_NV2A_DEBUG_H +#define HW_XBOX_NV2A_DEBUG_H #include @@ -36,54 +37,6 @@ # define NV2A_DPRINTF(format, ...) do { } while (0) #endif -// #define DEBUG_NV2A_GL -#ifdef DEBUG_NV2A_GL - -#include -#include "gl/gloffscreen.h" -#include "config-host.h" - -void gl_debug_initialize(void); -void gl_debug_message(bool cc, const char *fmt, ...); -void gl_debug_group_begin(const char *fmt, ...); -void gl_debug_group_end(void); -void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...); -void gl_debug_frame_terminator(void); - -# define NV2A_GL_DPRINTF(cc, format, ...) \ - gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__) -# define NV2A_GL_DGROUP_BEGIN(format, ...) \ - gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__) -# define NV2A_GL_DGROUP_END() \ - gl_debug_group_end() -# define NV2A_GL_DLABEL(target, name, format, ...) \ - gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__) -#define NV2A_GL_DFRAME_TERMINATOR() \ - gl_debug_frame_terminator() - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef CONFIG_RENDERDOC -bool nv2a_dbg_renderdoc_available(void); -void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames); -#endif - -#ifdef __cplusplus -} -#endif - -#else -# define NV2A_GL_DPRINTF(cc, format, ...) do { \ - if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \ - } while (0) -# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0) -# define NV2A_GL_DGROUP_END() do { } while (0) -# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0) -# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0) -#endif - /* Debug prints to identify when unimplemented or unconfirmed features * are being exercised. These cases likely result in graphical problems of * varying degree, but should otherwise not crash the system. Enable this @@ -111,6 +64,22 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames); #endif #define NV2A_PROF_COUNTERS_XMAC \ + _X(NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY) \ + _X(NV2A_PROF_FINISH_SURFACE_CREATE) \ + _X(NV2A_PROF_FINISH_SURFACE_DOWN) \ + _X(NV2A_PROF_FINISH_NEED_BUFFER_SPACE) \ + _X(NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY) \ + _X(NV2A_PROF_FINISH_PRESENTING) \ + _X(NV2A_PROF_FINISH_FLIP_STALL) \ + _X(NV2A_PROF_FINISH_FLUSH) \ + _X(NV2A_PROF_CLEAR) \ + _X(NV2A_PROF_QUEUE_SUBMIT) \ + _X(NV2A_PROF_QUEUE_SUBMIT_AUX) \ + _X(NV2A_PROF_PIPELINE_NOTDIRTY) \ + _X(NV2A_PROF_PIPELINE_GEN) \ + _X(NV2A_PROF_PIPELINE_BIND) \ + _X(NV2A_PROF_PIPELINE_MERGE) \ + _X(NV2A_PROF_PIPELINE_RENDERPASSES) \ _X(NV2A_PROF_BEGIN_ENDS) \ _X(NV2A_PROF_DRAW_ARRAYS) \ _X(NV2A_PROF_INLINE_BUFFERS) \ @@ -120,18 +89,26 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames); _X(NV2A_PROF_SHADER_GEN) \ _X(NV2A_PROF_SHADER_BIND) \ _X(NV2A_PROF_SHADER_BIND_NOTDIRTY) \ + _X(NV2A_PROF_SHADER_UBO_DIRTY) \ + _X(NV2A_PROF_SHADER_UBO_NOTDIRTY) \ _X(NV2A_PROF_ATTR_BIND) \ _X(NV2A_PROF_TEX_UPLOAD) \ - _X(NV2A_PROF_TEX_BIND) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_1) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_2) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_3) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_4) \ _X(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY) \ + _X(NV2A_PROF_SURF_SWIZZLE) \ + _X(NV2A_PROF_SURF_CREATE) \ _X(NV2A_PROF_SURF_DOWNLOAD) \ _X(NV2A_PROF_SURF_UPLOAD) \ _X(NV2A_PROF_SURF_TO_TEX) \ _X(NV2A_PROF_SURF_TO_TEX_FALLBACK) \ + _X(NV2A_PROF_QUEUE_SUBMIT_1) \ + _X(NV2A_PROF_QUEUE_SUBMIT_2) \ + _X(NV2A_PROF_QUEUE_SUBMIT_3) \ + _X(NV2A_PROF_QUEUE_SUBMIT_4) \ + _X(NV2A_PROF_QUEUE_SUBMIT_5) \ enum NV2A_PROF_COUNTERS_ENUM { #define _X(x) x, @@ -161,6 +138,21 @@ extern NV2AStats g_nv2a_stats; const char *nv2a_profile_get_counter_name(unsigned int cnt); int nv2a_profile_get_counter_value(unsigned int cnt); +void nv2a_profile_increment(void); +void nv2a_profile_flip_stall(void); + +static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt) +{ + g_nv2a_stats.frame_working.counters[cnt] += 1; +} + +#ifdef CONFIG_RENDERDOC +void nv2a_dbg_renderdoc_init(void); +void *nv2a_dbg_renderdoc_get_api(void); +bool nv2a_dbg_renderdoc_available(void); +void nv2a_dbg_renderdoc_capture_frames(int num_frames); +extern int renderdoc_capture_frames; +#endif #ifdef __cplusplus } diff --git a/hw/xbox/nv2a/gl/meson.build b/hw/xbox/nv2a/gl/meson.build deleted file mode 100644 index 973a9aa8c11..00000000000 --- a/hw/xbox/nv2a/gl/meson.build +++ /dev/null @@ -1,6 +0,0 @@ -softmmu_ss.add([sdl, files( - 'gloffscreen_common.c', - 'gloffscreen_sdl.c', - )]) - -# gloffscreen_sdl.o-cflags := $(SDL_CFLAGS) diff --git a/hw/xbox/nv2a/meson.build b/hw/xbox/nv2a/meson.build index d3b159a3bc8..29eff86e273 100644 --- a/hw/xbox/nv2a/meson.build +++ b/hw/xbox/nv2a/meson.build @@ -1,27 +1,17 @@ specific_ss.add(files( 'nv2a.c', - 'debug.c', 'pbus.c', 'pcrtc.c', 'pfb.c', 'pfifo.c', - 'pgraph.c', 'pmc.c', 'pramdac.c', 'prmcio.c', 'prmdio.c', 'prmvio.c', - 'psh.c', 'ptimer.c', 'pvideo.c', - 'shaders.c', 'stubs.c', 'user.c', - 'vsh.c', - 'swizzle.c', - 's3tc.c', )) -subdir('gl') - -subdir('thirdparty') -specific_ss.add(nv2a_vsh_cpu) +subdir('pgraph') diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c index e068f76dc91..7b161131159 100644 --- a/hw/xbox/nv2a/nv2a.c +++ b/hw/xbox/nv2a/nv2a.c @@ -172,6 +172,16 @@ static void nv2a_get_offsets(VGACommonState *s, *pline_compare = line_compare; } +const uint8_t *nv2a_get_dac_palette(void) +{ + return g_nv2a->puserdac.palette; +} + +int nv2a_get_screen_off(void) +{ + return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF; +} + static void nv2a_vga_gfx_update(void *opaque) { VGACommonState *vga = opaque; @@ -277,7 +287,7 @@ static void nv2a_reset(NV2AState *d) } memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs)); - memset(d->pgraph.regs, 0, sizeof(d->pgraph.regs)); + memset(d->pgraph.regs_, 0, sizeof(d->pgraph.regs_)); memset(d->pvideo.regs, 0, sizeof(d->pvideo.regs)); d->pcrtc.start = 0; @@ -365,11 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) if (state == RUN_STATE_SAVE_VM) { nv2a_lock_fifo(d); qatomic_set(&d->pfifo.halt, true); - qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true); - qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete); + d->pgraph.renderer->ops.pre_savevm_trigger(d); nv2a_unlock_fifo(d); qemu_mutex_unlock_iothread(); - qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete); + d->pgraph.renderer->ops.pre_savevm_wait(d); qemu_mutex_lock_iothread(); nv2a_lock_fifo(d); } else if (state == RUN_STATE_RESTORE_VM) { @@ -382,11 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) nv2a_unlock_fifo(d); } else if (state == RUN_STATE_SHUTDOWN) { nv2a_lock_fifo(d); - qatomic_set(&d->pgraph.shader_cache_writeback_pending, true); - qemu_event_reset(&d->pgraph.shader_cache_writeback_complete); + d->pgraph.renderer->ops.pre_shutdown_trigger(d); nv2a_unlock_fifo(d); qemu_mutex_unlock_iothread(); - qemu_event_wait(&d->pgraph.shader_cache_writeback_complete); + d->pgraph.renderer->ops.pre_shutdown_wait(d); qemu_mutex_lock_iothread(); } } @@ -515,9 +523,9 @@ static const VMStateDescription vmstate_nv2a = { VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState), VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState), - VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_start, NV2AState, 1250), - VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_count, NV2AState, 1250), - VMSTATE_UINT32_ARRAY(pgraph.regs, NV2AState, 0x2000), + VMSTATE_INT32_ARRAY(pgraph.draw_arrays_start, NV2AState, 1250), + VMSTATE_INT32_ARRAY(pgraph.draw_arrays_count, NV2AState, 1250), + VMSTATE_UINT32_ARRAY(pgraph.regs_, NV2AState, 0x2000), VMSTATE_UINT32(pmc.pending_interrupts, NV2AState), VMSTATE_UINT32(pmc.enabled_interrupts, NV2AState), VMSTATE_UINT32(pfifo.pending_interrupts, NV2AState), diff --git a/hw/xbox/nv2a/nv2a.h b/hw/xbox/nv2a/nv2a.h index 35b63749e46..a5c4468debe 100644 --- a/hw/xbox/nv2a/nv2a.h +++ b/hw/xbox/nv2a/nv2a.h @@ -22,7 +22,7 @@ #define HW_NV2A_H void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram); -void nv2a_gl_context_init(void); +void nv2a_context_init(void); int nv2a_get_framebuffer_surface(void); void nv2a_set_surface_scale_factor(unsigned int scale); unsigned int nv2a_get_surface_scale_factor(void); diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index 31ab6d89ca6..9b0189ebc8a 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -44,25 +44,12 @@ #include "cpu.h" #include "trace.h" -#include "swizzle.h" -#include "lru.h" -#include "gl/gloffscreen.h" #include "nv2a.h" +#include "pgraph/pgraph.h" #include "debug.h" -#include "shaders.h" #include "nv2a_regs.h" -#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask)) - -#define SET_MASK(v, mask, val) \ - ({ \ - const unsigned int __val = (val); \ - const unsigned int __mask = (mask); \ - (v) &= ~(__mask); \ - (v) |= ((__val) << ctz32(__mask)) & (__mask); \ - }) - #define NV2A_DEVICE(obj) OBJECT_CHECK(NV2AState, (obj), "nv2a") enum FIFOEngine { @@ -78,347 +65,6 @@ typedef struct DMAObject { hwaddr limit; } DMAObject; -typedef struct VertexAttribute { - bool dma_select; - hwaddr offset; - - /* inline arrays are packed in order? - * Need to pass the offset to converted attributes */ - unsigned int inline_array_offset; - - float inline_value[4]; - - unsigned int format; - unsigned int size; /* size of the data type */ - unsigned int count; /* number of components */ - uint32_t stride; - - bool needs_conversion; - - float *inline_buffer; - bool inline_buffer_populated; - - GLint gl_count; - GLenum gl_type; - GLboolean gl_normalize; - - GLuint gl_inline_buffer; -} VertexAttribute; - -typedef struct SurfaceFormatInfo { - unsigned int bytes_per_pixel; - GLint gl_internal_format; - GLenum gl_format; - GLenum gl_type; - GLenum gl_attachment; -} SurfaceFormatInfo; - -typedef struct Surface { - bool draw_dirty; - bool buffer_dirty; - bool write_enabled_cache; - unsigned int pitch; - - hwaddr offset; -} Surface; - -typedef struct SurfaceShape { - unsigned int z_format; - unsigned int color_format; - unsigned int zeta_format; - unsigned int log_width, log_height; - unsigned int clip_x, clip_y; - unsigned int clip_width, clip_height; - unsigned int anti_aliasing; -} SurfaceShape; - -typedef struct SurfaceBinding { - QTAILQ_ENTRY(SurfaceBinding) entry; - MemAccessCallback *access_cb; - - hwaddr vram_addr; - - SurfaceFormatInfo fmt; - SurfaceShape shape; - uintptr_t dma_addr; - uintptr_t dma_len; - bool color; - bool swizzle; - - unsigned int width; - unsigned int height; - unsigned int pitch; - size_t size; - - GLuint gl_buffer; - - bool cleared; - int frame_time; - int draw_time; - bool draw_dirty; - bool download_pending; - bool upload_pending; -} SurfaceBinding; - -typedef struct TextureShape { - bool cubemap; - unsigned int dimensionality; - unsigned int color_format; - unsigned int levels; - unsigned int width, height, depth; - bool border; - - unsigned int min_mipmap_level, max_mipmap_level; - unsigned int pitch; -} TextureShape; - -typedef struct TextureBinding { - GLenum gl_target; - GLuint gl_texture; - unsigned int refcnt; - int draw_time; - uint64_t data_hash; - unsigned int scale; - unsigned int min_filter; - unsigned int mag_filter; - unsigned int addru; - unsigned int addrv; - unsigned int addrp; - uint32_t border_color; - bool border_color_set; -} TextureBinding; - -typedef struct TextureKey { - TextureShape state; - hwaddr texture_vram_offset; - hwaddr texture_length; - hwaddr palette_vram_offset; - hwaddr palette_length; -} TextureKey; - -typedef struct TextureLruNode { - LruNode node; - TextureKey key; - TextureBinding *binding; - bool possibly_dirty; -} TextureLruNode; - -typedef struct VertexKey { - size_t count; - GLuint gl_type; - GLboolean gl_normalize; - size_t stride; - hwaddr addr; -} VertexKey; - -typedef struct VertexLruNode { - LruNode node; - VertexKey key; - GLuint gl_buffer; - bool initialized; -} VertexLruNode; - -typedef struct KelvinState { - hwaddr object_instance; -} KelvinState; - -typedef struct ContextSurfaces2DState { - hwaddr object_instance; - hwaddr dma_image_source; - hwaddr dma_image_dest; - unsigned int color_format; - unsigned int source_pitch, dest_pitch; - hwaddr source_offset, dest_offset; -} ContextSurfaces2DState; - -typedef struct ImageBlitState { - hwaddr object_instance; - hwaddr context_surfaces; - unsigned int operation; - unsigned int in_x, in_y; - unsigned int out_x, out_y; - unsigned int width, height; -} ImageBlitState; - -typedef struct BetaState { - hwaddr object_instance; - uint32_t beta; -} BetaState; - -typedef struct QueryReport { - QSIMPLEQ_ENTRY(QueryReport) entry; - bool clear; - uint32_t parameter; - unsigned int query_count; - GLuint *queries; -} QueryReport; - -typedef struct PGRAPHState { - QemuMutex lock; - - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - - int frame_time; - int draw_time; - - struct s2t_rndr { - GLuint fbo, vao, vbo, prog; - GLuint tex_loc, surface_size_loc; - } s2t_rndr; - - struct disp_rndr { - GLuint fbo, vao, vbo, prog; - GLuint display_size_loc; - GLuint line_offset_loc; - GLuint tex_loc; - GLuint pvideo_tex; - GLint pvideo_enable_loc; - GLint pvideo_tex_loc; - GLint pvideo_in_pos_loc; - GLint pvideo_pos_loc; - GLint pvideo_scale_loc; - GLint pvideo_color_key_enable_loc; - GLint pvideo_color_key_loc; - GLint palette_loc[256]; - } disp_rndr; - - /* subchannels state we're not sure the location of... */ - ContextSurfaces2DState context_surfaces_2d; - ImageBlitState image_blit; - KelvinState kelvin; - BetaState beta; - - hwaddr dma_color, dma_zeta; - Surface surface_color, surface_zeta; - unsigned int surface_type; - SurfaceShape surface_shape; - SurfaceShape last_surface_shape; - QTAILQ_HEAD(, SurfaceBinding) surfaces; - SurfaceBinding *color_binding, *zeta_binding; - struct { - int clip_x; - int clip_width; - int clip_y; - int clip_height; - int width; - int height; - } surface_binding_dim; // FIXME: Refactor - - hwaddr dma_a, dma_b; - Lru texture_cache; - TextureLruNode *texture_cache_entries; - bool texture_dirty[NV2A_MAX_TEXTURES]; - TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; - - Lru shader_cache; - ShaderLruNode *shader_cache_entries; - ShaderBinding *shader_binding; - QemuMutex shader_cache_lock; - QemuThread shader_disk_thread; - - bool texture_matrix_enable[NV2A_MAX_TEXTURES]; - - GLuint gl_framebuffer; - - GLuint gl_display_buffer; - GLint gl_display_buffer_internal_format; - GLsizei gl_display_buffer_width; - GLsizei gl_display_buffer_height; - GLenum gl_display_buffer_format; - GLenum gl_display_buffer_type; - - hwaddr dma_state; - hwaddr dma_notifies; - hwaddr dma_semaphore; - - hwaddr dma_report; - hwaddr report_offset; - bool zpass_pixel_count_enable; - unsigned int zpass_pixel_count_result; - unsigned int gl_zpass_pixel_count_query_count; - GLuint *gl_zpass_pixel_count_queries; - QSIMPLEQ_HEAD(, QueryReport) report_queue; - - hwaddr dma_vertex_a, dma_vertex_b; - - uint32_t primitive_mode; - - bool enable_vertex_program_write; - - uint32_t vertex_state_shader_v0[4]; - uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE]; - bool program_data_dirty; - - uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS]; - - /* lighting constant arrays */ - uint32_t ltctxa[NV2A_LTCTXA_COUNT][4]; - bool ltctxa_dirty[NV2A_LTCTXA_COUNT]; - uint32_t ltctxb[NV2A_LTCTXB_COUNT][4]; - bool ltctxb_dirty[NV2A_LTCTXB_COUNT]; - uint32_t ltc1[NV2A_LTC1_COUNT][4]; - bool ltc1_dirty[NV2A_LTC1_COUNT]; - - float material_alpha; - - // should figure out where these are in lighting context - float light_infinite_half_vector[NV2A_MAX_LIGHTS][3]; - float light_infinite_direction[NV2A_MAX_LIGHTS][3]; - float light_local_position[NV2A_MAX_LIGHTS][3]; - float light_local_attenuation[NV2A_MAX_LIGHTS][3]; - - float point_params[8]; - - VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; - uint16_t compressed_attrs; - - Lru element_cache; - VertexLruNode *element_cache_entries; - - unsigned int inline_array_length; - uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; - GLuint gl_inline_array_buffer; - - unsigned int inline_elements_length; - uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; - - unsigned int inline_buffer_length; - - unsigned int draw_arrays_length; - unsigned int draw_arrays_min_start; - unsigned int draw_arrays_max_count; - /* FIXME: Unknown size, possibly endless, 1250 will do for now */ - /* Keep in sync with size used in nv2a.c */ - GLint gl_draw_arrays_start[1250]; - GLsizei gl_draw_arrays_count[1250]; - bool draw_arrays_prevent_connect; - - GLuint gl_memory_buffer; - GLuint gl_vertex_array; - - uint32_t regs[0x2000]; - - bool clearing; - bool waiting_for_nop; - bool waiting_for_flip; - bool waiting_for_context_switch; - bool downloads_pending; - bool download_dirty_surfaces_pending; - bool flush_pending; - bool gl_sync_pending; - bool shader_cache_writeback_pending; - QemuEvent downloads_complete; - QemuEvent dirty_surfaces_download_complete; - QemuEvent flush_complete; - QemuEvent gl_sync_complete; - QemuEvent shader_cache_writeback_complete; - - unsigned int surface_scale_factor; - uint8_t *scale_buf; -} PGRAPHState; - typedef struct NV2AState { /*< private >*/ PCIDevice parent_obj; @@ -512,9 +158,6 @@ typedef struct NV2ABlockInfo { } NV2ABlockInfo; extern const NV2ABlockInfo blocktable[NV_NUM_BLOCKS]; -extern GloContext *g_nv2a_context_render; -extern GloContext *g_nv2a_context_display; - void nv2a_update_irq(NV2AState *d); static inline @@ -566,20 +209,5 @@ DEFINE_PROTO(user) DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address); void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len); -void pgraph_init(NV2AState *d); -void pgraph_destroy(PGRAPHState *pg); -void pgraph_context_switch(NV2AState *d, unsigned int channel_id); -int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method, - uint32_t parameter, uint32_t *parameters, - size_t num_words_available, size_t max_lookahead_words, - bool inc); -void pgraph_gl_sync(NV2AState *d); -void pgraph_process_pending_reports(NV2AState *d); -void pgraph_process_pending_downloads(NV2AState *d); -void pgraph_download_dirty_surfaces(NV2AState *d); -void pgraph_flush(NV2AState *d); - -void *pfifo_thread(void *arg); -void pfifo_kick(NV2AState *d); #endif diff --git a/hw/xbox/nv2a/nv2a_regs.h b/hw/xbox/nv2a/nv2a_regs.h index 108db8f716d..78a9091eb54 100644 --- a/hw/xbox/nv2a/nv2a_regs.h +++ b/hw/xbox/nv2a/nv2a_regs.h @@ -21,6 +21,17 @@ #ifndef HW_NV2A_REGS_H #define HW_NV2A_REGS_H + +#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask)) + +#define SET_MASK(v, mask, val) \ + ({ \ + const unsigned int __val = (val); \ + const unsigned int __mask = (mask); \ + (v) &= ~(__mask); \ + (v) |= ((__val) << ctz32(__mask)) & (__mask); \ + }) + #define NV_NUM_BLOCKS 21 #define NV_PMC 0 /* card master control */ #define NV_PBUS 1 /* bus control */ diff --git a/hw/xbox/nv2a/pfifo.c b/hw/xbox/nv2a/pfifo.c index 77dd175098b..295cbbf27b2 100644 --- a/hw/xbox/nv2a/pfifo.c +++ b/hw/xbox/nv2a/pfifo.c @@ -95,23 +95,25 @@ void pfifo_kick(NV2AState *d) qemu_cond_broadcast(&d->pfifo.fifo_cond); } -static bool pgraph_can_fifo_access(NV2AState *d) { - return qatomic_read(&d->pgraph.regs[NV_PGRAPH_FIFO]) & NV_PGRAPH_FIFO_ACCESS; +static bool can_fifo_access(NV2AState *d) { + return qatomic_read(&d->pgraph.regs_[NV_PGRAPH_FIFO]) & + NV_PGRAPH_FIFO_ACCESS; } /* If NV097_FLIP_STALL was executed, check if the flip has completed. * This will usually happen in the VSYNC interrupt handler. */ -static bool pgraph_is_flip_stall_complete(NV2AState *d) +static bool is_flip_stall_complete(NV2AState *d) { PGRAPHState *pg = &d->pgraph; + uint32_t s = pgraph_reg_r(pg, NV_PGRAPH_SURFACE); + NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n", - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D), - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D), - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D)); + GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D), + GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D), + GET_MASK(s, NV_PGRAPH_SURFACE_MODULO_3D)); - uint32_t s = pg->regs[NV_PGRAPH_SURFACE]; if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D) != GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) { return true; @@ -126,7 +128,7 @@ static bool pfifo_stall_for_flip(NV2AState *d) if (qatomic_read(&d->pgraph.waiting_for_flip)) { qemu_mutex_lock(&d->pgraph.lock); - if (!pgraph_is_flip_stall_complete(d)) { + if (!is_flip_stall_complete(d)) { should_stall = true; } else { d->pgraph.waiting_for_flip = false; @@ -141,7 +143,7 @@ static bool pfifo_puller_should_stall(NV2AState *d) { return pfifo_stall_for_flip(d) || qatomic_read(&d->pgraph.waiting_for_nop) || qatomic_read(&d->pgraph.waiting_for_context_switch) || - !pgraph_can_fifo_access(d); + !can_fifo_access(d); } static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, @@ -187,7 +189,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, qemu_mutex_lock(&d->pgraph.lock); // Switch contexts if necessary - if (pgraph_can_fifo_access(d)) { + if (can_fifo_access(d)) { pgraph_context_switch(d, entry.channel_id); if (!d->pgraph.waiting_for_context_switch) { num_proc = @@ -221,7 +223,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_lock(&d->pgraph.lock); - if (pgraph_can_fifo_access(d)) { + if (can_fifo_access(d)) { num_proc = pgraph_method(d, subchannel, method, parameter, parameters, num_words_available, max_lookahead_words, inc); @@ -242,7 +244,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry, static bool pfifo_pusher_should_stall(NV2AState *d) { - return !pgraph_can_fifo_access(d) || + return !can_fifo_access(d) || qatomic_read(&d->pgraph.waiting_for_nop); } @@ -447,39 +449,11 @@ static void pfifo_run_pusher(NV2AState *d) } } -static void process_requests(NV2AState *d) -{ - if (qatomic_read(&d->pgraph.downloads_pending) || - qatomic_read(&d->pgraph.download_dirty_surfaces_pending) || - qatomic_read(&d->pgraph.gl_sync_pending) || - qatomic_read(&d->pgraph.flush_pending) || - qatomic_read(&d->pgraph.shader_cache_writeback_pending)) { - qemu_mutex_unlock(&d->pfifo.lock); - qemu_mutex_lock(&d->pgraph.lock); - if (qatomic_read(&d->pgraph.downloads_pending)) { - pgraph_process_pending_downloads(d); - } - if (qatomic_read(&d->pgraph.download_dirty_surfaces_pending)) { - pgraph_download_dirty_surfaces(d); - } - if (qatomic_read(&d->pgraph.gl_sync_pending)) { - pgraph_gl_sync(d); - } - if (qatomic_read(&d->pgraph.flush_pending)) { - pgraph_flush(d); - } - if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) { - shader_write_cache_reload_list(&d->pgraph); - } - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - } -} - void *pfifo_thread(void *arg) { NV2AState *d = (NV2AState *)arg; - glo_set_current(g_nv2a_context_render); + + pgraph_init_thread(d); rcu_register_thread(); @@ -487,13 +461,13 @@ void *pfifo_thread(void *arg) while (true) { d->pfifo.fifo_kick = false; - process_requests(d); + d->pgraph.renderer->ops.process_pending(d); if (!d->pfifo.halt) { pfifo_run_pusher(d); } - pgraph_process_pending_reports(d); + d->pgraph.renderer->ops.process_pending_reports(d); if (!d->pfifo.fifo_kick) { qemu_cond_broadcast(&d->pfifo.fifo_idle_cond); diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c deleted file mode 100644 index 335c73cc0f6..00000000000 --- a/hw/xbox/nv2a/pgraph.c +++ /dev/null @@ -1,7775 +0,0 @@ -/* - * QEMU Geforce NV2A implementation - * - * Copyright (c) 2012 espes - * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2018-2021 Matt Borgerson - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "nv2a_int.h" - -#include - -#include "nv2a_vsh_emulator.h" -#include "s3tc.h" -#include "ui/xemu-settings.h" -#include "qemu/fast-hash.h" - -const float f16_max = 511.9375f; -const float f24_max = 1.0E30; - -static NV2AState *g_nv2a; -GloContext *g_nv2a_context_render; -GloContext *g_nv2a_context_display; - -NV2AStats g_nv2a_stats; - -static void nv2a_profile_increment(void) -{ - int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); - const int64_t fps_update_interval = 250000; - g_nv2a_stats.last_flip_time = now; - - static int64_t frame_count = 0; - frame_count++; - - static int64_t ts = 0; - int64_t delta = now - ts; - if (delta >= fps_update_interval) { - g_nv2a_stats.increment_fps = frame_count * 1000000 / delta; - ts = now; - frame_count = 0; - } -} - -static void nv2a_profile_flip_stall(void) -{ - glFinish(); - - int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); - int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000; - - g_nv2a_stats.frame_working.mspf = render_time; - g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] = - g_nv2a_stats.frame_working; - g_nv2a_stats.frame_ptr = - (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES; - g_nv2a_stats.frame_count++; - memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working)); -} - -static void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt) -{ - g_nv2a_stats.frame_working.counters[cnt] += 1; -} - -const char *nv2a_profile_get_counter_name(unsigned int cnt) -{ - const char *default_names[NV2A_PROF__COUNT] = { - #define _X(x) stringify(x), - NV2A_PROF_COUNTERS_XMAC - #undef _X - }; - - assert(cnt < NV2A_PROF__COUNT); - return default_names[cnt] + 10; /* 'NV2A_PROF_' */ -} - -int nv2a_profile_get_counter_value(unsigned int cnt) -{ - assert(cnt < NV2A_PROF__COUNT); - unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) % - NV2A_PROF_NUM_FRAMES; - return g_nv2a_stats.frame_history[idx].counters[cnt]; -} - -static const GLenum pgraph_texture_min_filter_map[] = { - 0, - GL_NEAREST, - GL_LINEAR, - GL_NEAREST_MIPMAP_NEAREST, - GL_LINEAR_MIPMAP_NEAREST, - GL_NEAREST_MIPMAP_LINEAR, - GL_LINEAR_MIPMAP_LINEAR, - GL_LINEAR, -}; - -static const GLenum pgraph_texture_mag_filter_map[] = { - 0, - GL_NEAREST, - GL_LINEAR, - 0, - GL_LINEAR /* TODO: Convolution filter... */ -}; - -static const GLenum pgraph_texture_addr_map[] = { - 0, - GL_REPEAT, - GL_MIRRORED_REPEAT, - GL_CLAMP_TO_EDGE, - GL_CLAMP_TO_BORDER, - GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */ -}; - -static const GLenum pgraph_blend_factor_map[] = { - GL_ZERO, - GL_ONE, - GL_SRC_COLOR, - GL_ONE_MINUS_SRC_COLOR, - GL_SRC_ALPHA, - GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, - GL_ONE_MINUS_DST_ALPHA, - GL_DST_COLOR, - GL_ONE_MINUS_DST_COLOR, - GL_SRC_ALPHA_SATURATE, - 0, - GL_CONSTANT_COLOR, - GL_ONE_MINUS_CONSTANT_COLOR, - GL_CONSTANT_ALPHA, - GL_ONE_MINUS_CONSTANT_ALPHA, -}; - -static const GLenum pgraph_blend_equation_map[] = { - GL_FUNC_SUBTRACT, - GL_FUNC_REVERSE_SUBTRACT, - GL_FUNC_ADD, - GL_MIN, - GL_MAX, - GL_FUNC_REVERSE_SUBTRACT, - GL_FUNC_ADD, -}; - -/* FIXME -static const GLenum pgraph_blend_logicop_map[] = { - GL_CLEAR, - GL_AND, - GL_AND_REVERSE, - GL_COPY, - GL_AND_INVERTED, - GL_NOOP, - GL_XOR, - GL_OR, - GL_NOR, - GL_EQUIV, - GL_INVERT, - GL_OR_REVERSE, - GL_COPY_INVERTED, - GL_OR_INVERTED, - GL_NAND, - GL_SET, -}; -*/ - -static const GLenum pgraph_cull_face_map[] = { - 0, - GL_FRONT, - GL_BACK, - GL_FRONT_AND_BACK -}; - -static const GLenum pgraph_depth_func_map[] = { - GL_NEVER, - GL_LESS, - GL_EQUAL, - GL_LEQUAL, - GL_GREATER, - GL_NOTEQUAL, - GL_GEQUAL, - GL_ALWAYS, -}; - -static const GLenum pgraph_stencil_func_map[] = { - GL_NEVER, - GL_LESS, - GL_EQUAL, - GL_LEQUAL, - GL_GREATER, - GL_NOTEQUAL, - GL_GEQUAL, - GL_ALWAYS, -}; - -static const GLenum pgraph_stencil_op_map[] = { - 0, - GL_KEEP, - GL_ZERO, - GL_REPLACE, - GL_INCR, - GL_DECR, - GL_INVERT, - GL_INCR_WRAP, - GL_DECR_WRAP, -}; - -typedef struct ColorFormatInfo { - unsigned int bytes_per_pixel; - bool linear; - GLint gl_internal_format; - GLenum gl_format; - GLenum gl_type; - GLenum gl_swizzle_mask[4]; - bool depth; -} ColorFormatInfo; - -static const ColorFormatInfo kelvin_color_format_map[66] = { - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = - {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_ONE}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = - {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = - {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = - {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = - {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = - {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = - {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = - {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - - /* paletted texture */ - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = - {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = - {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA}, - [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = - {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA}, - [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = - {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = - {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = - {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = - {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = - {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_ONE}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = - {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = - {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = - {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_GREEN}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = - {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = - {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = - {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = - {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = - {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, - {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = - {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_RED, GL_RED, GL_GREEN}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = - {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */ - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = - {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = - {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, - {GL_GREEN, GL_RED, GL_RED, GL_GREEN}}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = - {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = - {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - - /* Additional information is passed to the pixel shader via the swizzle: - * RED: The depth value. - * GREEN: 0 for 16-bit, 1 for 24 bit - * BLUE: 0 for fixed, 1 for float - */ - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = - {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, - {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = - {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = - /* FIXME: Uses fixed-point format to match surface format hack below. */ - {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = - {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, - {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = - {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, - {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = - {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT, - {GL_RED, GL_RED, GL_RED, GL_ONE}}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = - {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = - {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = - {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, - - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = - {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = - {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, - [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = - {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8} -}; - -static const SurfaceFormatInfo kelvin_surface_color_format_map[] = { - [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = - {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = - {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = - {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = - {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, - - // FIXME: Map channel color - [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = - {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0}, - [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = - {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0}, -}; - -static const SurfaceFormatInfo kelvin_surface_zeta_float_format_map[] = { - [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = - {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT}, - [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = - /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for - * now just emulate this with fixed-point Z24S8. Possible compat - * improvement with custom conversion. - */ - {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, -}; - -static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_map[] = { - [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = - {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT}, - [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = - {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, -}; - -static GLfloat supportedAliasedLineWidthRange[2] = { 0.0f, 0.0f }; -static GLfloat supportedSmoothLineWidthRange[2] = { 0.0f, 0.0f }; - -// static void pgraph_set_context_user(NV2AState *d, uint32_t val); -static void pgraph_gl_fence(void); -static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src); -static void pgraph_init_render_to_texture(NV2AState *d); -static void pgraph_init_display_renderer(NV2AState *d); -static void pgraph_method_log(unsigned int subchannel, unsigned int graphics_class, unsigned int method, uint32_t parameter); -static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr); -static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); -static void pgraph_shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function); -static void pgraph_bind_shaders(PGRAPHState *pg); -static bool pgraph_framebuffer_dirty(PGRAPHState *pg); -static bool pgraph_color_write_enabled(PGRAPHState *pg); -static bool pgraph_zeta_write_enabled(PGRAPHState *pg); -static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); -static void pgraph_wait_for_surface_download(SurfaceBinding *e); -static void pgraph_surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, hwaddr len, bool write); -static SurfaceBinding *pgraph_surface_put(NV2AState *d, hwaddr addr, SurfaceBinding *e); -static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr); -static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr); -static void pgraph_unbind_surface(NV2AState *d, bool color); -static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *e); -static void pgraph_surface_evict_old(NV2AState *d); -static void pgraph_download_surface_data_if_dirty(NV2AState *d, SurfaceBinding *surface); -static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); -static void pgraph_download_surface_data_to_buffer(NV2AState *d, - SurfaceBinding *surface, - bool swizzle, bool flip, - bool downscale, - uint8_t *pixels); -static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); -static bool pgraph_check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2, bool strict); -static bool pgraph_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape); -static void pgraph_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit); -static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color); -static void pgraph_update_surface(NV2AState *d, bool upload, bool color_write, bool zeta_write); -static void pgraph_bind_textures(NV2AState *d); -static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height); -static void pgraph_apply_scaling_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height); -static void pgraph_get_surface_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height); -static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, bool quick); -static void pgraph_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element); -static unsigned int pgraph_bind_inline_array(NV2AState *d); -static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage); - -static float convert_f16_to_float(uint16_t f16); -static float convert_f24_to_float(uint32_t f24); -static uint8_t cliptobyte(int x); -static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b); -static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b); -static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data, const uint8_t *palette_data, unsigned int width, unsigned int height, unsigned int depth, unsigned int row_pitch, unsigned int slice_pitch); -static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); -static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); -static void texture_binding_destroy(gpointer data); -static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key); -static void texture_cache_entry_post_evict(Lru *lru, LruNode *node); -static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key); - -static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key) -{ - VertexLruNode *vnode = container_of(node, VertexLruNode, node); - memcpy(&vnode->key, key, sizeof(struct VertexKey)); - vnode->initialized = false; -} - -static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) -{ - VertexLruNode *vnode = container_of(node, VertexLruNode, node); - return memcmp(&vnode->key, key, sizeof(VertexKey)); -} - -static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); -static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size); -static unsigned int kelvin_map_stencil_op(uint32_t parameter); -static unsigned int kelvin_map_polygon_mode(uint32_t parameter); -static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel); -static void pgraph_reload_surface_scale_factor(NV2AState *d); - -static uint32_t pgraph_rdi_read(PGRAPHState *pg, - unsigned int select, unsigned int address) -{ - uint32_t r = 0; - switch(select) { - case RDI_INDEX_VTX_CONSTANTS0: - case RDI_INDEX_VTX_CONSTANTS1: - assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); - r = pg->vsh_constants[address / 4][3 - address % 4]; - break; - default: - fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n", - select, address); - assert(false); - break; - } - return r; -} - -static void pgraph_rdi_write(PGRAPHState *pg, - unsigned int select, unsigned int address, - uint32_t val) -{ - switch(select) { - case RDI_INDEX_VTX_CONSTANTS0: - case RDI_INDEX_VTX_CONSTANTS1: - assert(false); /* Untested */ - assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); - pg->vsh_constants_dirty[address / 4] |= - (val != pg->vsh_constants[address / 4][3 - address % 4]); - pg->vsh_constants[address / 4][3 - address % 4] = val; - break; - default: - NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n", - select, address, val); - break; - } -} - -uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size) -{ - NV2AState *d = (NV2AState *)opaque; - PGRAPHState *pg = &d->pgraph; - - qemu_mutex_lock(&pg->lock); - - uint64_t r = 0; - switch (addr) { - case NV_PGRAPH_INTR: - r = pg->pending_interrupts; - break; - case NV_PGRAPH_INTR_EN: - r = pg->enabled_interrupts; - break; - case NV_PGRAPH_RDI_DATA: { - unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_SELECT); - unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS); - - r = pgraph_rdi_read(pg, select, address); - - /* FIXME: Overflow into select? */ - assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, - NV_PGRAPH_RDI_INDEX_ADDRESS)); - SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); - break; - } - default: - r = pg->regs[addr]; - break; - } - - qemu_mutex_unlock(&pg->lock); - - nv2a_reg_log_read(NV_PGRAPH, addr, size, r); - return r; -} - -void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) -{ - NV2AState *d = (NV2AState *)opaque; - PGRAPHState *pg = &d->pgraph; - - nv2a_reg_log_write(NV_PGRAPH, addr, size, val); - - qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here - qemu_mutex_lock(&pg->lock); - - switch (addr) { - case NV_PGRAPH_INTR: - pg->pending_interrupts &= ~val; - - if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) { - pg->waiting_for_nop = false; - } - if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) { - pg->waiting_for_context_switch = false; - } - pfifo_kick(d); - break; - case NV_PGRAPH_INTR_EN: - pg->enabled_interrupts = val; - break; - case NV_PGRAPH_INCREMENT: - if (val & NV_PGRAPH_INCREMENT_READ_3D) { - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_READ_3D, - (GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_READ_3D)+1) - % GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_MODULO_3D) ); - nv2a_profile_increment(); - pfifo_kick(d); - } - break; - case NV_PGRAPH_RDI_DATA: { - unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_SELECT); - unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS); - - pgraph_rdi_write(pg, select, address, val); - - /* FIXME: Overflow into select? */ - assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, - NV_PGRAPH_RDI_INDEX_ADDRESS)); - SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], - NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); - break; - } - case NV_PGRAPH_CHANNEL_CTX_TRIGGER: { - hwaddr context_address = - GET_MASK(pg->regs[NV_PGRAPH_CHANNEL_CTX_POINTER], - NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; - - if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { -#ifdef DEBUG_NV2A - unsigned pgraph_channel_id = - GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); -#endif - NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", - pgraph_channel_id, context_address); - - assert(context_address < memory_region_size(&d->ramin)); - - uint8_t *context_ptr = d->ramin_ptr + context_address; - uint32_t context_user = ldl_le_p((uint32_t*)context_ptr); - - NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user); - - pg->regs[NV_PGRAPH_CTX_USER] = context_user; - // pgraph_set_context_user(d, context_user); - } - if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { - /* do stuff ... */ - } - - break; - } - default: - pg->regs[addr] = val; - break; - } - - // events - switch (addr) { - case NV_PGRAPH_FIFO: - pfifo_kick(d); - break; - } - - qemu_mutex_unlock(&pg->lock); - qemu_mutex_unlock(&d->pfifo.lock); -} - -void pgraph_flush(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - bool update_surface = (pg->color_binding || pg->zeta_binding); - - /* Clear last surface shape to force recreation of buffers at next draw */ - pg->surface_color.draw_dirty = false; - pg->surface_zeta.draw_dirty = false; - memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); - pgraph_unbind_surface(d, true); - pgraph_unbind_surface(d, false); - - SurfaceBinding *s, *next; - QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) { - pgraph_surface_invalidate(d, s); - } - - pgraph_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram)); - - /* Sync all RAM */ - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr); - - /* FIXME: Flush more? */ - - pgraph_reload_surface_scale_factor(d); - - if (update_surface) { - pgraph_update_surface(d, true, true, true); - } - - qatomic_set(&d->pgraph.flush_pending, false); - qemu_event_set(&d->pgraph.flush_complete); -} - -#define METHOD_ADDR(gclass, name) \ - gclass ## _ ## name -#define METHOD_ADDR_TO_INDEX(x) ((x)>>2) -#define METHOD_NAME_STR(gclass, name) \ - tostring(gclass ## _ ## name) -#define METHOD_FUNC_NAME(gclass, name) \ - pgraph_ ## gclass ## _ ## name ## _handler -#define METHOD_HANDLER_ARG_DECL \ - NV2AState *d, PGRAPHState *pg, \ - unsigned int subchannel, unsigned int method, \ - uint32_t parameter, uint32_t *parameters, \ - size_t num_words_available, size_t *num_words_consumed, bool inc -#define METHOD_HANDLER_ARGS \ - d, pg, subchannel, method, parameter, parameters, \ - num_words_available, num_words_consumed, inc -#define DEF_METHOD_PROTO(gclass, name) \ - static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL) - -#define DEF_METHOD(gclass, name) \ - DEF_METHOD_PROTO(gclass, name); -#define DEF_METHOD_RANGE(gclass, name, range) \ - DEF_METHOD_PROTO(gclass, name); -#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */ -#define DEF_METHOD_CASE_4(gclass, name, stride) \ - DEF_METHOD_PROTO(gclass, name); -#include "pgraph_methods.h" -#undef DEF_METHOD -#undef DEF_METHOD_RANGE -#undef DEF_METHOD_CASE_4_OFFSET -#undef DEF_METHOD_CASE_4 - -typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL); -static const struct { - uint32_t base; - const char *name; - MethodFunc handler; -} pgraph_kelvin_methods[0x800] = { -#define DEF_METHOD(gclass, name) \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, -#define DEF_METHOD_RANGE(gclass, name, range) \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \ - ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, -#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, \ - [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \ - { \ - METHOD_ADDR(gclass, name), \ - METHOD_NAME_STR(gclass, name), \ - METHOD_FUNC_NAME(gclass, name), \ - }, -#define DEF_METHOD_CASE_4(gclass, name, stride) \ - DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride) -#include "pgraph_methods.h" -#undef DEF_METHOD -#undef DEF_METHOD_RANGE -#undef DEF_METHOD_CASE_4_OFFSET -#undef DEF_METHOD_CASE_4 -}; - -#define METHOD_RANGE_END_NAME(gclass, name) \ - pgraph_ ## gclass ## _ ## name ## __END -#define DEF_METHOD(gclass, name) \ - static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ - METHOD_ADDR(gclass, name) + 4; -#define DEF_METHOD_RANGE(gclass, name, range) \ - static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ - METHOD_ADDR(gclass, name) + 4*range; -#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */ -#define DEF_METHOD_CASE_4(gclass, name, stride) \ - static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ - METHOD_ADDR(gclass, name) + 4*stride; -#include "pgraph_methods.h" -#undef DEF_METHOD -#undef DEF_METHOD_RANGE -#undef DEF_METHOD_CASE_4_OFFSET -#undef DEF_METHOD_CASE_4 - -static void pgraph_method_inc(MethodFunc handler, uint32_t end, - METHOD_HANDLER_ARG_DECL) -{ - if (!inc) { - handler(METHOD_HANDLER_ARGS); - return; - } - size_t count = MIN(num_words_available, (end - method) / 4); - for (size_t i = 0; i < count; i++) { - parameter = ldl_le_p(parameters + i); - if (i) { - pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, - parameter); - } - handler(METHOD_HANDLER_ARGS); - method += 4; - } - *num_words_consumed = count; -} - -static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL) -{ - if (inc) { - handler(METHOD_HANDLER_ARGS); - return; - } - - for (size_t i = 0; i < num_words_available; i++) { - parameter = ldl_le_p(parameters + i); - if (i) { - pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, - parameter); - } - handler(METHOD_HANDLER_ARGS); - } - *num_words_consumed = num_words_available; -} - -#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int) -#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int) -#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name) - -#define DEF_METHOD_INC(gclass, name) \ - DEF_METHOD_INT(gclass, name); \ - DEF_METHOD(gclass, name) \ - { \ - pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name), \ - METHOD_RANGE_END_NAME(gclass, name), \ - METHOD_HANDLER_ARGS); \ - } \ - DEF_METHOD_INT(gclass, name) - -#define DEF_METHOD_NON_INC(gclass, name) \ - DEF_METHOD_INT(gclass, name); \ - DEF_METHOD(gclass, name) \ - { \ - pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \ - METHOD_HANDLER_ARGS); \ - } \ - DEF_METHOD_INT(gclass, name) - -// TODO: Optimize. Ideally this should all be done via OpenGL. -static void pgraph_image_blit(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d; - ImageBlitState *image_blit = &pg->image_blit; - BetaState *beta = &pg->beta; - - pgraph_update_surface(d, false, true, true); - - assert(context_surfaces->object_instance == image_blit->context_surfaces); - - unsigned int bytes_per_pixel; - switch (context_surfaces->color_format) { - case NV062_SET_COLOR_FORMAT_LE_Y8: - bytes_per_pixel = 1; - break; - case NV062_SET_COLOR_FORMAT_LE_R5G6B5: - bytes_per_pixel = 2; - break; - case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8: - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: - case NV062_SET_COLOR_FORMAT_LE_Y32: - bytes_per_pixel = 4; - break; - default: - fprintf(stderr, "Unknown blit surface format: 0x%x\n", - context_surfaces->color_format); - assert(false); - break; - } - - hwaddr source_dma_len, dest_dma_len; - - uint8_t *source = (uint8_t *)nv_dma_map( - d, context_surfaces->dma_image_source, &source_dma_len); - assert(context_surfaces->source_offset < source_dma_len); - source += context_surfaces->source_offset; - - uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest, - &dest_dma_len); - assert(context_surfaces->dest_offset < dest_dma_len); - dest += context_surfaces->dest_offset; - - hwaddr source_addr = source - d->vram_ptr; - hwaddr dest_addr = dest - d->vram_ptr; - - SurfaceBinding *surf_src = pgraph_surface_get(d, source_addr); - if (surf_src) { - pgraph_download_surface_data_if_dirty(d, surf_src); - } - - SurfaceBinding *surf_dest = pgraph_surface_get(d, dest_addr); - if (surf_dest) { - if (image_blit->height < surf_dest->height || - image_blit->width < surf_dest->width) { - pgraph_download_surface_data_if_dirty(d, surf_dest); - } else { - // The blit will completely replace the surface so any pending - // download should be discarded. - surf_dest->download_pending = false; - surf_dest->draw_dirty = false; - } - surf_dest->upload_pending = true; - pg->draw_time++; - } - - hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch + - image_blit->in_x * bytes_per_pixel; - hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch + - image_blit->out_x * bytes_per_pixel; - - hwaddr source_size = - (image_blit->height - 1) * context_surfaces->source_pitch + - image_blit->width * bytes_per_pixel; - hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch + - image_blit->width * bytes_per_pixel; - - /* FIXME: What does hardware do in this case? */ - assert(source_addr + source_offset + source_size <= - memory_region_size(d->vram)); - assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram)); - - uint8_t *source_row = source + source_offset; - uint8_t *dest_row = dest + dest_offset; - - if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) { - NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY"); - for (unsigned int y = 0; y < image_blit->height; y++) { - memmove(dest_row, source_row, image_blit->width * bytes_per_pixel); - source_row += context_surfaces->source_pitch; - dest_row += context_surfaces->dest_pitch; - } - } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) { - NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND"); - uint32_t max_beta_mult = 0x7f80; - uint32_t beta_mult = beta->beta >> 16; - uint32_t inv_beta_mult = max_beta_mult - beta_mult; - for (unsigned int y = 0; y < image_blit->height; y++) { - for (unsigned int x = 0; x < image_blit->width; x++) { - for (unsigned int ch = 0; ch < 3; ch++) { - uint32_t a = source_row[x * 4 + ch] * beta_mult; - uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult; - dest_row[x * 4 + ch] = (a + b) / max_beta_mult; - } - } - source_row += context_surfaces->source_pitch; - dest_row += context_surfaces->dest_pitch; - } - } else { - fprintf(stderr, "Unknown blit operation: 0x%x\n", - image_blit->operation); - assert(false && "Unknown blit operation"); - } - - NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr); - - bool needs_alpha_patching; - uint8_t alpha_override; - switch (context_surfaces->color_format) { - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: - needs_alpha_patching = true; - alpha_override = 0xff; - break; - case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: - needs_alpha_patching = true; - alpha_override = 0; - break; - default: - needs_alpha_patching = false; - alpha_override = 0; - } - - if (needs_alpha_patching) { - dest_row = dest + dest_offset; - for (unsigned int y = 0; y < image_blit->height; y++) { - for (unsigned int x = 0; x < image_blit->width; x++) { - dest_row[x * 4 + 3] = alpha_override; - } - dest_row += context_surfaces->dest_pitch; - } - } - - dest_addr += dest_offset; - memory_region_set_client_dirty(d->vram, dest_addr, dest_size, - DIRTY_MEMORY_VGA); - memory_region_set_client_dirty(d->vram, dest_addr, dest_size, - DIRTY_MEMORY_NV2A_TEX); -} - -int pgraph_method(NV2AState *d, unsigned int subchannel, - unsigned int method, uint32_t parameter, - uint32_t *parameters, size_t num_words_available, - size_t max_lookahead_words, bool inc) -{ - int num_processed = 1; - - assert(glGetError() == GL_NO_ERROR); - - PGRAPHState *pg = &d->pgraph; - - bool channel_valid = - d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID; - assert(channel_valid); - - ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d; - ImageBlitState *image_blit = &pg->image_blit; - BetaState *beta = &pg->beta; - - assert(subchannel < 8); - - if (method == NV_SET_OBJECT) { - assert(parameter < memory_region_size(&d->ramin)); - uint8_t *obj_ptr = d->ramin_ptr + parameter; - - uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr); - uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4)); - uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8)); - uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12)); - uint32_t ctx_5 = parameter; - - pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4] = ctx_1; - pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4] = ctx_2; - pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4] = ctx_3; - pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4] = ctx_4; - pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4] = ctx_5; - } - - // is this right? - pg->regs[NV_PGRAPH_CTX_SWITCH1] = pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH2] = pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH3] = pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH4] = pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH5] = pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4]; - - uint32_t graphics_class = GET_MASK(pg->regs[NV_PGRAPH_CTX_SWITCH1], - NV_PGRAPH_CTX_SWITCH1_GRCLASS); - - pgraph_method_log(subchannel, graphics_class, method, parameter); - - if (subchannel != 0) { - // catches context switching issues on xbox d3d - assert(graphics_class != 0x97); - } - - /* ugly switch for now */ - switch (graphics_class) { - case NV_BETA: { - switch (method) { - case NV012_SET_OBJECT: - beta->object_instance = parameter; - break; - case NV012_SET_BETA: - if (parameter & 0x80000000) { - beta->beta = 0; - } else { - // The parameter is a signed fixed-point number with a sign bit - // and 31 fractional bits. Note that negative values are clamped - // to 0, and only 8 fractional bits are actually implemented in - // hardware. - beta->beta = parameter & 0x7f800000; - } - break; - default: - goto unhandled; - } - break; - } - case NV_CONTEXT_PATTERN: { - switch (method) { - case NV044_SET_MONOCHROME_COLOR0: - pg->regs[NV_PGRAPH_PATT_COLOR0] = parameter; - break; - default: - goto unhandled; - } - break; - } - case NV_CONTEXT_SURFACES_2D: { - switch (method) { - case NV062_SET_OBJECT: - context_surfaces_2d->object_instance = parameter; - break; - case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE: - context_surfaces_2d->dma_image_source = parameter; - break; - case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN: - context_surfaces_2d->dma_image_dest = parameter; - break; - case NV062_SET_COLOR_FORMAT: - context_surfaces_2d->color_format = parameter; - break; - case NV062_SET_PITCH: - context_surfaces_2d->source_pitch = parameter & 0xFFFF; - context_surfaces_2d->dest_pitch = parameter >> 16; - break; - case NV062_SET_OFFSET_SOURCE: - context_surfaces_2d->source_offset = parameter & 0x07FFFFFF; - break; - case NV062_SET_OFFSET_DESTIN: - context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF; - break; - default: - goto unhandled; - } - break; - } - case NV_IMAGE_BLIT: { - switch (method) { - case NV09F_SET_OBJECT: - image_blit->object_instance = parameter; - break; - case NV09F_SET_CONTEXT_SURFACES: - image_blit->context_surfaces = parameter; - break; - case NV09F_SET_OPERATION: - image_blit->operation = parameter; - break; - case NV09F_CONTROL_POINT_IN: - image_blit->in_x = parameter & 0xFFFF; - image_blit->in_y = parameter >> 16; - break; - case NV09F_CONTROL_POINT_OUT: - image_blit->out_x = parameter & 0xFFFF; - image_blit->out_y = parameter >> 16; - break; - case NV09F_SIZE: - image_blit->width = parameter & 0xFFFF; - image_blit->height = parameter >> 16; - - if (image_blit->width && image_blit->height) { - pgraph_image_blit(d); - } - break; - default: - goto unhandled; - } - break; - } - case NV_KELVIN_PRIMITIVE: { - MethodFunc handler = - pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler; - if (handler == NULL) { - goto unhandled; - } - size_t num_words_consumed = 1; - handler(d, pg, subchannel, method, parameter, parameters, - num_words_available, &num_words_consumed, inc); - - /* Squash repeated BEGIN,DRAW_ARRAYS,END */ - #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd)) - #define LAP(i, prm) (parameters[i*2+2] == (prm)) - #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm)) - - if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) && - pg->inline_elements_length == 0 && - pg->draw_arrays_length < - (ARRAY_SIZE(pg->gl_draw_arrays_start) - 1) && - LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) && - LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) && - LAM(2, NV097_DRAW_ARRAYS)) { - num_words_consumed += 4; - pg->draw_arrays_prevent_connect = true; - } - - #undef LAM - #undef LAP - #undef LAMP - - num_processed = num_words_consumed; - break; - } - default: - goto unhandled; - } - - return num_processed; - -unhandled: - trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class, - method, parameter); - return num_processed; -} - -DEF_METHOD(NV097, SET_OBJECT) -{ - pg->kelvin.object_instance = parameter; -} - -DEF_METHOD(NV097, NO_OPERATION) -{ - /* The bios uses nop as a software method call - - * it seems to expect a notify interrupt if the parameter isn't 0. - * According to a nouveau guy it should still be a nop regardless - * of the parameter. It's possible a debug register enables this, - * but nothing obvious sticks out. Weird. - */ - if (parameter == 0) { - return; - } - - unsigned channel_id = - GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); - - assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)); - - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_CHID, - channel_id); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_SUBCH, - subchannel); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_MTHD, - method); - pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter; - pg->regs[NV_PGRAPH_NSOURCE] = - NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */ - pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR; - pg->waiting_for_nop = true; - - qemu_mutex_unlock(&pg->lock); - qemu_mutex_lock_iothread(); - nv2a_update_irq(d); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&pg->lock); -} - -DEF_METHOD(NV097, WAIT_FOR_IDLE) -{ - pgraph_update_surface(d, false, true, true); -} - -DEF_METHOD(NV097, SET_FLIP_READ) -{ - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D, - parameter); -} - -DEF_METHOD(NV097, SET_FLIP_WRITE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D, - parameter); -} - -DEF_METHOD(NV097, SET_FLIP_MODULO) -{ - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D, - parameter); -} - -DEF_METHOD(NV097, FLIP_INCREMENT_WRITE) -{ - uint32_t old = - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D); - - SET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_WRITE_3D, - (GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_WRITE_3D)+1) - % GET_MASK(pg->regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_MODULO_3D) ); - - uint32_t new = - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D); - - trace_nv2a_pgraph_flip_increment_write(old, new); - NV2A_GL_DFRAME_TERMINATOR(); - pg->frame_time++; -} - -DEF_METHOD(NV097, FLIP_STALL) -{ - trace_nv2a_pgraph_flip_stall(); - pgraph_update_surface(d, false, true, true); - nv2a_profile_flip_stall(); - pg->waiting_for_flip = true; -} - -// TODO: these should be loading the dma objects from ramin here? - -DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES) -{ - pg->dma_notifies = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_A) -{ - pg->dma_a = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_B) -{ - pg->dma_b = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE) -{ - pg->dma_state = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR) -{ - /* try to get any straggling draws in before the surface's changed :/ */ - pgraph_update_surface(d, false, true, true); - - pg->dma_color = parameter; - pg->surface_color.buffer_dirty = true; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA) -{ - pg->dma_zeta = parameter; - pg->surface_zeta.buffer_dirty = true; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A) -{ - pg->dma_vertex_a = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B) -{ - pg->dma_vertex_b = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE) -{ - pg->dma_semaphore = parameter; -} - -DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT) -{ - pgraph_process_pending_reports(d); - - pg->dma_report = parameter; -} - -DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL) -{ - pgraph_update_surface(d, false, true, true); - - pg->surface_shape.clip_x = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X); - pg->surface_shape.clip_width = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH); -} - -DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL) -{ - pgraph_update_surface(d, false, true, true); - - pg->surface_shape.clip_y = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y); - pg->surface_shape.clip_height = - GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT); -} - -DEF_METHOD(NV097, SET_SURFACE_FORMAT) -{ - pgraph_update_surface(d, false, true, true); - - pg->surface_shape.color_format = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR); - pg->surface_shape.zeta_format = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA); - pg->surface_shape.anti_aliasing = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING); - pg->surface_shape.log_width = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH); - pg->surface_shape.log_height = - GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT); - - int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE); - if (surface_type != pg->surface_type) { - pg->surface_type = surface_type; - pg->surface_color.buffer_dirty = true; - pg->surface_zeta.buffer_dirty = true; - } -} - -DEF_METHOD(NV097, SET_SURFACE_PITCH) -{ - pgraph_update_surface(d, false, true, true); - unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR); - unsigned int zeta_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA); - - pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch); - pg->surface_color.pitch = color_pitch; - - pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch); - pg->surface_zeta.pitch = zeta_pitch; -} - -DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET) -{ - pgraph_update_surface(d, false, true, true); - pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter); - pg->surface_color.offset = parameter; -} - -DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET) -{ - pgraph_update_surface(d, false, true, true); - pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter); - pg->surface_zeta.offset = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW) -{ - int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4; - pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0) -{ - pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter; -} - -DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1) -{ - pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_ADDRESS) -{ - int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64; - pg->regs[NV_PGRAPH_TEXADDRESS0 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_CONTROL0) -{ - pgraph_update_surface(d, false, true, true); - - bool stencil_write_enable = - parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE, - stencil_write_enable); - - uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT); - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format); - - bool z_perspective = - parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE, - z_perspective); -} - -DEF_METHOD(NV097, SET_COLOR_MATERIAL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION, - (parameter >> 0) & 3); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT, - (parameter >> 2) & 3); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE, - (parameter >> 4) & 3); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR, - (parameter >> 6) & 3); -} - -DEF_METHOD(NV097, SET_FOG_MODE) -{ - /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */ - unsigned int mode; - switch (parameter) { - case NV097_SET_FOG_MODE_V_LINEAR: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break; - case NV097_SET_FOG_MODE_V_EXP: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break; - case NV097_SET_FOG_MODE_V_EXP2: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break; - case NV097_SET_FOG_MODE_V_EXP_ABS: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break; - case NV097_SET_FOG_MODE_V_EXP2_ABS: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break; - case NV097_SET_FOG_MODE_V_LINEAR_ABS: - mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break; - default: - assert(false); - break; - } - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOG_MODE, - mode); -} - -DEF_METHOD(NV097, SET_FOG_GEN_MODE) -{ - unsigned int mode; - switch (parameter) { - case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break; - case NV097_SET_FOG_GEN_MODE_V_RADIAL: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break; - case NV097_SET_FOG_GEN_MODE_V_PLANAR: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break; - case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break; - case NV097_SET_FOG_GEN_MODE_V_FOG_X: - mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break; - default: - assert(false); - break; - } - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGGENMODE, mode); -} - -DEF_METHOD(NV097, SET_FOG_ENABLE) -{ - /* - FIXME: There is also: - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGENABLE, - parameter); - */ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOGENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_FOG_COLOR) -{ - /* PGRAPH channels are ARGB, parameter channels are ABGR */ - uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED); - uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN); - uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE); - uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_RED, red); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_GREEN, green); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_BLUE, blue); - SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_ALPHA, alpha); -} - -DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter); -} - -DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL) -{ - int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4; - for (; slot < 8; ++slot) { - pg->regs[NV_PGRAPH_WINDOWCLIPX0 + slot * 4] = parameter; - } -} - -DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL) -{ - int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4; - for (; slot < 8; ++slot) { - pg->regs[NV_PGRAPH_WINDOWCLIPY0 + slot * 4] = parameter; - } -} - -DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter); -} - -DEF_METHOD(NV097, SET_BLEND_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EN, parameter); -} - -DEF_METHOD(NV097, SET_CULL_FACE_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_CULLENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_DITHER_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter); -} - -DEF_METHOD(NV097, SET_LIGHTING_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_LIGHTING, - parameter); -} - -DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_POINTPARAMSENABLE, - parameter); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter); -} - -DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter); -} - -DEF_METHOD(NV097, SET_SKIN_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_SKIN, - parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter); -} - -DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter); -} - -DEF_METHOD(NV097, SET_ALPHA_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF); -} - -DEF_METHOD(NV097, SET_ALPHA_REF) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAREF, parameter); -} - -DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR) -{ - unsigned int factor; - switch (parameter) { - case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO: - factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE: - factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break; - case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; - default: - NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_SFACTOR, factor); -} - -DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR) -{ - unsigned int factor; - switch (parameter) { - case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO: - factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE: - factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break; - case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: - factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; - default: - NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_DFACTOR, factor); -} - -DEF_METHOD(NV097, SET_BLEND_COLOR) -{ - pg->regs[NV_PGRAPH_BLENDCOLOR] = parameter; -} - -DEF_METHOD(NV097, SET_BLEND_EQUATION) -{ - unsigned int equation; - switch (parameter) { - case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT: - equation = 0; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT: - equation = 1; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_ADD: - equation = 2; break; - case NV097_SET_BLEND_EQUATION_V_MIN: - equation = 3; break; - case NV097_SET_BLEND_EQUATION_V_MAX: - equation = 4; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED: - equation = 5; break; - case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED: - equation = 6; break; - default: - NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EQN, equation); -} - -DEF_METHOD(NV097, SET_DEPTH_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZFUNC, - parameter & 0xF); -} - -DEF_METHOD(NV097, SET_COLOR_MASK) -{ - pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg); - - bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE; - bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE; - bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE; - bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green); - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue); -} - -DEF_METHOD(NV097, SET_DEPTH_MASK) -{ - pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg); - - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_MASK) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF); -} - -DEF_METHOD(NV097, SET_STENCIL_FUNC_REF) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter); -} - -DEF_METHOD(NV097, SET_STENCIL_OP_FAIL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL, - kelvin_map_stencil_op(parameter)); -} - -DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL, - kelvin_map_stencil_op(parameter)); -} - -DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS) -{ - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS, - kelvin_map_stencil_op(parameter)); -} - -DEF_METHOD(NV097, SET_SHADE_MODE) -{ - switch (parameter) { - case NV097_SET_SHADE_MODE_V_FLAT: - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT); - break; - case NV097_SET_SHADE_MODE_V_SMOOTH: - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, - NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); - break; - default: - /* Discard */ - break; - } -} - -DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) -{ - pg->regs[NV_PGRAPH_ZOFFSETFACTOR] = parameter; -} - -DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS) -{ - pg->regs[NV_PGRAPH_ZOFFSETBIAS] = parameter; -} - -DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACEMODE, - kelvin_map_polygon_mode(parameter)); -} - -DEF_METHOD(NV097, SET_BACK_POLYGON_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_BACKFACEMODE, - kelvin_map_polygon_mode(parameter)); -} - -DEF_METHOD(NV097, SET_CLIP_MIN) -{ - pg->regs[NV_PGRAPH_ZCLIPMIN] = parameter; -} - -DEF_METHOD(NV097, SET_CLIP_MAX) -{ - pg->regs[NV_PGRAPH_ZCLIPMAX] = parameter; -} - -DEF_METHOD(NV097, SET_CULL_FACE) -{ - unsigned int face; - switch (parameter) { - case NV097_SET_CULL_FACE_V_FRONT: - face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break; - case NV097_SET_CULL_FACE_V_BACK: - face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break; - case NV097_SET_CULL_FACE_V_FRONT_AND_BACK: - face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break; - default: - assert(false); - break; - } - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_CULLCTRL, - face); -} - -DEF_METHOD(NV097, SET_FRONT_FACE) -{ - bool ccw; - switch (parameter) { - case NV097_SET_FRONT_FACE_V_CW: - ccw = false; break; - case NV097_SET_FRONT_FACE_V_CCW: - ccw = true; break; - default: - NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter); - return; /* discard */ - } - SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACE, - ccw ? 1 : 0); -} - -DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE, - parameter); -} - -DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION) -{ - int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4; - // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true; -} - -DEF_METHOD(NV097, SET_MATERIAL_ALPHA) -{ - pg->material_alpha = *(float*)¶meter; -} - -DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK) -{ - SET_MASK(d->pgraph.regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_LIGHTS, - parameter); -} - -DEF_METHOD(NV097, SET_TEXGEN_S) -{ - int slot = (method - NV097_SET_TEXGEN_S) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S - : NV_PGRAPH_CSV1_A_T0_S; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 0)); -} - -DEF_METHOD(NV097, SET_TEXGEN_T) -{ - int slot = (method - NV097_SET_TEXGEN_T) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T - : NV_PGRAPH_CSV1_A_T0_T; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 1)); -} - -DEF_METHOD(NV097, SET_TEXGEN_R) -{ - int slot = (method - NV097_SET_TEXGEN_R) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R - : NV_PGRAPH_CSV1_A_T0_R; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 2)); -} - -DEF_METHOD(NV097, SET_TEXGEN_Q) -{ - int slot = (method - NV097_SET_TEXGEN_Q) / 16; - unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A - : NV_PGRAPH_CSV1_B; - unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q - : NV_PGRAPH_CSV1_A_T0_Q; - SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 3)); -} - -DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE) -{ - int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4; - pg->texture_matrix_enable[slot] = parameter; -} - -DEF_METHOD(NV097, SET_POINT_SIZE) -{ - SET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V, parameter); -} - -DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX) -{ - int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4; - // pg->projection_matrix[slot] = *(float*)¶meter; - unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4; - pg->vsh_constants[row][slot%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX) -{ - int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4; - unsigned int matnum = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4; - pg->vsh_constants[row][entry % 4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX) -{ - int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4; - unsigned int matnum = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4; - pg->vsh_constants[row][entry % 4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX) -{ - int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4; - unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4; - pg->vsh_constants[row][slot%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX) -{ - int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4; - unsigned int tex = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4; - pg->vsh_constants[row][entry%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD_INC(NV097, SET_FOG_PARAMS) -{ - int slot = (method - NV097_SET_FOG_PARAMS) / 4; - if (slot < 2) { - pg->regs[NV_PGRAPH_FOGPARAM0 + slot*4] = parameter; - } else { - /* FIXME: No idea where slot = 2 is */ - } - - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true; -} - -/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */ -DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S) -{ - int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4; - unsigned int tex = slot / 16; - unsigned int entry = slot % 16; - unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4; - pg->vsh_constants[row][entry%4] = parameter; - pg->vsh_constants_dirty[row] = true; -} - -DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_TEXGEN_REF, - parameter); -} - -DEF_METHOD_INC(NV097, SET_FOG_PLANE) -{ - int slot = (method - NV097_SET_FOG_PLANE) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true; -} - -DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR) -{ - int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4; - // ?? - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true; -} - -DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET) -{ - int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true; -} - -DEF_METHOD_INC(NV097, SET_POINT_PARAMS) -{ - int slot = (method - NV097_SET_POINT_PARAMS) / 4; - pg->point_params[slot] = *(float *)¶meter; /* FIXME: Where? */ -} - -DEF_METHOD_INC(NV097, SET_EYE_POSITION) -{ - int slot = (method - NV097_SET_EYE_POSITION) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0) -{ - int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4; - pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1) -{ - int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4; - pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW) -{ - int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4; - pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW) -{ - int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4; - pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE) -{ - int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4; - pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter; - pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true; -} - -DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM) -{ - int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4; - - int program_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR); - - assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - pg->program_data[program_load][slot%4] = parameter; - pg->program_data_dirty = true; - - if (slot % 4 == 3) { - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1); - } -} - -DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT) -{ - int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4; - int const_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR); - - assert(const_load < NV2A_VERTEXSHADER_CONSTANTS); - // VertexShaderConstant *constant = &pg->constants[const_load]; - pg->vsh_constants_dirty[const_load] |= - (parameter != pg->vsh_constants[const_load][slot%4]); - pg->vsh_constants[const_load][slot%4] = parameter; - - if (slot % 4 == 3) { - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX3F) -{ - int slot = (method - NV097_SET_VERTEX3F) / 4; - VertexAttribute *attribute = - &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; - pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); - attribute->inline_value[slot] = *(float*)¶meter; - attribute->inline_value[3] = 1.0f; - if (slot == 2) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -/* Handles NV097_SET_BACK_LIGHT_* */ -DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR) -{ - int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4; - unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16; - slot /= 16; /* [Light index] */ - assert(slot < 8); - switch(part * 4) { - case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ... - NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8: - part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true; - break; - case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ... - NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8: - part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true; - break; - case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ... - NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8: - part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true; - break; - default: - assert(false); - break; - } -} - -/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */ -DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR) -{ - int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4; - unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32; - slot /= 32; /* [Light index] */ - assert(slot < 8); - switch(part * 4) { - case NV097_SET_LIGHT_AMBIENT_COLOR ... - NV097_SET_LIGHT_AMBIENT_COLOR + 8: - part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true; - break; - case NV097_SET_LIGHT_DIFFUSE_COLOR ... - NV097_SET_LIGHT_DIFFUSE_COLOR + 8: - part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true; - break; - case NV097_SET_LIGHT_SPECULAR_COLOR ... - NV097_SET_LIGHT_SPECULAR_COLOR + 8: - part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4; - pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter; - pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true; - break; - case NV097_SET_LIGHT_LOCAL_RANGE: - pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter; - pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true; - break; - case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ... - NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8: - part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4; - pg->light_infinite_half_vector[slot][part] = *(float*)¶meter; - break; - case NV097_SET_LIGHT_INFINITE_DIRECTION ... - NV097_SET_LIGHT_INFINITE_DIRECTION + 8: - part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4; - pg->light_infinite_direction[slot][part] = *(float*)¶meter; - break; - case NV097_SET_LIGHT_SPOT_FALLOFF ... - NV097_SET_LIGHT_SPOT_FALLOFF + 8: - part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4; - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true; - break; - case NV097_SET_LIGHT_SPOT_DIRECTION ... - NV097_SET_LIGHT_SPOT_DIRECTION + 12: - part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4; - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true; - break; - case NV097_SET_LIGHT_LOCAL_POSITION ... - NV097_SET_LIGHT_LOCAL_POSITION + 8: - part -= NV097_SET_LIGHT_LOCAL_POSITION / 4; - pg->light_local_position[slot][part] = *(float*)¶meter; - break; - case NV097_SET_LIGHT_LOCAL_ATTENUATION ... - NV097_SET_LIGHT_LOCAL_ATTENUATION + 8: - part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4; - pg->light_local_attenuation[slot][part] = *(float*)¶meter; - break; - default: - assert(false); - break; - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX4F) -{ - int slot = (method - NV097_SET_VERTEX4F) / 4; - VertexAttribute *attribute = - &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; - pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); - attribute->inline_value[slot] = *(float*)¶meter; - if (slot == 3) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_NORMAL3S) -{ - int slot = (method - NV097_SET_NORMAL3S) / 4; - unsigned int part = slot % 2; - VertexAttribute *attribute = - &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL]; - pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL); - int16_t val = parameter & 0xFFFF; - attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f); - val = parameter >> 16; - attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f); -} - -#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - unsigned int part = slot % 2; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[part * 2 + 0] = \ - (float)(int16_t)(parameter & 0xFFFF); \ - attribute->inline_value[part * 2 + 1] = \ - (float)(int16_t)(parameter >> 16); \ - } while (0) - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S) -{ - SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATTRIBUTE_4S - -#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index) \ - do { \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \ - attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); \ - attribute->inline_value[2] = 0.0f; \ - attribute->inline_value[3] = 1.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S) -{ - SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATRIBUTE_TEX_2S - -#define SET_VERTEX_COLOR_3F(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[slot] = *(float*)¶meter; \ - attribute->inline_value[3] = 1.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F) -{ - SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE); -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F) -{ - SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR); -} - -#undef SET_VERTEX_COLOR_3F - -#define SET_VERTEX_ATTRIBUTE_F(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[slot] = *(float*)¶meter; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_NORMAL3F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL); -} - -DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE); -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F, - NV2A_VERTEX_ATTR_SPECULAR); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1); -} - - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F) -{ - SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATTRIBUTE_F - -#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \ - do { \ - int slot = (method - (command)) / 4; \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[slot] = *(float*)¶meter; \ - attribute->inline_value[2] = 0.0f; \ - attribute->inline_value[3] = 1.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F, - NV2A_VERTEX_ATTR_TEXTURE0); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F, - NV2A_VERTEX_ATTR_TEXTURE1); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F, - NV2A_VERTEX_ATTR_TEXTURE2); -} - -DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F) -{ - SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F, - NV2A_VERTEX_ATTR_TEXTURE3); -} - -#undef SET_VERTEX_ATRIBUTE_TEX_2F - -#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index) \ - do { \ - VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ - pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ - attribute->inline_value[0] = (parameter & 0xFF) / 255.0f; \ - attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f; \ - attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f; \ - attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f; \ - } while (0) - -DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB) -{ - SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB, - NV2A_VERTEX_ATTR_DIFFUSE); -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB) -{ - SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB, - NV2A_VERTEX_ATTR_SPECULAR); -} - -#undef SET_VERTEX_ATTRIBUTE_4UB - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT) -{ - int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4; - VertexAttribute *attr = &pg->vertex_attributes[slot]; - attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE); - attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE); - attr->stride = GET_MASK(parameter, - NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE); - attr->gl_count = attr->count; - - NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n", - attr->format, attr->count, attr->stride); - - switch (attr->format) { - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: - attr->gl_type = GL_UNSIGNED_BYTE; - attr->gl_normalize = GL_TRUE; - attr->size = 1; - assert(attr->count == 4); - // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt - attr->gl_count = GL_BGRA; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: - attr->gl_type = GL_UNSIGNED_BYTE; - attr->gl_normalize = GL_TRUE; - attr->size = 1; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: - attr->gl_type = GL_SHORT; - attr->gl_normalize = GL_TRUE; - attr->size = 2; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: - attr->gl_type = GL_FLOAT; - attr->gl_normalize = GL_FALSE; - attr->size = 4; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: - attr->gl_type = GL_SHORT; - attr->gl_normalize = GL_FALSE; - attr->size = 2; - attr->needs_conversion = false; - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: - /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ - attr->gl_type = GL_INT; - attr->size = 4; - assert(attr->count == 1); - attr->needs_conversion = true; - break; - default: - fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); - assert(false); - break; - } - - if (attr->needs_conversion) { - pg->compressed_attrs |= (1 << slot); - } else { - pg->compressed_attrs &= ~(1 << slot); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET) -{ - int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4; - - pg->vertex_attributes[slot].dma_select = parameter & 0x80000000; - pg->vertex_attributes[slot].offset = parameter & 0x7fffffff; -} - -DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE) -{ - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP_ENABLE, - parameter); -} - -DEF_METHOD(NV097, SET_LOGIC_OP) -{ - SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP, - parameter & 0xF); -} - -static void pgraph_process_pending_report(NV2AState *d, QueryReport *r) -{ - PGRAPHState *pg = &d->pgraph; - - if (r->clear) { - pg->zpass_pixel_count_result = 0; - return; - } - - uint8_t type = GET_MASK(r->parameter, NV097_GET_REPORT_TYPE); - assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); - - /* FIXME: Multisampling affects this (both: OGL and Xbox GPU), - * not sure if CLEARs also count - */ - /* FIXME: What about clipping regions etc? */ - for (int i = 0; i < r->query_count; i++) { - GLuint gl_query_result = 0; - glGetQueryObjectuiv(r->queries[i], GL_QUERY_RESULT, &gl_query_result); - gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor; - pg->zpass_pixel_count_result += gl_query_result; - } - - if (r->query_count) { - glDeleteQueries(r->query_count, r->queries); - g_free(r->queries); - } - - uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */ - uint32_t done = 0; - - hwaddr report_dma_len; - uint8_t *report_data = - (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len); - - hwaddr offset = GET_MASK(r->parameter, NV097_GET_REPORT_OFFSET); - assert(offset < report_dma_len); - report_data += offset; - - stq_le_p((uint64_t *)&report_data[0], timestamp); - stl_le_p((uint32_t *)&report_data[8], pg->zpass_pixel_count_result); - stl_le_p((uint32_t *)&report_data[12], done); -} - -void pgraph_process_pending_reports(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - QueryReport *r, *next; - - QSIMPLEQ_FOREACH_SAFE(r, &pg->report_queue, entry, next) { - pgraph_process_pending_report(d, r); - QSIMPLEQ_REMOVE_HEAD(&pg->report_queue, entry); - g_free(r); - } -} - -DEF_METHOD(NV097, CLEAR_REPORT_VALUE) -{ - /* FIXME: Does this have a value in parameter? Also does this (also?) modify - * the report memory block? - */ - if (pg->gl_zpass_pixel_count_query_count) { - glDeleteQueries(pg->gl_zpass_pixel_count_query_count, - pg->gl_zpass_pixel_count_queries); - pg->gl_zpass_pixel_count_query_count = 0; - } - - QueryReport *r = g_malloc(sizeof(QueryReport)); - r->clear = true; - QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry); -} - -DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE) -{ - pg->zpass_pixel_count_enable = parameter; -} - -DEF_METHOD(NV097, GET_REPORT) -{ - uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); - assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); - - QueryReport *r = g_malloc(sizeof(QueryReport)); - r->clear = false; - r->parameter = parameter; - r->query_count = pg->gl_zpass_pixel_count_query_count; - r->queries = pg->gl_zpass_pixel_count_queries; - QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry); - - pg->gl_zpass_pixel_count_query_count = 0; - pg->gl_zpass_pixel_count_queries = NULL; -} - -DEF_METHOD_INC(NV097, SET_EYE_DIRECTION) -{ - int slot = (method - NV097_SET_EYE_DIRECTION) / 4; - pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter; - pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true; -} - -static void pgraph_reset_draw_arrays(PGRAPHState *pg) -{ - pg->draw_arrays_length = 0; - pg->draw_arrays_min_start = -1; - pg->draw_arrays_max_count = 0; - pg->draw_arrays_prevent_connect = false; -} - -static void pgraph_reset_inline_buffers(PGRAPHState *pg) -{ - pg->inline_elements_length = 0; - pg->inline_array_length = 0; - pg->inline_buffer_length = 0; - pgraph_reset_draw_arrays(pg); -} - -static void pgraph_flush_draw(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - if (!(pg->color_binding || pg->zeta_binding)) { - pgraph_reset_inline_buffers(pg); - return; - } - assert(pg->shader_binding); - - if (pg->draw_arrays_length) { - NV2A_GL_DPRINTF(false, "Draw Arrays"); - nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS); - assert(pg->inline_elements_length == 0); - assert(pg->inline_buffer_length == 0); - assert(pg->inline_array_length == 0); - - pgraph_bind_vertex_attributes(d, pg->draw_arrays_min_start, - pg->draw_arrays_max_count - 1, - false, 0, - pg->draw_arrays_max_count - 1); - glMultiDrawArrays(pg->shader_binding->gl_primitive_mode, - pg->gl_draw_arrays_start, - pg->gl_draw_arrays_count, - pg->draw_arrays_length); - } else if (pg->inline_elements_length) { - NV2A_GL_DPRINTF(false, "Inline Elements"); - nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS); - assert(pg->inline_buffer_length == 0); - assert(pg->inline_array_length == 0); - - uint32_t min_element = (uint32_t)-1; - uint32_t max_element = 0; - for (int i=0; i < pg->inline_elements_length; i++) { - max_element = MAX(pg->inline_elements[i], max_element); - min_element = MIN(pg->inline_elements[i], min_element); - } - - pgraph_bind_vertex_attributes( - d, min_element, max_element, false, 0, - pg->inline_elements[pg->inline_elements_length - 1]); - - VertexKey k; - memset(&k, 0, sizeof(VertexKey)); - k.count = pg->inline_elements_length; - k.gl_type = GL_UNSIGNED_INT; - k.gl_normalize = GL_FALSE; - k.stride = sizeof(uint32_t); - uint64_t h = fast_hash((uint8_t*)pg->inline_elements, - pg->inline_elements_length * 4); - - LruNode *node = lru_lookup(&pg->element_cache, h, &k); - VertexLruNode *found = container_of(node, VertexLruNode, node); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer); - if (!found->initialized) { - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, - pg->inline_elements_length * 4, - pg->inline_elements, GL_STATIC_DRAW); - found->initialized = true; - } else { - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY); - } - glDrawElements(pg->shader_binding->gl_primitive_mode, - pg->inline_elements_length, GL_UNSIGNED_INT, - (void *)0); - } else if (pg->inline_buffer_length) { - NV2A_GL_DPRINTF(false, "Inline Buffer"); - nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS); - assert(pg->inline_array_length == 0); - - if (pg->compressed_attrs) { - pg->compressed_attrs = 0; - pgraph_bind_shaders(pg); - } - - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attr = &pg->vertex_attributes[i]; - if (attr->inline_buffer_populated) { - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3); - glBindBuffer(GL_ARRAY_BUFFER, attr->gl_inline_buffer); - glBufferData(GL_ARRAY_BUFFER, - pg->inline_buffer_length * sizeof(float) * 4, - attr->inline_buffer, GL_STREAM_DRAW); - glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(i); - attr->inline_buffer_populated = false; - memcpy(attr->inline_value, - attr->inline_buffer + (pg->inline_buffer_length - 1) * 4, - sizeof(attr->inline_value)); - } else { - glDisableVertexAttribArray(i); - glVertexAttrib4fv(i, attr->inline_value); - } - } - - glDrawArrays(pg->shader_binding->gl_primitive_mode, - 0, pg->inline_buffer_length); - } else if (pg->inline_array_length) { - NV2A_GL_DPRINTF(false, "Inline Array"); - nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS); - - unsigned int index_count = pgraph_bind_inline_array(d); - glDrawArrays(pg->shader_binding->gl_primitive_mode, - 0, index_count); - } else { - NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END"); - NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END"); - } - - pgraph_reset_inline_buffers(pg); -} - -DEF_METHOD(NV097, SET_BEGIN_END) -{ - uint32_t control_0 = pg->regs[NV_PGRAPH_CONTROL_0]; - bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; - bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; - bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; - bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; - bool color_write = mask_alpha || mask_red || mask_green || mask_blue; - bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; - bool stencil_test = - pg->regs[NV_PGRAPH_CONTROL_1] & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; - bool is_nop_draw = !(color_write || depth_test || stencil_test); - - if (parameter == NV097_SET_BEGIN_END_OP_END) { - if (pg->primitive_mode == PRIM_TYPE_INVALID) { - NV2A_DPRINTF("End without Begin!\n"); - } - nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS); - - if (is_nop_draw) { - // FIXME: Check PGRAPH register 0x880. - // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit - // check that will raise an exception in the case that a draw should - // modify the color and/or zeta buffer but the target(s) are masked - // off. This check only seems to trigger during the fragment - // processing, it is legal to attempt a draw that is entirely - // clipped regardless of 0x880. See xemu#635 for context. - return; - } - - pgraph_flush_draw(d); - - /* End of visibility testing */ - if (pg->zpass_pixel_count_enable) { - nv2a_profile_inc_counter(NV2A_PROF_QUERY); - glEndQuery(GL_SAMPLES_PASSED); - } - - pg->draw_time++; - if (pg->color_binding && pgraph_color_write_enabled(pg)) { - pg->color_binding->draw_time = pg->draw_time; - } - if (pg->zeta_binding && pgraph_zeta_write_enabled(pg)) { - pg->zeta_binding->draw_time = pg->draw_time; - } - - pgraph_set_surface_dirty(pg, color_write, depth_test || stencil_test); - - NV2A_GL_DGROUP_END(); - pg->primitive_mode = PRIM_TYPE_INVALID; - } else { - NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", parameter); - if (pg->primitive_mode != PRIM_TYPE_INVALID) { - NV2A_DPRINTF("Begin without End!\n"); - } - assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON); - pg->primitive_mode = parameter; - - pgraph_update_surface(d, true, true, depth_test || stencil_test); - pgraph_reset_inline_buffers(pg); - - if (is_nop_draw) { - return; - } - - assert(pg->color_binding || pg->zeta_binding); - - pgraph_bind_textures(d); - pgraph_bind_shaders(pg); - - glColorMask(mask_red, mask_green, mask_blue, mask_alpha); - glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE)); - glStencilMask(GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE)); - - if (pg->regs[NV_PGRAPH_BLEND] & NV_PGRAPH_BLEND_EN) { - glEnable(GL_BLEND); - uint32_t sfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND], - NV_PGRAPH_BLEND_SFACTOR); - uint32_t dfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND], - NV_PGRAPH_BLEND_DFACTOR); - assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_map)); - assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_map)); - glBlendFunc(pgraph_blend_factor_map[sfactor], - pgraph_blend_factor_map[dfactor]); - - uint32_t equation = GET_MASK(pg->regs[NV_PGRAPH_BLEND], - NV_PGRAPH_BLEND_EQN); - assert(equation < ARRAY_SIZE(pgraph_blend_equation_map)); - glBlendEquation(pgraph_blend_equation_map[equation]); - - uint32_t blend_color = pg->regs[NV_PGRAPH_BLENDCOLOR]; - glBlendColor( ((blend_color >> 16) & 0xFF) / 255.0f, /* red */ - ((blend_color >> 8) & 0xFF) / 255.0f, /* green */ - (blend_color & 0xFF) / 255.0f, /* blue */ - ((blend_color >> 24) & 0xFF) / 255.0f);/* alpha */ - } else { - glDisable(GL_BLEND); - } - - /* Face culling */ - if (pg->regs[NV_PGRAPH_SETUPRASTER] - & NV_PGRAPH_SETUPRASTER_CULLENABLE) { - uint32_t cull_face = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_CULLCTRL); - assert(cull_face < ARRAY_SIZE(pgraph_cull_face_map)); - glCullFace(pgraph_cull_face_map[cull_face]); - glEnable(GL_CULL_FACE); - } else { - glDisable(GL_CULL_FACE); - } - - /* Clipping */ - glEnable(GL_CLIP_DISTANCE0); - glEnable(GL_CLIP_DISTANCE1); - - /* Front-face select */ - glFrontFace(pg->regs[NV_PGRAPH_SETUPRASTER] - & NV_PGRAPH_SETUPRASTER_FRONTFACE - ? GL_CCW : GL_CW); - - /* Polygon offset */ - /* FIXME: GL implementation-specific, maybe do this in VS? */ - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - if (pg->regs[NV_PGRAPH_SETUPRASTER] & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { - GLfloat zfactor = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETFACTOR]; - GLfloat zbias = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETBIAS]; - glPolygonOffset(zfactor, zbias); - } - - /* Depth testing */ - if (depth_test) { - glEnable(GL_DEPTH_TEST); - - uint32_t depth_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ZFUNC); - assert(depth_func < ARRAY_SIZE(pgraph_depth_func_map)); - glDepthFunc(pgraph_depth_func_map[depth_func]); - } else { - glDisable(GL_DEPTH_TEST); - } - - if (GET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE], - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } - - if (GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) { - glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); - } - - if (stencil_test) { - glEnable(GL_STENCIL_TEST); - - uint32_t stencil_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_FUNC); - uint32_t stencil_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_REF); - uint32_t func_mask = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1], - NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ); - uint32_t op_fail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL); - uint32_t op_zfail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL); - uint32_t op_zpass = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2], - NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS); - - assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_map)); - assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_map)); - assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_map)); - assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_map)); - - glStencilFunc( - pgraph_stencil_func_map[stencil_func], - stencil_ref, - func_mask); - - glStencilOp( - pgraph_stencil_op_map[op_fail], - pgraph_stencil_op_map[op_zfail], - pgraph_stencil_op_map[op_zpass]); - - } else { - glDisable(GL_STENCIL_TEST); - } - - /* Dither */ - /* FIXME: GL implementation dependent */ - if (pg->regs[NV_PGRAPH_CONTROL_0] & - NV_PGRAPH_CONTROL_0_DITHERENABLE) { - glEnable(GL_DITHER); - } else { - glDisable(GL_DITHER); - } - - glEnable(GL_PROGRAM_POINT_SIZE); - - bool anti_aliasing = GET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE); - - /* Edge Antialiasing */ - if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { - glEnable(GL_LINE_SMOOTH); - glLineWidth(MIN(supportedSmoothLineWidthRange[1], pg->surface_scale_factor)); - } else { - glDisable(GL_LINE_SMOOTH); - glLineWidth(MIN(supportedAliasedLineWidthRange[1], pg->surface_scale_factor)); - } - if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { - glEnable(GL_POLYGON_SMOOTH); - } else { - glDisable(GL_POLYGON_SMOOTH); - } - - unsigned int vp_width = pg->surface_binding_dim.width, - vp_height = pg->surface_binding_dim.height; - pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); - glViewport(0, 0, vp_width, vp_height); - - /* Surface clip */ - /* FIXME: Consider moving to PSH w/ window clip */ - unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x, - ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y; - unsigned int xmax = xmin + pg->surface_shape.clip_width - 1, - ymax = ymin + pg->surface_shape.clip_height - 1; - - unsigned int scissor_width = xmax - xmin + 1, - scissor_height = ymax - ymin + 1; - pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); - pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); - pgraph_apply_scaling_factor(pg, &xmin, &ymin); - pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); - - glEnable(GL_SCISSOR_TEST); - glScissor(xmin, ymin, scissor_width, scissor_height); - - /* Visibility testing */ - if (pg->zpass_pixel_count_enable) { - pg->gl_zpass_pixel_count_query_count++; - pg->gl_zpass_pixel_count_queries = (GLuint*)g_realloc( - pg->gl_zpass_pixel_count_queries, - sizeof(GLuint) * pg->gl_zpass_pixel_count_query_count); - - GLuint gl_query; - glGenQueries(1, &gl_query); - pg->gl_zpass_pixel_count_queries[ - pg->gl_zpass_pixel_count_query_count - 1] = gl_query; - glBeginQuery(GL_SAMPLES_PASSED, gl_query); - } - } -} - -DEF_METHOD(NV097, SET_TEXTURE_OFFSET) -{ - int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64; - pg->regs[NV_PGRAPH_TEXOFFSET0 + slot * 4] = parameter; - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_FORMAT) -{ - int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64; - - bool dma_select = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2; - bool cubemap = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE); - unsigned int border_source = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE); - unsigned int dimensionality = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY); - unsigned int color_format = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR); - unsigned int levels = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS); - unsigned int log_width = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U); - unsigned int log_height = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V); - unsigned int log_depth = - GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P); - - uint32_t *reg = &pg->regs[NV_PGRAPH_TEXFMT0 + slot * 4]; - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_COLOR, color_format); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height); - SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth); - - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_CONTROL0) -{ - int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64; - pg->regs[NV_PGRAPH_TEXCTL0_0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_CONTROL1) -{ - int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64; - pg->regs[NV_PGRAPH_TEXCTL1_0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_FILTER) -{ - int slot = (method - NV097_SET_TEXTURE_FILTER) / 64; - pg->regs[NV_PGRAPH_TEXFILTER0 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT) -{ - int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64; - pg->regs[NV_PGRAPH_TEXIMAGERECT0 + slot * 4] = parameter; - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_PALETTE) -{ - int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64; - - bool dma_select = - GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1; - unsigned int length = - GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH); - unsigned int offset = - GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET); - - uint32_t *reg = &pg->regs[NV_PGRAPH_TEXPALETTE0 + slot * 4]; - SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select); - SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length); - SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset); - - pg->texture_dirty[slot] = true; -} - -DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR) -{ - int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64; - pg->regs[NV_PGRAPH_BORDERCOLOR0 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT) -{ - int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4; - if (slot < 16) { - /* discard */ - return; - } - - slot -= 16; - const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01, - NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 }; - pg->regs[swizzle[slot % 4] + slot / 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE) -{ - int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64; - if (slot == 0) { - /* discard */ - return; - } - - slot--; - pg->regs[NV_PGRAPH_BUMPSCALE1 + slot * 4] = parameter; -} - -DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET) -{ - int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64; - if (slot == 0) { - /* discard */ - return; - } - - slot--; - pg->regs[NV_PGRAPH_BUMPOFFSET1 + slot * 4] = parameter; -} - -static void pgraph_expand_draw_arrays(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - GLint start = pg->gl_draw_arrays_start[pg->draw_arrays_length - 1]; - GLsizei count = pg->gl_draw_arrays_count[pg->draw_arrays_length - 1]; - - /* Render any previously squashed DRAW_ARRAYS calls. This case would be - * triggered if a set of BEGIN+DA+END triplets is followed by the - * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */ - if (pg->draw_arrays_length > 1) { - pgraph_flush_draw(d); - } - assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); - for (unsigned int i = 0; i < count; i++) { - pg->inline_elements[pg->inline_elements_length++] = start + i; - } - - pgraph_reset_draw_arrays(pg); -} - -static void pgraph_check_within_begin_end_block(PGRAPHState *pg) -{ - if (pg->primitive_mode == PRIM_TYPE_INVALID) { - NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n"); - } -} - -DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16) -{ - pgraph_check_within_begin_end_block(pg); - - if (pg->draw_arrays_length) { - pgraph_expand_draw_arrays(d); - } - - assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); - pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF; - pg->inline_elements[pg->inline_elements_length++] = parameter >> 16; -} - -DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32) -{ - pgraph_check_within_begin_end_block(pg); - - if (pg->draw_arrays_length) { - pgraph_expand_draw_arrays(d); - } - - assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); - pg->inline_elements[pg->inline_elements_length++] = parameter; -} - -DEF_METHOD(NV097, DRAW_ARRAYS) -{ - pgraph_check_within_begin_end_block(pg); - - unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX); - unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1; - - if (pg->inline_elements_length) { - /* FIXME: Determine HW behavior for overflow case. */ - assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); - assert(!pg->draw_arrays_prevent_connect); - - for (unsigned int i = 0; i < count; i++) { - pg->inline_elements[pg->inline_elements_length++] = start + i; - } - return; - } - - pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start); - pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count); - - assert(pg->draw_arrays_length < ARRAY_SIZE(pg->gl_draw_arrays_start)); - - /* Attempt to connect contiguous primitives */ - if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) { - unsigned int last_start = - pg->gl_draw_arrays_start[pg->draw_arrays_length - 1]; - GLsizei* last_count = - &pg->gl_draw_arrays_count[pg->draw_arrays_length - 1]; - if (start == (last_start + *last_count)) { - *last_count += count; - return; - } - } - - pg->gl_draw_arrays_start[pg->draw_arrays_length] = start; - pg->gl_draw_arrays_count[pg->draw_arrays_length] = count; - pg->draw_arrays_length++; - pg->draw_arrays_prevent_connect = false; -} - -DEF_METHOD_NON_INC(NV097, INLINE_ARRAY) -{ - pgraph_check_within_begin_end_block(pg); - assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH); - pg->inline_array[pg->inline_array_length++] = parameter; -} - -DEF_METHOD_INC(NV097, SET_EYE_VECTOR) -{ - int slot = (method - NV097_SET_EYE_VECTOR) / 4; - pg->regs[NV_PGRAPH_EYEVEC0 + slot * 4] = parameter; -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M) -{ - int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4; - unsigned int part = slot % 2; - slot /= 2; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[part] = *(float*)¶meter; - /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */ - attribute->inline_value[2] = 0.0; - attribute->inline_value[3] = 1.0; - if ((slot == 0) && (part == 1)) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M) -{ - int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4; - unsigned int part = slot % 4; - slot /= 4; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[part] = *(float*)¶meter; - if ((slot == 0) && (part == 3)) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S) -{ - int slot = (method - NV097_SET_VERTEX_DATA2S) / 4; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); - attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); - attribute->inline_value[2] = 0.0; - attribute->inline_value[3] = 1.0; - if (slot == 0) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB) -{ - int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - attribute->inline_value[0] = (parameter & 0xFF) / 255.0; - attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0; - attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0; - attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0; - if (slot == 0) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M) -{ - int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4; - unsigned int part = slot % 2; - slot /= 2; - VertexAttribute *attribute = &pg->vertex_attributes[slot]; - pgraph_allocate_inline_buffer_vertices(pg, slot); - - attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF); - attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16); - if ((slot == 0) && (part == 1)) { - pgraph_finish_inline_buffer_vertex(pg); - } -} - -DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET) -{ - pg->regs[NV_PGRAPH_SEMAPHOREOFFSET] = parameter; -} - -DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE) -{ - pgraph_update_surface(d, false, true, true); - - //qemu_mutex_unlock(&d->pgraph.lock); - //qemu_mutex_lock_iothread(); - - uint32_t semaphore_offset = pg->regs[NV_PGRAPH_SEMAPHOREOFFSET]; - - hwaddr semaphore_dma_len; - uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore, - &semaphore_dma_len); - assert(semaphore_offset < semaphore_dma_len); - semaphore_data += semaphore_offset; - - stl_le_p((uint32_t*)semaphore_data, parameter); - - //qemu_mutex_lock(&d->pgraph.lock); - //qemu_mutex_unlock_iothread(); -} - -DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL) -{ - switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) { - case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL: - SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE], - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL); - break; - case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP: - SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE], - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP); - break; - default: - /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */ - assert(!"Invalid zclamp value"); - break; - } -} - -DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL) -{ - SET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE, - GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE)); - // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001) -} - -DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE) -{ - pg->regs[NV_PGRAPH_ZSTENCILCLEARVALUE] = parameter; -} - -DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE) -{ - pg->regs[NV_PGRAPH_COLORCLEARVALUE] = parameter; -} - -DEF_METHOD(NV097, CLEAR_SURFACE) -{ - pg->clearing = true; - - NV2A_DPRINTF("---------PRE CLEAR ------\n"); - GLbitfield gl_mask = 0; - - bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR); - bool write_zeta = - (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); - - if (write_zeta) { - uint32_t clear_zstencil = - d->pgraph.regs[NV_PGRAPH_ZSTENCILCLEARVALUE]; - GLint gl_clear_stencil; - GLfloat gl_clear_depth; - - switch(pg->surface_shape.zeta_format) { - case NV097_SET_SURFACE_FORMAT_ZETA_Z16: { - uint16_t z = clear_zstencil & 0xFFFF; - /* FIXME: Remove bit for stencil clear? */ - if (pg->surface_shape.z_format) { - gl_clear_depth = convert_f16_to_float(z) / f16_max; - } else { - gl_clear_depth = z / (float)0xFFFF; - } - break; - } - case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: { - gl_clear_stencil = clear_zstencil & 0xFF; - uint32_t z = clear_zstencil >> 8; - if (pg->surface_shape.z_format) { - gl_clear_depth = convert_f24_to_float(z) / f24_max; - } else { - gl_clear_depth = z / (float)0xFFFFFF; - } - break; - } - default: - fprintf(stderr, "Unknown zeta surface format: 0x%x\n", pg->surface_shape.zeta_format); - assert(false); - break; - } - if (parameter & NV097_CLEAR_SURFACE_Z) { - gl_mask |= GL_DEPTH_BUFFER_BIT; - glDepthMask(GL_TRUE); - glClearDepth(gl_clear_depth); - } - if (parameter & NV097_CLEAR_SURFACE_STENCIL) { - gl_mask |= GL_STENCIL_BUFFER_BIT; - glStencilMask(0xff); - glClearStencil(gl_clear_stencil); - } - } - if (write_color) { - gl_mask |= GL_COLOR_BUFFER_BIT; - glColorMask((parameter & NV097_CLEAR_SURFACE_R) - ? GL_TRUE : GL_FALSE, - (parameter & NV097_CLEAR_SURFACE_G) - ? GL_TRUE : GL_FALSE, - (parameter & NV097_CLEAR_SURFACE_B) - ? GL_TRUE : GL_FALSE, - (parameter & NV097_CLEAR_SURFACE_A) - ? GL_TRUE : GL_FALSE); - uint32_t clear_color = d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]; - - /* Handle RGB */ - GLfloat red, green, blue; - switch(pg->surface_shape.color_format) { - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5: - red = ((clear_color >> 10) & 0x1F) / 31.0f; - green = ((clear_color >> 5) & 0x1F) / 31.0f; - blue = (clear_color & 0x1F) / 31.0f; - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: - red = ((clear_color >> 11) & 0x1F) / 31.0f; - green = ((clear_color >> 5) & 0x3F) / 63.0f; - blue = (clear_color & 0x1F) / 31.0f; - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: - red = ((clear_color >> 16) & 0xFF) / 255.0f; - green = ((clear_color >> 8) & 0xFF) / 255.0f; - blue = (clear_color & 0xFF) / 255.0f; - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8: - /* Xbox D3D doesn't support clearing those */ - default: - red = 1.0f; - green = 0.0f; - blue = 1.0f; - fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported", - pg->surface_shape.color_format); - assert(false); - break; - } - - /* Handle alpha */ - GLfloat alpha; - switch(pg->surface_shape.color_format) { - /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we - * also have to clear non-alpha bits with alpha value? - * As GL doesn't own those pixels we'd have to do this on - * our own in xbox memory. - */ - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: - alpha = ((clear_color >> 24) & 0x7F) / 127.0f; - assert(false); /* Untested */ - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: - alpha = ((clear_color >> 24) & 0xFF) / 255.0f; - break; - default: - alpha = 1.0f; - break; - } - - glClearColor(red, green, blue, alpha); - } - - pgraph_update_surface(d, true, write_color, write_zeta); - - /* FIXME: Needs confirmation */ - unsigned int xmin = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMIN); - unsigned int xmax = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMAX); - unsigned int ymin = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMIN); - unsigned int ymax = - GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMAX); - - NV2A_DPRINTF( - "------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n", - parameter, xmin, ymin, xmax, ymax, - d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]); - - unsigned int scissor_width = xmax - xmin + 1, - scissor_height = ymax - ymin + 1; - pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); - pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); - - NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin, - xmin + scissor_width - 1, ymin + scissor_height - 1); - - bool full_clear = !xmin && !ymin && - scissor_width >= pg->surface_binding_dim.width && - scissor_height >= pg->surface_binding_dim.height; - - pgraph_apply_scaling_factor(pg, &xmin, &ymin); - pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); - - /* FIXME: Respect window clip?!?! */ - glEnable(GL_SCISSOR_TEST); - glScissor(xmin, ymin, scissor_width, scissor_height); - - /* Dither */ - /* FIXME: Maybe also disable it here? + GL implementation dependent */ - if (pg->regs[NV_PGRAPH_CONTROL_0] & NV_PGRAPH_CONTROL_0_DITHERENABLE) { - glEnable(GL_DITHER); - } else { - glDisable(GL_DITHER); - } - - glClear(gl_mask); - - glDisable(GL_SCISSOR_TEST); - - pgraph_set_surface_dirty(pg, write_color, write_zeta); - - if (pg->color_binding) { - pg->color_binding->cleared = full_clear && write_color; - } - if (pg->zeta_binding) { - pg->zeta_binding->cleared = full_clear && write_zeta; - } - - pg->clearing = false; -} - -DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL) -{ - pg->regs[NV_PGRAPH_CLEARRECTX] = parameter; -} - -DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL) -{ - pg->regs[NV_PGRAPH_CLEARRECTY] = parameter; -} - -DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR) -{ - int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4; - pg->regs[NV_PGRAPH_SPECFOGFACTOR0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE) -{ - pg->regs[NV_PGRAPH_SHADERCLIPMODE] = parameter; -} - -DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW) -{ - int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4; - pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter; -} - -DEF_METHOD(NV097, SET_COMBINER_CONTROL) -{ - pg->regs[NV_PGRAPH_COMBINECTL] = parameter; -} - -DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD) -{ - pg->regs[NV_PGRAPH_SHADOWZSLOPETHRESHOLD] = parameter; - assert(parameter == 0x7F800000); /* FIXME: Unimplemented */ -} - -DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC) -{ - SET_MASK(pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC, - parameter); -} - -DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM) -{ - pg->regs[NV_PGRAPH_SHADERPROG] = parameter; -} - -DEF_METHOD(NV097, SET_DOT_RGBMAPPING) -{ - SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFF, - GET_MASK(parameter, 0xFFF)); -} - -DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT) -{ - SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFFF000, - GET_MASK(parameter, 0xFFFF000)); -} - -DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA) -{ - int slot = (method - NV097_SET_TRANSFORM_DATA) / 4; - pg->vertex_state_shader_v0[slot] = parameter; -} - -DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM) -{ - unsigned int program_start = parameter; - assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - Nv2aVshProgram program; - Nv2aVshParseResult result = nv2a_vsh_parse_program( - &program, - pg->program_data[program_start], - NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start); - assert(result == NV2AVPR_SUCCESS); - - Nv2aVshCPUXVSSExecutionState state_linkage; - Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state( - &state_linkage, (float*)pg->vsh_constants); - memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0)); - - nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty); - - nv2a_vsh_program_destroy(&program); -} - -DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE) -{ - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_MODE, - GET_MASK(parameter, - NV097_SET_TRANSFORM_EXECUTION_MODE_MODE)); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_RANGE_MODE, - GET_MASK(parameter, - NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE)); -} - -DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN) -{ - pg->enable_vertex_program_write = parameter; -} - -DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD) -{ - assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter); -} - -DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START) -{ - assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); - SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter); -} - -DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD) -{ - assert(parameter < NV2A_VERTEXSHADER_CONSTANTS); - SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET], - NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter); -} - - -void pgraph_context_switch(NV2AState *d, unsigned int channel_id) -{ - bool channel_valid = - d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID; - unsigned pgraph_channel_id = GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); - - bool valid = channel_valid && pgraph_channel_id == channel_id; - if (!valid) { - SET_MASK(d->pgraph.regs[NV_PGRAPH_TRAPPED_ADDR], - NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id); - - NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id); - - /* TODO: hardware context switching */ - assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3] - & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH)); - - d->pgraph.waiting_for_context_switch = true; - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock_iothread(); - d->pgraph.pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH; - nv2a_update_irq(d); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pgraph.lock); - } -} - -static void pgraph_method_log(unsigned int subchannel, - unsigned int graphics_class, - unsigned int method, uint32_t parameter) -{ - const char *method_name = "?"; - static unsigned int last = 0; - static unsigned int count = 0; - - if (last == NV097_ARRAY_ELEMENT16 && method != last) { - method_name = "NV097_ARRAY_ELEMENT16"; - trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last, - method_name, count); - NV2A_GL_DPRINTF(false, "pgraph method (%d) 0x%x %s * %d", subchannel, - last, method_name, count); - } - - if (method != NV097_ARRAY_ELEMENT16) { - uint32_t base = method; - switch (graphics_class) { - case NV_KELVIN_PRIMITIVE: { - int idx = METHOD_ADDR_TO_INDEX(method); - if (idx < ARRAY_SIZE(pgraph_kelvin_methods) && - pgraph_kelvin_methods[idx].handler) { - method_name = pgraph_kelvin_methods[idx].name; - base = pgraph_kelvin_methods[idx].base; - } - break; - } - default: - break; - } - - uint32_t offset = method - base; - trace_nv2a_pgraph_method(subchannel, graphics_class, method, - method_name, offset, parameter); - NV2A_GL_DPRINTF(false, - "pgraph method (%d): 0x%" PRIx32 " -> 0x%04" PRIx32 - " %s[%" PRId32 "] 0x%" PRIx32, - subchannel, graphics_class, method, method_name, offset, - parameter); - } - - if (method == last) { - count++; - } else { - count = 0; - } - last = method; -} - -static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, - unsigned int attr) -{ - VertexAttribute *attribute = &pg->vertex_attributes[attr]; - - if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) { - return; - } - - /* Now upload the previous attribute value */ - attribute->inline_buffer_populated = true; - for (int i = 0; i < pg->inline_buffer_length; i++) { - memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value, - sizeof(float) * 4); - } -} - -static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) -{ - pgraph_check_within_begin_end_block(pg); - assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); - - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attribute = &pg->vertex_attributes[i]; - if (attribute->inline_buffer_populated) { - memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4], - attribute->inline_value, sizeof(float) * 4); - } - } - - pg->inline_buffer_length++; -} - -void nv2a_gl_context_init(void) -{ - g_nv2a_context_render = glo_context_create(); - g_nv2a_context_display = glo_context_create(); - - glGetFloatv(GL_SMOOTH_LINE_WIDTH_RANGE, supportedSmoothLineWidthRange); - glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, supportedAliasedLineWidthRange); -} - -void nv2a_set_surface_scale_factor(unsigned int scale) -{ - NV2AState *d = g_nv2a; - - g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; - - qemu_mutex_unlock_iothread(); - - qemu_mutex_lock(&d->pfifo.lock); - qatomic_set(&d->pfifo.halt, true); - qemu_mutex_unlock(&d->pfifo.lock); - - qemu_mutex_lock(&d->pgraph.lock); - qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete); - qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true); - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete); - - qemu_mutex_lock(&d->pgraph.lock); - qemu_event_reset(&d->pgraph.flush_complete); - qatomic_set(&d->pgraph.flush_pending, true); - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.flush_complete); - - qemu_mutex_lock(&d->pfifo.lock); - qatomic_set(&d->pfifo.halt, false); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - - qemu_mutex_lock_iothread(); -} - -unsigned int nv2a_get_surface_scale_factor(void) -{ - return g_nv2a->pgraph.surface_scale_factor; -} - -static void pgraph_reload_surface_scale_factor(NV2AState *d) -{ - int factor = g_config.display.quality.surface_scale; - d->pgraph.surface_scale_factor = factor < 1 ? 1 : factor; -} - -void pgraph_init(NV2AState *d) -{ - int i; - - g_nv2a = d; - PGRAPHState *pg = &d->pgraph; - - pgraph_reload_surface_scale_factor(d); - - pg->frame_time = 0; - pg->draw_time = 0; - pg->downloads_pending = false; - - qemu_mutex_init(&pg->lock); - qemu_mutex_init(&pg->shader_cache_lock); - qemu_event_init(&pg->gl_sync_complete, false); - qemu_event_init(&pg->downloads_complete, false); - qemu_event_init(&pg->dirty_surfaces_download_complete, false); - qemu_event_init(&pg->flush_complete, false); - qemu_event_init(&pg->shader_cache_writeback_complete, false); - - /* fire up opengl */ - glo_set_current(g_nv2a_context_render); - -#ifdef DEBUG_NV2A_GL - gl_debug_initialize(); -#endif - - /* DXT textures */ - assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); - /* Internal RGB565 texture format */ - assert(glo_check_extension("GL_ARB_ES2_compatibility")); - - GLint max_vertex_attributes; - glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes); - assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES); - - - glGenFramebuffers(1, &pg->gl_framebuffer); - glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer); - - pgraph_init_render_to_texture(d); - QTAILQ_INIT(&pg->surfaces); - - QSIMPLEQ_INIT(&pg->report_queue); - - //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); - - // Initialize texture cache - const size_t texture_cache_size = 512; - lru_init(&pg->texture_cache); - pg->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode)); - assert(pg->texture_cache_entries != NULL); - for (i = 0; i < texture_cache_size; i++) { - lru_add_free(&pg->texture_cache, &pg->texture_cache_entries[i].node); - } - - pg->texture_cache.init_node = texture_cache_entry_init; - pg->texture_cache.compare_nodes = texture_cache_entry_compare; - pg->texture_cache.post_node_evict = texture_cache_entry_post_evict; - - // Initialize element cache - const size_t element_cache_size = 50*1024; - lru_init(&pg->element_cache); - pg->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode)); - assert(pg->element_cache_entries != NULL); - GLuint element_cache_buffers[element_cache_size]; - glGenBuffers(element_cache_size, element_cache_buffers); - for (i = 0; i < element_cache_size; i++) { - pg->element_cache_entries[i].gl_buffer = element_cache_buffers[i]; - lru_add_free(&pg->element_cache, &pg->element_cache_entries[i].node); - } - - pg->element_cache.init_node = vertex_cache_entry_init; - pg->element_cache.compare_nodes = vertex_cache_entry_compare; - - shader_cache_init(pg); - - pg->material_alpha = 0.0f; - SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE, - NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); - pg->primitive_mode = PRIM_TYPE_INVALID; - - for (i=0; ivertex_attributes[i]; - glGenBuffers(1, &attribute->gl_inline_buffer); - attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH - * sizeof(float) * 4); - attribute->inline_buffer_populated = false; - } - glGenBuffers(1, &pg->gl_inline_array_buffer); - - glGenBuffers(1, &pg->gl_memory_buffer); - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer); - glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram), - NULL, GL_DYNAMIC_DRAW); - - glGenVertexArrays(1, &pg->gl_vertex_array); - glBindVertexArray(pg->gl_vertex_array); - - assert(glGetError() == GL_NO_ERROR); - - glo_set_current(g_nv2a_context_display); - pgraph_init_display_renderer(d); - - glo_set_current(NULL); -} - -void pgraph_destroy(PGRAPHState *pg) -{ - qemu_mutex_destroy(&pg->lock); - qemu_mutex_destroy(&pg->shader_cache_lock); - - glo_set_current(g_nv2a_context_render); - - // TODO: clear out surfaces - - glDeleteFramebuffers(1, &pg->gl_framebuffer); - - // Clear out shader cache - shader_write_cache_reload_list(pg); - free(pg->shader_cache_entries); - - // Clear out texture cache - lru_flush(&pg->texture_cache); - free(pg->texture_cache_entries); - - glo_set_current(NULL); - glo_context_destroy(g_nv2a_context_render); - glo_context_destroy(g_nv2a_context_display); -} - -static void pgraph_shader_update_constants(PGRAPHState *pg, - ShaderBinding *binding, - bool binding_changed, - bool vertex_program, - bool fixed_function) -{ - int i, j; - - /* update combiner constants */ - for (i = 0; i < 9; i++) { - uint32_t constant[2]; - if (i == 8) { - /* final combiner */ - constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0]; - constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1]; - } else { - constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; - constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; - } - - for (j = 0; j < 2; j++) { - GLint loc = binding->psh_constant_loc[i][j]; - if (loc != -1) { - float value[4]; - value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f; - value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f; - value[2] = (float) (constant[j] & 0xFF) / 255.0f; - value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f; - - glUniform4fv(loc, 1, value); - } - } - } - if (binding->alpha_ref_loc != -1) { - float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; - glUniform1f(binding->alpha_ref_loc, alpha_ref); - } - - - /* For each texture stage */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - GLint loc; - - /* Bump luminance only during stages 1 - 3 */ - if (i > 0) { - loc = binding->bump_mat_loc[i]; - if (loc != -1) { - float m[4]; - m[0] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)]; - m[1] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)]; - m[2] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)]; - m[3] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)]; - glUniformMatrix2fv(loc, 1, GL_FALSE, m); - } - loc = binding->bump_scale_loc[i]; - if (loc != -1) { - glUniform1f(loc, *(float*)&pg->regs[ - NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]); - } - loc = binding->bump_offset_loc[i]; - if (loc != -1) { - glUniform1f(loc, *(float*)&pg->regs[ - NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]); - } - } - - loc = pg->shader_binding->tex_scale_loc[i]; - if (loc != -1) { - assert(pg->texture_binding[i] != NULL); - glUniform1f(loc, (float)pg->texture_binding[i]->scale); - } - } - - if (binding->fog_color_loc != -1) { - uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR]; - glUniform4f(binding->fog_color_loc, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); - } - if (binding->fog_param_loc[0] != -1) { - glUniform1f(binding->fog_param_loc[0], - *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]); - } - if (binding->fog_param_loc[1] != -1) { - glUniform1f(binding->fog_param_loc[1], - *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]); - } - - float zmax; - switch (pg->surface_shape.zeta_format) { - case NV097_SET_SURFACE_FORMAT_ZETA_Z16: - zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; - break; - case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: - zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; - break; - default: - assert(0); - } - - if (fixed_function) { - /* update lighting constants */ - struct { - uint32_t* v; - bool* dirty; - GLint* locs; - size_t len; - } lighting_arrays[] = { - {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, - {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, - {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, - }; - - for (i=0; ilight_infinite_half_vector_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); - } - loc = binding->light_infinite_direction_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_infinite_direction[i]); - } - - loc = binding->light_local_position_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_local_position[i]); - } - loc = binding->light_local_attenuation_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_local_attenuation[i]); - } - } - - /* estimate the viewport by assuming it matches the surface ... */ - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - - float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); - float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); - float m33 = zmax; - float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; - float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; - - float invViewport[16] = { - 1.0/m11, 0, 0, 0, - 0, 1.0/m22, 0, 0, - 0, 0, 1.0/m33, 0, - -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - glUniformMatrix4fv(binding->inv_viewport_loc, - 1, GL_FALSE, &invViewport[0]); - } - } - - /* update vertex program constants */ - for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; - - GLint loc = binding->vsh_constant_loc[i]; - if ((loc != -1) && - memcmp(binding->vsh_constants[i], pg->vsh_constants[i], - sizeof(pg->vsh_constants[1]))) { - glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]); - memcpy(binding->vsh_constants[i], pg->vsh_constants[i], - sizeof(pg->vsh_constants[i])); - } - - pg->vsh_constants_dirty[i] = false; - } - - if (binding->surface_size_loc != -1) { - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - glUniform2f(binding->surface_size_loc, - pg->surface_binding_dim.width / aa_width, - pg->surface_binding_dim.height / aa_height); - } - - if (binding->clip_range_loc != -1) { - float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN] / zmax * 2.0 - 1.0; - float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX] / zmax * 2.0 - 1.0; - glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); - } - - /* Clipping regions */ - unsigned int max_gl_width = pg->surface_binding_dim.width; - unsigned int max_gl_height = pg->surface_binding_dim.height; - pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); - - for (i = 0; i < 8; i++) { - uint32_t x = pg->regs[NV_PGRAPH_WINDOWCLIPX0 + i * 4]; - unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); - unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; - uint32_t y = pg->regs[NV_PGRAPH_WINDOWCLIPY0 + i * 4]; - unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); - unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; - pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); - pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); - - pgraph_apply_scaling_factor(pg, &x_min, &y_min); - pgraph_apply_scaling_factor(pg, &x_max, &y_max); - - /* Translate for the GL viewport origin */ - int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); - int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); - - glUniform4i(pg->shader_binding->clip_region_loc[i], - x_min, y_min_xlat, x_max, y_max_xlat); - } - - if (binding->material_alpha_loc != -1) { - glUniform1f(binding->material_alpha_loc, pg->material_alpha); - } -} - -static bool pgraph_bind_shaders_test_dirty(PGRAPHState *pg) -{ - #define CR_1(reg) CR_x(reg, 1) - #define CR_4(reg) CR_x(reg, 4) - #define CR_8(reg) CR_x(reg, 8) - #define CF(src, name) CF_x(typeof(src), (&src), name, 1) - #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src)) - #define CNAME(name) reg_check__ ## name - #define CX_x__define(type, name, x) static type CNAME(name)[x]; - #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x) - #define CF_x__define(type, src, name, x) CX_x__define(type, name, x) - #define CR_x__check(reg, x) \ - for (int i = 0; i < x; i++) { if (pg->regs[reg+i*4] != CNAME(reg)[i]) goto dirty; } - #define CF_x__check(type, src, name, x) \ - for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; } - #define CR_x__update(reg, x) \ - for (int i = 0; i < x; i++) { CNAME(reg)[i] = pg->regs[reg+i*4]; } - #define CF_x__update(type, src, name, x) \ - for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; } - - #define DIRTY_REGS \ - CR_1(NV_PGRAPH_COMBINECTL) \ - CR_1(NV_PGRAPH_SHADERCTL) \ - CR_1(NV_PGRAPH_SHADOWCTL) \ - CR_1(NV_PGRAPH_COMBINESPECFOG0) \ - CR_1(NV_PGRAPH_COMBINESPECFOG1) \ - CR_1(NV_PGRAPH_CONTROL_0) \ - CR_1(NV_PGRAPH_CONTROL_3) \ - CR_1(NV_PGRAPH_CSV0_C) \ - CR_1(NV_PGRAPH_CSV0_D) \ - CR_1(NV_PGRAPH_CSV1_A) \ - CR_1(NV_PGRAPH_CSV1_B) \ - CR_1(NV_PGRAPH_SETUPRASTER) \ - CR_1(NV_PGRAPH_SHADERPROG) \ - CR_8(NV_PGRAPH_COMBINECOLORI0) \ - CR_8(NV_PGRAPH_COMBINECOLORO0) \ - CR_8(NV_PGRAPH_COMBINEALPHAI0) \ - CR_8(NV_PGRAPH_COMBINEALPHAO0) \ - CR_8(NV_PGRAPH_COMBINEFACTOR0) \ - CR_8(NV_PGRAPH_COMBINEFACTOR1) \ - CR_1(NV_PGRAPH_SHADERCLIPMODE) \ - CR_4(NV_PGRAPH_TEXCTL0_0) \ - CR_4(NV_PGRAPH_TEXFMT0) \ - CR_4(NV_PGRAPH_TEXFILTER0) \ - CR_8(NV_PGRAPH_WINDOWCLIPX0) \ - CR_8(NV_PGRAPH_WINDOWCLIPY0) \ - CF(pg->primitive_mode, primitive_mode) \ - CF(pg->surface_scale_factor, surface_scale_factor) \ - CF(pg->compressed_attrs, compressed_attrs) \ - CFA(pg->texture_matrix_enable, texture_matrix_enable) - - #define CR_x(reg, x) CR_x__define(reg, x) - #define CF_x(type, src, name, x) CF_x__define(type, src, name, x) - DIRTY_REGS - #undef CR_x - #undef CF_x - - #define CR_x(reg, x) CR_x__check(reg, x) - #define CF_x(type, src, name, x) CF_x__check(type, src, name, x) - DIRTY_REGS - #undef CR_x - #undef CF_x - return false; - -dirty: - #define CR_x(reg, x) CR_x__update(reg, x) - #define CF_x(type, src, name, x) CF_x__update(type, src, name, x) - DIRTY_REGS - #undef CR_x - #undef CF_x - return true; -} - -static void pgraph_bind_shaders(PGRAPHState *pg) -{ - int i, j; - - bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 2; - - bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 0; - - int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START); - - NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, - vertex_program ? "yes" : "no", - fixed_function ? "yes" : "no"); - - bool binding_changed = false; - if (!pgraph_bind_shaders_test_dirty(pg) && !pg->program_data_dirty) { - nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); - goto update_constants; - } - - pg->program_data_dirty = false; - - ShaderBinding* old_binding = pg->shader_binding; - - ShaderState state; - memset(&state, 0, sizeof(ShaderState)); - - state.surface_scale_factor = pg->surface_scale_factor; - - state.compressed_attrs = pg->compressed_attrs; - - /* register combiner stuff */ - state.psh.window_clip_exclusive = pg->regs[NV_PGRAPH_SETUPRASTER] - & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE; - state.psh.combiner_control = pg->regs[NV_PGRAPH_COMBINECTL]; - state.psh.shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG]; - state.psh.other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL]; - state.psh.final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0]; - state.psh.final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1]; - - state.psh.alpha_test = pg->regs[NV_PGRAPH_CONTROL_0] - & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE; - state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAFUNC); - - state.psh.point_sprite = pg->regs[NV_PGRAPH_SETUPRASTER] & - NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE; - - state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK( - pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC); - - state.fixed_function = fixed_function; - - /* fixed function stuff */ - if (fixed_function) { - state.skinning = (enum VshSkinning)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_SKIN); - state.lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_LIGHTING); - state.normalization = pg->regs[NV_PGRAPH_CSV0_C] - & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE; - - /* color material */ - state.emission_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION); - state.ambient_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT); - state.diffuse_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE); - state.specular_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR); - } - - /* vertex program stuff */ - state.vertex_program = vertex_program, - state.z_perspective = pg->regs[NV_PGRAPH_CONTROL_0] - & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; - - state.point_params_enable = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_POINTPARAMSENABLE); - state.point_size = - GET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V) / 8.0f; - if (state.point_params_enable) { - for (int i = 0; i < 8; i++) { - state.point_params[i] = pg->point_params[i]; - } - } - - /* geometry shader stuff */ - state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode; - state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACEMODE); - state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_BACKFACEMODE); - - state.smooth_shading = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; - state.psh.smooth_shading = state.smooth_shading; - - state.program_length = 0; - - if (vertex_program) { - // copy in vertex program tokens - for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) { - uint32_t *cur_token = (uint32_t*)&pg->program_data[i]; - memcpy(&state.program_data[state.program_length], - cur_token, - VSH_TOKEN_SIZE * sizeof(uint32_t)); - state.program_length++; - - if (vsh_get_field(cur_token, FLD_FINAL)) { - break; - } - } - } - - /* Texgen */ - for (i = 0; i < 4; i++) { - unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B; - for (j = 0; j < 4; j++) { - unsigned int masks[] = { - (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q - }; - state.texgen[i][j] = (enum VshTexgen)GET_MASK(pg->regs[reg], masks[j]); - } - } - - /* Fog */ - state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3] - & NV_PGRAPH_CONTROL_3_FOGENABLE; - if (state.fog_enable) { - /*FIXME: Use CSV0_D? */ - state.fog_mode = (enum VshFogMode)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_FOG_MODE); - state.foggen = (enum VshFoggen)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_FOGGENMODE); - } else { - /* FIXME: Do we still pass the fogmode? */ - state.fog_mode = (enum VshFogMode)0; - state.foggen = (enum VshFoggen)0; - } - - /* Texture matrices */ - for (i = 0; i < 4; i++) { - state.texture_matrix_enable[i] = pg->texture_matrix_enable[i]; - } - - /* Lighting */ - if (state.lighting) { - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - state.light[i] = (enum VshLight)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2)); - } - } - - /* Copy content of enabled combiner stages */ - int num_stages = pg->regs[NV_PGRAPH_COMBINECTL] & 0xFF; - for (i = 0; i < num_stages; i++) { - state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; - state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; - state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4]; - state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4]; - //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; - //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; - } - - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - state.psh.compare_mode[i][j] = - (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1; - } - - uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4]; - bool enabled = pgraph_is_texture_stage_active(pg, i) && - (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE); - if (!enabled) { - continue; - } - - state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; - - uint32_t tex_fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; - unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR); - ColorFormatInfo f = kelvin_color_format_map[color_format]; - state.psh.rect_tex[i] = f.linear; - - uint32_t border_source = GET_MASK(tex_fmt, - NV_PGRAPH_TEXFMT0_BORDER_SOURCE); - bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); - state.psh.border_logical_size[i][0] = 0.0f; - state.psh.border_logical_size[i][1] = 0.0f; - state.psh.border_logical_size[i][2] = 0.0f; - if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { - if (!f.linear && !cubemap) { - // The actual texture will be (at least) double the reported - // size and shifted by a 4 texel border but texture coordinates - // will still be relative to the reported size. - unsigned int reported_width = - 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); - unsigned int reported_height = - 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); - unsigned int reported_depth = - 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); - - state.psh.border_logical_size[i][0] = reported_width; - state.psh.border_logical_size[i][1] = reported_height; - state.psh.border_logical_size[i][2] = reported_depth; - - if (reported_width < 8) { - state.psh.border_inv_real_size[i][0] = 0.0625f; - } else { - state.psh.border_inv_real_size[i][0] = - 1.0f / (reported_width * 2.0f); - } - if (reported_height < 8) { - state.psh.border_inv_real_size[i][1] = 0.0625f; - } else { - state.psh.border_inv_real_size[i][1] = - 1.0f / (reported_height * 2.0f); - } - if (reported_depth < 8) { - state.psh.border_inv_real_size[i][2] = 0.0625f; - } else { - state.psh.border_inv_real_size[i][2] = - 1.0f / (reported_depth * 2.0f); - } - } else { - NV2A_UNIMPLEMENTED("Border source texture with linear %d cubemap %d", - f.linear, cubemap); - } - } - - /* Keep track of whether texture data has been loaded as signed - * normalized integers or not. This dictates whether or not we will need - * to re-map in fragment shader for certain texture modes (e.g. - * bumpenvmap). - * - * FIXME: When signed texture data is loaded as unsigned and remapped in - * fragment shader, there may be interpolation artifacts. Fix this to - * support signed textures more appropriately. - */ - state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM) - || (f.gl_internal_format == GL_RG8_SNORM); - - state.psh.shadow_map[i] = f.depth; - - uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4]; - unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); - enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED; - /* FIXME: We do not distinguish between min and mag when - * performing convolution. Just use it if specified for min (common AA - * case). - */ - if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) { - int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL); - assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX || - k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3); - kernel = (enum ConvolutionFilter)k; - } - - state.psh.conv_tex[i] = kernel; - } - - uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); - qemu_mutex_lock(&pg->shader_cache_lock); - LruNode *node = lru_lookup(&pg->shader_cache, shader_state_hash, &state); - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - if (snode->binding || shader_load_from_memory(snode)) { - pg->shader_binding = snode->binding; - } else { - pg->shader_binding = generate_shaders(&state); - nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); - - /* cache it */ - snode->binding = pg->shader_binding; - if (g_config.perf.cache_shaders) { - shader_cache_to_disk(snode); - } - } - - qemu_mutex_unlock(&pg->shader_cache_lock); - - binding_changed = (pg->shader_binding != old_binding); - if (binding_changed) { - nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); - glUseProgram(pg->shader_binding->gl_program); - } - -update_constants: - pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed, - vertex_program, fixed_function); - - NV2A_GL_DGROUP_END(); -} - -static bool pgraph_framebuffer_dirty(PGRAPHState *pg) -{ - bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, - sizeof(SurfaceShape)) != 0; - if (!shape_changed || (!pg->surface_shape.color_format - && !pg->surface_shape.zeta_format)) { - return false; - } - return true; -} - -static bool pgraph_color_write_enabled(PGRAPHState *pg) -{ - return pg->regs[NV_PGRAPH_CONTROL_0] & ( - NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE); -} - -static bool pgraph_zeta_write_enabled(PGRAPHState *pg) -{ - return pg->regs[NV_PGRAPH_CONTROL_0] & ( - NV_PGRAPH_CONTROL_0_ZWRITEENABLE - | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE); -} - -static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) -{ - NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", - color, zeta, - pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); - /* FIXME: Does this apply to CLEARs too? */ - color = color && pgraph_color_write_enabled(pg); - zeta = zeta && pgraph_zeta_write_enabled(pg); - pg->surface_color.draw_dirty |= color; - pg->surface_zeta.draw_dirty |= zeta; - - if (pg->color_binding) { - pg->color_binding->draw_dirty |= color; - pg->color_binding->frame_time = pg->frame_time; - pg->color_binding->cleared = false; - - } - - if (pg->zeta_binding) { - pg->zeta_binding->draw_dirty |= zeta; - pg->zeta_binding->frame_time = pg->frame_time; - pg->zeta_binding->cleared = false; - - } -} - -static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src) -{ - GLint status; - char err_buf[512]; - - // Compile vertex shader - GLuint vs = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vs, 1, &vs_src, NULL); - glCompileShader(vs); - glGetShaderiv(vs, GL_COMPILE_STATUS, &status); - if (status != GL_TRUE) { - glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf); - err_buf[sizeof(err_buf)-1] = '\0'; - fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf); - exit(1); - } - - // Compile fragment shader - GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(fs, 1, &fs_src, NULL); - glCompileShader(fs); - glGetShaderiv(fs, GL_COMPILE_STATUS, &status); - if (status != GL_TRUE) { - glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf); - err_buf[sizeof(err_buf)-1] = '\0'; - fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf); - exit(1); - } - - // Link vertex and fragment shaders - GLuint prog = glCreateProgram(); - glAttachShader(prog, vs); - glAttachShader(prog, fs); - glLinkProgram(prog); - glUseProgram(prog); - - // Flag shaders for deletion (will still be retained for lifetime of prog) - glDeleteShader(vs); - glDeleteShader(fs); - - return prog; -} - -static void pgraph_init_render_to_texture(NV2AState *d) -{ - struct PGRAPHState *pg = &d->pgraph; - const char *vs = - "#version 330\n" - "void main()\n" - "{\n" - " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" - " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" - " gl_Position = vec4(x, y, 0, 1);\n" - "}\n"; - const char *fs = - "#version 330\n" - "uniform sampler2D tex;\n" - "uniform vec2 surface_size;\n" - "layout(location = 0) out vec4 out_Color;\n" - "void main()\n" - "{\n" - " vec2 texCoord;\n" - " texCoord.x = gl_FragCoord.x;\n" - " texCoord.y = (surface_size.y - gl_FragCoord.y)\n" - " + (textureSize(tex,0).y - surface_size.y);\n" - " texCoord /= textureSize(tex,0).xy;\n" - " out_Color.rgba = texture(tex, texCoord);\n" - "}\n"; - - pg->s2t_rndr.prog = pgraph_compile_shader(vs, fs); - pg->s2t_rndr.tex_loc = glGetUniformLocation(pg->s2t_rndr.prog, "tex"); - pg->s2t_rndr.surface_size_loc = glGetUniformLocation(pg->s2t_rndr.prog, - "surface_size"); - - glGenVertexArrays(1, &pg->s2t_rndr.vao); - glBindVertexArray(pg->s2t_rndr.vao); - glGenBuffers(1, &pg->s2t_rndr.vbo); - glBindBuffer(GL_ARRAY_BUFFER, pg->s2t_rndr.vbo); - glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); - glGenFramebuffers(1, &pg->s2t_rndr.fbo); -} - -static bool pgraph_surface_to_texture_can_fastpath(SurfaceBinding *surface, - TextureShape *shape) -{ - // FIXME: Better checks/handling on formats and surface-texture compat - - int surface_fmt = surface->shape.color_format; - int texture_fmt = shape->color_format; - - if (!surface->color) { - // FIXME: Support zeta to color - return false; - } - - switch (surface_fmt) { - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; - default: break; - } - break; - default: break; - } - - trace_nv2a_pgraph_surface_texture_compat_failed( - surface_fmt, texture_fmt); - return false; -} - - -static void pgraph_render_surface_to(NV2AState *d, SurfaceBinding *surface, - int texture_unit, GLuint gl_target, - GLuint gl_texture, unsigned int width, - unsigned int height) -{ - glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.s2t_rndr.fbo); - - GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 }; - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, - gl_texture, 0); - glDrawBuffers(1, draw_buffers); - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - assert(glGetError() == GL_NO_ERROR); - - float color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); - glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color); - - glBindVertexArray(d->pgraph.s2t_rndr.vao); - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.s2t_rndr.vbo); - glUseProgram(d->pgraph.s2t_rndr.prog); - glProgramUniform1i(d->pgraph.s2t_rndr.prog, d->pgraph.s2t_rndr.tex_loc, - texture_unit); - glProgramUniform2f(d->pgraph.s2t_rndr.prog, - d->pgraph.s2t_rndr.surface_size_loc, width, height); - - glViewport(0, 0, width, height); - glColorMask(true, true, true, true); - glDisable(GL_DITHER); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glDisable(GL_STENCIL_TEST); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glClearColor(0.0f, 0.0f, 1.0f, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0, - 0); - glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.gl_framebuffer); - glBindVertexArray(d->pgraph.gl_vertex_array); - glBindTexture(gl_target, gl_texture); - glUseProgram( - d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0); -} - -static void pgraph_render_surface_to_texture_slow( - NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, - TextureShape *texture_shape, int texture_unit) -{ - PGRAPHState *pg = &d->pgraph; - - const ColorFormatInfo *f = &kelvin_color_format_map[texture_shape->color_format]; - assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map)); - nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK); - - glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindTexture(texture->gl_target, texture->gl_texture); - - unsigned int width = surface->width, - height = surface->height; - pgraph_apply_scaling_factor(pg, &width, &height); - - size_t bufsize = width * height * surface->fmt.bytes_per_pixel; - - uint8_t *buf = g_malloc(bufsize); - pgraph_download_surface_data_to_buffer(d, surface, false, true, false, buf); - - width = texture_shape->width; - height = texture_shape->height; - pgraph_apply_scaling_factor(pg, &width, &height); - - glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, - f->gl_format, f->gl_type, buf); - g_free(buf); - glBindTexture(texture->gl_target, texture->gl_texture); -} - -/* Note: This function is intended to be called before PGRAPH configures GL - * state for rendering; it will configure GL state here but only restore a - * couple of items. - */ -static void pgraph_render_surface_to_texture(NV2AState *d, - SurfaceBinding *surface, - TextureBinding *texture, - TextureShape *texture_shape, - int texture_unit) -{ - PGRAPHState *pg = &d->pgraph; - - const ColorFormatInfo *f = - &kelvin_color_format_map[texture_shape->color_format]; - assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map)); - - nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); - - if (!pgraph_surface_to_texture_can_fastpath(surface, texture_shape)) { - pgraph_render_surface_to_texture_slow(d, surface, texture, - texture_shape, texture_unit); - return; - } - - - unsigned int width = texture_shape->width, - height = texture_shape->height; - pgraph_apply_scaling_factor(pg, &width, &height); - - glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindTexture(texture->gl_target, texture->gl_texture); - glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, - f->gl_format, f->gl_type, NULL); - glBindTexture(texture->gl_target, 0); - pgraph_render_surface_to(d, surface, texture_unit, texture->gl_target, - texture->gl_texture, width, height); - glBindTexture(texture->gl_target, texture->gl_texture); - glUseProgram( - d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0); -} - -static void pgraph_gl_fence(void) -{ - GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, - (GLuint64)(5000000000)); - assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED); - glDeleteSync(fence); -} - -static void pgraph_init_display_renderer(NV2AState *d) -{ - struct PGRAPHState *pg = &d->pgraph; - - glGenTextures(1, &pg->gl_display_buffer); - pg->gl_display_buffer_internal_format = 0; - pg->gl_display_buffer_width = 0; - pg->gl_display_buffer_height = 0; - pg->gl_display_buffer_format = 0; - pg->gl_display_buffer_type = 0; - - const char *vs = - "#version 330\n" - "void main()\n" - "{\n" - " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" - " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" - " gl_Position = vec4(x, y, 0, 1);\n" - "}\n"; - /* FIXME: improve interlace handling, pvideo */ - - const char *fs = - "#version 330\n" - "uniform sampler2D tex;\n" - "uniform bool pvideo_enable;\n" - "uniform sampler2D pvideo_tex;\n" - "uniform vec2 pvideo_in_pos;\n" - "uniform vec4 pvideo_pos;\n" - "uniform vec3 pvideo_scale;\n" - "uniform bool pvideo_color_key_enable;\n" - "uniform vec4 pvideo_color_key;\n" - "uniform vec2 display_size;\n" - "uniform float line_offset;\n" - "layout(location = 0) out vec4 out_Color;\n" - "void main()\n" - "{\n" - " vec2 texCoord = gl_FragCoord.xy/display_size;\n" - " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" - " out_Color.rgba = texture(tex, texCoord);\n" - " if (pvideo_enable) {\n" - " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" - " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" - " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" - " greaterThan(screenCoord, output_region.zw));\n" - " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" - " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" - " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" - " in_st.y *= -1.0;\n" - " out_Color.rgba = texture(pvideo_tex, in_st);\n" - " }\n" - " }\n" - "}\n"; - - pg->disp_rndr.prog = pgraph_compile_shader(vs, fs); - pg->disp_rndr.tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "tex"); - pg->disp_rndr.pvideo_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_enable"); - pg->disp_rndr.pvideo_tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_tex"); - pg->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_in_pos"); - pg->disp_rndr.pvideo_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_pos"); - pg->disp_rndr.pvideo_scale_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_scale"); - pg->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key_enable"); - pg->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key"); - pg->disp_rndr.display_size_loc = glGetUniformLocation(pg->disp_rndr.prog, "display_size"); - pg->disp_rndr.line_offset_loc = glGetUniformLocation(pg->disp_rndr.prog, "line_offset"); - - glGenVertexArrays(1, &pg->disp_rndr.vao); - glBindVertexArray(pg->disp_rndr.vao); - glGenBuffers(1, &pg->disp_rndr.vbo); - glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo); - glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); - glGenFramebuffers(1, &pg->disp_rndr.fbo); - glGenTextures(1, &pg->disp_rndr.pvideo_tex); - assert(glGetError() == GL_NO_ERROR); -} - -static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data, - unsigned int width, - unsigned int height, - unsigned int pitch) -{ - uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4); - int x, y; - for (y = 0; y < height; y++) { - const uint8_t *line = &data[y * pitch]; - const uint32_t row_offset = y * width; - for (x = 0; x < width; x++) { - uint8_t *pixel = &converted_data[(row_offset + x) * 4]; - convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - pixel[3] = 255; - } - } - return converted_data; -} - -static inline float pvideo_calculate_scale(unsigned int din_dout, - unsigned int output_size) -{ - float calculated_in = din_dout * (output_size - 1); - calculated_in = floorf(calculated_in / (1 << 20) + 0.5f); - return (calculated_in + 1.0f) / output_size; -} - -static void pgraph_render_display_pvideo_overlay(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. - // Many games seem to pass this value when initializing or tearing down - // PVIDEO. On its own, this generally does not result in the overlay being - // hidden, however there are certain games (e.g., Ultimate Beach Soccer) - // that use an unknown mechanism to hide the overlay without explicitly - // stopping it. - // Since the value seems to be set to 0xFFFFFFFF only in cases where the - // content is not valid, it is probably good enough to treat it as an - // implicit stop. - bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) - && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; - glUniform1ui(d->pgraph.disp_rndr.pvideo_enable_loc, enabled); - if (!enabled) { - return; - } - - hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; - hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; - hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; - - int in_width = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); - int in_height = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); - - int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_S); - int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_T); - - int in_pitch = - GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); - int in_color = - GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); - - unsigned int out_width = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); - unsigned int out_height = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); - - float scale_x = 1.0f; - float scale_y = 1.0f; - unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; - unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; - if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { - scale_x = pvideo_calculate_scale(ds_dx, out_width); - } - if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { - scale_y = pvideo_calculate_scale(dt_dy, out_height); - } - - // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results - // in them being capped to the output size, content is not scaled. This is - // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF - // during initialization or teardown. - if (in_width > out_width) { - in_width = floorf((float)out_width * scale_x + 0.5f); - } - if (in_height > out_height) { - in_height = floorf((float)out_height * scale_y + 0.5f); - } - - /* TODO: support other color formats */ - assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); - - unsigned int out_x = - GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); - unsigned int out_y = - GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); - - unsigned int color_key_enabled = - GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); - glUniform1ui(d->pgraph.disp_rndr.pvideo_color_key_enable_loc, - color_key_enabled); - - // TODO: Verify that masking off the top byte is correct. - // SeaBlade sets a color key of 0x80000000 but the texture passed into the - // shader is cleared to 0 alpha. - unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; - glUniform4f(d->pgraph.disp_rndr.pvideo_color_key_loc, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); - - assert(offset + in_pitch * in_height <= limit); - hwaddr end = base + offset + in_pitch * in_height; - assert(end <= memory_region_size(d->vram)); - - pgraph_apply_scaling_factor(pg, &out_x, &out_y); - pgraph_apply_scaling_factor(pg, &out_width, &out_height); - - // Translate for the GL viewport origin. - out_y = MAX(pg->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); - - glActiveTexture(GL_TEXTURE0 + 1); - glBindTexture(GL_TEXTURE_2D, g_nv2a->pgraph.disp_rndr.pvideo_tex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( - d->vram_ptr + base + offset, in_width, in_height, in_pitch); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, - GL_UNSIGNED_BYTE, tex_rgba); - g_free(tex_rgba); - glUniform1i(d->pgraph.disp_rndr.pvideo_tex_loc, 1); - glUniform2f(d->pgraph.disp_rndr.pvideo_in_pos_loc, in_s, in_t); - glUniform4f(d->pgraph.disp_rndr.pvideo_pos_loc, - out_x, out_y, out_width, out_height); - glUniform3f(d->pgraph.disp_rndr.pvideo_scale_loc, - scale_x, scale_y, 1.0f / pg->surface_scale_factor); -} - -static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) -{ - struct PGRAPHState *pg = &d->pgraph; - - unsigned int width, height; - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - int line_offset = surface->pitch / pline_offset; - - /* Adjust viewport height for interlaced mode, used only in 1080i */ - if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { - height *= 2; - } - - pgraph_apply_scaling_factor(pg, &width, &height); - - glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.disp_rndr.fbo); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, pg->gl_display_buffer); - bool recreate = ( - surface->fmt.gl_internal_format != pg->gl_display_buffer_internal_format - || width != pg->gl_display_buffer_width - || height != pg->gl_display_buffer_height - || surface->fmt.gl_format != pg->gl_display_buffer_format - || surface->fmt.gl_type != pg->gl_display_buffer_type - ); - - if (recreate) { - /* XXX: There's apparently a bug in some Intel OpenGL drivers for - * Windows that will leak this texture when its orphaned after use in - * another context, apparently regardless of which thread it's created - * or released on. - * - * Driver: 27.20.100.8729 9/11/2020 W10 x64 - * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423 - */ - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - pg->gl_display_buffer_internal_format = surface->fmt.gl_internal_format; - pg->gl_display_buffer_width = width; - pg->gl_display_buffer_height = height; - pg->gl_display_buffer_format = surface->fmt.gl_format; - pg->gl_display_buffer_type = surface->fmt.gl_type; - glTexImage2D(GL_TEXTURE_2D, 0, - pg->gl_display_buffer_internal_format, - pg->gl_display_buffer_width, - pg->gl_display_buffer_height, - 0, - pg->gl_display_buffer_format, - pg->gl_display_buffer_type, - NULL); - } - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, pg->gl_display_buffer, 0); - GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; - glDrawBuffers(1, DrawBuffers); - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glBindVertexArray(pg->disp_rndr.vao); - glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo); - glUseProgram(pg->disp_rndr.prog); - glProgramUniform1i(pg->disp_rndr.prog, pg->disp_rndr.tex_loc, 0); - glUniform2f(d->pgraph.disp_rndr.display_size_loc, width, height); - glUniform1f(d->pgraph.disp_rndr.line_offset_loc, line_offset); - pgraph_render_display_pvideo_overlay(d); - - glViewport(0, 0, width, height); - glColorMask(true, true, true, true); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glDisable(GL_STENCIL_TEST); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, 0, 0); -} - -void pgraph_gl_sync(NV2AState *d) -{ - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset); - if (surface == NULL) { - qemu_event_set(&d->pgraph.gl_sync_complete); - return; - } - - /* FIXME: Sanity check surface dimensions */ - - /* Wait for queued commands to complete */ - pgraph_upload_surface_data(d, surface, !tcg_enabled()); - pgraph_gl_fence(); - assert(glGetError() == GL_NO_ERROR); - - /* Render framebuffer in display context */ - glo_set_current(g_nv2a_context_display); - pgraph_render_display(d, surface); - pgraph_gl_fence(); - assert(glGetError() == GL_NO_ERROR); - - /* Switch back to original context */ - glo_set_current(g_nv2a_context_render); - - qatomic_set(&d->pgraph.gl_sync_pending, false); - qemu_event_set(&d->pgraph.gl_sync_complete); -} - -const uint8_t *nv2a_get_dac_palette(void) -{ - return g_nv2a->puserdac.palette; -} - -int nv2a_get_screen_off(void) -{ - return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF; -} - -int nv2a_get_framebuffer_surface(void) -{ - NV2AState *d = g_nv2a; - PGRAPHState *pg = &d->pgraph; - - qemu_mutex_lock(&d->pfifo.lock); - // FIXME: Possible race condition with pgraph, consider lock - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset); - if (surface == NULL || !surface->color) { - qemu_mutex_unlock(&d->pfifo.lock); - return 0; - } - - assert(surface->color); - assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); - assert(surface->fmt.gl_format == GL_RGBA - || surface->fmt.gl_format == GL_RGB - || surface->fmt.gl_format == GL_BGR - || surface->fmt.gl_format == GL_BGRA - ); - - surface->frame_time = pg->frame_time; - qemu_event_reset(&d->pgraph.gl_sync_complete); - qatomic_set(&pg->gl_sync_pending, true); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.gl_sync_complete); - - return pg->gl_display_buffer; -} - -static bool pgraph_check_surface_to_texture_compatibility( - const SurfaceBinding *surface, - const TextureShape *shape) -{ - // FIXME: Better checks/handling on formats and surface-texture compat - - if ((!surface->swizzle && surface->pitch != shape->pitch) || - surface->width != shape->width || - surface->height != shape->height) { - return false; - } - - int surface_fmt = surface->shape.color_format; - int texture_fmt = shape->color_format; - - if (!surface->color) { - // FIXME: Support zeta to color - return false; - } - - if (shape->cubemap) { - // FIXME: Support rendering surface to cubemap face - return false; - } - - if (shape->levels > 1) { - // FIXME: Support rendering surface to mip levels - return false; - } - - switch (surface_fmt) { - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; - default: break; - } - break; - case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; - case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; - default: break; - } - break; - default: - break; - } - - trace_nv2a_pgraph_surface_texture_compat_failed( - surface_fmt, texture_fmt); - return false; -} - -static void pgraph_wait_for_surface_download(SurfaceBinding *e) -{ - NV2AState *d = g_nv2a; - - if (qatomic_read(&e->draw_dirty)) { - qemu_mutex_lock(&d->pfifo.lock); - qemu_event_reset(&d->pgraph.downloads_complete); - qatomic_set(&e->download_pending, true); - qatomic_set(&d->pgraph.downloads_pending, true); - pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); - qemu_event_wait(&d->pgraph.downloads_complete); - } -} - -static void pgraph_surface_access_callback( - void *opaque, - MemoryRegion *mr, - hwaddr addr, - hwaddr len, - bool write) -{ - SurfaceBinding *e = opaque; - assert(addr >= e->vram_addr); - hwaddr offset = addr - e->vram_addr; - assert(offset < e->size); - - if (qatomic_read(&e->draw_dirty)) { - trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); - pgraph_wait_for_surface_download(e); - } - - if (write && !qatomic_read(&e->upload_pending)) { - trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); - qatomic_set(&e->upload_pending, true); - } -} - -static SurfaceBinding *pgraph_surface_put(NV2AState *d, - hwaddr addr, - SurfaceBinding *surface_in) -{ - assert(pgraph_surface_get(d, addr) == NULL); - - SurfaceBinding *surface, *next; - uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1; - QTAILQ_FOREACH_SAFE(surface, &d->pgraph.surfaces, entry, next) { - uintptr_t s_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr > e_end - || surface_in->vram_addr > s_end); - if (overlapping) { - trace_nv2a_pgraph_surface_evict_overlapping( - surface->vram_addr, surface->width, surface->height, - surface->pitch); - pgraph_download_surface_data_if_dirty(d, surface); - pgraph_surface_invalidate(d, surface); - } - } - - SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding)); - assert(surface_out != NULL); - *surface_out = *surface_in; - - if (tcg_enabled()) { - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock_iothread(); - mem_access_callback_insert(qemu_get_cpu(0), - d->vram, surface_out->vram_addr, surface_out->size, - &surface_out->access_cb, &pgraph_surface_access_callback, - surface_out); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pgraph.lock); - } - - QTAILQ_INSERT_TAIL(&d->pgraph.surfaces, surface_out, entry); - - return surface_out; -} - -static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) { - if (surface->vram_addr == addr) { - return surface; - } - } - - return NULL; -} - -static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) { - if (addr >= surface->vram_addr && - addr < (surface->vram_addr + surface->size)) { - return surface; - } - } - - return NULL; -} - -static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *surface) -{ - trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); - - if (surface == d->pgraph.color_binding) { - assert(d->pgraph.surface_color.buffer_dirty); - pgraph_unbind_surface(d, true); - } - if (surface == d->pgraph.zeta_binding) { - assert(d->pgraph.surface_zeta.buffer_dirty); - pgraph_unbind_surface(d, false); - } - - if (tcg_enabled()) { - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock_iothread(); - mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pgraph.lock); - } - - glDeleteTextures(1, &surface->gl_buffer); - - QTAILQ_REMOVE(&d->pgraph.surfaces, surface, entry); - g_free(surface); -} - -static void pgraph_surface_evict_old(NV2AState *d) -{ - const int surface_age_limit = 5; - - SurfaceBinding *s, *next; - QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) { - int last_used = d->pgraph.frame_time - s->frame_time; - if (last_used >= surface_age_limit) { - trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); - pgraph_download_surface_data_if_dirty(d, s); - pgraph_surface_invalidate(d, s); - } - } -} - -static bool pgraph_check_surface_compatibility(SurfaceBinding *s1, - SurfaceBinding *s2, bool strict) -{ - bool format_compatible = - (s1->color == s2->color) && - (s1->fmt.gl_attachment == s2->fmt.gl_attachment) && - (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) && - (s1->pitch == s2->pitch) && - (s1->shape.clip_x <= s2->shape.clip_x) && - (s1->shape.clip_y <= s2->shape.clip_y); - if (!format_compatible) { - return false; - } - - if (!strict) { - return (s1->width >= s2->width) && (s1->height >= s2->height); - } else { - return (s1->width == s2->width) && (s1->height == s2->height); - } -} - -static void pgraph_download_surface_data_if_dirty(NV2AState *d, - SurfaceBinding *surface) -{ - if (surface->draw_dirty) { - pgraph_download_surface_data(d, surface, true); - } -} - -static void pgraph_bind_current_surface(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - if (pg->color_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, pg->color_binding->fmt.gl_attachment, - GL_TEXTURE_2D, pg->color_binding->gl_buffer, 0); - } - - if (pg->zeta_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, pg->zeta_binding->fmt.gl_attachment, - GL_TEXTURE_2D, pg->zeta_binding->gl_buffer, 0); - } - - if (pg->color_binding || pg->zeta_binding) { - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == - GL_FRAMEBUFFER_COMPLETE); - } -} - -static void surface_copy_shrink_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - *(uint32_t *)out = *(uint32_t *)in; - out += 4; - in += 4 * factor; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - *(uint16_t *)out = *(uint16_t *)in; - out += 2; - in += 2 * factor; - } - } else { - for (unsigned int x = 0; x < width; x++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - in += bytes_per_pixel * factor; - } - } -} - - -static void pgraph_download_surface_data_to_buffer(NV2AState *d, - SurfaceBinding *surface, - bool swizzle, bool flip, - bool downscale, - uint8_t *pixels) -{ - PGRAPHState *pg = &d->pgraph; - swizzle &= surface->swizzle; - downscale &= (pg->surface_scale_factor != 1); - - trace_nv2a_pgraph_surface_download( - surface->color ? "COLOR" : "ZETA", - surface->swizzle ? "sz" : "lin", surface->vram_addr, - surface->width, surface->height, surface->pitch, - surface->fmt.bytes_per_pixel); - - /* Bind destination surface to framebuffer */ - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, - GL_TEXTURE_2D, surface->gl_buffer, 0); - - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - - /* Read surface into memory */ - uint8_t *gl_read_buf = pixels; - - uint8_t *swizzle_buf = pixels; - if (swizzle) { - /* FIXME: Allocate big buffer up front and re-alloc if necessary. - * FIXME: Consider swizzle in shader - */ - assert(pg->surface_scale_factor == 1 || downscale); - swizzle_buf = (uint8_t *)g_malloc(surface->size); - gl_read_buf = swizzle_buf; - } - - if (downscale) { - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor * - surface->size); - gl_read_buf = pg->scale_buf; - } - - glo_readpixels( - surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel, - pg->surface_scale_factor * surface->pitch, - pg->surface_scale_factor * surface->width, - pg->surface_scale_factor * surface->height, flip, gl_read_buf); - - /* FIXME: Replace this with a hw accelerated version */ - if (downscale) { - assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel)); - uint8_t *out = swizzle_buf, *in = pg->scale_buf; - for (unsigned int y = 0; y < surface->height; y++) { - surface_copy_shrink_row(out, in, surface->width, - surface->fmt.bytes_per_pixel, - pg->surface_scale_factor); - in += surface->pitch * pg->surface_scale_factor * - pg->surface_scale_factor; - out += surface->pitch; - } - } - - if (swizzle) { - swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, - surface->pitch, surface->fmt.bytes_per_pixel); - g_free(swizzle_buf); - } - - /* Re-bind original framebuffer target */ - glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, - GL_TEXTURE_2D, 0, 0); - pgraph_bind_current_surface(d); -} - -static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, - bool force) -{ - if (!(surface->download_pending || force)) { - return; - } - - /* FIXME: Respect write enable at last TOU? */ - - nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); - - pgraph_download_surface_data_to_buffer( - d, surface, true, true, true, d->vram_ptr + surface->vram_addr); - - memory_region_set_client_dirty(d->vram, surface->vram_addr, - surface->pitch * surface->height, - DIRTY_MEMORY_VGA); - memory_region_set_client_dirty(d->vram, surface->vram_addr, - surface->pitch * surface->height, - DIRTY_MEMORY_NV2A_TEX); - - surface->download_pending = false; - surface->draw_dirty = false; -} - -void pgraph_process_pending_downloads(NV2AState *d) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - pgraph_download_surface_data(d, surface, false); - } - - qatomic_set(&d->pgraph.downloads_pending, false); - qemu_event_set(&d->pgraph.downloads_complete); -} - -void pgraph_download_dirty_surfaces(NV2AState *d) -{ - SurfaceBinding *surface; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - pgraph_download_surface_data_if_dirty(d, surface); - } - - qatomic_set(&d->pgraph.download_dirty_surfaces_pending, false); - qemu_event_set(&d->pgraph.dirty_surfaces_download_complete); -} - - -static void surface_copy_expand_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint32_t *)out = *(uint32_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint16_t *)out = *(uint16_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } -} - -static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, - unsigned int height, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - size_t out_pitch = width * bytes_per_pixel * factor; - - for (unsigned int y = 0; y < height; y++) { - surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); - uint8_t *row_in = out; - for (unsigned int i = 1; i < factor; i++) { - out += out_pitch; - memcpy(out, row_in, out_pitch); - } - in += width * bytes_per_pixel; - out += out_pitch; - } -} - -static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, - bool force) -{ - if (!(surface->upload_pending || force)) { - return; - } - - nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); - - trace_nv2a_pgraph_surface_upload( - surface->color ? "COLOR" : "ZETA", - surface->swizzle ? "sz" : "lin", surface->vram_addr, - surface->width, surface->height, surface->pitch, - surface->fmt.bytes_per_pixel); - - PGRAPHState *pg = &d->pgraph; - - surface->upload_pending = false; - surface->draw_time = pg->draw_time; - - // FIXME: Don't query GL for texture binding - GLint last_texture_binding; - glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding); - - // FIXME: Replace with FBO to not disturb current state - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - - uint8_t *data = d->vram_ptr; - uint8_t *buf = data + surface->vram_addr; - - if (surface->swizzle) { - buf = (uint8_t*)g_malloc(surface->size); - unswizzle_rect(data + surface->vram_addr, - surface->width, surface->height, - buf, - surface->pitch, - surface->fmt.bytes_per_pixel); - } - - /* FIXME: Replace this flip/scaling */ - - // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); - } - - uint8_t *gl_read_buf = flipped_buf; - unsigned int width = surface->width, height = surface->height; - - if (pg->surface_scale_factor > 1) { - pgraph_apply_scaling_factor(pg, &width, &height); - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); - gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; - surface_copy_expand(out, in, surface->width, surface->height, - surface->fmt.bytes_per_pixel, - d->pgraph.surface_scale_factor); - } - - int prev_unpack_alignment; - glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment); - if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) { - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - } else { - glPixelStorei(GL_UNPACK_ALIGNMENT, 4); - } - - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, - height, 0, surface->fmt.gl_format, surface->fmt.gl_type, - gl_read_buf); - glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); - g_free(flipped_buf); - if (surface->swizzle) { - g_free(buf); - } - - // Rebind previous framebuffer binding - glBindTexture(GL_TEXTURE_2D, last_texture_binding); - - pgraph_bind_current_surface(d); -} - -static void pgraph_compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2) -{ - #define DO_CMP(fld) \ - if (s1->fld != s2->fld) \ - trace_nv2a_pgraph_surface_compare_mismatch( \ - #fld, (long int)s1->fld, (long int)s2->fld); - DO_CMP(shape.clip_x) - DO_CMP(shape.clip_width) - DO_CMP(shape.clip_y) - DO_CMP(shape.clip_height) - DO_CMP(gl_buffer) - DO_CMP(fmt.bytes_per_pixel) - DO_CMP(fmt.gl_attachment) - DO_CMP(fmt.gl_internal_format) - DO_CMP(fmt.gl_format) - DO_CMP(fmt.gl_type) - DO_CMP(color) - DO_CMP(swizzle) - DO_CMP(vram_addr) - DO_CMP(width) - DO_CMP(height) - DO_CMP(pitch) - DO_CMP(size) - DO_CMP(dma_addr) - DO_CMP(dma_len) - DO_CMP(frame_time) - DO_CMP(draw_time) - #undef DO_CMP -} - -static void pgraph_populate_surface_binding_entry_sized(NV2AState *d, - bool color, - unsigned int width, - unsigned int height, - SurfaceBinding *entry) -{ - PGRAPHState *pg = &d->pgraph; - Surface *surface; - hwaddr dma_address; - SurfaceFormatInfo fmt; - - if (color) { - surface = &pg->surface_color; - dma_address = pg->dma_color; - assert(pg->surface_shape.color_format != 0); - assert(pg->surface_shape.color_format < - ARRAY_SIZE(kelvin_surface_color_format_map)); - fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format]; - if (fmt.bytes_per_pixel == 0) { - fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", - pg->surface_shape.color_format); - abort(); - } - } else { - surface = &pg->surface_zeta; - dma_address = pg->dma_zeta; - assert(pg->surface_shape.zeta_format != 0); - assert(pg->surface_shape.zeta_format < - ARRAY_SIZE(kelvin_surface_zeta_float_format_map)); - const SurfaceFormatInfo *map = - pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_map : - kelvin_surface_zeta_fixed_format_map; - fmt = map[pg->surface_shape.zeta_format]; - } - - DMAObject dma = nv_dma_load(d, dma_address); - /* There's a bunch of bugs that could cause us to hit this function - * at the wrong time and get a invalid dma object. - * Check that it's sane. */ - assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); - // assert(dma.address + surface->offset != 0); - assert(surface->offset <= dma.limit); - assert(surface->offset + surface->pitch * height <= dma.limit + 1); - assert(surface->pitch % fmt.bytes_per_pixel == 0); - assert((dma.address & ~0x07FFFFFF) == 0); - - entry->shape = (color || !pg->color_binding) ? pg->surface_shape : - pg->color_binding->shape; - entry->gl_buffer = 0; - entry->fmt = fmt; - entry->color = color; - entry->swizzle = - (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - entry->vram_addr = dma.address + surface->offset; - entry->width = width; - entry->height = height; - entry->pitch = surface->pitch; - entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); - entry->upload_pending = true; - entry->download_pending = false; - entry->draw_dirty = false; - entry->dma_addr = dma.address; - entry->dma_len = dma.limit; - entry->frame_time = pg->frame_time; - entry->draw_time = pg->draw_time; - entry->cleared = false; -} - -static void pgraph_populate_surface_binding_entry(NV2AState *d, bool color, - SurfaceBinding *entry) -{ - PGRAPHState *pg = &d->pgraph; - unsigned int width, height; - - if (color || !pg->color_binding) { - pgraph_get_surface_dimensions(pg, &width, &height); - pgraph_apply_anti_aliasing_factor(pg, &width, &height); - - /* Since we determine surface dimensions based on the clipping - * rectangle, make sure to include the surface offset as well. - */ - if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { - width += pg->surface_shape.clip_x; - height += pg->surface_shape.clip_y; - } - } else { - width = pg->color_binding->width; - height = pg->color_binding->height; - } - - pgraph_populate_surface_binding_entry_sized(d, color, width, height, entry); -} - -static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) -{ - PGRAPHState *pg = &d->pgraph; - - SurfaceBinding entry; - pgraph_populate_surface_binding_entry(d, color, &entry); - - Surface *surface = color ? &pg->surface_color : &pg->surface_zeta; - - bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( - d->vram, entry.vram_addr, entry.size, - DIRTY_MEMORY_NV2A); - - if (upload && (surface->buffer_dirty || mem_dirty)) { - pgraph_unbind_surface(d, color); - - SurfaceBinding *found = pgraph_surface_get(d, entry.vram_addr); - if (found != NULL) { - /* FIXME: Support same color/zeta surface target? In the mean time, - * if the surface we just found is currently bound, just unbind it. - */ - SurfaceBinding *other = (color ? pg->zeta_binding - : pg->color_binding); - if (found == other) { - NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); - pgraph_unbind_surface(d, !color); - } - } - - trace_nv2a_pgraph_surface_target( - color ? "COLOR" : "ZETA", entry.vram_addr, - entry.swizzle ? "sz" : "ln", - pg->surface_shape.anti_aliasing, - pg->surface_shape.clip_x, - pg->surface_shape.clip_width, pg->surface_shape.clip_y, - pg->surface_shape.clip_height); - - bool should_create = true; - - if (found != NULL) { - bool is_compatible = - pgraph_check_surface_compatibility(found, &entry, false); - -#define TRACE_ARGS found->vram_addr, found->width, found->height, \ - found->swizzle ? "sz" : "ln", \ - found->shape.anti_aliasing, found->shape.clip_x, \ - found->shape.clip_width, found->shape.clip_y, \ - found->shape.clip_height, found->pitch - if (found->color) { - trace_nv2a_pgraph_surface_match_color(TRACE_ARGS); - } else { - trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS); - } -#undef TRACE_ARGS - - assert(!(entry.swizzle && pg->clearing)); - - if (found->swizzle != entry.swizzle) { - /* Clears should only be done on linear surfaces. Avoid - * synchronization by allowing (1) a surface marked swizzled to - * be cleared under the assumption the entire surface is - * destined to be cleared and (2) a fully cleared linear surface - * to be marked swizzled. Strictly match size to avoid - * pathological cases. - */ - is_compatible &= (pg->clearing || found->cleared) && - pgraph_check_surface_compatibility(found, &entry, true); - if (is_compatible) { - trace_nv2a_pgraph_surface_migrate_type( - entry.swizzle ? "swizzled" : "linear"); - } - } - - if (is_compatible && color && - !pgraph_check_surface_compatibility(found, &entry, true)) { - SurfaceBinding zeta_entry; - pgraph_populate_surface_binding_entry_sized( - d, !color, found->width, found->height, &zeta_entry); - hwaddr color_end = found->vram_addr + found->size; - hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; - is_compatible &= found->vram_addr >= zeta_end || - zeta_entry.vram_addr >= color_end; - } - - if (is_compatible && !color && pg->color_binding) { - is_compatible &= (found->width == pg->color_binding->width) && - (found->height == pg->color_binding->height); - } - - if (is_compatible) { - /* FIXME: Refactor */ - pg->surface_binding_dim.width = found->width; - pg->surface_binding_dim.clip_x = found->shape.clip_x; - pg->surface_binding_dim.clip_width = found->shape.clip_width; - pg->surface_binding_dim.height = found->height; - pg->surface_binding_dim.clip_y = found->shape.clip_y; - pg->surface_binding_dim.clip_height = found->shape.clip_height; - found->upload_pending |= mem_dirty; - pg->surface_zeta.buffer_dirty |= color; - should_create = false; - } else { - trace_nv2a_pgraph_surface_evict_reason( - "incompatible", found->vram_addr); - pgraph_compare_surfaces(found, &entry); - pgraph_download_surface_data_if_dirty(d, found); - pgraph_surface_invalidate(d, found); - } - } - - if (should_create) { - glGenTextures(1, &entry.gl_buffer); - glBindTexture(GL_TEXTURE_2D, entry.gl_buffer); - NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer, - "%s format: %0X, width: %d, height: %d " - "(addr %" HWADDR_PRIx ")", - color ? "color" : "zeta", - color ? pg->surface_shape.color_format - : pg->surface_shape.zeta_format, - entry.width, entry.height, surface->offset); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - unsigned int width = entry.width, height = entry.height; - pgraph_apply_scaling_factor(pg, &width, &height); - glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width, - height, 0, entry.fmt.gl_format, entry.fmt.gl_type, - NULL); - found = pgraph_surface_put(d, entry.vram_addr, &entry); - - /* FIXME: Refactor */ - pg->surface_binding_dim.width = entry.width; - pg->surface_binding_dim.clip_x = entry.shape.clip_x; - pg->surface_binding_dim.clip_width = entry.shape.clip_width; - pg->surface_binding_dim.height = entry.height; - pg->surface_binding_dim.clip_y = entry.shape.clip_y; - pg->surface_binding_dim.clip_height = entry.shape.clip_height; - - if (color && pg->zeta_binding && (pg->zeta_binding->width != entry.width || pg->zeta_binding->height != entry.height)) { - pg->surface_zeta.buffer_dirty = true; - } - } - -#define TRACE_ARGS found->vram_addr, found->width, found->height, \ - found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \ - found->shape.clip_x, found->shape.clip_width, \ - found->shape.clip_y, found->shape.clip_height, found->pitch - - if (color) { - if (should_create) { - trace_nv2a_pgraph_surface_create_color(TRACE_ARGS); - } else { - trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS); - } - - pg->color_binding = found; - } else { - if (should_create) { - trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS); - } else { - trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS); - } - pg->zeta_binding = found; - } -#undef TRACE_ARGS - - glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment, - GL_TEXTURE_2D, found->gl_buffer, 0); - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == - GL_FRAMEBUFFER_COMPLETE); - - surface->buffer_dirty = false; - } - - if (!upload && surface->draw_dirty) { - if (!tcg_enabled()) { - /* FIXME: Cannot monitor for reads/writes; flush now */ - pgraph_download_surface_data(d, - color ? pg->color_binding : pg->zeta_binding, true); - } - - surface->write_enabled_cache = false; - surface->draw_dirty = false; - } -} - -static void pgraph_unbind_surface(NV2AState *d, bool color) -{ - PGRAPHState *pg = &d->pgraph; - - if (color) { - if (pg->color_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, 0, 0); - pg->color_binding = NULL; - } - } else { - if (pg->zeta_binding) { - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); - pg->zeta_binding = NULL; - } - } -} - -static void pgraph_update_surface(NV2AState *d, bool upload, - bool color_write, bool zeta_write) -{ - PGRAPHState *pg = &d->pgraph; - - pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_Z_FORMAT); - - color_write = color_write && - (pg->clearing || pgraph_color_write_enabled(pg)); - zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); - - if (upload) { - bool fb_dirty = pgraph_framebuffer_dirty(pg); - if (fb_dirty) { - memcpy(&pg->last_surface_shape, &pg->surface_shape, - sizeof(SurfaceShape)); - pg->surface_color.buffer_dirty = true; - pg->surface_zeta.buffer_dirty = true; - } - - if (pg->surface_color.buffer_dirty) { - pgraph_unbind_surface(d, true); - } - - if (color_write) { - pgraph_update_surface_part(d, true, true); - } - - if (pg->surface_zeta.buffer_dirty) { - pgraph_unbind_surface(d, false); - } - - if (zeta_write) { - pgraph_update_surface_part(d, true, false); - } - } else { - if ((color_write || pg->surface_color.write_enabled_cache) - && pg->surface_color.draw_dirty) { - pgraph_update_surface_part(d, false, true); - } - if ((zeta_write || pg->surface_zeta.write_enabled_cache) - && pg->surface_zeta.draw_dirty) { - pgraph_update_surface_part(d, false, false); - } - } - - if (upload) { - pg->draw_time++; - } - - bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - - if (pg->color_binding) { - pg->color_binding->frame_time = pg->frame_time; - if (upload) { - pgraph_upload_surface_data(d, pg->color_binding, false); - pg->color_binding->draw_time = pg->draw_time; - pg->color_binding->swizzle = swizzle; - } - } - - if (pg->zeta_binding) { - pg->zeta_binding->frame_time = pg->frame_time; - if (upload) { - pgraph_upload_surface_data(d, pg->zeta_binding, false); - pg->zeta_binding->draw_time = pg->draw_time; - pg->zeta_binding->swizzle = swizzle; - } - } - - // Sanity check color and zeta dimensions match - if (pg->color_binding && pg->zeta_binding) { - assert((pg->color_binding->width == pg->zeta_binding->width) - && (pg->color_binding->height == pg->zeta_binding->height)); - } - - pgraph_surface_evict_old(d); -} - -struct pgraph_texture_possibly_dirty_struct { - hwaddr addr, end; -}; - -static void pgraph_mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) -{ - struct pgraph_texture_possibly_dirty_struct *test = - (struct pgraph_texture_possibly_dirty_struct *)opaque; - - struct TextureLruNode *tnode = container_of(node, TextureLruNode, node); - if (tnode->binding == NULL || tnode->possibly_dirty) { - return; - } - - uintptr_t k_tex_addr = tnode->key.texture_vram_offset; - uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; - bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); - - if (tnode->key.palette_length > 0) { - uintptr_t k_pal_addr = tnode->key.palette_vram_offset; - uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; - overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); - } - - tnode->possibly_dirty |= overlapping; -} - - -static void pgraph_mark_textures_possibly_dirty(NV2AState *d, - hwaddr addr, hwaddr size) -{ - hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1; - addr &= TARGET_PAGE_MASK; - assert(end <= memory_region_size(d->vram)); - - struct pgraph_texture_possibly_dirty_struct test = { - .addr = addr, - .end = end, - }; - - lru_visit_active(&d->pgraph.texture_cache, - pgraph_mark_textures_possibly_dirty_visitor, - &test); -} - -static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) -{ - hwaddr end = TARGET_PAGE_ALIGN(addr + size); - addr &= TARGET_PAGE_MASK; - assert(end < memory_region_size(d->vram)); - return memory_region_test_and_clear_dirty(d->vram, addr, end - addr, - DIRTY_MEMORY_NV2A_TEX); -} - -static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage) -{ - assert(stage < NV2A_MAX_TEXTURES); - uint32_t mode = (pg->regs[NV_PGRAPH_SHADERPROG] >> (stage * 5)) & 0x1F; - return !!mode; -} - -// Check if any of the pages spanned by the a texture are dirty. -static bool pgraph_check_texture_possibly_dirty(NV2AState *d, hwaddr texture_vram_offset, unsigned int length, hwaddr palette_vram_offset, unsigned int palette_length) -{ - bool possibly_dirty = false; - if (pgraph_check_texture_dirty(d, texture_vram_offset, length)) { - possibly_dirty = true; - pgraph_mark_textures_possibly_dirty(d, texture_vram_offset, length); - } - if (palette_length && pgraph_check_texture_dirty(d, palette_vram_offset, - palette_length)) { - possibly_dirty = true; - pgraph_mark_textures_possibly_dirty(d, palette_vram_offset, - palette_length); - } - return possibly_dirty; -} - -static void apply_texture_parameters(TextureBinding *binding, - const ColorFormatInfo *f, - unsigned int dimensionality, - unsigned int filter, - unsigned int address, - bool is_bordered, - uint32_t border_color) -{ - unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); - unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); - unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU); - unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV); - unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP); - - if (f->linear) { - /* somtimes games try to set mipmap min filters on linear textures. - * this could indicate a bug... */ - switch (min_filter) { - case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD: - case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD: - min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0; - break; - case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD: - case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD: - min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0; - break; - } - } - - if (min_filter != binding->min_filter) { - glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER, - pgraph_texture_min_filter_map[min_filter]); - binding->min_filter = min_filter; - } - if (mag_filter != binding->mag_filter) { - glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER, - pgraph_texture_mag_filter_map[mag_filter]); - binding->mag_filter = mag_filter; - } - - /* Texture wrapping */ - assert(addru < ARRAY_SIZE(pgraph_texture_addr_map)); - if (addru != binding->addru) { - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S, - pgraph_texture_addr_map[addru]); - binding->addru = addru; - } - bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; - if (dimensionality > 1) { - if (addrv != binding->addrv) { - assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T, - pgraph_texture_addr_map[addrv]); - binding->addrv = addrv; - } - needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; - } - if (dimensionality > 2) { - if (addrp != binding->addrp) { - assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R, - pgraph_texture_addr_map[addrp]); - binding->addrp = addrp; - } - needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; - } - - if (!is_bordered && needs_border_color) { - if (!binding->border_color_set || binding->border_color != border_color) { - GLfloat gl_border_color[] = { - /* FIXME: Color channels might be wrong order */ - ((border_color >> 16) & 0xFF) / 255.0f, /* red */ - ((border_color >> 8) & 0xFF) / 255.0f, /* green */ - (border_color & 0xFF) / 255.0f, /* blue */ - ((border_color >> 24) & 0xFF) / 255.0f /* alpha */ - }; - glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR, - gl_border_color); - - binding->border_color_set = true; - binding->border_color = border_color; - } - } -} - -static void pgraph_bind_textures(NV2AState *d) -{ - int i; - PGRAPHState *pg = &d->pgraph; - - NV2A_GL_DGROUP_BEGIN("%s", __func__); - - for (i=0; iregs[NV_PGRAPH_TEXCTL0_0 + i*4]; - bool enabled = pgraph_is_texture_stage_active(pg, i) && - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE); - /* FIXME: What happens if texture is disabled but stage is active? */ - - glActiveTexture(GL_TEXTURE0 + i); - if (!enabled) { - glBindTexture(GL_TEXTURE_CUBE_MAP, 0); - glBindTexture(GL_TEXTURE_RECTANGLE, 0); - glBindTexture(GL_TEXTURE_1D, 0); - glBindTexture(GL_TEXTURE_2D, 0); - glBindTexture(GL_TEXTURE_3D, 0); - continue; - } - - uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4]; - uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; - uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4]; - uint32_t address = pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4]; - uint32_t palette = pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4]; - - unsigned int min_mipmap_level = - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP); - unsigned int max_mipmap_level = - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP); - - unsigned int pitch = - GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH); - - unsigned int dma_select = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA); - bool cubemap = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); - unsigned int dimensionality = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); - unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR); - unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS); - unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); - unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); - unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); - - unsigned int rect_width = - GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], - NV_PGRAPH_TEXIMAGERECT0_WIDTH); - unsigned int rect_height = - GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], - NV_PGRAPH_TEXIMAGERECT0_HEIGHT); -#ifdef DEBUG_NV2A - unsigned int lod_bias = - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS); -#endif - unsigned int border_source = GET_MASK(fmt, - NV_PGRAPH_TEXFMT0_BORDER_SOURCE); - uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4]; - - hwaddr offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4]; - - bool palette_dma_select = - GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA); - unsigned int palette_length_index = - GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH); - unsigned int palette_offset = - palette & NV_PGRAPH_TEXPALETTE0_OFFSET; - - unsigned int palette_length = 0; - switch (palette_length_index) { - case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break; - default: assert(false); break; - } - - /* Check for unsupported features */ - if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); - if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED"); - if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED"); - if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED"); - - nv2a_profile_inc_counter(NV2A_PROF_TEX_BIND); - - hwaddr dma_len; - uint8_t *texture_data; - if (dma_select) { - texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len); - } else { - texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len); - } - assert(offset < dma_len); - texture_data += offset; - hwaddr texture_vram_offset = texture_data - d->vram_ptr; - - hwaddr palette_dma_len; - uint8_t *palette_data; - if (palette_dma_select) { - palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len); - } else { - palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len); - } - assert(palette_offset < palette_dma_len); - palette_data += palette_offset; - hwaddr palette_vram_offset = palette_data - d->vram_ptr; - - NV2A_DPRINTF(" texture %d is format 0x%x, " - "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s)," - " filter %x %x, levels %d-%d %d bias %d\n", - i, color_format, offset, - rect_width, rect_height, - 1 << log_width, 1 << log_height, 1 << log_depth, - pitch, - cubemap ? "; cubemap" : "", - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN), - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG), - min_mipmap_level, max_mipmap_level, levels, - lod_bias); - - assert(color_format < ARRAY_SIZE(kelvin_color_format_map)); - ColorFormatInfo f = kelvin_color_format_map[color_format]; - if (f.bytes_per_pixel == 0) { - fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n", - color_format); - abort(); - } - - unsigned int width, height, depth; - if (f.linear) { - assert(dimensionality == 2); - width = rect_width; - height = rect_height; - depth = 1; - } else { - width = 1 << log_width; - height = 1 << log_height; - depth = 1 << log_depth; - pitch = 0; - - levels = MIN(levels, max_mipmap_level + 1); - - /* Discard mipmap levels that would be smaller than 1x1. - * FIXME: Is this actually needed? - * - * >> Level 0: 32 x 4 - * Level 1: 16 x 2 - * Level 2: 8 x 1 - * Level 3: 4 x 1 - * Level 4: 2 x 1 - * Level 5: 1 x 1 - */ - levels = MIN(levels, MAX(log_width, log_height) + 1); - assert(levels > 0); - - if (dimensionality == 3) { - /* FIXME: What about 3D mipmaps? */ - if (log_width < 2 || log_height < 2) { - /* Base level is smaller than 4x4... */ - levels = 1; - } else { - levels = MIN(levels, MIN(log_width, log_height) - 1); - } - } - min_mipmap_level = MIN(levels-1, min_mipmap_level); - max_mipmap_level = MIN(levels-1, max_mipmap_level); - } - - size_t length = 0; - if (f.linear) { - assert(cubemap == false); - assert(dimensionality == 2); - length = height * pitch; - } else { - if (dimensionality >= 2) { - unsigned int w = width, h = height; - int level; - if (f.gl_format != 0) { - for (level = 0; level < levels; level++) { - w = MAX(w, 1); - h = MAX(h, 1); - length += w * h * f.bytes_per_pixel; - w /= 2; - h /= 2; - } - } else { - /* Compressed textures are a bit different */ - unsigned int block_size = - f.gl_internal_format == - GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ? - 8 : 16; - for (level = 0; level < levels; level++) { - w = MAX(w, 1); - h = MAX(h, 1); - unsigned int phys_w = (w + 3) & ~3, - phys_h = (h + 3) & ~3; - length += phys_w/4 * phys_h/4 * block_size; - w /= 2; - h /= 2; - } - } - if (cubemap) { - assert(dimensionality == 2); - length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); - length *= 6; - } - if (dimensionality >= 3) { - length *= depth; - } - } - } - - bool is_bordered = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR; - - assert((texture_vram_offset + length) < memory_region_size(d->vram)); - assert((palette_vram_offset + palette_length) - < memory_region_size(d->vram)); - bool is_indexed = (color_format == - NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); - bool possibly_dirty = false; - bool possibly_dirty_checked = false; - - SurfaceBinding *surface = pgraph_surface_get(d, texture_vram_offset); - TextureBinding *tbind = pg->texture_binding[i]; - if (!pg->texture_dirty[i] && tbind) { - bool reusable = false; - if (surface && tbind->draw_time == surface->draw_time) { - reusable = true; - } else if (!surface) { - possibly_dirty = pgraph_check_texture_possibly_dirty( - d, - texture_vram_offset, - length, - palette_vram_offset, - is_indexed ? palette_length : 0); - possibly_dirty_checked = true; - reusable = !possibly_dirty; - } - - if (reusable) { - glBindTexture(pg->texture_binding[i]->gl_target, - pg->texture_binding[i]->gl_texture); - apply_texture_parameters(pg->texture_binding[i], - &f, - dimensionality, - filter, - address, - is_bordered, - border_color); - continue; - } - } - - TextureShape state; - memset(&state, 0, sizeof(TextureShape)); - state.cubemap = cubemap; - state.dimensionality = dimensionality; - state.color_format = color_format; - state.levels = levels; - state.width = width; - state.height = height; - state.depth = depth; - state.min_mipmap_level = min_mipmap_level; - state.max_mipmap_level = max_mipmap_level; - state.pitch = pitch; - state.border = is_bordered; - - /* - * Check active surfaces to see if this texture was a render target - */ - bool surf_to_tex = false; - if (surface != NULL) { - surf_to_tex = pgraph_check_surface_to_texture_compatibility( - surface, &state); - - if (surf_to_tex && surface->upload_pending) { - pgraph_upload_surface_data(d, surface, false); - } - } - - if (!surf_to_tex) { - // FIXME: Restructure to support rendering surfaces to cubemap faces - - // Writeback any surfaces which this texture may index - hwaddr tex_vram_end = texture_vram_offset + length - 1; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr >= tex_vram_end - || texture_vram_offset >= surf_vram_end); - if (overlapping) { - pgraph_download_surface_data_if_dirty(d, surface); - } - } - } - - TextureKey key; - memset(&key, 0, sizeof(TextureKey)); - key.state = state; - key.texture_vram_offset = texture_vram_offset; - key.texture_length = length; - if (is_indexed) { - key.palette_vram_offset = palette_vram_offset; - key.palette_length = palette_length; - } - - // Search for existing texture binding in cache - uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key)); - LruNode *found = lru_lookup(&pg->texture_cache, - tex_binding_hash, &key); - TextureLruNode *key_out = container_of(found, TextureLruNode, node); - possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty; - - if (!surf_to_tex && !possibly_dirty_checked) { - possibly_dirty |= pgraph_check_texture_possibly_dirty( - d, - texture_vram_offset, - length, - palette_vram_offset, - is_indexed ? palette_length : 0); - } - - // Calculate hash of texture data, if necessary - uint64_t tex_data_hash = 0; - if (!surf_to_tex && possibly_dirty) { - tex_data_hash = fast_hash(texture_data, length); - if (is_indexed) { - tex_data_hash ^= fast_hash(palette_data, palette_length); - } - } - - // Free existing binding, if texture data has changed - bool must_destroy = (key_out->binding != NULL) - && possibly_dirty - && (key_out->binding->data_hash != tex_data_hash); - if (must_destroy) { - texture_binding_destroy(key_out->binding); - key_out->binding = NULL; - } - - if (key_out->binding == NULL) { - // Must create the texture - key_out->binding = generate_texture(state, texture_data, palette_data); - key_out->binding->data_hash = tex_data_hash; - key_out->binding->scale = 1; - } else { - // Saved an upload! Reuse existing texture in graphics memory. - glBindTexture(key_out->binding->gl_target, - key_out->binding->gl_texture); - } - - key_out->possibly_dirty = false; - TextureBinding *binding = key_out->binding; - binding->refcnt++; - - if (surf_to_tex && binding->draw_time < surface->draw_time) { - - trace_nv2a_pgraph_surface_render_to_texture( - surface->vram_addr, surface->width, surface->height); - pgraph_render_surface_to_texture(d, surface, binding, &state, i); - binding->draw_time = surface->draw_time; - if (binding->gl_target == GL_TEXTURE_RECTANGLE) { - binding->scale = pg->surface_scale_factor; - } else { - binding->scale = 1; - } - } - - apply_texture_parameters(binding, - &f, - dimensionality, - filter, - address, - is_bordered, - border_color); - - if (pg->texture_binding[i]) { - if (pg->texture_binding[i]->gl_target != binding->gl_target) { - glBindTexture(pg->texture_binding[i]->gl_target, 0); - } - texture_binding_destroy(pg->texture_binding[i]); - } - pg->texture_binding[i] = binding; - pg->texture_dirty[i] = false; - } - NV2A_GL_DGROUP_END(); -} - -static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - switch (pg->surface_shape.anti_aliasing) { - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1: - break; - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2: - if (width) { *width *= 2; } - break; - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4: - if (width) { *width *= 2; } - if (height) { *height *= 2; } - break; - default: - assert(false); - break; - } -} - -static void pgraph_apply_scaling_factor(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - *width *= pg->surface_scale_factor; - *height *= pg->surface_scale_factor; -} - -static void pgraph_get_surface_dimensions(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - if (swizzle) { - *width = 1 << pg->surface_shape.log_width; - *height = 1 << pg->surface_shape.log_height; - } else { - *width = pg->surface_shape.clip_width; - *height = pg->surface_shape.clip_height; - } -} - -static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, - bool quick) -{ - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer); - - hwaddr end = TARGET_PAGE_ALIGN(addr + size); - addr &= TARGET_PAGE_MASK; - assert(end < memory_region_size(d->vram)); - - static hwaddr last_addr, last_end; - if (quick && (addr >= last_addr) && (end <= last_end)) { - return; - } - last_addr = addr; - last_end = end; - - size = end - addr; - if (memory_region_test_and_clear_dirty(d->vram, addr, size, - DIRTY_MEMORY_NV2A)) { - glBufferSubData(GL_ARRAY_BUFFER, addr, size, - d->vram_ptr + addr); - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1); - } -} - -static void pgraph_update_inline_value(VertexAttribute *attr, - const uint8_t *data) -{ - assert(attr->count <= 4); - attr->inline_value[0] = 0.0f; - attr->inline_value[1] = 0.0f; - attr->inline_value[2] = 0.0f; - attr->inline_value[3] = 1.0f; - - switch (attr->format) { - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: - for (uint32_t i = 0; i < attr->count; ++i) { - attr->inline_value[i] = (float)data[i] / 255.0f; - } - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: { - const int16_t *val = (const int16_t *) data; - for (uint32_t i = 0; i < attr->count; ++i, ++val) { - attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f); - } - break; - } - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: - memcpy(attr->inline_value, data, attr->size * attr->count); - break; - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: { - const int16_t *val = (const int16_t *) data; - for (uint32_t i = 0; i < attr->count; ++i, ++val) { - attr->inline_value[i] = (float)*val; - } - break; - } - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: { - /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ - const int32_t val = *(const int32_t *)data; - int32_t x = val & 0x7FF; - if (x & 0x400) { - x |= 0xFFFFF800; - } - int32_t y = (val >> 11) & 0x7FF; - if (y & 0x400) { - y |= 0xFFFFF800; - } - int32_t z = (val >> 22) & 0x7FF; - if (z & 0x200) { - z |= 0xFFFFFC00; - } - - attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f); - attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f); - attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f); - break; - } - default: - fprintf(stderr, "Unknown vertex attribute type: 0x%x for format 0x%x\n", - attr->gl_type, attr->format); - assert(!"Unsupported attribute type"); - break; - } -} - -static void pgraph_bind_vertex_attributes(NV2AState *d, - unsigned int min_element, - unsigned int max_element, - bool inline_data, - unsigned int inline_stride, - unsigned int provoking_element) -{ - PGRAPHState *pg = &d->pgraph; - bool updated_memory_buffer = false; - unsigned int num_elements = max_element - min_element + 1; - - if (inline_data) { - NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", - __func__, num_elements, inline_stride); - } else { - NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); - } - - pg->compressed_attrs = 0; - - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attr = &pg->vertex_attributes[i]; - - if (!attr->count) { - glDisableVertexAttribArray(i); - glVertexAttrib4fv(i, attr->inline_value); - continue; - } - - nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND); - hwaddr attrib_data_addr; - size_t stride; - - if (attr->needs_conversion) { - pg->compressed_attrs |= (1 << i); - } - - hwaddr start = 0; - if (inline_data) { - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); - attrib_data_addr = attr->inline_array_offset; - stride = inline_stride; - } else { - hwaddr dma_len; - uint8_t *attr_data = (uint8_t *)nv_dma_map( - d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a, - &dma_len); - assert(attr->offset < dma_len); - attrib_data_addr = attr_data + attr->offset - d->vram_ptr; - stride = attr->stride; - start = attrib_data_addr + min_element * stride; - pgraph_update_memory_buffer(d, start, num_elements * stride, - updated_memory_buffer); - updated_memory_buffer = true; - } - - uint32_t provoking_element_index = provoking_element - min_element; - size_t element_size = attr->size * attr->count; - assert(element_size <= sizeof(attr->inline_value)); - const uint8_t *last_entry; - - if (inline_data) { - last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset; - } else { - last_entry = d->vram_ptr + start; - } - if (!stride) { - // Stride of 0 indicates that only the first element should be - // used. - pgraph_update_inline_value(attr, last_entry); - glDisableVertexAttribArray(i); - glVertexAttrib4fv(i, attr->inline_value); - continue; - } - - if (attr->needs_conversion) { - glVertexAttribIPointer(i, attr->gl_count, attr->gl_type, stride, - (void *)attrib_data_addr); - } else { - glVertexAttribPointer(i, attr->gl_count, attr->gl_type, - attr->gl_normalize, stride, - (void *)attrib_data_addr); - } - - glEnableVertexAttribArray(i); - last_entry += stride * provoking_element_index; - pgraph_update_inline_value(attr, last_entry); - } - - NV2A_GL_DGROUP_END(); -} - -static unsigned int pgraph_bind_inline_array(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - unsigned int offset = 0; - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attr = &pg->vertex_attributes[i]; - if (attr->count == 0) { - continue; - } - - /* FIXME: Double check */ - offset = ROUND_UP(offset, attr->size); - attr->inline_array_offset = offset; - NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", - i, attr->size, attr->count); - offset += attr->size * attr->count; - offset = ROUND_UP(offset, attr->size); - } - - unsigned int vertex_size = offset; - unsigned int index_count = pg->inline_array_length*4 / vertex_size; - - NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); - - nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2); - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); - glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t), - NULL, GL_STREAM_DRAW); - glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array); - pgraph_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size, - index_count-1); - - return index_count; -} - -/* 16 bit to [0.0, F16_MAX = 511.9375] */ -static float convert_f16_to_float(uint16_t f16) { - if (f16 == 0x0000) { return 0.0; } - uint32_t i = (f16 << 11) + 0x3C000000; - return *(float*)&i; -} - -/* 24 bit to [0.0, F24_MAX] */ -static float convert_f24_to_float(uint32_t f24) { - assert(!(f24 >> 24)); - f24 &= 0xFFFFFF; - if (f24 == 0x000000) { return 0.0; } - uint32_t i = f24 << 7; - return *(float*)&i; -} - -static uint8_t cliptobyte(int x) -{ - return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); -} - -static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, - uint8_t *r, uint8_t *g, uint8_t* b) { - int c, d, e; - c = (int)line[ix * 2] - 16; - if (ix % 2) { - d = (int)line[ix * 2 - 1] - 128; - e = (int)line[ix * 2 + 1] - 128; - } else { - d = (int)line[ix * 2 + 1] - 128; - e = (int)line[ix * 2 + 3] - 128; - } - *r = cliptobyte((298 * c + 409 * e + 128) >> 8); - *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); - *b = cliptobyte((298 * c + 516 * d + 128) >> 8); -} - -static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, - uint8_t *r, uint8_t *g, uint8_t* b) { - int c, d, e; - c = (int)line[ix * 2 + 1] - 16; - if (ix % 2) { - d = (int)line[ix * 2 - 2] - 128; - e = (int)line[ix * 2 + 0] - 128; - } else { - d = (int)line[ix * 2 + 0] - 128; - e = (int)line[ix * 2 + 2] - 128; - } - *r = cliptobyte((298 * c + 409 * e + 128) >> 8); - *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); - *b = cliptobyte((298 * c + 516 * d + 128) >> 8); -} - -static uint8_t* convert_texture_data(const TextureShape s, - const uint8_t *data, - const uint8_t *palette_data, - unsigned int width, - unsigned int height, - unsigned int depth, - unsigned int row_pitch, - unsigned int slice_pitch) -{ - if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) { - uint8_t* converted_data = (uint8_t*)g_malloc(width * height * depth * 4); - int x, y, z; - const uint8_t* src = data; - uint32_t* dst = (uint32_t*)converted_data; - for (z = 0; z < depth; z++) { - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - uint8_t index = src[y * row_pitch + x]; - uint32_t color = *(uint32_t * )(palette_data + index * 4); - *dst++ = color; - } - } - src += slice_pitch; - } - return converted_data; - } else if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 || - s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) { - // TODO: Investigate whether a non-1 depth is possible. - // Generally the hardware asserts when attempting to use volumetric - // textures in linear formats. - assert(depth == 1); /* FIXME */ - // FIXME: only valid if control0 register allows for colorspace conversion - uint8_t* converted_data = (uint8_t*)g_malloc(width * height * 4); - int x, y; - uint8_t* pixel = converted_data; - for (y = 0; y < height; y++) { - const uint8_t* line = &data[y * row_pitch * depth]; - for (x = 0; x < width; x++, pixel += 4) { - if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) { - convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - } else { - convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - } - pixel[3] = 255; - } - } - return converted_data; - } else if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) { - assert(depth == 1); /* FIXME */ - uint8_t *converted_data = (uint8_t*)g_malloc(width * height * 3); - int x, y; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2); - int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3]; - /* Maps 5 bit G and B signed value range to 8 bit - * signed values. R is probably unsigned. - */ - rgb655 ^= (1 << 9) | (1 << 4); - pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F; - pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80; - pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80; - } - } - return converted_data; - } else { - return NULL; - } -} - -static void upload_gl_texture(GLenum gl_target, - const TextureShape s, - const uint8_t *texture_data, - const uint8_t *palette_data) -{ - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); - - unsigned int adjusted_width = s.width; - unsigned int adjusted_height = s.height; - unsigned int adjusted_pitch = s.pitch; - unsigned int adjusted_depth = s.depth; - if (!f.linear && s.border) { - adjusted_width = MAX(16, adjusted_width * 2); - adjusted_height = MAX(16, adjusted_height * 2); - adjusted_pitch = adjusted_width * (s.pitch / s.width); - adjusted_depth = MAX(16, s.depth * 2); - } - - switch(gl_target) { - case GL_TEXTURE_1D: - assert(false); - break; - case GL_TEXTURE_RECTANGLE: { - /* Can't handle strides unaligned to pixels */ - assert(s.pitch % f.bytes_per_pixel == 0); - - uint8_t *converted = convert_texture_data(s, texture_data, - palette_data, - adjusted_width, - adjusted_height, 1, - adjusted_pitch, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, - converted ? 0 : adjusted_pitch / f.bytes_per_pixel); - glTexImage2D(gl_target, 0, f.gl_internal_format, - adjusted_width, adjusted_height, 0, - f.gl_format, f.gl_type, - converted ? converted : texture_data); - - if (converted) { - g_free(converted); - } - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - break; - } - case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { - - unsigned int width = adjusted_width, height = adjusted_height; - - int level; - for (level = 0; level < s.levels; level++) { - width = MAX(width, 1); - height = MAX(height, 1); - - if (f.gl_format == 0) { /* compressed */ - // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size - unsigned int block_size = - f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ? - 8 : 16; - unsigned int physical_width = (width + 3) & ~3, - physical_height = (height + 3) & ~3; - if (physical_width != width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width); - } - uint8_t *converted = decompress_2d_texture_data( - f.gl_internal_format, texture_data, physical_width, - physical_height); - unsigned int tex_width = width; - unsigned int tex_height = height; - - if (s.cubemap && adjusted_width != s.width) { - // FIXME: Consider preserving the border. - // There does not seem to be a way to reference the border - // texels in a cubemap, so they are discarded. - glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); - glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); - tex_width = s.width; - tex_height = s.height; - if (physical_width == width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); - } - } - - glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0, - GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, converted); - g_free(converted); - if (physical_width != width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - if (s.cubemap && adjusted_width != s.width) { - glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); - glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); - if (physical_width == width) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - } - texture_data += - physical_width / 4 * physical_height / 4 * block_size; - } else { - unsigned int pitch = width * f.bytes_per_pixel; - uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); - unswizzle_rect(texture_data, width, height, - unswizzled, pitch, f.bytes_per_pixel); - uint8_t *converted = convert_texture_data(s, unswizzled, - palette_data, - width, height, 1, - pitch, 0); - uint8_t *pixel_data = converted ? converted : unswizzled; - unsigned int tex_width = width; - unsigned int tex_height = height; - - if (s.cubemap && adjusted_width != s.width) { - // FIXME: Consider preserving the border. - // There does not seem to be a way to reference the border - // texels in a cubemap, so they are discarded. - glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); - tex_width = s.width; - tex_height = s.height; - pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; - } - - glTexImage2D(gl_target, level, f.gl_internal_format, tex_width, - tex_height, 0, f.gl_format, f.gl_type, - pixel_data); - if (s.cubemap && s.border) { - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - if (converted) { - g_free(converted); - } - g_free(unswizzled); - - texture_data += width * height * f.bytes_per_pixel; - } - - width /= 2; - height /= 2; - } - - break; - } - case GL_TEXTURE_3D: { - - unsigned int width = adjusted_width; - unsigned int height = adjusted_height; - unsigned int depth = adjusted_depth; - - assert(f.linear == false); - - int level; - for (level = 0; level < s.levels; level++) { - if (f.gl_format == 0) { /* compressed */ - assert(width % 4 == 0 && height % 4 == 0 && - "Compressed 3D texture virtual size"); - width = MAX(width, 4); - height = MAX(height, 4); - depth = MAX(depth, 1); - - unsigned int block_size; - if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { - block_size = 8; - } else { - block_size = 16; - } - - size_t texture_size = width/4 * height/4 * depth * block_size; - - uint8_t *converted = decompress_3d_texture_data(f.gl_internal_format, texture_data, width, height, depth); - - glTexImage3D(gl_target, level, GL_RGBA8, - width, height, depth, 0, - GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, - converted); - - g_free(converted); - - texture_data += texture_size; - } else { - width = MAX(width, 1); - height = MAX(height, 1); - depth = MAX(depth, 1); - - unsigned int row_pitch = width * f.bytes_per_pixel; - unsigned int slice_pitch = row_pitch * height; - uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth); - unswizzle_box(texture_data, width, height, depth, unswizzled, - row_pitch, slice_pitch, f.bytes_per_pixel); - - uint8_t *converted = convert_texture_data(s, unswizzled, - palette_data, - width, height, depth, - row_pitch, slice_pitch); - - glTexImage3D(gl_target, level, f.gl_internal_format, - width, height, depth, 0, - f.gl_format, f.gl_type, - converted ? converted : unswizzled); - - if (converted) { - g_free(converted); - } - g_free(unswizzled); - - texture_data += width * height * depth * f.bytes_per_pixel; - } - - width /= 2; - height /= 2; - depth /= 2; - } - break; - } - default: - assert(false); - break; - } -} - -static TextureBinding* generate_texture(const TextureShape s, - const uint8_t *texture_data, - const uint8_t *palette_data) -{ - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - - /* Create a new opengl texture */ - GLuint gl_texture; - glGenTextures(1, &gl_texture); - - GLenum gl_target; - if (s.cubemap) { - assert(f.linear == false); - assert(s.dimensionality == 2); - gl_target = GL_TEXTURE_CUBE_MAP; - } else { - if (f.linear) { - /* linear textures use unnormalised texcoords. - * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but - * does not allow repeat and mirror wrap modes. - * (or mipmapping, but xbox d3d says 'Non swizzled and non - * compressed textures cannot be mip mapped.') - * Not sure if that'll be an issue. */ - - /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ - gl_target = GL_TEXTURE_RECTANGLE; - assert(s.dimensionality == 2); - } else { - switch(s.dimensionality) { - case 1: gl_target = GL_TEXTURE_1D; break; - case 2: gl_target = GL_TEXTURE_2D; break; - case 3: gl_target = GL_TEXTURE_3D; break; - default: - assert(false); - break; - } - } - } - - glBindTexture(gl_target, gl_texture); - - NV2A_GL_DLABEL(GL_TEXTURE, gl_texture, - "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, " - "width: %d, height: %d, depth: %d", - texture_data - g_nv2a->vram_ptr, - s.color_format, f.linear ? "" : " (SZ)", - s.dimensionality, s.cubemap ? " (Cubemap)" : "", - s.width, s.height, s.depth); - - if (gl_target == GL_TEXTURE_CUBE_MAP) { - - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - unsigned int block_size; - if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { - block_size = 8; - } else { - block_size = 16; - } - - size_t length = 0; - unsigned int w = s.width; - unsigned int h = s.height; - if (!f.linear && s.border) { - w = MAX(16, w * 2); - h = MAX(16, h * 2); - } - - int level; - for (level = 0; level < s.levels; level++) { - if (f.gl_format == 0) { - length += w/4 * h/4 * block_size; - } else { - length += w * h * f.bytes_per_pixel; - } - - w /= 2; - h /= 2; - } - - length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); - - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X, - s, texture_data + 0 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, - s, texture_data + 1 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, - s, texture_data + 2 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, - s, texture_data + 3 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, - s, texture_data + 4 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, - s, texture_data + 5 * length, palette_data); - } else { - upload_gl_texture(gl_target, s, texture_data, palette_data); - } - - /* Linear textures don't support mipmapping */ - if (!f.linear) { - glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL, - s.min_mipmap_level); - glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL, - s.levels - 1); - } - - if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0 - || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) { - glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA, - (const GLint *)f.gl_swizzle_mask); - } - - TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding)); - ret->gl_target = gl_target; - ret->gl_texture = gl_texture; - ret->refcnt = 1; - ret->draw_time = 0; - ret->data_hash = 0; - ret->min_filter = 0xFFFFFFFF; - ret->mag_filter = 0xFFFFFFFF; - ret->addru = 0xFFFFFFFF; - ret->addrv = 0xFFFFFFFF; - ret->addrp = 0xFFFFFFFF; - ret->border_color_set = false; - return ret; -} - -static void texture_binding_destroy(gpointer data) -{ - TextureBinding *binding = (TextureBinding *)data; - assert(binding->refcnt > 0); - binding->refcnt--; - if (binding->refcnt == 0) { - glDeleteTextures(1, &binding->gl_texture); - g_free(binding); - } -} - -/* functions for texture LRU cache */ -static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key) -{ - TextureLruNode *tnode = container_of(node, TextureLruNode, node); - memcpy(&tnode->key, key, sizeof(TextureKey)); - - tnode->binding = NULL; - tnode->possibly_dirty = false; -} - -static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) -{ - TextureLruNode *tnode = container_of(node, TextureLruNode, node); - if (tnode->binding) { - texture_binding_destroy(tnode->binding); - tnode->binding = NULL; - tnode->possibly_dirty = false; - } -} - -static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) -{ - TextureLruNode *tnode = container_of(node, TextureLruNode, node); - return memcmp(&tnode->key, key, sizeof(TextureKey)); -} - -static unsigned int kelvin_map_stencil_op(uint32_t parameter) -{ - unsigned int op; - switch (parameter) { - case NV097_SET_STENCIL_OP_V_KEEP: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break; - case NV097_SET_STENCIL_OP_V_ZERO: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break; - case NV097_SET_STENCIL_OP_V_REPLACE: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break; - case NV097_SET_STENCIL_OP_V_INCRSAT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break; - case NV097_SET_STENCIL_OP_V_DECRSAT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break; - case NV097_SET_STENCIL_OP_V_INVERT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break; - case NV097_SET_STENCIL_OP_V_INCR: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break; - case NV097_SET_STENCIL_OP_V_DECR: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break; - default: - assert(false); - break; - } - return op; -} - -static unsigned int kelvin_map_polygon_mode(uint32_t parameter) -{ - unsigned int mode; - switch (parameter) { - case NV097_SET_FRONT_POLYGON_MODE_V_POINT: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break; - case NV097_SET_FRONT_POLYGON_MODE_V_LINE: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break; - case NV097_SET_FRONT_POLYGON_MODE_V_FILL: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break; - default: - assert(false); - break; - } - return mode; -} - -static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel) -{ - assert(channel < 4); - unsigned int texgen; - switch (parameter) { - case NV097_SET_TEXGEN_S_DISABLE: - texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break; - case NV097_SET_TEXGEN_S_EYE_LINEAR: - texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break; - case NV097_SET_TEXGEN_S_OBJECT_LINEAR: - texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break; - case NV097_SET_TEXGEN_S_SPHERE_MAP: - assert(channel < 2); - texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break; - case NV097_SET_TEXGEN_S_REFLECTION_MAP: - assert(channel < 3); - texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break; - case NV097_SET_TEXGEN_S_NORMAL_MAP: - assert(channel < 3); - texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break; - default: - assert(false); - break; - } - return texgen; -} diff --git a/hw/xbox/nv2a/pgraph/debug_renderdoc.c b/hw/xbox/nv2a/pgraph/debug_renderdoc.c new file mode 100644 index 00000000000..ded339e23f9 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/debug_renderdoc.c @@ -0,0 +1,84 @@ +/* + * Geforce NV2A PGRAPH Renderdoc Helpers + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" + +#include +#include + +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include "thirdparty/renderdoc_app.h" + +#include "hw/xbox/nv2a/debug.h" + +#ifdef _WIN32 +#include +#else +#include +#endif + +static RENDERDOC_API_1_6_0 *rdoc_api = NULL; + +int renderdoc_capture_frames = 0; + +void nv2a_dbg_renderdoc_init(void) +{ + if (rdoc_api) { + return; + } + +#ifdef _WIN32 + HMODULE renderdoc = GetModuleHandleA("renderdoc.dll"); + if (renderdoc) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI"); +#else + void *renderdoc = dlopen( +#ifdef __APPLE__ + "librenderdoc.dylib", +#else + "librenderdoc.so", +#endif + RTLD_LAZY); + if (renderdoc) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI"); +#endif // _WIN32 + int ret = + RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api); + assert(ret == 1 && "Failed to retrieve RenderDoc API."); + } else { + fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", dlerror()); + } +} + +void *nv2a_dbg_renderdoc_get_api(void) +{ + return (void*)rdoc_api; +} + +bool nv2a_dbg_renderdoc_available(void) +{ + return rdoc_api != NULL; +} + +void nv2a_dbg_renderdoc_capture_frames(int num_frames) +{ + renderdoc_capture_frames += num_frames; +} diff --git a/hw/xbox/nv2a/pgraph/gl/blit.c b/hw/xbox/nv2a/pgraph/gl/blit.c new file mode 100644 index 00000000000..b4cce8a5ef1 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/blit.c @@ -0,0 +1,174 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "renderer.h" + +// TODO: Optimize. Ideally this should all be done via OpenGL. +void pgraph_gl_image_blit(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d; + ImageBlitState *image_blit = &pg->image_blit; + BetaState *beta = &pg->beta; + + pgraph_gl_surface_update(d, false, true, true); + + assert(context_surfaces->object_instance == image_blit->context_surfaces); + + unsigned int bytes_per_pixel; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_Y8: + bytes_per_pixel = 1; + break; + case NV062_SET_COLOR_FORMAT_LE_R5G6B5: + bytes_per_pixel = 2; + break; + case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_Y32: + bytes_per_pixel = 4; + break; + default: + fprintf(stderr, "Unknown blit surface format: 0x%x\n", + context_surfaces->color_format); + assert(false); + break; + } + + hwaddr source_dma_len, dest_dma_len; + + uint8_t *source = (uint8_t *)nv_dma_map( + d, context_surfaces->dma_image_source, &source_dma_len); + assert(context_surfaces->source_offset < source_dma_len); + source += context_surfaces->source_offset; + + uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest, + &dest_dma_len); + assert(context_surfaces->dest_offset < dest_dma_len); + dest += context_surfaces->dest_offset; + + hwaddr source_addr = source - d->vram_ptr; + hwaddr dest_addr = dest - d->vram_ptr; + + SurfaceBinding *surf_src = pgraph_gl_surface_get(d, source_addr); + if (surf_src) { + pgraph_gl_surface_download_if_dirty(d, surf_src); + } + + SurfaceBinding *surf_dest = pgraph_gl_surface_get(d, dest_addr); + if (surf_dest) { + if (image_blit->height < surf_dest->height || + image_blit->width < surf_dest->width) { + pgraph_gl_surface_download_if_dirty(d, surf_dest); + } else { + // The blit will completely replace the surface so any pending + // download should be discarded. + surf_dest->download_pending = false; + surf_dest->draw_dirty = false; + } + surf_dest->upload_pending = true; + pg->draw_time++; + } + + hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch + + image_blit->in_x * bytes_per_pixel; + hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch + + image_blit->out_x * bytes_per_pixel; + + hwaddr source_size = + (image_blit->height - 1) * context_surfaces->source_pitch + + image_blit->width * bytes_per_pixel; + hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch + + image_blit->width * bytes_per_pixel; + + /* FIXME: What does hardware do in this case? */ + assert(source_addr + source_offset + source_size <= + memory_region_size(d->vram)); + assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram)); + + uint8_t *source_row = source + source_offset; + uint8_t *dest_row = dest + dest_offset; + + if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY"); + for (unsigned int y = 0; y < image_blit->height; y++) { + memmove(dest_row, source_row, image_blit->width * bytes_per_pixel); + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND"); + uint32_t max_beta_mult = 0x7f80; + uint32_t beta_mult = beta->beta >> 16; + uint32_t inv_beta_mult = max_beta_mult - beta_mult; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + for (unsigned int ch = 0; ch < 3; ch++) { + uint32_t a = source_row[x * 4 + ch] * beta_mult; + uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult; + dest_row[x * 4 + ch] = (a + b) / max_beta_mult; + } + } + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else { + fprintf(stderr, "Unknown blit operation: 0x%x\n", + image_blit->operation); + assert(false && "Unknown blit operation"); + } + + NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr); + + bool needs_alpha_patching; + uint8_t alpha_override; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0xff; + break; + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0; + break; + default: + needs_alpha_patching = false; + alpha_override = 0; + } + + if (needs_alpha_patching) { + dest_row = dest + dest_offset; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + dest_row[x * 4 + 3] = alpha_override; + } + dest_row += context_surfaces->dest_pitch; + } + } + + dest_addr += dest_offset; + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_NV2A_TEX); +} diff --git a/hw/xbox/nv2a/pgraph/gl/constants.h b/hw/xbox/nv2a/pgraph/gl/constants.h new file mode 100644 index 00000000000..d78b0054e38 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/constants.h @@ -0,0 +1,322 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H +#define HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "gloffscreen.h" + +static const GLenum pgraph_texture_min_filter_gl_map[] = { + 0, + GL_NEAREST, + GL_LINEAR, + GL_NEAREST_MIPMAP_NEAREST, + GL_LINEAR_MIPMAP_NEAREST, + GL_NEAREST_MIPMAP_LINEAR, + GL_LINEAR_MIPMAP_LINEAR, + GL_LINEAR, +}; + +static const GLenum pgraph_texture_mag_filter_gl_map[] = { + 0, + GL_NEAREST, + GL_LINEAR, + 0, + GL_LINEAR /* TODO: Convolution filter... */ +}; + +static const GLenum pgraph_texture_addr_gl_map[] = { + 0, + GL_REPEAT, + GL_MIRRORED_REPEAT, + GL_CLAMP_TO_EDGE, + GL_CLAMP_TO_BORDER, + GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */ +}; + +static const GLenum pgraph_blend_factor_gl_map[] = { + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + GL_SRC_ALPHA_SATURATE, + 0, + GL_CONSTANT_COLOR, + GL_ONE_MINUS_CONSTANT_COLOR, + GL_CONSTANT_ALPHA, + GL_ONE_MINUS_CONSTANT_ALPHA, +}; + +static const GLenum pgraph_blend_equation_gl_map[] = { + GL_FUNC_SUBTRACT, + GL_FUNC_REVERSE_SUBTRACT, + GL_FUNC_ADD, + GL_MIN, + GL_MAX, + GL_FUNC_REVERSE_SUBTRACT, + GL_FUNC_ADD, +}; + +/* FIXME +static const GLenum pgraph_blend_logicop_map[] = { + GL_CLEAR, + GL_AND, + GL_AND_REVERSE, + GL_COPY, + GL_AND_INVERTED, + GL_NOOP, + GL_XOR, + GL_OR, + GL_NOR, + GL_EQUIV, + GL_INVERT, + GL_OR_REVERSE, + GL_COPY_INVERTED, + GL_OR_INVERTED, + GL_NAND, + GL_SET, +}; +*/ + +static const GLenum pgraph_cull_face_gl_map[] = { + 0, + GL_FRONT, + GL_BACK, + GL_FRONT_AND_BACK +}; + +static const GLenum pgraph_depth_func_gl_map[] = { + GL_NEVER, + GL_LESS, + GL_EQUAL, + GL_LEQUAL, + GL_GREATER, + GL_NOTEQUAL, + GL_GEQUAL, + GL_ALWAYS, +}; + +static const GLenum pgraph_stencil_func_gl_map[] = { + GL_NEVER, + GL_LESS, + GL_EQUAL, + GL_LEQUAL, + GL_GREATER, + GL_NOTEQUAL, + GL_GEQUAL, + GL_ALWAYS, +}; + +static const GLenum pgraph_stencil_op_gl_map[] = { + 0, + GL_KEEP, + GL_ZERO, + GL_REPLACE, + GL_INCR, + GL_DECR, + GL_INVERT, + GL_INCR_WRAP, + GL_DECR_WRAP, +}; + +typedef struct ColorFormatInfo { + unsigned int bytes_per_pixel; + bool linear; + GLint gl_internal_format; + GLenum gl_format; + GLenum gl_type; + GLenum gl_swizzle_mask[4]; + bool depth; +} ColorFormatInfo; + +static const ColorFormatInfo kelvin_color_format_gl_map[66] = { + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = + {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_ONE}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = + {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = + {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = + {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = + {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = + {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = + {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = + {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + + /* paletted texture */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = + {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = + {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA}, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = + {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA}, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = + {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = + {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = + {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = + {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = + {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_ONE}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = + {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = + {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = + {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_GREEN}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = + {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = + {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = + {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = + {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = + {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE, + {GL_ONE, GL_ONE, GL_ONE, GL_RED}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = + {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_RED, GL_RED, GL_GREEN}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = + {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = + {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_RED, GL_GREEN, GL_RED, GL_GREEN}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = + {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, + {GL_GREEN, GL_RED, GL_RED, GL_GREEN}}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = + {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = + {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + + /* Additional information is passed to the pixel shader via the swizzle: + * RED: The depth value. + * GREEN: 0 for 16-bit, 1 for 24 bit + * BLUE: 0 for fixed, 1 for float + */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = + {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, + {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = + {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = + /* FIXME: Uses fixed-point format to match surface format hack below. */ + {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = + {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, + {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = + {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, + {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = + {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT, + {GL_RED, GL_RED, GL_RED, GL_ONE}}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = + {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = + {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = + {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = + {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = + {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8}, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = + {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8} +}; + +typedef struct SurfaceFormatInfo { + unsigned int bytes_per_pixel; + GLint gl_internal_format; + GLenum gl_format; + GLenum gl_type; + GLenum gl_attachment; +} SurfaceFormatInfo; + +static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = { + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = + {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = + {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = + {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = + {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0}, + + // FIXME: Map channel color + [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = + {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0}, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = + {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0}, +}; + +static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = { + [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = + {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT}, + [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for + * now just emulate this with fixed-point Z24S8. Possible compat + * improvement with custom conversion. + */ + {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, +}; + +static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_gl_map[] = { + [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = + {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT}, + [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT}, +}; + +#endif diff --git a/hw/xbox/nv2a/debug.c b/hw/xbox/nv2a/pgraph/gl/debug.c similarity index 77% rename from hw/xbox/nv2a/debug.c rename to hw/xbox/nv2a/pgraph/gl/debug.c index def94cdba1a..8e7f49e47c7 100644 --- a/hw/xbox/nv2a/debug.c +++ b/hw/xbox/nv2a/pgraph/gl/debug.c @@ -1,5 +1,5 @@ /* - * QEMU Geforce NV2A debug helpers + * Geforce NV2A PGRAPH OpenGL Renderer * * Copyright (c) 2015 Jannik Vogel * Copyright (c) 2012 espes @@ -18,6 +18,7 @@ * License along with this library; if not, see . */ +#include "renderer.h" #include "debug.h" #ifdef DEBUG_NV2A_GL @@ -28,15 +29,8 @@ #include #ifdef CONFIG_RENDERDOC +#pragma GCC diagnostic ignored "-Wstrict-prototypes" #include "thirdparty/renderdoc_app.h" -#ifdef _WIN32 -#include -#else -#include -#endif - -static RENDERDOC_API_1_1_2 *rdoc_api = NULL; -static int32_t renderdoc_capture_frames = 0; #endif #define CHECK_GL_ERROR() do { \ @@ -74,31 +68,7 @@ void gl_debug_initialize(void) } #ifdef CONFIG_RENDERDOC - const char *renderdoc_lib; - void* renderdoc; -#ifdef __APPLE__ - renderdoc_lib = "librenderdoc.dylib"; -#elif _WIN32 - renderdoc_lib = "renderdoc.dll"; -#else - renderdoc_lib = "librenderdoc.so"; -#endif - -#ifdef _WIN32 - renderdoc = GetModuleHandleA(renderdoc_lib); - if (renderdoc) { - pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress( - renderdoc, "RENDERDOC_GetAPI"); -#else - renderdoc = dlopen(renderdoc_lib, RTLD_NOW | RTLD_NOLOAD); - if (renderdoc) { - pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym( - renderdoc, "RENDERDOC_GetAPI"); -#endif - int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, - (void **)&rdoc_api); - assert(ret == 1 && "Failed to retrieve RenderDoc API."); - } + nv2a_dbg_renderdoc_init(); #endif } @@ -179,7 +149,10 @@ void gl_debug_frame_terminator(void) CHECK_GL_ERROR(); #ifdef CONFIG_RENDERDOC - if (rdoc_api) { + if (nv2a_dbg_renderdoc_available()) { + + RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api(); + if (rdoc_api->IsTargetControlConnected()) { if (rdoc_api->IsFrameCapturing()) { rdoc_api->EndFrameCapture(NULL, NULL); @@ -190,7 +163,7 @@ void gl_debug_frame_terminator(void) error); } } - if (renderdoc_capture_frames) { + if (renderdoc_capture_frames > 0) { rdoc_api->StartFrameCapture(NULL, NULL); GLenum error = glGetError(); if (error != GL_NO_ERROR) { @@ -203,22 +176,10 @@ void gl_debug_frame_terminator(void) } } #endif - if (!has_GL_GREMEDY_frame_terminator) { - return; + if (has_GL_GREMEDY_frame_terminator) { + glFrameTerminatorGREMEDY(); + CHECK_GL_ERROR(); } - - glFrameTerminatorGREMEDY(); - CHECK_GL_ERROR(); -} - -#ifdef CONFIG_RENDERDOC -bool nv2a_dbg_renderdoc_available(void) { - return rdoc_api != NULL; } -void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames) { - renderdoc_capture_frames = num_frames; -} -#endif - #endif // DEBUG_NV2A_GL diff --git a/hw/xbox/nv2a/pgraph/gl/debug.h b/hw/xbox/nv2a/pgraph/gl/debug.h new file mode 100644 index 00000000000..c242e1f3846 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/debug.h @@ -0,0 +1,60 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2012 espes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H +#define HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H + +// #define DEBUG_NV2A_GL +#ifdef DEBUG_NV2A_GL + +#include +#include "gloffscreen.h" +#include "config-host.h" + +void gl_debug_initialize(void); +void gl_debug_message(bool cc, const char *fmt, ...); +void gl_debug_group_begin(const char *fmt, ...); +void gl_debug_group_end(void); +void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...); +void gl_debug_frame_terminator(void); + +# define NV2A_GL_DPRINTF(cc, format, ...) \ + gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__) +# define NV2A_GL_DGROUP_BEGIN(format, ...) \ + gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__) +# define NV2A_GL_DGROUP_END() \ + gl_debug_group_end() +# define NV2A_GL_DLABEL(target, name, format, ...) \ + gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__) +#define NV2A_GL_DFRAME_TERMINATOR() \ + gl_debug_frame_terminator() + +#else + +# define NV2A_GL_DPRINTF(cc, format, ...) do { \ + if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \ + } while (0) +# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0) +# define NV2A_GL_DGROUP_END() do { } while (0) +# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0) +# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0) +#endif + +#endif diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c new file mode 100644 index 00000000000..804fec2c2d1 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -0,0 +1,407 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/util.h" +#include "renderer.h" + +#include + +void pgraph_gl_init_display_renderer(NV2AState *d) +{ + struct PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glGenTextures(1, &r->gl_display_buffer); + r->gl_display_buffer_internal_format = 0; + r->gl_display_buffer_width = 0; + r->gl_display_buffer_height = 0; + r->gl_display_buffer_format = 0; + r->gl_display_buffer_type = 0; + + const char *vs = + "#version 330\n" + "void main()\n" + "{\n" + " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" + " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" + " gl_Position = vec4(x, y, 0, 1);\n" + "}\n"; + /* FIXME: improve interlace handling, pvideo */ + + const char *fs = + "#version 330\n" + "uniform sampler2D tex;\n" + "uniform bool pvideo_enable;\n" + "uniform sampler2D pvideo_tex;\n" + "uniform vec2 pvideo_in_pos;\n" + "uniform vec4 pvideo_pos;\n" + "uniform vec3 pvideo_scale;\n" + "uniform bool pvideo_color_key_enable;\n" + "uniform vec4 pvideo_color_key;\n" + "uniform vec2 display_size;\n" + "uniform float line_offset;\n" + "layout(location = 0) out vec4 out_Color;\n" + "void main()\n" + "{\n" + " vec2 texCoord = gl_FragCoord.xy/display_size;\n" + " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" + " texCoord.y = 1 + rel*(texCoord.y - 1);" + " out_Color.rgba = texture(tex, texCoord);\n" + " if (pvideo_enable) {\n" + " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" + " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" + " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" + " greaterThan(screenCoord, output_region.zw));\n" + " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" + " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" + " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" + " in_st.y *= -1.0;\n" + " out_Color.rgba = texture(pvideo_tex, in_st);\n" + " }\n" + " }\n" + "}\n"; + + r->disp_rndr.prog = pgraph_gl_compile_shader(vs, fs); + r->disp_rndr.tex_loc = glGetUniformLocation(r->disp_rndr.prog, "tex"); + r->disp_rndr.pvideo_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_enable"); + r->disp_rndr.pvideo_tex_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_tex"); + r->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_in_pos"); + r->disp_rndr.pvideo_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_pos"); + r->disp_rndr.pvideo_scale_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_scale"); + r->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key_enable"); + r->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key"); + r->disp_rndr.display_size_loc = glGetUniformLocation(r->disp_rndr.prog, "display_size"); + r->disp_rndr.line_offset_loc = glGetUniformLocation(r->disp_rndr.prog, "line_offset"); + + glGenVertexArrays(1, &r->disp_rndr.vao); + glBindVertexArray(r->disp_rndr.vao); + glGenBuffers(1, &r->disp_rndr.vbo); + glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo); + glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); + glGenFramebuffers(1, &r->disp_rndr.fbo); + glGenTextures(1, &r->disp_rndr.pvideo_tex); + assert(glGetError() == GL_NO_ERROR); +} + +static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data, + unsigned int width, + unsigned int height, + unsigned int pitch) +{ + uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4); + int x, y; + for (y = 0; y < height; y++) { + const uint8_t *line = &data[y * pitch]; + const uint32_t row_offset = y * width; + for (x = 0; x < width; x++) { + uint8_t *pixel = &converted_data[(row_offset + x) * 4]; + convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); + pixel[3] = 255; + } + } + return converted_data; +} + +static float pvideo_calculate_scale(unsigned int din_dout, + unsigned int output_size) +{ + float calculated_in = din_dout * (output_size - 1); + calculated_in = floorf(calculated_in / (1 << 20) + 0.5f); + return (calculated_in + 1.0f) / output_size; +} + +static void render_display_pvideo_overlay(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. + // Many games seem to pass this value when initializing or tearing down + // PVIDEO. On its own, this generally does not result in the overlay being + // hidden, however there are certain games (e.g., Ultimate Beach Soccer) + // that use an unknown mechanism to hide the overlay without explicitly + // stopping it. + // Since the value seems to be set to 0xFFFFFFFF only in cases where the + // content is not valid, it is probably good enough to treat it as an + // implicit stop. + bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) + && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; + glUniform1ui(r->disp_rndr.pvideo_enable_loc, enabled); + if (!enabled) { + return; + } + + hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; + hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; + hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + + int in_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); + int in_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); + + int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_S); + int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_T); + + int in_pitch = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); + int in_color = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); + + unsigned int out_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); + unsigned int out_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); + + float scale_x = 1.0f; + float scale_y = 1.0f; + unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; + unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; + if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_x = pvideo_calculate_scale(ds_dx, out_width); + } + if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_y = pvideo_calculate_scale(dt_dy, out_height); + } + + // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results + // in them being capped to the output size, content is not scaled. This is + // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF + // during initialization or teardown. + if (in_width > out_width) { + in_width = floorf((float)out_width * scale_x + 0.5f); + } + if (in_height > out_height) { + in_height = floorf((float)out_height * scale_y + 0.5f); + } + + /* TODO: support other color formats */ + assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + + unsigned int out_x = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); + unsigned int out_y = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); + + unsigned int color_key_enabled = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); + glUniform1ui(r->disp_rndr.pvideo_color_key_enable_loc, + color_key_enabled); + + // TODO: Verify that masking off the top byte is correct. + // SeaBlade sets a color key of 0x80000000 but the texture passed into the + // shader is cleared to 0 alpha. + unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; + glUniform4f(r->disp_rndr.pvideo_color_key_loc, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); + + assert(offset + in_pitch * in_height <= limit); + hwaddr end = base + offset + in_pitch * in_height; + assert(end <= memory_region_size(d->vram)); + + pgraph_apply_scaling_factor(pg, &out_x, &out_y); + pgraph_apply_scaling_factor(pg, &out_width, &out_height); + + // Translate for the GL viewport origin. + out_y = MAX(r->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); + + glActiveTexture(GL_TEXTURE0 + 1); + glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( + d->vram_ptr + base + offset, in_width, in_height, in_pitch); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, + GL_UNSIGNED_BYTE, tex_rgba); + g_free(tex_rgba); + glUniform1i(r->disp_rndr.pvideo_tex_loc, 1); + glUniform2f(r->disp_rndr.pvideo_in_pos_loc, in_s, in_t); + glUniform4f(r->disp_rndr.pvideo_pos_loc, + out_x, out_y, out_width, out_height); + glUniform3f(r->disp_rndr.pvideo_scale_loc, + scale_x, scale_y, 1.0f / pg->surface_scale_factor); +} + +static void render_display(NV2AState *d, SurfaceBinding *surface) +{ + struct PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + unsigned int width, height; + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + int line_offset = surface->pitch / pline_offset; + + /* Adjust viewport height for interlaced mode, used only in 1080i */ + if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { + height *= 2; + } + + pgraph_apply_scaling_factor(pg, &width, &height); + + glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer); + bool recreate = ( + surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format + || width != r->gl_display_buffer_width + || height != r->gl_display_buffer_height + || surface->fmt.gl_format != r->gl_display_buffer_format + || surface->fmt.gl_type != r->gl_display_buffer_type + ); + + if (recreate) { + /* XXX: There's apparently a bug in some Intel OpenGL drivers for + * Windows that will leak this texture when its orphaned after use in + * another context, apparently regardless of which thread it's created + * or released on. + * + * Driver: 27.20.100.8729 9/11/2020 W10 x64 + * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423 + */ + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format; + r->gl_display_buffer_width = width; + r->gl_display_buffer_height = height; + r->gl_display_buffer_format = surface->fmt.gl_format; + r->gl_display_buffer_type = surface->fmt.gl_type; + glTexImage2D(GL_TEXTURE_2D, 0, + r->gl_display_buffer_internal_format, + r->gl_display_buffer_width, + r->gl_display_buffer_height, + 0, + r->gl_display_buffer_format, + r->gl_display_buffer_type, + NULL); + } + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, r->gl_display_buffer, 0); + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; + glDrawBuffers(1, DrawBuffers); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glBindVertexArray(r->disp_rndr.vao); + glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo); + glUseProgram(r->disp_rndr.prog); + glProgramUniform1i(r->disp_rndr.prog, r->disp_rndr.tex_loc, 0); + glUniform2f(r->disp_rndr.display_size_loc, width, height); + glUniform1f(r->disp_rndr.line_offset_loc, line_offset); + render_display_pvideo_overlay(d); + + glViewport(0, 0, width, height); + glColorMask(true, true, true, true); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDisable(GL_STENCIL_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, 0, 0); +} + +static void gl_fence(void) +{ + GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, + (GLuint64)(5000000000)); + assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED); + glDeleteSync(fence); +} + +void pgraph_gl_sync(NV2AState *d) +{ + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL) { + qemu_event_set(&d->pgraph.sync_complete); + return; + } + + /* FIXME: Sanity check surface dimensions */ + + /* Wait for queued commands to complete */ + pgraph_gl_upload_surface_data(d, surface, !tcg_enabled()); + gl_fence(); + assert(glGetError() == GL_NO_ERROR); + + /* Render framebuffer in display context */ + glo_set_current(g_nv2a_context_display); + render_display(d, surface); + gl_fence(); + assert(glGetError() == GL_NO_ERROR); + + /* Switch back to original context */ + glo_set_current(g_nv2a_context_render); + + qatomic_set(&d->pgraph.sync_pending, false); + qemu_event_set(&d->pgraph.sync_complete); +} + +int pgraph_gl_get_framebuffer_surface(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_mutex_lock(&d->pfifo.lock); + // FIXME: Possible race condition with pgraph, consider lock + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL || !surface->color) { + qemu_mutex_unlock(&d->pfifo.lock); + return 0; + } + + assert(surface->color); + assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); + assert(surface->fmt.gl_format == GL_RGBA + || surface->fmt.gl_format == GL_RGB + || surface->fmt.gl_format == GL_BGR + || surface->fmt.gl_format == GL_BGRA + ); + + surface->frame_time = pg->frame_time; + qemu_event_reset(&d->pgraph.sync_complete); + qatomic_set(&pg->sync_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.sync_complete); + + return r->gl_display_buffer; +} diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c new file mode 100644 index 00000000000..94e9beb50b0 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -0,0 +1,528 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/fast-hash.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "debug.h" +#include "renderer.h" + +void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_DPRINTF("---------PRE CLEAR ------\n"); + pg->clearing = true; + + GLbitfield gl_mask = 0; + + bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR); + bool write_zeta = + (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); + + if (write_zeta) { + GLint gl_clear_stencil; + GLfloat gl_clear_depth; + pgraph_get_clear_depth_stencil_value(pg, &gl_clear_depth, + &gl_clear_stencil); + + if (parameter & NV097_CLEAR_SURFACE_Z) { + gl_mask |= GL_DEPTH_BUFFER_BIT; + glDepthMask(GL_TRUE); + glClearDepth(gl_clear_depth); + } + if (parameter & NV097_CLEAR_SURFACE_STENCIL) { + gl_mask |= GL_STENCIL_BUFFER_BIT; + glStencilMask(0xff); + glClearStencil(gl_clear_stencil); + } + } + if (write_color) { + gl_mask |= GL_COLOR_BUFFER_BIT; + glColorMask((parameter & NV097_CLEAR_SURFACE_R) + ? GL_TRUE : GL_FALSE, + (parameter & NV097_CLEAR_SURFACE_G) + ? GL_TRUE : GL_FALSE, + (parameter & NV097_CLEAR_SURFACE_B) + ? GL_TRUE : GL_FALSE, + (parameter & NV097_CLEAR_SURFACE_A) + ? GL_TRUE : GL_FALSE); + + GLfloat rgba[4]; + pgraph_get_clear_color(pg, rgba); + glClearColor(rgba[0], rgba[1], rgba[2], rgba[3]); + } + + pgraph_gl_surface_update(d, true, write_color, write_zeta); + + /* FIXME: Needs confirmation */ + unsigned int xmin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN); + unsigned int xmax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX); + unsigned int ymin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN); + unsigned int ymax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX); + + NV2A_DPRINTF( + "------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n", + parameter, xmin, ymin, xmax, ymax, + d->pgraph.regs_[NV_PGRAPH_COLORCLEARVALUE]); + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + ymin = pg->surface_binding_dim.height - (ymin + scissor_height); + + NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin, + xmin + scissor_width - 1, ymin + scissor_height - 1); + + bool full_clear = !xmin && !ymin && + scissor_width >= pg->surface_binding_dim.width && + scissor_height >= pg->surface_binding_dim.height; + + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + /* FIXME: Respect window clip?!?! */ + glEnable(GL_SCISSOR_TEST); + glScissor(xmin, ymin, scissor_width, scissor_height); + + /* Dither */ + /* FIXME: Maybe also disable it here? + GL implementation dependent */ + if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_DITHERENABLE) { + glEnable(GL_DITHER); + } else { + glDisable(GL_DITHER); + } + + glClear(gl_mask); + + glDisable(GL_SCISSOR_TEST); + + pgraph_gl_set_surface_dirty(pg, write_color, write_zeta); + + if (r->color_binding) { + r->color_binding->cleared = full_clear && write_color; + } + if (r->zeta_binding) { + r->zeta_binding->cleared = full_clear && write_zeta; + } + + pg->clearing = false; +} + +void pgraph_gl_draw_begin(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", pg->primitive_mode); + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + pgraph_gl_surface_update(d, true, true, depth_test || stencil_test); + + if (is_nop_draw) { + return; + } + + assert(r->color_binding || r->zeta_binding); + + pgraph_gl_bind_textures(d); + pgraph_gl_bind_shaders(pg); + + glColorMask(mask_red, mask_green, mask_blue, mask_alpha); + glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE)); + glStencilMask(GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE)); + + if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) { + glEnable(GL_BLEND); + uint32_t sfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), + NV_PGRAPH_BLEND_SFACTOR); + uint32_t dfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), + NV_PGRAPH_BLEND_DFACTOR); + assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map)); + assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map)); + glBlendFunc(pgraph_blend_factor_gl_map[sfactor], + pgraph_blend_factor_gl_map[dfactor]); + + uint32_t equation = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), + NV_PGRAPH_BLEND_EQN); + assert(equation < ARRAY_SIZE(pgraph_blend_equation_gl_map)); + glBlendEquation(pgraph_blend_equation_gl_map[equation]); + + uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR); + float gl_blend_color[4]; + pgraph_argb_pack32_to_rgba_float(blend_color, gl_blend_color); + glBlendColor(gl_blend_color[0], gl_blend_color[1], gl_blend_color[2], + gl_blend_color[3]); + } else { + glDisable(GL_BLEND); + } + + /* Face culling */ + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) + & NV_PGRAPH_SETUPRASTER_CULLENABLE) { + uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_CULLCTRL); + assert(cull_face < ARRAY_SIZE(pgraph_cull_face_gl_map)); + glCullFace(pgraph_cull_face_gl_map[cull_face]); + glEnable(GL_CULL_FACE); + } else { + glDisable(GL_CULL_FACE); + } + + /* Clipping */ + glEnable(GL_CLIP_DISTANCE0); + glEnable(GL_CLIP_DISTANCE1); + + /* Front-face select */ + glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) + & NV_PGRAPH_SETUPRASTER_FRONTFACE + ? GL_CCW : GL_CW); + + /* Polygon offset */ + /* FIXME: GL implementation-specific, maybe do this in VS? */ + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) { + glEnable(GL_POLYGON_OFFSET_FILL); + } else { + glDisable(GL_POLYGON_OFFSET_FILL); + } + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) { + glEnable(GL_POLYGON_OFFSET_LINE); + } else { + glDisable(GL_POLYGON_OFFSET_LINE); + } + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) { + glEnable(GL_POLYGON_OFFSET_POINT); + } else { + glDisable(GL_POLYGON_OFFSET_POINT); + } + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); + GLfloat zfactor = *(float*)&zfactor_u32; + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + GLfloat zbias = *(float*)&zbias_u32; + glPolygonOffset(zfactor, zbias); + } + + /* Depth testing */ + if (depth_test) { + glEnable(GL_DEPTH_TEST); + + uint32_t depth_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), + NV_PGRAPH_CONTROL_0_ZFUNC); + assert(depth_func < ARRAY_SIZE(pgraph_depth_func_gl_map)); + glDepthFunc(pgraph_depth_func_gl_map[depth_func]); + } else { + glDisable(GL_DEPTH_TEST); + } + + if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { + glEnable(GL_DEPTH_CLAMP); + } else { + glDisable(GL_DEPTH_CLAMP); + } + + if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_SHADEMODE) == + NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) { + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + } + + if (stencil_test) { + glEnable(GL_STENCIL_TEST); + + uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_FUNC); + uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_REF); + uint32_t func_mask = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ); + uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL); + uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL); + uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS); + + assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_gl_map)); + assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_gl_map)); + assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_gl_map)); + assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_gl_map)); + + glStencilFunc( + pgraph_stencil_func_gl_map[stencil_func], + stencil_ref, + func_mask); + + glStencilOp( + pgraph_stencil_op_gl_map[op_fail], + pgraph_stencil_op_gl_map[op_zfail], + pgraph_stencil_op_gl_map[op_zpass]); + + } else { + glDisable(GL_STENCIL_TEST); + } + + /* Dither */ + /* FIXME: GL implementation dependent */ + if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_DITHERENABLE) { + glEnable(GL_DITHER); + } else { + glDisable(GL_DITHER); + } + + glEnable(GL_PROGRAM_POINT_SIZE); + + bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), NV_PGRAPH_ANTIALIASING_ENABLE); + + /* Edge Antialiasing */ + if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { + glEnable(GL_LINE_SMOOTH); + } else { + glDisable(GL_LINE_SMOOTH); + } + if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { + glEnable(GL_POLYGON_SMOOTH); + } else { + glDisable(GL_POLYGON_SMOOTH); + } + + unsigned int vp_width = pg->surface_binding_dim.width, + vp_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); + glViewport(0, 0, vp_width, vp_height); + + /* Surface clip */ + /* FIXME: Consider moving to PSH w/ window clip */ + unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x, + ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y; + unsigned int xmax = xmin + pg->surface_shape.clip_width - 1, + ymax = ymin + pg->surface_shape.clip_height - 1; + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + ymin = pg->surface_binding_dim.height - (ymin + scissor_height); + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + glEnable(GL_SCISSOR_TEST); + glScissor(xmin, ymin, scissor_width, scissor_height); + + /* Visibility testing */ + if (pg->zpass_pixel_count_enable) { + r->gl_zpass_pixel_count_query_count++; + r->gl_zpass_pixel_count_queries = (GLuint*)g_realloc( + r->gl_zpass_pixel_count_queries, + sizeof(GLuint) * r->gl_zpass_pixel_count_query_count); + + GLuint gl_query; + glGenQueries(1, &gl_query); + r->gl_zpass_pixel_count_queries[ + r->gl_zpass_pixel_count_query_count - 1] = gl_query; + glBeginQuery(GL_SAMPLES_PASSED, gl_query); + } +} + +void pgraph_gl_draw_end(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + if (is_nop_draw) { + // FIXME: Check PGRAPH register 0x880. + // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit + // check that will raise an exception in the case that a draw should + // modify the color and/or zeta buffer but the target(s) are masked + // off. This check only seems to trigger during the fragment + // processing, it is legal to attempt a draw that is entirely + // clipped regardless of 0x880. See xemu#635 for context. + return; + } + + pgraph_gl_flush_draw(d); + + /* End of visibility testing */ + if (pg->zpass_pixel_count_enable) { + nv2a_profile_inc_counter(NV2A_PROF_QUERY); + glEndQuery(GL_SAMPLES_PASSED); + } + + pg->draw_time++; + if (r->color_binding && pgraph_color_write_enabled(pg)) { + r->color_binding->draw_time = pg->draw_time; + } + if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) { + r->zeta_binding->draw_time = pg->draw_time; + } + + pgraph_gl_set_surface_dirty(pg, color_write, depth_test || stencil_test); + NV2A_GL_DGROUP_END(); +} + +void pgraph_gl_flush_draw(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (!(r->color_binding || r->zeta_binding)) { + return; + } + assert(r->shader_binding); + + if (pg->draw_arrays_length) { + NV2A_GL_DPRINTF(false, "Draw Arrays"); + nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS); + assert(pg->inline_elements_length == 0); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + pgraph_gl_bind_vertex_attributes(d, pg->draw_arrays_min_start, + pg->draw_arrays_max_count - 1, + false, 0, + pg->draw_arrays_max_count - 1); + glMultiDrawArrays(r->shader_binding->gl_primitive_mode, + pg->draw_arrays_start, + pg->draw_arrays_count, + pg->draw_arrays_length); + } else if (pg->inline_elements_length) { + NV2A_GL_DPRINTF(false, "Inline Elements"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + uint32_t min_element = (uint32_t)-1; + uint32_t max_element = 0; + for (int i=0; i < pg->inline_elements_length; i++) { + max_element = MAX(pg->inline_elements[i], max_element); + min_element = MIN(pg->inline_elements[i], min_element); + } + + pgraph_gl_bind_vertex_attributes( + d, min_element, max_element, false, 0, + pg->inline_elements[pg->inline_elements_length - 1]); + + VertexKey k; + memset(&k, 0, sizeof(VertexKey)); + k.count = pg->inline_elements_length; + k.gl_type = GL_UNSIGNED_INT; + k.gl_normalize = GL_FALSE; + k.stride = sizeof(uint32_t); + uint64_t h = fast_hash((uint8_t*)pg->inline_elements, + pg->inline_elements_length * 4); + + LruNode *node = lru_lookup(&r->element_cache, h, &k); + VertexLruNode *found = container_of(node, VertexLruNode, node); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer); + if (!found->initialized) { + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, + pg->inline_elements_length * 4, + pg->inline_elements, GL_STATIC_DRAW); + found->initialized = true; + } else { + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY); + } + glDrawElements(r->shader_binding->gl_primitive_mode, + pg->inline_elements_length, GL_UNSIGNED_INT, + (void *)0); + } else if (pg->inline_buffer_length) { + NV2A_GL_DPRINTF(false, "Inline Buffer"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS); + assert(pg->inline_array_length == 0); + + if (pg->compressed_attrs) { + pg->compressed_attrs = 0; + pgraph_gl_bind_shaders(pg); + } + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->inline_buffer_populated) { + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3); + glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_buffer[i]); + glBufferData(GL_ARRAY_BUFFER, + pg->inline_buffer_length * sizeof(float) * 4, + attr->inline_buffer, GL_STREAM_DRAW); + glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0); + glEnableVertexAttribArray(i); + attr->inline_buffer_populated = false; + memcpy(attr->inline_value, + attr->inline_buffer + (pg->inline_buffer_length - 1) * 4, + sizeof(attr->inline_value)); + } else { + glDisableVertexAttribArray(i); + glVertexAttrib4fv(i, attr->inline_value); + } + } + + glDrawArrays(r->shader_binding->gl_primitive_mode, + 0, pg->inline_buffer_length); + } else if (pg->inline_array_length) { + NV2A_GL_DPRINTF(false, "Inline Array"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS); + + unsigned int index_count = pgraph_gl_bind_inline_array(d); + glDrawArrays(r->shader_binding->gl_primitive_mode, + 0, index_count); + } else { + NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END"); + NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END"); + } +} diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build new file mode 100644 index 00000000000..ab25eacb7dd --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/meson.build @@ -0,0 +1,12 @@ +specific_ss.add([sdl, gloffscreen, files( + 'blit.c', + 'debug.c', + 'display.c', + 'draw.c', + 'renderer.c', + 'reports.c', + 'shaders.c', + 'surface.c', + 'texture.c', + 'vertex.c', + )]) diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c new file mode 100644 index 00000000000..2114608683d --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -0,0 +1,201 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "debug.h" +#include "renderer.h" + +GloContext *g_nv2a_context_render; +GloContext *g_nv2a_context_display; + +static void nv2a_gl_context_init(void) +{ + g_nv2a_context_render = glo_context_create(); + g_nv2a_context_display = glo_context_create(); +} + +static void pgraph_gl_init_thread(NV2AState *d) +{ + glo_set_current(g_nv2a_context_render); +} + +static void pgraph_gl_deinit(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + glo_set_current(g_nv2a_context_render); + + pgraph_gl_deinit_surfaces(pg); + pgraph_gl_deinit_shader_cache(pg); + pgraph_gl_deinit_texture_cache(pg); + + glo_set_current(NULL); + glo_context_destroy(g_nv2a_context_render); + glo_context_destroy(g_nv2a_context_display); +} + +static void pgraph_gl_flip_stall(NV2AState *d) +{ + NV2A_GL_DFRAME_TERMINATOR(); + glFinish(); +} + +static void pgraph_gl_flush(NV2AState *d) +{ + pgraph_gl_surface_flush(d); + pgraph_gl_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram)); + pgraph_gl_update_entire_memory_buffer(d); + /* FIXME: Flush more? */ + + qatomic_set(&d->pgraph.flush_pending, false); + qemu_event_set(&d->pgraph.flush_complete); +} + +static void pgraph_gl_process_pending(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (qatomic_read(&r->downloads_pending) || + qatomic_read(&r->download_dirty_surfaces_pending) || + qatomic_read(&d->pgraph.sync_pending) || + qatomic_read(&d->pgraph.flush_pending) || + qatomic_read(&r->shader_cache_writeback_pending)) { + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + if (qatomic_read(&r->downloads_pending)) { + pgraph_gl_process_pending_downloads(d); + } + if (qatomic_read(&r->download_dirty_surfaces_pending)) { + pgraph_gl_download_dirty_surfaces(d); + } + if (qatomic_read(&d->pgraph.sync_pending)) { + pgraph_gl_sync(d); + } + if (qatomic_read(&d->pgraph.flush_pending)) { + pgraph_gl_flush(d); + } + if (qatomic_read(&r->shader_cache_writeback_pending)) { + pgraph_gl_shader_write_cache_reload_list(&d->pgraph); + } + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } +} + +static void pgraph_gl_pre_savevm_trigger(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qatomic_set(&r->download_dirty_surfaces_pending, true); + qemu_event_reset(&r->dirty_surfaces_download_complete); +} + +static void pgraph_gl_pre_savevm_wait(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_event_wait(&r->dirty_surfaces_download_complete); +} + +static void pgraph_gl_pre_shutdown_trigger(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qatomic_set(&r->shader_cache_writeback_pending, true); + qemu_event_reset(&r->shader_cache_writeback_complete); +} + +static void pgraph_gl_pre_shutdown_wait(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_event_wait(&r->shader_cache_writeback_complete); +} + +static void pgraph_gl_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pg->gl_renderer_state = g_malloc(sizeof(PGRAPHGLState)); + + /* fire up opengl */ + glo_set_current(g_nv2a_context_render); + +#ifdef DEBUG_NV2A_GL + gl_debug_initialize(); +#endif + + /* DXT textures */ + assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); + /* Internal RGB565 texture format */ + assert(glo_check_extension("GL_ARB_ES2_compatibility")); + + pgraph_gl_init_surfaces(pg); + pgraph_gl_init_reports(d); + pgraph_gl_init_texture_cache(d); + pgraph_gl_init_vertex_cache(d); + pgraph_gl_init_shader_cache(pg); + + glo_set_current(g_nv2a_context_display); + pgraph_gl_init_display_renderer(d); + + glo_set_current(NULL); +} + +static PGRAPHRenderer pgraph_gl_renderer = { + .type = CONFIG_DISPLAY_RENDERER_OPENGL, + .name = "OpenGL", + .ops = { + .init = pgraph_gl_init, + .early_context_init = nv2a_gl_context_init, + .init_thread = pgraph_gl_init_thread, + .finalize = pgraph_gl_deinit, + .clear_report_value = pgraph_gl_clear_report_value, + .clear_surface = pgraph_gl_clear_surface, + .draw_begin = pgraph_gl_draw_begin, + .draw_end = pgraph_gl_draw_end, + .flip_stall = pgraph_gl_flip_stall, + .flush_draw = pgraph_gl_flush_draw, + .get_report = pgraph_gl_get_report, + .image_blit = pgraph_gl_image_blit, + .pre_savevm_trigger = pgraph_gl_pre_savevm_trigger, + .pre_savevm_wait = pgraph_gl_pre_savevm_wait, + .pre_shutdown_trigger = pgraph_gl_pre_shutdown_trigger, + .pre_shutdown_wait = pgraph_gl_pre_shutdown_wait, + .process_pending = pgraph_gl_process_pending, + .process_pending_reports = pgraph_gl_process_pending_reports, + .surface_update = pgraph_gl_surface_update, + .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor, + .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor, + .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface, + } +}; + +static void __attribute__((constructor)) register_renderer(void) +{ + pgraph_renderer_register(&pgraph_gl_renderer); +} diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h new file mode 100644 index 00000000000..fff4ac7d536 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -0,0 +1,283 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H +#define HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/lru.h" + +#include "hw/hw.h" + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "hw/xbox/nv2a/pgraph/surface.h" +#include "hw/xbox/nv2a/pgraph/texture.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +#include "gloffscreen.h" +#include "constants.h" + +typedef struct SurfaceBinding { + QTAILQ_ENTRY(SurfaceBinding) entry; + MemAccessCallback *access_cb; + + hwaddr vram_addr; + + SurfaceShape shape; + uintptr_t dma_addr; + uintptr_t dma_len; + bool color; + bool swizzle; + + unsigned int width; + unsigned int height; + unsigned int pitch; + size_t size; + + bool cleared; + int frame_time; + int draw_time; + bool draw_dirty; + bool download_pending; + bool upload_pending; + + GLuint gl_buffer; + SurfaceFormatInfo fmt; +} SurfaceBinding; + +typedef struct TextureBinding { + unsigned int refcnt; + int draw_time; + uint64_t data_hash; + unsigned int scale; + unsigned int min_filter; + unsigned int mag_filter; + unsigned int addru; + unsigned int addrv; + unsigned int addrp; + uint32_t border_color; + bool border_color_set; + GLenum gl_target; + GLuint gl_texture; +} TextureBinding; + +typedef struct ShaderBinding { + GLuint gl_program; + GLenum gl_primitive_mode; + + GLint psh_constant_loc[9][2]; + GLint alpha_ref_loc; + + GLint bump_mat_loc[NV2A_MAX_TEXTURES]; + GLint bump_scale_loc[NV2A_MAX_TEXTURES]; + GLint bump_offset_loc[NV2A_MAX_TEXTURES]; + GLint tex_scale_loc[NV2A_MAX_TEXTURES]; + + GLint surface_size_loc; + GLint clip_range_loc; + + GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + + GLint inv_viewport_loc; + GLint ltctxa_loc[NV2A_LTCTXA_COUNT]; + GLint ltctxb_loc[NV2A_LTCTXB_COUNT]; + GLint ltc1_loc[NV2A_LTC1_COUNT]; + + GLint fog_color_loc; + GLint fog_param_loc; + GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; + GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS]; + GLint light_local_position_loc[NV2A_MAX_LIGHTS]; + GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS]; + + GLint clip_region_loc[8]; + + GLint material_alpha_loc; +} ShaderBinding; + +typedef struct ShaderLruNode { + LruNode node; + bool cached; + void *program; + size_t program_size; + GLenum program_format; + ShaderState state; + ShaderBinding *binding; + QemuThread *save_thread; +} ShaderLruNode; + +typedef struct VertexKey { + size_t count; + size_t stride; + hwaddr addr; + + GLboolean gl_normalize; + GLuint gl_type; +} VertexKey; + +typedef struct VertexLruNode { + LruNode node; + VertexKey key; + bool initialized; + + GLuint gl_buffer; +} VertexLruNode; + +typedef struct TextureKey { + TextureShape state; + hwaddr texture_vram_offset; + hwaddr texture_length; + hwaddr palette_vram_offset; + hwaddr palette_length; +} TextureKey; + +typedef struct TextureLruNode { + LruNode node; + TextureKey key; + TextureBinding *binding; + bool possibly_dirty; +} TextureLruNode; + +typedef struct QueryReport { + QSIMPLEQ_ENTRY(QueryReport) entry; + bool clear; + uint32_t parameter; + unsigned int query_count; + GLuint *queries; +} QueryReport; + +typedef struct PGRAPHGLState { + GLuint gl_framebuffer; + GLuint gl_display_buffer; + GLint gl_display_buffer_internal_format; + GLsizei gl_display_buffer_width; + GLsizei gl_display_buffer_height; + GLenum gl_display_buffer_format; + GLenum gl_display_buffer_type; + + Lru element_cache; + VertexLruNode *element_cache_entries; + GLuint gl_inline_array_buffer; + GLuint gl_memory_buffer; + GLuint gl_vertex_array; + GLuint gl_inline_buffer[NV2A_VERTEXSHADER_ATTRIBUTES]; + + QTAILQ_HEAD(, SurfaceBinding) surfaces; + SurfaceBinding *color_binding, *zeta_binding; + bool downloads_pending; + QemuEvent downloads_complete; + bool download_dirty_surfaces_pending; + QemuEvent dirty_surfaces_download_complete; // common + + TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; + Lru texture_cache; + TextureLruNode *texture_cache_entries; + + Lru shader_cache; + ShaderLruNode *shader_cache_entries; + ShaderBinding *shader_binding; + QemuMutex shader_cache_lock; + QemuThread shader_disk_thread; + + unsigned int zpass_pixel_count_result; + unsigned int gl_zpass_pixel_count_query_count; + GLuint *gl_zpass_pixel_count_queries; + QSIMPLEQ_HEAD(, QueryReport) report_queue; + + bool shader_cache_writeback_pending; + QemuEvent shader_cache_writeback_complete; + + struct s2t_rndr { + GLuint fbo, vao, vbo, prog; + GLuint tex_loc, surface_size_loc; + } s2t_rndr; + + struct disp_rndr { + GLuint fbo, vao, vbo, prog; + GLuint display_size_loc; + GLuint line_offset_loc; + GLuint tex_loc; + GLuint pvideo_tex; + GLint pvideo_enable_loc; + GLint pvideo_tex_loc; + GLint pvideo_in_pos_loc; + GLint pvideo_pos_loc; + GLint pvideo_scale_loc; + GLint pvideo_color_key_enable_loc; + GLint pvideo_color_key_loc; + GLint palette_loc[256]; + } disp_rndr; +} PGRAPHGLState; + +extern GloContext *g_nv2a_context_render; +extern GloContext *g_nv2a_context_display; + +unsigned int pgraph_gl_bind_inline_array(NV2AState *d); +void pgraph_gl_bind_shaders(PGRAPHState *pg); +void pgraph_gl_bind_textures(NV2AState *d); +void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element); +bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape); +GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src); +void pgraph_gl_deinit_shader_cache(PGRAPHState *pg); +void pgraph_gl_deinit_surfaces(PGRAPHState *pg); +void pgraph_gl_deinit_texture_cache(PGRAPHState *pg); +void pgraph_gl_download_dirty_surfaces(NV2AState *d); +void pgraph_gl_clear_report_value(NV2AState *d); +void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter); +void pgraph_gl_draw_begin(NV2AState *d); +void pgraph_gl_draw_end(NV2AState *d); +void pgraph_gl_flush_draw(NV2AState *d); +void pgraph_gl_get_report(NV2AState *d, uint32_t parameter); +void pgraph_gl_image_blit(NV2AState *d); +void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); +void pgraph_gl_process_pending_reports(NV2AState *d); +void pgraph_gl_surface_flush(NV2AState *d); +void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write); +void pgraph_gl_sync(NV2AState *d); +void pgraph_gl_update_entire_memory_buffer(NV2AState *d); +void pgraph_gl_init_display_renderer(NV2AState *d); +void pgraph_gl_init_reports(NV2AState *d); +void pgraph_gl_init_shader_cache(PGRAPHState *pg); +void pgraph_gl_init_surfaces(PGRAPHState *pg); +void pgraph_gl_init_texture_cache(NV2AState *d); +void pgraph_gl_init_vertex_cache(NV2AState *d); +void pgraph_gl_process_pending_downloads(NV2AState *d); +void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg); +void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit); +void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); +void pgraph_gl_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface); +SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr); +SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr); +void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *e); +void pgraph_gl_unbind_surface(NV2AState *d, bool color); +void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); +void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode); +bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode); +void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg); +void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale); +unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d); +int pgraph_gl_get_framebuffer_surface(NV2AState *d); + +#endif diff --git a/hw/xbox/nv2a/pgraph/gl/reports.c b/hw/xbox/nv2a/pgraph/gl/reports.c new file mode 100644 index 00000000000..0673c37e0c5 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/reports.c @@ -0,0 +1,111 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include "renderer.h" + +static void process_pending_report(NV2AState *d, QueryReport *report) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (report->clear) { + r->zpass_pixel_count_result = 0; + return; + } + + uint8_t type = GET_MASK(report->parameter, NV097_GET_REPORT_TYPE); + assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); + + /* FIXME: Multisampling affects this (both: OGL and Xbox GPU), + * not sure if CLEARs also count + */ + /* FIXME: What about clipping regions etc? */ + for (int i = 0; i < report->query_count; i++) { + GLuint gl_query_result = 0; + glGetQueryObjectuiv(report->queries[i], GL_QUERY_RESULT, &gl_query_result); + gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor; + r->zpass_pixel_count_result += gl_query_result; + } + + if (report->query_count) { + glDeleteQueries(report->query_count, report->queries); + g_free(report->queries); + } + + pgraph_write_zpass_pixel_cnt_report(d, report->parameter, r->zpass_pixel_count_result); +} + +void pgraph_gl_process_pending_reports(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + QueryReport *report, *next; + + QSIMPLEQ_FOREACH_SAFE(report, &r->report_queue, entry, next) { + process_pending_report(d, report); + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); + g_free(report); + } +} + +void pgraph_gl_clear_report_value(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + /* FIXME: Does this have a value in parameter? Also does this (also?) modify + * the report memory block? + */ + if (r->gl_zpass_pixel_count_query_count) { + glDeleteQueries(r->gl_zpass_pixel_count_query_count, + r->gl_zpass_pixel_count_queries); + r->gl_zpass_pixel_count_query_count = 0; + } + + QueryReport *report = g_malloc(sizeof(QueryReport)); + report->clear = true; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry); +} + +void pgraph_gl_init_reports(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + QSIMPLEQ_INIT(&r->report_queue); +} + +void pgraph_gl_get_report(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + QueryReport *report = g_malloc(sizeof(QueryReport)); + report->clear = false; + report->parameter = parameter; + report->query_count = r->gl_zpass_pixel_count_query_count; + report->queries = r->gl_zpass_pixel_count_queries; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry); + + r->gl_zpass_pixel_count_query_count = 0; + r->gl_zpass_pixel_count_queries = NULL; +} diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c new file mode 100644 index 00000000000..0bb4eaa5981 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -0,0 +1,1102 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/fast-hash.h" +#include "qemu/mstring.h" +#include + +#include "xemu-version.h" +#include "ui/xemu-settings.h" +#include "hw/xbox/nv2a/pgraph/glsl/geom.h" +#include "hw/xbox/nv2a/pgraph/glsl/vsh.h" +#include "hw/xbox/nv2a/pgraph/glsl/psh.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "hw/xbox/nv2a/pgraph/util.h" +#include "debug.h" +#include "renderer.h" + +static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function); + +static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) +{ + if (polygon_mode == POLY_MODE_POINT) { + return GL_POINTS; + } + + switch (primitive_mode) { + case PRIM_TYPE_POINTS: return GL_POINTS; + case PRIM_TYPE_LINES: return GL_LINES; + case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP; + case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP; + case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES; + case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP; + case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN; + case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY; + case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return GL_LINE_LOOP; + } else if (polygon_mode == POLY_MODE_FILL) { + return GL_TRIANGLE_FAN; + } + + assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); + return 0; + default: + assert(!"Invalid primitive_mode"); + return 0; + } +} + +static GLuint create_gl_shader(GLenum gl_shader_type, + const char *code, + const char *name) +{ + GLint compiled = 0; + + NV2A_GL_DGROUP_BEGIN("Creating new %s", name); + + NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code); + + GLuint shader = glCreateShader(gl_shader_type); + glShaderSource(shader, 1, &code, 0); + glCompileShader(shader); + + /* Check it compiled */ + compiled = 0; + glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); + if (!compiled) { + GLchar* log; + GLint log_length; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + log = g_malloc(log_length * sizeof(GLchar)); + glGetShaderInfoLog(shader, log_length, NULL, log); + fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log); + g_free(log); + + NV2A_GL_DGROUP_END(); + abort(); + } + + NV2A_GL_DGROUP_END(); + + return shader; +} + +static void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state) +{ + int i, j; + char tmp[64]; + + /* set texture samplers */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + char samplerName[16]; + snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); + GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName); + if (texSampLoc >= 0) { + glUniform1i(texSampLoc, i); + } + } + + /* validate the program */ + glValidateProgram(binding->gl_program); + GLint valid = 0; + glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(binding->gl_program, 1024, NULL, log); + fprintf(stderr, "nv2a: shader validation failed: %s\n", log); + abort(); + } + + /* lookup fragment shader uniforms */ + for (i = 0; i < 9; i++) { + for (j = 0; j < 2; j++) { + snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); + binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp); + } + } + binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef"); + for (i = 1; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "bumpMat%d", i); + binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "bumpScale%d", i); + binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); + binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "texScale%d", i); + binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + /* lookup vertex shader uniforms */ + for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { + snprintf(tmp, sizeof(tmp), "c[%d]", i); + binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); + binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); + binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); + binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); + + binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); + for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); + binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); + binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_LTC1_COUNT; i++) { + snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); + binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); + binding->light_infinite_half_vector_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); + binding->light_infinite_direction_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + + snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); + binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); + binding->light_local_attenuation_loc[i] = + glGetUniformLocation(binding->gl_program, tmp); + } + for (i = 0; i < 8; i++) { + snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); + binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp); + } + + if (state->fixed_function) { + binding->material_alpha_loc = + glGetUniformLocation(binding->gl_program, "material_alpha"); + } else { + binding->material_alpha_loc = -1; + } +} + +static ShaderBinding *generate_shaders(const ShaderState *state) +{ + char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); + if (previous_numeric_locale) { + previous_numeric_locale = g_strdup(previous_numeric_locale); + } + + /* Ensure numeric values are printed with '.' radix, no grouping */ + setlocale(LC_NUMERIC, "C"); + GLuint program = glCreateProgram(); + + /* Create an optional geometry shader and find primitive type */ + GLenum gl_primitive_mode = + get_gl_primitive_mode(state->polygon_front_mode, state->primitive_mode); + MString* geometry_shader_code = + pgraph_gen_geom_glsl(state->polygon_front_mode, + state->polygon_back_mode, + state->primitive_mode, + state->smooth_shading, + false); + if (geometry_shader_code) { + const char* geometry_shader_code_str = + mstring_get_str(geometry_shader_code); + GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER, + geometry_shader_code_str, + "geometry shader"); + glAttachShader(program, geometry_shader); + mstring_unref(geometry_shader_code); + } + + /* create the vertex shader */ + MString *vertex_shader_code = + pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL); + GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER, + mstring_get_str(vertex_shader_code), + "vertex shader"); + glAttachShader(program, vertex_shader); + mstring_unref(vertex_shader_code); + + /* generate a fragment shader from register combiners */ + MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh); + const char *fragment_shader_code_str = + mstring_get_str(fragment_shader_code); + GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER, + fragment_shader_code_str, + "fragment shader"); + glAttachShader(program, fragment_shader); + mstring_unref(fragment_shader_code); + + /* link the program */ + glLinkProgram(program); + GLint linked = 0; + glGetProgramiv(program, GL_LINK_STATUS, &linked); + if(!linked) { + GLchar log[2048]; + glGetProgramInfoLog(program, 2048, NULL, log); + fprintf(stderr, "nv2a: shader linking failed: %s\n", log); + abort(); + } + + glUseProgram(program); + + ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding)); + ret->gl_program = program; + ret->gl_primitive_mode = gl_primitive_mode; + + update_shader_constant_locations(ret, state); + + if (previous_numeric_locale) { + setlocale(LC_NUMERIC, previous_numeric_locale); + g_free(previous_numeric_locale); + } + + return ret; +} + +static const char *shader_gl_vendor = NULL; + +static void shader_create_cache_folder(void) +{ + char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path()); + qemu_mkdir(shader_path); + g_free(shader_path); +} + +static char *shader_get_lru_cache_path(void) +{ + return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path()); +} + +static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque) +{ + FILE *lru_list_file = (FILE*) opaque; + size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file); + if (written != 1) { + fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n", + (unsigned long long) node->hash); + } +} + +void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + if (!g_config.perf.cache_shaders) { + qatomic_set(&r->shader_cache_writeback_pending, false); + qemu_event_set(&r->shader_cache_writeback_complete); + return; + } + + char *shader_lru_path = shader_get_lru_cache_path(); + qemu_thread_join(&r->shader_disk_thread); + + FILE *lru_list = qemu_fopen(shader_lru_path, "wb"); + g_free(shader_lru_path); + if (!lru_list) { + fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n"); + return; + } + + lru_visit_active(&r->shader_cache, shader_write_lru_list_entry_to_disk, lru_list); + fclose(lru_list); + + lru_flush(&r->shader_cache); + + qatomic_set(&r->shader_cache_writeback_pending, false); + qemu_event_set(&r->shader_cache_writeback_complete); +} + +bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode) +{ + assert(glGetError() == GL_NO_ERROR); + + if (!snode->program) { + return false; + } + + GLuint gl_program = glCreateProgram(); + glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size); + GLint gl_error = glGetError(); + if (gl_error != GL_NO_ERROR) { + NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error); + glDeleteProgram(gl_program); + return false; + } + + glValidateProgram(gl_program); + GLint valid = 0; + glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(gl_program, 1024, NULL, log); + NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log); + glDeleteProgram(gl_program); + return false; + } + + glUseProgram(gl_program); + + ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding)); + binding->gl_program = gl_program; + binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode, + snode->state.primitive_mode); + snode->binding = binding; + + g_free(snode->program); + snode->program = NULL; + + update_shader_constant_locations(binding, &snode->state); + + return true; +} + +static char *shader_get_bin_directory(uint64_t hash) +{ + const char *cfg_dir = xemu_settings_get_base_path(); + uint64_t bin_mask = 0xffffUL << 48; + char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx", + cfg_dir, (hash & bin_mask) >> 48); + return shader_bin_dir; +} + +static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) +{ + uint64_t bin_mask = 0xffffUL << 48; + return g_strdup_printf("%s/%012lx", shader_bin_dir, + hash & (~bin_mask)); +} + +static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + char *shader_bin_dir = shader_get_bin_directory(hash); + char *shader_path = shader_get_binary_path(shader_bin_dir, hash); + char *cached_xemu_version = NULL; + char *cached_gl_vendor = NULL; + void *program_buffer = NULL; + + uint64_t cached_xemu_version_len; + uint64_t gl_vendor_len; + GLenum program_binary_format; + ShaderState state; + size_t shader_size; + + g_free(shader_bin_dir); + + qemu_mutex_lock(&r->shader_cache_lock); + if (lru_contains_hash(&r->shader_cache, hash)) { + qemu_mutex_unlock(&r->shader_cache_lock); + return; + } + qemu_mutex_unlock(&r->shader_cache_lock); + + FILE *shader_file = qemu_fopen(shader_path, "rb"); + if (!shader_file) { + goto error; + } + + size_t nread; + #define READ_OR_ERR(data, data_len) \ + do { \ + nread = fread(data, data_len, 1, shader_file); \ + if (nread != 1) { \ + fclose(shader_file); \ + goto error; \ + } \ + } while (0) + + READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len)); + + cached_xemu_version = g_malloc(cached_xemu_version_len +1); + READ_OR_ERR(cached_xemu_version, cached_xemu_version_len); + if (strcmp(cached_xemu_version, xemu_version) != 0) { + fclose(shader_file); + goto error; + } + + READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); + + cached_gl_vendor = g_malloc(gl_vendor_len); + READ_OR_ERR(cached_gl_vendor, gl_vendor_len); + if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) { + fclose(shader_file); + goto error; + } + + READ_OR_ERR(&program_binary_format, sizeof(program_binary_format)); + READ_OR_ERR(&state, sizeof(state)); + READ_OR_ERR(&shader_size, sizeof(shader_size)); + + program_buffer = g_malloc(shader_size); + READ_OR_ERR(program_buffer, shader_size); + + #undef READ_OR_ERR + + fclose(shader_file); + g_free(shader_path); + g_free(cached_xemu_version); + g_free(cached_gl_vendor); + + qemu_mutex_lock(&r->shader_cache_lock); + LruNode *node = lru_lookup(&r->shader_cache, hash, &state); + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + + /* If we happened to regenerate this shader already, then we may as well use the new one */ + if (snode->binding) { + qemu_mutex_unlock(&r->shader_cache_lock); + return; + } + + snode->program_format = program_binary_format; + snode->program_size = shader_size; + snode->program = program_buffer; + snode->cached = true; + qemu_mutex_unlock(&r->shader_cache_lock); + return; + +error: + /* Delete the shader so it won't be loaded again */ + qemu_unlink(shader_path); + g_free(shader_path); + g_free(program_buffer); + g_free(cached_xemu_version); + g_free(cached_gl_vendor); +} + +static void *shader_reload_lru_from_disk(void *arg) +{ + if (!g_config.perf.cache_shaders) { + return NULL; + } + + PGRAPHState *pg = (PGRAPHState*) arg; + char *shader_lru_path = shader_get_lru_cache_path(); + + FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb"); + g_free(shader_lru_path); + if (!lru_shaders_list) { + return NULL; + } + + uint64_t hash; + while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) { + shader_load_from_disk(pg, hash); + } + + return NULL; +} + +static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + memcpy(&snode->state, state, sizeof(ShaderState)); + snode->cached = false; + snode->binding = NULL; + snode->program = NULL; + snode->save_thread = NULL; +} + +static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + + if (snode->save_thread) { + qemu_thread_join(snode->save_thread); + g_free(snode->save_thread); + } + + if (snode->binding) { + glDeleteProgram(snode->binding->gl_program); + g_free(snode->binding); + } + + if (snode->program) { + g_free(snode->program); + } + + snode->cached = false; + snode->save_thread = NULL; + snode->binding = NULL; + snode->program = NULL; + memset(&snode->state, 0, sizeof(ShaderState)); +} + +static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + return memcmp(&snode->state, key, sizeof(ShaderState)); +} + +void pgraph_gl_init_shader_cache(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + qemu_mutex_init(&r->shader_cache_lock); + qemu_event_init(&r->shader_cache_writeback_complete, false); + + if (!shader_gl_vendor) { + shader_gl_vendor = (const char *) glGetString(GL_VENDOR); + } + + shader_create_cache_folder(); + + /* FIXME: Make this configurable */ + const size_t shader_cache_size = 50*1024; + lru_init(&r->shader_cache); + r->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode)); + assert(r->shader_cache_entries != NULL); + for (int i = 0; i < shader_cache_size; i++) { + lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node); + } + + r->shader_cache.init_node = shader_cache_entry_init; + r->shader_cache.compare_nodes = shader_cache_entry_compare; + r->shader_cache.post_node_evict = shader_cache_entry_post_evict; + + qemu_thread_create(&r->shader_disk_thread, "pgraph.renderer_state->shader_cache", + shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); +} + +void pgraph_gl_deinit_shader_cache(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + // Clear out shader cache + pgraph_gl_shader_write_cache_reload_list(pg); + free(r->shader_cache_entries); + qemu_mutex_destroy(&r->shader_cache_lock); +} + +static void *shader_write_to_disk(void *arg) +{ + ShaderLruNode *snode = (ShaderLruNode*) arg; + + char *shader_bin = shader_get_bin_directory(snode->node.hash); + char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash); + + static uint64_t gl_vendor_len; + if (gl_vendor_len == 0) { + gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1); + } + + static uint64_t xemu_version_len = 0; + if (xemu_version_len == 0) { + xemu_version_len = (uint64_t) (strlen(xemu_version) + 1); + } + + qemu_mkdir(shader_bin); + g_free(shader_bin); + + FILE *shader_file = qemu_fopen(shader_path, "wb"); + if (!shader_file) { + goto error; + } + + size_t written; + #define WRITE_OR_ERR(data, data_size) \ + do { \ + written = fwrite(data, data_size, 1, shader_file); \ + if (written != 1) { \ + fclose(shader_file); \ + goto error; \ + } \ + } while (0) + + WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len)); + WRITE_OR_ERR(xemu_version, xemu_version_len); + + WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); + WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len); + + WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format)); + WRITE_OR_ERR(&snode->state, sizeof(snode->state)); + + WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size)); + WRITE_OR_ERR(snode->program, snode->program_size); + + #undef WRITE_OR_ERR + + fclose(shader_file); + + g_free(shader_path); + g_free(snode->program); + snode->program = NULL; + + return NULL; + +error: + fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path); + qemu_unlink(shader_path); + g_free(shader_path); + g_free(snode->program); + snode->program = NULL; + return NULL; +} + +void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode) +{ + if (!snode->binding || snode->cached) { + return; + } + + GLint program_size; + glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size); + + if (snode->program) { + g_free(snode->program); + snode->program = NULL; + } + + /* program_size might be zero on some systems, if no binary formats are supported */ + if (program_size == 0) { + return; + } + + snode->program = g_malloc(program_size); + GLsizei program_size_copied; + glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied, + &snode->program_format, snode->program); + assert(glGetError() == GL_NO_ERROR); + + snode->program_size = program_size_copied; + snode->cached = true; + + char name[24]; + snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash); + snode->save_thread = g_malloc0(sizeof(QemuThread)); + qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE); +} + +static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, + bool binding_changed, + + // FIXME: Remove these... We already know it from binding.state + bool vertex_program, + bool fixed_function) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + int i, j; + + /* update combiner constants */ + for (i = 0; i < 9; i++) { + uint32_t constant[2]; + if (i == 8) { + /* final combiner */ + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1); + } else { + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); + } + + for (j = 0; j < 2; j++) { + GLint loc = binding->psh_constant_loc[i][j]; + if (loc != -1) { + float value[4]; + pgraph_argb_pack32_to_rgba_float(constant[j], value); + glUniform4fv(loc, 1, value); + } + } + } + if (binding->alpha_ref_loc != -1) { + float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), + NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; + glUniform1f(binding->alpha_ref_loc, alpha_ref); + } + + + /* For each texture stage */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + GLint loc; + + /* Bump luminance only during stages 1 - 3 */ + if (i > 0) { + loc = binding->bump_mat_loc[i]; + if (loc != -1) { + uint32_t m_u32[4]; + m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)); + m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)); + m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)); + m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)); + float m[4]; + m[0] = *(float*)&m_u32[0]; + m[1] = *(float*)&m_u32[1]; + m[2] = *(float*)&m_u32[2]; + m[3] = *(float*)&m_u32[3]; + glUniformMatrix2fv(loc, 1, GL_FALSE, m); + } + loc = binding->bump_scale_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4); + glUniform1f(loc, *(float*)&v); + } + loc = binding->bump_offset_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4); + glUniform1f(loc, *(float*)&v); + } + } + + loc = r->shader_binding->tex_scale_loc[i]; + if (loc != -1) { + assert(r->texture_binding[i] != NULL); + glUniform1f(loc, (float)r->texture_binding[i]->scale); + } + } + + if (binding->fog_color_loc != -1) { + uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR); + glUniform4f(binding->fog_color_loc, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); + } + if (binding->fog_param_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1); + glUniform2f(binding->fog_param_loc, *(float *)&v[0], *(float *)&v[1]); + } + + float zmax; + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; + break; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; + break; + default: + assert(0); + } + + if (fixed_function) { + /* update lighting constants */ + struct { + uint32_t* v; + bool* dirty; + GLint* locs; + size_t len; + } lighting_arrays[] = { + {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, + {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, + {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, + }; + + for (i=0; ilight_infinite_half_vector_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); + } + loc = binding->light_infinite_direction_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_infinite_direction[i]); + } + + loc = binding->light_local_position_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_local_position[i]); + } + loc = binding->light_local_attenuation_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_local_attenuation[i]); + } + } + + /* estimate the viewport by assuming it matches the surface ... */ + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + + float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); + float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); + float m33 = zmax; + float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; + float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; + + float invViewport[16] = { + 1.0/m11, 0, 0, 0, + 0, 1.0/m22, 0, 0, + 0, 0, 1.0/m33, 0, + -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 + }; + + if (binding->inv_viewport_loc != -1) { + glUniformMatrix4fv(binding->inv_viewport_loc, + 1, GL_FALSE, &invViewport[0]); + } + } + + /* update vertex program constants */ + for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; + + GLint loc = binding->vsh_constant_loc[i]; + if ((loc != -1) && + memcmp(binding->vsh_constants[i], pg->vsh_constants[i], + sizeof(pg->vsh_constants[1]))) { + glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]); + memcpy(binding->vsh_constants[i], pg->vsh_constants[i], + sizeof(pg->vsh_constants[i])); + } + + pg->vsh_constants_dirty[i] = false; + } + + if (binding->surface_size_loc != -1) { + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + glUniform2f(binding->surface_size_loc, + pg->surface_binding_dim.width / aa_width, + pg->surface_binding_dim.height / aa_height); + } + + if (binding->clip_range_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); + float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0; + float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0; + glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); + } + + /* Clipping regions */ + unsigned int max_gl_width = pg->surface_binding_dim.width; + unsigned int max_gl_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); + + for (i = 0; i < 8; i++) { + uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4); + unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); + unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; + uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4); + unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); + unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; + pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); + pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); + + pgraph_apply_scaling_factor(pg, &x_min, &y_min); + pgraph_apply_scaling_factor(pg, &x_max, &y_max); + + /* Translate for the GL viewport origin */ + int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); + int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); + + glUniform4i(r->shader_binding->clip_region_loc[i], + x_min, y_min_xlat, x_max, y_max_xlat); + } + + if (binding->material_alpha_loc != -1) { + glUniform1f(binding->material_alpha_loc, pg->material_alpha); + } +} + +static bool test_shaders_dirty(PGRAPHState *pg) +{ + #define CR_1(reg) CR_x(reg, 1) + #define CR_4(reg) CR_x(reg, 4) + #define CR_8(reg) CR_x(reg, 8) + #define CF(src, name) CF_x(typeof(src), (&src), name, 1) + #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src)) + #define CNAME(name) reg_check__ ## name + #define CX_x__define(type, name, x) static type CNAME(name)[x]; + #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x) + #define CF_x__define(type, src, name, x) CX_x__define(type, name, x) + #define CR_x__check(reg, x) \ + for (int i = 0; i < x; i++) { if (pgraph_reg_r(pg, reg+i*4) != CNAME(reg)[i]) goto dirty; } + #define CF_x__check(type, src, name, x) \ + for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; } + #define CR_x__update(reg, x) \ + for (int i = 0; i < x; i++) { CNAME(reg)[i] = pgraph_reg_r(pg, reg+i*4); } + #define CF_x__update(type, src, name, x) \ + for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; } + + #define DIRTY_REGS \ + CR_1(NV_PGRAPH_COMBINECTL) \ + CR_1(NV_PGRAPH_SHADERCTL) \ + CR_1(NV_PGRAPH_SHADOWCTL) \ + CR_1(NV_PGRAPH_COMBINESPECFOG0) \ + CR_1(NV_PGRAPH_COMBINESPECFOG1) \ + CR_1(NV_PGRAPH_CONTROL_0) \ + CR_1(NV_PGRAPH_CONTROL_3) \ + CR_1(NV_PGRAPH_CSV0_C) \ + CR_1(NV_PGRAPH_CSV0_D) \ + CR_1(NV_PGRAPH_CSV1_A) \ + CR_1(NV_PGRAPH_CSV1_B) \ + CR_1(NV_PGRAPH_SETUPRASTER) \ + CR_1(NV_PGRAPH_SHADERPROG) \ + CR_8(NV_PGRAPH_COMBINECOLORI0) \ + CR_8(NV_PGRAPH_COMBINECOLORO0) \ + CR_8(NV_PGRAPH_COMBINEALPHAI0) \ + CR_8(NV_PGRAPH_COMBINEALPHAO0) \ + CR_8(NV_PGRAPH_COMBINEFACTOR0) \ + CR_8(NV_PGRAPH_COMBINEFACTOR1) \ + CR_1(NV_PGRAPH_SHADERCLIPMODE) \ + CR_4(NV_PGRAPH_TEXCTL0_0) \ + CR_4(NV_PGRAPH_TEXFMT0) \ + CR_4(NV_PGRAPH_TEXFILTER0) \ + CR_8(NV_PGRAPH_WINDOWCLIPX0) \ + CR_8(NV_PGRAPH_WINDOWCLIPY0) \ + CF(pg->primitive_mode, primitive_mode) \ + CF(pg->surface_scale_factor, surface_scale_factor) \ + CF(pg->compressed_attrs, compressed_attrs) \ + CFA(pg->texture_matrix_enable, texture_matrix_enable) + + #define CR_x(reg, x) CR_x__define(reg, x) + #define CF_x(type, src, name, x) CF_x__define(type, src, name, x) + DIRTY_REGS + #undef CR_x + #undef CF_x + + #define CR_x(reg, x) CR_x__check(reg, x) + #define CF_x(type, src, name, x) CF_x__check(type, src, name, x) + DIRTY_REGS + #undef CR_x + #undef CF_x + return false; + +dirty: + #define CR_x(reg, x) CR_x__update(reg, x) + #define CF_x(type, src, name, x) CF_x__update(type, src, name, x) + DIRTY_REGS + #undef CR_x + #undef CF_x + return true; +} + +void pgraph_gl_bind_shaders(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, + vertex_program ? "yes" : "no", + fixed_function ? "yes" : "no"); + + bool binding_changed = false; + if (!test_shaders_dirty(pg) && !pg->program_data_dirty) { + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); + goto update_constants; + } + + pg->program_data_dirty = false; + + ShaderBinding* old_binding = r->shader_binding; + + ShaderState state = pgraph_get_shader_state(pg); + + uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); + qemu_mutex_lock(&r->shader_cache_lock); + LruNode *node = lru_lookup(&r->shader_cache, shader_state_hash, &state); + ShaderLruNode *snode = container_of(node, ShaderLruNode, node); + if (snode->binding || pgraph_gl_shader_load_from_memory(snode)) { + r->shader_binding = snode->binding; + } else { + r->shader_binding = generate_shaders(&state); + nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); + + /* cache it */ + snode->binding = r->shader_binding; + if (g_config.perf.cache_shaders) { + pgraph_gl_shader_cache_to_disk(snode); + } + } + + qemu_mutex_unlock(&r->shader_cache_lock); + + binding_changed = (r->shader_binding != old_binding); + if (binding_changed) { + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); + glUseProgram(r->shader_binding->gl_program); + } + +update_constants: + shader_update_constants(pg, r->shader_binding, binding_changed, + state.vertex_program, state.fixed_function); + + NV2A_GL_DGROUP_END(); +} + +GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src) +{ + GLint status; + char err_buf[512]; + + // Compile vertex shader + GLuint vs = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vs, 1, &vs_src, NULL); + glCompileShader(vs); + glGetShaderiv(vs, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) { + glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf); + err_buf[sizeof(err_buf)-1] = '\0'; + fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf); + exit(1); + } + + // Compile fragment shader + GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(fs, 1, &fs_src, NULL); + glCompileShader(fs); + glGetShaderiv(fs, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) { + glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf); + err_buf[sizeof(err_buf)-1] = '\0'; + fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf); + exit(1); + } + + // Link vertex and fragment shaders + GLuint prog = glCreateProgram(); + glAttachShader(prog, vs); + glAttachShader(prog, fs); + glLinkProgram(prog); + glUseProgram(prog); + + // Flag shaders for deletion (will still be retained for lifetime of prog) + glDeleteShader(vs); + glDeleteShader(fs); + + return prog; +} diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c new file mode 100644 index 00000000000..332ca7199ee --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -0,0 +1,1400 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "ui/xemu-settings.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "debug.h" +#include "renderer.h" + +static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force); +static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface, + bool swizzle, bool flip, bool downscale, + uint8_t *pixels); +static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height); + +void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; + + qemu_mutex_unlock_iothread(); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, true); + qemu_mutex_unlock(&d->pfifo.lock); + + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&r->dirty_surfaces_download_complete); + qatomic_set(&r->download_dirty_surfaces_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&r->dirty_surfaces_download_complete); + + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&d->pgraph.flush_complete); + qatomic_set(&d->pgraph.flush_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.flush_complete); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, false); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + + qemu_mutex_lock_iothread(); +} + +unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d) +{ + return d->pgraph.surface_scale_factor; +} + +void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg) +{ + int factor = g_config.display.quality.surface_scale; + pg->surface_scale_factor = factor < 1 ? 1 : factor; +} + +// FIXME: Move to common +static bool framebuffer_dirty(PGRAPHState *pg) +{ + bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, + sizeof(SurfaceShape)) != 0; + if (!shape_changed || (!pg->surface_shape.color_format + && !pg->surface_shape.zeta_format)) { + return false; + } + return true; +} + +void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", + color, zeta, + pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); + /* FIXME: Does this apply to CLEARs too? */ + color = color && pgraph_color_write_enabled(pg); + zeta = zeta && pgraph_zeta_write_enabled(pg); + pg->surface_color.draw_dirty |= color; + pg->surface_zeta.draw_dirty |= zeta; + + if (r->color_binding) { + r->color_binding->draw_dirty |= color; + r->color_binding->frame_time = pg->frame_time; + r->color_binding->cleared = false; + + } + + if (r->zeta_binding) { + r->zeta_binding->draw_dirty |= zeta; + r->zeta_binding->frame_time = pg->frame_time; + r->zeta_binding->cleared = false; + + } +} + +static void init_render_to_texture(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + const char *vs = + "#version 330\n" + "void main()\n" + "{\n" + " float x = -1.0 + float((gl_VertexID & 1) << 2);\n" + " float y = -1.0 + float((gl_VertexID & 2) << 1);\n" + " gl_Position = vec4(x, y, 0, 1);\n" + "}\n"; + const char *fs = + "#version 330\n" + "uniform sampler2D tex;\n" + "uniform vec2 surface_size;\n" + "layout(location = 0) out vec4 out_Color;\n" + "void main()\n" + "{\n" + " vec2 texCoord;\n" + " texCoord.x = gl_FragCoord.x;\n" + " texCoord.y = (surface_size.y - gl_FragCoord.y)\n" + " + (textureSize(tex,0).y - surface_size.y);\n" + " texCoord /= textureSize(tex,0).xy;\n" + " out_Color.rgba = texture(tex, texCoord);\n" + "}\n"; + + r->s2t_rndr.prog = pgraph_gl_compile_shader(vs, fs); + r->s2t_rndr.tex_loc = glGetUniformLocation(r->s2t_rndr.prog, "tex"); + r->s2t_rndr.surface_size_loc = glGetUniformLocation(r->s2t_rndr.prog, + "surface_size"); + + glGenVertexArrays(1, &r->s2t_rndr.vao); + glBindVertexArray(r->s2t_rndr.vao); + glGenBuffers(1, &r->s2t_rndr.vbo); + glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo); + glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); + glGenFramebuffers(1, &r->s2t_rndr.fbo); +} + +static bool surface_to_texture_can_fastpath(SurfaceBinding *surface, + TextureShape *shape) +{ + // FIXME: Better checks/handling on formats and surface-texture compat + + int surface_fmt = surface->shape.color_format; + int texture_fmt = shape->color_format; + + if (!surface->color) { + // FIXME: Support zeta to color + return false; + } + + switch (surface_fmt) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; + default: break; + } + break; + default: break; + } + + trace_nv2a_pgraph_surface_texture_compat_failed( + surface_fmt, texture_fmt); + return false; +} + +static void render_surface_to(NV2AState *d, SurfaceBinding *surface, + int texture_unit, GLuint gl_target, + GLuint gl_texture, unsigned int width, + unsigned int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glActiveTexture(GL_TEXTURE0 + texture_unit); + glBindFramebuffer(GL_FRAMEBUFFER, r->s2t_rndr.fbo); + + GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 }; + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, + gl_texture, 0); + glDrawBuffers(1, draw_buffers); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + assert(glGetError() == GL_NO_ERROR); + + float color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); + glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color); + + glBindVertexArray(r->s2t_rndr.vao); + glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo); + glUseProgram(r->s2t_rndr.prog); + glProgramUniform1i(r->s2t_rndr.prog, r->s2t_rndr.tex_loc, + texture_unit); + glProgramUniform2f(r->s2t_rndr.prog, + r->s2t_rndr.surface_size_loc, width, height); + + glViewport(0, 0, width, height); + glColorMask(true, true, true, true); + glDisable(GL_DITHER); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDisable(GL_STENCIL_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glClearColor(0.0f, 0.0f, 1.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0, + 0); + glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer); + glBindVertexArray(r->gl_vertex_array); + glBindTexture(gl_target, gl_texture); + glUseProgram( + r->shader_binding ? r->shader_binding->gl_program : 0); +} + +static void render_surface_to_texture_slow(NV2AState *d, + SurfaceBinding *surface, + TextureBinding *texture, + TextureShape *texture_shape, + int texture_unit) +{ + PGRAPHState *pg = &d->pgraph; + + const ColorFormatInfo *f = &kelvin_color_format_gl_map[texture_shape->color_format]; + assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map)); + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK); + + glActiveTexture(GL_TEXTURE0 + texture_unit); + glBindTexture(texture->gl_target, texture->gl_texture); + + unsigned int width = surface->width, + height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + size_t bufsize = width * height * surface->fmt.bytes_per_pixel; + + uint8_t *buf = g_malloc(bufsize); + surface_download_to_buffer(d, surface, false, true, false, buf); + + width = texture_shape->width; + height = texture_shape->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, + f->gl_format, f->gl_type, buf); + g_free(buf); + glBindTexture(texture->gl_target, texture->gl_texture); +} + +/* Note: This function is intended to be called before PGRAPH configures GL + * state for rendering; it will configure GL state here but only restore a + * couple of items. + */ +void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, + TextureBinding *texture, + TextureShape *texture_shape, + int texture_unit) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const ColorFormatInfo *f = + &kelvin_color_format_gl_map[texture_shape->color_format]; + assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map)); + + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); + + if (!surface_to_texture_can_fastpath(surface, texture_shape)) { + render_surface_to_texture_slow(d, surface, texture, + texture_shape, texture_unit); + return; + } + + unsigned int width = texture_shape->width, height = texture_shape->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + glActiveTexture(GL_TEXTURE0 + texture_unit); + glBindTexture(texture->gl_target, texture->gl_texture); + glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0, + f->gl_format, f->gl_type, NULL); + glBindTexture(texture->gl_target, 0); + render_surface_to(d, surface, texture_unit, texture->gl_target, + texture->gl_texture, width, height); + glBindTexture(texture->gl_target, texture->gl_texture); + glUseProgram( + r->shader_binding ? r->shader_binding->gl_program : 0); +} + +bool pgraph_gl_check_surface_to_texture_compatibility( + const SurfaceBinding *surface, + const TextureShape *shape) +{ + // FIXME: Better checks/handling on formats and surface-texture compat + + if ((!surface->swizzle && surface->pitch != shape->pitch) || + surface->width != shape->width || + surface->height != shape->height) { + return false; + } + + int surface_fmt = surface->shape.color_format; + int texture_fmt = shape->color_format; + + if (!surface->color) { + // FIXME: Support zeta to color + return false; + } + + if (shape->cubemap) { + // FIXME: Support rendering surface to cubemap face + return false; + } + + if (shape->levels > 1) { + // FIXME: Support rendering surface to mip levels + return false; + } + + switch (surface_fmt) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; + default: break; + } + break; + default: + break; + } + + trace_nv2a_pgraph_surface_texture_compat_failed( + surface_fmt, texture_fmt); + return false; +} + +static void wait_for_surface_download(SurfaceBinding *e) +{ + NV2AState *d = g_nv2a; + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (qatomic_read(&e->draw_dirty)) { + qemu_mutex_lock(&d->pfifo.lock); + qemu_event_reset(&r->downloads_complete); + qatomic_set(&e->download_pending, true); + qatomic_set(&r->downloads_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&r->downloads_complete); + } +} + +static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, + hwaddr len, bool write) +{ + SurfaceBinding *e = opaque; + assert(addr >= e->vram_addr); + hwaddr offset = addr - e->vram_addr; + assert(offset < e->size); + + if (qatomic_read(&e->draw_dirty)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + wait_for_surface_download(e); + } + + if (write && !qatomic_read(&e->upload_pending)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + qatomic_set(&e->upload_pending, true); + } +} + +static SurfaceBinding *surface_put(NV2AState *d, hwaddr addr, + SurfaceBinding *surface_in) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + assert(pgraph_gl_surface_get(d, addr) == NULL); + + SurfaceBinding *surface, *next; + uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1; + QTAILQ_FOREACH_SAFE(surface, &r->surfaces, entry, next) { + uintptr_t s_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr > e_end + || surface_in->vram_addr > s_end); + if (overlapping) { + trace_nv2a_pgraph_surface_evict_overlapping( + surface->vram_addr, surface->width, surface->height, + surface->pitch); + pgraph_gl_surface_download_if_dirty(d, surface); + pgraph_gl_surface_invalidate(d, surface); + } + } + + SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding)); + assert(surface_out != NULL); + *surface_out = *surface_in; + + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_insert(qemu_get_cpu(0), + d->vram, surface_out->vram_addr, surface_out->size, + &surface_out->access_cb, &surface_access_callback, + surface_out); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } + + QTAILQ_INSERT_TAIL(&r->surfaces, surface_out, entry); + + return surface_out; +} + +SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (surface->vram_addr == addr) { + return surface; + } + } + + return NULL; +} + +SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (addr >= surface->vram_addr && + addr < (surface->vram_addr + surface->size)) { + return surface; + } + } + + return NULL; +} + +void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *surface) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); + + if (surface == r->color_binding) { + assert(d->pgraph.surface_color.buffer_dirty); + pgraph_gl_unbind_surface(d, true); + } + if (surface == r->zeta_binding) { + assert(d->pgraph.surface_zeta.buffer_dirty); + pgraph_gl_unbind_surface(d, false); + } + + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } + + glDeleteTextures(1, &surface->gl_buffer); + + QTAILQ_REMOVE(&r->surfaces, surface, entry); + g_free(surface); +} + +static void surface_evict_old(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const int surface_age_limit = 5; + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + int last_used = d->pgraph.frame_time - s->frame_time; + if (last_used >= surface_age_limit) { + trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); + pgraph_gl_surface_download_if_dirty(d, s); + pgraph_gl_surface_invalidate(d, s); + } + } +} + +static bool check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2, + bool strict) +{ + bool format_compatible = + (s1->color == s2->color) && + (s1->fmt.gl_attachment == s2->fmt.gl_attachment) && + (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) && + (s1->pitch == s2->pitch) && + (s1->shape.clip_x <= s2->shape.clip_x) && + (s1->shape.clip_y <= s2->shape.clip_y); + if (!format_compatible) { + return false; + } + + if (!strict) { + return (s1->width >= s2->width) && (s1->height >= s2->height); + } else { + return (s1->width == s2->width) && (s1->height == s2->height); + } +} + +void pgraph_gl_surface_download_if_dirty(NV2AState *d, + SurfaceBinding *surface) +{ + if (surface->draw_dirty) { + surface_download(d, surface, true); + } +} + +static void bind_current_surface(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (r->color_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, r->color_binding->fmt.gl_attachment, + GL_TEXTURE_2D, r->color_binding->gl_buffer, 0); + } + + if (r->zeta_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, r->zeta_binding->fmt.gl_attachment, + GL_TEXTURE_2D, r->zeta_binding->gl_buffer, 0); + } + + if (r->color_binding || r->zeta_binding) { + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == + GL_FRAMEBUFFER_COMPLETE); + } +} + +static void surface_copy_shrink_row(uint8_t *out, uint8_t *in, + unsigned int width, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + if (bytes_per_pixel == 4) { + for (unsigned int x = 0; x < width; x++) { + *(uint32_t *)out = *(uint32_t *)in; + out += 4; + in += 4 * factor; + } + } else if (bytes_per_pixel == 2) { + for (unsigned int x = 0; x < width; x++) { + *(uint16_t *)out = *(uint16_t *)in; + out += 2; + in += 2 * factor; + } + } else { + for (unsigned int x = 0; x < width; x++) { + memcpy(out, in, bytes_per_pixel); + out += bytes_per_pixel; + in += bytes_per_pixel * factor; + } + } +} + +static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface, + bool swizzle, bool flip, bool downscale, + uint8_t *pixels) +{ + PGRAPHState *pg = &d->pgraph; + + swizzle &= surface->swizzle; + downscale &= (pg->surface_scale_factor != 1); + + trace_nv2a_pgraph_surface_download( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + /* Bind destination surface to framebuffer */ + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, + GL_TEXTURE_2D, surface->gl_buffer, 0); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + /* Read surface into memory */ + uint8_t *gl_read_buf = pixels; + + uint8_t *swizzle_buf = pixels; + if (swizzle) { + /* FIXME: Allocate big buffer up front and re-alloc if necessary. + * FIXME: Consider swizzle in shader + */ + assert(pg->surface_scale_factor == 1 || downscale); + swizzle_buf = (uint8_t *)g_malloc(surface->size); + gl_read_buf = swizzle_buf; + } + + if (downscale) { + pg->scale_buf = (uint8_t *)g_realloc( + pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor * + surface->size); + gl_read_buf = pg->scale_buf; + } + + glo_readpixels( + surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel, + pg->surface_scale_factor * surface->pitch, + pg->surface_scale_factor * surface->width, + pg->surface_scale_factor * surface->height, flip, gl_read_buf); + + /* FIXME: Replace this with a hw accelerated version */ + if (downscale) { + assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel)); + uint8_t *out = swizzle_buf, *in = pg->scale_buf; + for (unsigned int y = 0; y < surface->height; y++) { + surface_copy_shrink_row(out, in, surface->width, + surface->fmt.bytes_per_pixel, + pg->surface_scale_factor); + in += surface->pitch * pg->surface_scale_factor * + pg->surface_scale_factor; + out += surface->pitch; + } + } + + if (swizzle) { + swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, + surface->pitch, surface->fmt.bytes_per_pixel); + g_free(swizzle_buf); + } + + /* Re-bind original framebuffer target */ + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, + GL_TEXTURE_2D, 0, 0); + bind_current_surface(d); +} + +static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force) +{ + if (!(surface->download_pending || force)) { + return; + } + + /* FIXME: Respect write enable at last TOU? */ + + nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); + + surface_download_to_buffer(d, surface, true, true, true, + d->vram_ptr + surface->vram_addr); + + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_NV2A_TEX); + + surface->download_pending = false; + surface->draw_dirty = false; +} + +void pgraph_gl_process_pending_downloads(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + surface_download(d, surface, false); + } + + qatomic_set(&r->downloads_pending, false); + qemu_event_set(&r->downloads_complete); +} + +void pgraph_gl_download_dirty_surfaces(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + pgraph_gl_surface_download_if_dirty(d, surface); + } + + qatomic_set(&r->download_dirty_surfaces_pending, false); + qemu_event_set(&r->dirty_surfaces_download_complete); +} + +static void surface_copy_expand_row(uint8_t *out, uint8_t *in, + unsigned int width, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + if (bytes_per_pixel == 4) { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + *(uint32_t *)out = *(uint32_t *)in; + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } else if (bytes_per_pixel == 2) { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + *(uint16_t *)out = *(uint16_t *)in; + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } else { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + memcpy(out, in, bytes_per_pixel); + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } +} + +static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, + unsigned int height, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + size_t out_pitch = width * bytes_per_pixel * factor; + + for (unsigned int y = 0; y < height; y++) { + surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); + uint8_t *row_in = out; + for (unsigned int i = 1; i < factor; i++) { + out += out_pitch; + memcpy(out, row_in, out_pitch); + } + in += width * bytes_per_pixel; + out += out_pitch; + } +} + +void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, + bool force) +{ + if (!(surface->upload_pending || force)) { + return; + } + + nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); + + trace_nv2a_pgraph_surface_upload( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + PGRAPHState *pg = &d->pgraph; + + surface->upload_pending = false; + surface->draw_time = pg->draw_time; + + // FIXME: Don't query GL for texture binding + GLint last_texture_binding; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding); + + // FIXME: Replace with FBO to not disturb current state + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + surface->vram_addr; + + if (surface->swizzle) { + buf = (uint8_t*)g_malloc(surface->size); + unswizzle_rect(data + surface->vram_addr, + surface->width, surface->height, + buf, + surface->pitch, + surface->fmt.bytes_per_pixel); + } + + /* FIXME: Replace this flip/scaling */ + + // This is VRAM so we can't do this inplace! + uint8_t *flipped_buf = (uint8_t *)g_malloc( + surface->height * surface->width * surface->fmt.bytes_per_pixel); + unsigned int irow; + for (irow = 0; irow < surface->height; irow++) { + memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) + * surface->fmt.bytes_per_pixel], + &buf[surface->pitch * irow], + surface->width * surface->fmt.bytes_per_pixel); + } + + uint8_t *gl_read_buf = flipped_buf; + unsigned int width = surface->width, height = surface->height; + + if (pg->surface_scale_factor > 1) { + pgraph_apply_scaling_factor(pg, &width, &height); + pg->scale_buf = (uint8_t *)g_realloc( + pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); + gl_read_buf = pg->scale_buf; + uint8_t *out = gl_read_buf, *in = flipped_buf; + surface_copy_expand(out, in, surface->width, surface->height, + surface->fmt.bytes_per_pixel, + d->pgraph.surface_scale_factor); + } + + int prev_unpack_alignment; + glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment); + if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) { + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + } else { + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + } + + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, + height, 0, surface->fmt.gl_format, surface->fmt.gl_type, + gl_read_buf); + glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); + g_free(flipped_buf); + if (surface->swizzle) { + g_free(buf); + } + + // Rebind previous framebuffer binding + glBindTexture(GL_TEXTURE_2D, last_texture_binding); + + bind_current_surface(d); +} + +static void compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2) +{ + #define DO_CMP(fld) \ + if (s1->fld != s2->fld) \ + trace_nv2a_pgraph_surface_compare_mismatch( \ + #fld, (long int)s1->fld, (long int)s2->fld); + DO_CMP(shape.clip_x) + DO_CMP(shape.clip_width) + DO_CMP(shape.clip_y) + DO_CMP(shape.clip_height) + DO_CMP(gl_buffer) + DO_CMP(fmt.bytes_per_pixel) + DO_CMP(fmt.gl_attachment) + DO_CMP(fmt.gl_internal_format) + DO_CMP(fmt.gl_format) + DO_CMP(fmt.gl_type) + DO_CMP(color) + DO_CMP(swizzle) + DO_CMP(vram_addr) + DO_CMP(width) + DO_CMP(height) + DO_CMP(pitch) + DO_CMP(size) + DO_CMP(dma_addr) + DO_CMP(dma_len) + DO_CMP(frame_time) + DO_CMP(draw_time) + #undef DO_CMP +} + +static void populate_surface_binding_entry_sized(NV2AState *d, bool color, + unsigned int width, + unsigned int height, + SurfaceBinding *entry) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + Surface *surface; + hwaddr dma_address; + SurfaceFormatInfo fmt; + + if (color) { + surface = &pg->surface_color; + dma_address = pg->dma_color; + assert(pg->surface_shape.color_format != 0); + assert(pg->surface_shape.color_format < + ARRAY_SIZE(kelvin_surface_color_format_gl_map)); + fmt = kelvin_surface_color_format_gl_map[pg->surface_shape.color_format]; + if (fmt.bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", + pg->surface_shape.color_format); + abort(); + } + } else { + surface = &pg->surface_zeta; + dma_address = pg->dma_zeta; + assert(pg->surface_shape.zeta_format != 0); + assert(pg->surface_shape.zeta_format < + ARRAY_SIZE(kelvin_surface_zeta_float_format_gl_map)); + const SurfaceFormatInfo *map = + pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_gl_map : + kelvin_surface_zeta_fixed_format_gl_map; + fmt = map[pg->surface_shape.zeta_format]; + } + + DMAObject dma = nv_dma_load(d, dma_address); + /* There's a bunch of bugs that could cause us to hit this function + * at the wrong time and get a invalid dma object. + * Check that it's sane. */ + assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); + // assert(dma.address + surface->offset != 0); + assert(surface->offset <= dma.limit); + assert(surface->offset + surface->pitch * height <= dma.limit + 1); + assert(surface->pitch % fmt.bytes_per_pixel == 0); + assert((dma.address & ~0x07FFFFFF) == 0); + + entry->shape = (color || !r->color_binding) ? pg->surface_shape : + r->color_binding->shape; + entry->gl_buffer = 0; + entry->fmt = fmt; + entry->color = color; + entry->swizzle = + (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + entry->vram_addr = dma.address + surface->offset; + entry->width = width; + entry->height = height; + entry->pitch = surface->pitch; + entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); + entry->upload_pending = true; + entry->download_pending = false; + entry->draw_dirty = false; + entry->dma_addr = dma.address; + entry->dma_len = dma.limit; + entry->frame_time = pg->frame_time; + entry->draw_time = pg->draw_time; + entry->cleared = false; +} + +static void populate_surface_binding_entry(NV2AState *d, bool color, + SurfaceBinding *entry) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + unsigned int width, height; + + if (color || !r->color_binding) { + surface_get_dimensions(pg, &width, &height); + pgraph_apply_anti_aliasing_factor(pg, &width, &height); + + /* Since we determine surface dimensions based on the clipping + * rectangle, make sure to include the surface offset as well. + */ + if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { + width += pg->surface_shape.clip_x; + height += pg->surface_shape.clip_y; + } + } else { + width = r->color_binding->width; + height = r->color_binding->height; + } + + populate_surface_binding_entry_sized(d, color, width, height, entry); +} + +static void update_surface_part(NV2AState *d, bool upload, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + SurfaceBinding entry; + populate_surface_binding_entry(d, color, &entry); + + Surface *surface = color ? &pg->surface_color : &pg->surface_zeta; + + bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( + d->vram, entry.vram_addr, entry.size, + DIRTY_MEMORY_NV2A); + + if (upload && (surface->buffer_dirty || mem_dirty)) { + pgraph_gl_unbind_surface(d, color); + + SurfaceBinding *found = pgraph_gl_surface_get(d, entry.vram_addr); + if (found != NULL) { + /* FIXME: Support same color/zeta surface target? In the mean time, + * if the surface we just found is currently bound, just unbind it. + */ + SurfaceBinding *other = (color ? r->zeta_binding + : r->color_binding); + if (found == other) { + NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); + pgraph_gl_unbind_surface(d, !color); + } + } + + trace_nv2a_pgraph_surface_target( + color ? "COLOR" : "ZETA", entry.vram_addr, + entry.swizzle ? "sz" : "ln", + pg->surface_shape.anti_aliasing, + pg->surface_shape.clip_x, + pg->surface_shape.clip_width, pg->surface_shape.clip_y, + pg->surface_shape.clip_height); + + bool should_create = true; + + if (found != NULL) { + bool is_compatible = + check_surface_compatibility(found, &entry, false); + +#define TRACE_ARGS found->vram_addr, found->width, found->height, \ + found->swizzle ? "sz" : "ln", \ + found->shape.anti_aliasing, found->shape.clip_x, \ + found->shape.clip_width, found->shape.clip_y, \ + found->shape.clip_height, found->pitch + if (found->color) { + trace_nv2a_pgraph_surface_match_color(TRACE_ARGS); + } else { + trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS); + } +#undef TRACE_ARGS + + assert(!(entry.swizzle && pg->clearing)); + + if (found->swizzle != entry.swizzle) { + /* Clears should only be done on linear surfaces. Avoid + * synchronization by allowing (1) a surface marked swizzled to + * be cleared under the assumption the entire surface is + * destined to be cleared and (2) a fully cleared linear surface + * to be marked swizzled. Strictly match size to avoid + * pathological cases. + */ + is_compatible &= (pg->clearing || found->cleared) && + check_surface_compatibility(found, &entry, true); + if (is_compatible) { + trace_nv2a_pgraph_surface_migrate_type( + entry.swizzle ? "swizzled" : "linear"); + } + } + + if (is_compatible && color && + !check_surface_compatibility(found, &entry, true)) { + SurfaceBinding zeta_entry; + populate_surface_binding_entry_sized( + d, !color, found->width, found->height, &zeta_entry); + hwaddr color_end = found->vram_addr + found->size; + hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; + is_compatible &= found->vram_addr >= zeta_end || + zeta_entry.vram_addr >= color_end; + } + + if (is_compatible && !color && r->color_binding) { + is_compatible &= (found->width == r->color_binding->width) && + (found->height == r->color_binding->height); + } + + if (is_compatible) { + /* FIXME: Refactor */ + pg->surface_binding_dim.width = found->width; + pg->surface_binding_dim.clip_x = found->shape.clip_x; + pg->surface_binding_dim.clip_width = found->shape.clip_width; + pg->surface_binding_dim.height = found->height; + pg->surface_binding_dim.clip_y = found->shape.clip_y; + pg->surface_binding_dim.clip_height = found->shape.clip_height; + found->upload_pending |= mem_dirty; + pg->surface_zeta.buffer_dirty |= color; + should_create = false; + } else { + trace_nv2a_pgraph_surface_evict_reason( + "incompatible", found->vram_addr); + compare_surfaces(found, &entry); + pgraph_gl_surface_download_if_dirty(d, found); + pgraph_gl_surface_invalidate(d, found); + } + } + + if (should_create) { + glGenTextures(1, &entry.gl_buffer); + glBindTexture(GL_TEXTURE_2D, entry.gl_buffer); + NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer, + "%s format: %0X, width: %d, height: %d " + "(addr %" HWADDR_PRIx ")", + color ? "color" : "zeta", + color ? pg->surface_shape.color_format + : pg->surface_shape.zeta_format, + entry.width, entry.height, surface->offset); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + unsigned int width = entry.width, height = entry.height; + pgraph_apply_scaling_factor(pg, &width, &height); + glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width, + height, 0, entry.fmt.gl_format, entry.fmt.gl_type, + NULL); + found = surface_put(d, entry.vram_addr, &entry); + + /* FIXME: Refactor */ + pg->surface_binding_dim.width = entry.width; + pg->surface_binding_dim.clip_x = entry.shape.clip_x; + pg->surface_binding_dim.clip_width = entry.shape.clip_width; + pg->surface_binding_dim.height = entry.height; + pg->surface_binding_dim.clip_y = entry.shape.clip_y; + pg->surface_binding_dim.clip_height = entry.shape.clip_height; + + if (color && r->zeta_binding && (r->zeta_binding->width != entry.width || r->zeta_binding->height != entry.height)) { + pg->surface_zeta.buffer_dirty = true; + } + } + +#define TRACE_ARGS found->vram_addr, found->width, found->height, \ + found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \ + found->shape.clip_x, found->shape.clip_width, \ + found->shape.clip_y, found->shape.clip_height, found->pitch + + if (color) { + if (should_create) { + trace_nv2a_pgraph_surface_create_color(TRACE_ARGS); + } else { + trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS); + } + + r->color_binding = found; + } else { + if (should_create) { + trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS); + } else { + trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS); + } + r->zeta_binding = found; + } +#undef TRACE_ARGS + + glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment, + GL_TEXTURE_2D, found->gl_buffer, 0); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == + GL_FRAMEBUFFER_COMPLETE); + + surface->buffer_dirty = false; + } + + if (!upload && surface->draw_dirty) { + if (!tcg_enabled()) { + /* FIXME: Cannot monitor for reads/writes; flush now */ + surface_download(d, + color ? r->color_binding : + r->zeta_binding, + true); + } + + surface->write_enabled_cache = false; + surface->draw_dirty = false; + } +} + +void pgraph_gl_unbind_surface(NV2AState *d, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + if (color) { + if (r->color_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, 0, 0); + r->color_binding = NULL; + } + } else { + if (r->zeta_binding) { + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + r->zeta_binding = NULL; + } + } +} + +void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, + bool zeta_write) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + pg->surface_shape.z_format = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_Z_FORMAT); + + color_write = color_write && + (pg->clearing || pgraph_color_write_enabled(pg)); + zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); + + if (upload) { + bool fb_dirty = framebuffer_dirty(pg); + if (fb_dirty) { + memcpy(&pg->last_surface_shape, &pg->surface_shape, + sizeof(SurfaceShape)); + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + } + + if (pg->surface_color.buffer_dirty) { + pgraph_gl_unbind_surface(d, true); + } + + if (color_write) { + update_surface_part(d, true, true); + } + + if (pg->surface_zeta.buffer_dirty) { + pgraph_gl_unbind_surface(d, false); + } + + if (zeta_write) { + update_surface_part(d, true, false); + } + } else { + if ((color_write || pg->surface_color.write_enabled_cache) + && pg->surface_color.draw_dirty) { + update_surface_part(d, false, true); + } + if ((zeta_write || pg->surface_zeta.write_enabled_cache) + && pg->surface_zeta.draw_dirty) { + update_surface_part(d, false, false); + } + } + + if (upload) { + pg->draw_time++; + } + + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + + if (r->color_binding) { + r->color_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_gl_upload_surface_data(d, r->color_binding, false); + r->color_binding->draw_time = pg->draw_time; + r->color_binding->swizzle = swizzle; + } + } + + if (r->zeta_binding) { + r->zeta_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_gl_upload_surface_data(d, r->zeta_binding, false); + r->zeta_binding->draw_time = pg->draw_time; + r->zeta_binding->swizzle = swizzle; + } + } + + // Sanity check color and zeta dimensions match + if (r->color_binding && r->zeta_binding) { + assert((r->color_binding->width == r->zeta_binding->width) + && (r->color_binding->height == r->zeta_binding->height)); + } + + surface_evict_old(d); +} + +// FIXME: Move to common +static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width, + unsigned int *height) +{ + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + if (swizzle) { + *width = 1 << pg->surface_shape.log_width; + *height = 1 << pg->surface_shape.log_height; + } else { + *width = pg->surface_shape.clip_width; + *height = pg->surface_shape.clip_height; + } +} + +void pgraph_gl_init_surfaces(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + pgraph_gl_reload_surface_scale_factor(pg); + glGenFramebuffers(1, &r->gl_framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer); + QTAILQ_INIT(&r->surfaces); + r->downloads_pending = false; + qemu_event_init(&r->downloads_complete, false); + qemu_event_init(&r->dirty_surfaces_download_complete, false); + + init_render_to_texture(pg); +} + +void pgraph_gl_deinit_surfaces(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + glDeleteFramebuffers(1, &r->gl_framebuffer); + // TODO: clear out surfaces +} + +void pgraph_gl_surface_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + bool update_surface = (r->color_binding || r->zeta_binding); + + /* Clear last surface shape to force recreation of buffers at next draw */ + pg->surface_color.draw_dirty = false; + pg->surface_zeta.draw_dirty = false; + memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); + pgraph_gl_unbind_surface(d, true); + pgraph_gl_unbind_surface(d, false); + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + pgraph_gl_surface_invalidate(d, s); + } + + pgraph_gl_reload_surface_scale_factor(pg); + + if (update_surface) { + pgraph_gl_surface_update(d, true, true, true); + } +} diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c new file mode 100644 index 00000000000..bf072f44d65 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/texture.c @@ -0,0 +1,819 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/fast-hash.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "hw/xbox/nv2a/pgraph/s3tc.h" +#include "hw/xbox/nv2a/pgraph/texture.h" +#include "debug.h" +#include "renderer.h" + +static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); +static void texture_binding_destroy(gpointer data); + +struct pgraph_texture_possibly_dirty_struct { + hwaddr addr, end; +}; + +static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) +{ + struct pgraph_texture_possibly_dirty_struct *test = + (struct pgraph_texture_possibly_dirty_struct *)opaque; + + struct TextureLruNode *tnode = container_of(node, TextureLruNode, node); + if (tnode->binding == NULL || tnode->possibly_dirty) { + return; + } + + uintptr_t k_tex_addr = tnode->key.texture_vram_offset; + uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; + bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); + + if (tnode->key.palette_length > 0) { + uintptr_t k_pal_addr = tnode->key.palette_vram_offset; + uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; + overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); + } + + tnode->possibly_dirty |= overlapping; +} + +void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, + hwaddr addr, hwaddr size) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1; + addr &= TARGET_PAGE_MASK; + assert(end <= memory_region_size(d->vram)); + + struct pgraph_texture_possibly_dirty_struct test = { + .addr = addr, + .end = end, + }; + + lru_visit_active(&r->texture_cache, + mark_textures_possibly_dirty_visitor, + &test); +} + +static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) +{ + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + return memory_region_test_and_clear_dirty(d->vram, addr, end - addr, + DIRTY_MEMORY_NV2A_TEX); +} + +// Check if any of the pages spanned by the a texture are dirty. +static bool check_texture_possibly_dirty(NV2AState *d, + hwaddr texture_vram_offset, + unsigned int length, + hwaddr palette_vram_offset, + unsigned int palette_length) +{ + bool possibly_dirty = false; + if (check_texture_dirty(d, texture_vram_offset, length)) { + possibly_dirty = true; + pgraph_gl_mark_textures_possibly_dirty(d, texture_vram_offset, length); + } + if (palette_length && check_texture_dirty(d, palette_vram_offset, + palette_length)) { + possibly_dirty = true; + pgraph_gl_mark_textures_possibly_dirty(d, palette_vram_offset, + palette_length); + } + return possibly_dirty; +} + +static void apply_texture_parameters(TextureBinding *binding, + const BasicColorFormatInfo *f, + unsigned int dimensionality, + unsigned int filter, + unsigned int address, + bool is_bordered, + uint32_t border_color) +{ + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); + unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU); + unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV); + unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP); + + if (f->linear) { + /* somtimes games try to set mipmap min filters on linear textures. + * this could indicate a bug... */ + switch (min_filter) { + case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD: + case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD: + min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0; + break; + case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD: + case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD: + min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0; + break; + } + } + + if (min_filter != binding->min_filter) { + glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER, + pgraph_texture_min_filter_gl_map[min_filter]); + binding->min_filter = min_filter; + } + if (mag_filter != binding->mag_filter) { + glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER, + pgraph_texture_mag_filter_gl_map[mag_filter]); + binding->mag_filter = mag_filter; + } + + /* Texture wrapping */ + assert(addru < ARRAY_SIZE(pgraph_texture_addr_gl_map)); + if (addru != binding->addru) { + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S, + pgraph_texture_addr_gl_map[addru]); + binding->addru = addru; + } + bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; + if (dimensionality > 1) { + if (addrv != binding->addrv) { + assert(addrv < ARRAY_SIZE(pgraph_texture_addr_gl_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T, + pgraph_texture_addr_gl_map[addrv]); + binding->addrv = addrv; + } + needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; + } + if (dimensionality > 2) { + if (addrp != binding->addrp) { + assert(addrp < ARRAY_SIZE(pgraph_texture_addr_gl_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R, + pgraph_texture_addr_gl_map[addrp]); + binding->addrp = addrp; + } + needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER; + } + + if (!is_bordered && needs_border_color) { + if (!binding->border_color_set || binding->border_color != border_color) { + /* FIXME: Color channels might be wrong order */ + GLfloat gl_border_color[4]; + pgraph_argb_pack32_to_rgba_float(border_color, gl_border_color); + glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR, + gl_border_color); + + binding->border_color_set = true; + binding->border_color = border_color; + } + } +} + +void pgraph_gl_bind_textures(NV2AState *d) +{ + int i; + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + NV2A_GL_DGROUP_BEGIN("%s", __func__); + + for (i=0; ivram)); + assert((palette_vram_offset + palette_length) + < memory_region_size(d->vram)); + bool is_indexed = (state.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); + bool possibly_dirty = false; + bool possibly_dirty_checked = false; + + SurfaceBinding *surface = pgraph_gl_surface_get(d, texture_vram_offset); + TextureBinding *tbind = r->texture_binding[i]; + if (!pg->texture_dirty[i] && tbind) { + bool reusable = false; + if (surface && tbind->draw_time == surface->draw_time) { + reusable = true; + } else if (!surface) { + possibly_dirty = check_texture_possibly_dirty( + d, + texture_vram_offset, + length, + palette_vram_offset, + is_indexed ? palette_length : 0); + possibly_dirty_checked = true; + reusable = !possibly_dirty; + } + + if (reusable) { + glBindTexture(r->texture_binding[i]->gl_target, + r->texture_binding[i]->gl_texture); + apply_texture_parameters(r->texture_binding[i], + &kelvin_color_format_info_map[state.color_format], + state.dimensionality, + filter, + address, + state.border, + border_color); + continue; + } + } + + /* + * Check active surfaces to see if this texture was a render target + */ + bool surf_to_tex = false; + if (surface != NULL) { + surf_to_tex = pgraph_gl_check_surface_to_texture_compatibility( + surface, &state); + + if (surf_to_tex && surface->upload_pending) { + pgraph_gl_upload_surface_data(d, surface, false); + } + } + + if (!surf_to_tex) { + // FIXME: Restructure to support rendering surfaces to cubemap faces + + // Writeback any surfaces which this texture may index + hwaddr tex_vram_end = texture_vram_offset + length - 1; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= tex_vram_end + || texture_vram_offset >= surf_vram_end); + if (overlapping) { + pgraph_gl_surface_download_if_dirty(d, surface); + } + } + } + + TextureKey key; + memset(&key, 0, sizeof(TextureKey)); + key.state = state; + key.texture_vram_offset = texture_vram_offset; + key.texture_length = length; + if (is_indexed) { + key.palette_vram_offset = palette_vram_offset; + key.palette_length = palette_length; + } + + // Search for existing texture binding in cache + uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key)); + LruNode *found = lru_lookup(&r->texture_cache, + tex_binding_hash, &key); + TextureLruNode *key_out = container_of(found, TextureLruNode, node); + possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty; + + if (!surf_to_tex && !possibly_dirty_checked) { + possibly_dirty |= check_texture_possibly_dirty( + d, + texture_vram_offset, + length, + palette_vram_offset, + is_indexed ? palette_length : 0); + } + + // Calculate hash of texture data, if necessary + void *texture_data = (char*)d->vram_ptr + texture_vram_offset; + void *palette_data = (char*)d->vram_ptr + palette_vram_offset; + + uint64_t tex_data_hash = 0; + if (!surf_to_tex && possibly_dirty) { + tex_data_hash = fast_hash(texture_data, length); + if (is_indexed) { + tex_data_hash ^= fast_hash(palette_data, palette_length); + } + } + + // Free existing binding, if texture data has changed + bool must_destroy = (key_out->binding != NULL) + && possibly_dirty + && (key_out->binding->data_hash != tex_data_hash); + if (must_destroy) { + texture_binding_destroy(key_out->binding); + key_out->binding = NULL; + } + + if (key_out->binding == NULL) { + // Must create the texture + key_out->binding = generate_texture(state, texture_data, palette_data); + key_out->binding->data_hash = tex_data_hash; + key_out->binding->scale = 1; + } else { + // Saved an upload! Reuse existing texture in graphics memory. + glBindTexture(key_out->binding->gl_target, + key_out->binding->gl_texture); + } + + key_out->possibly_dirty = false; + TextureBinding *binding = key_out->binding; + binding->refcnt++; + + if (surf_to_tex && binding->draw_time < surface->draw_time) { + + trace_nv2a_pgraph_surface_render_to_texture( + surface->vram_addr, surface->width, surface->height); + pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i); + binding->draw_time = surface->draw_time; + if (binding->gl_target == GL_TEXTURE_RECTANGLE) { + binding->scale = pg->surface_scale_factor; + } else { + binding->scale = 1; + } + } + + apply_texture_parameters(binding, + &kelvin_color_format_info_map[state.color_format], + state.dimensionality, + filter, + address, + state.border, + border_color); + + if (r->texture_binding[i]) { + if (r->texture_binding[i]->gl_target != binding->gl_target) { + glBindTexture(r->texture_binding[i]->gl_target, 0); + } + texture_binding_destroy(r->texture_binding[i]); + } + r->texture_binding[i] = binding; + pg->texture_dirty[i] = false; + } + NV2A_GL_DGROUP_END(); +} + +static enum S3TC_DECOMPRESS_FORMAT +gl_internal_format_to_s3tc_enum(GLint gl_internal_format) +{ + switch (gl_internal_format) { + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return S3TC_DECOMPRESS_FORMAT_DXT1; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return S3TC_DECOMPRESS_FORMAT_DXT3; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return S3TC_DECOMPRESS_FORMAT_DXT5; + default: + assert(!"Invalid format"); + } +} + +static void upload_gl_texture(GLenum gl_target, + const TextureShape s, + const uint8_t *texture_data, + const uint8_t *palette_data) +{ + ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format]; + nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); + + unsigned int adjusted_width = s.width; + unsigned int adjusted_height = s.height; + unsigned int adjusted_pitch = s.pitch; + unsigned int adjusted_depth = s.depth; + if (!f.linear && s.border) { + adjusted_width = MAX(16, adjusted_width * 2); + adjusted_height = MAX(16, adjusted_height * 2); + adjusted_pitch = adjusted_width * (s.pitch / s.width); + adjusted_depth = MAX(16, s.depth * 2); + } + + switch(gl_target) { + case GL_TEXTURE_1D: + assert(false); + break; + case GL_TEXTURE_RECTANGLE: { + /* Can't handle strides unaligned to pixels */ + assert(s.pitch % f.bytes_per_pixel == 0); + + uint8_t *converted = pgraph_convert_texture_data( + s, texture_data, palette_data, adjusted_width, adjusted_height, 1, + adjusted_pitch, 0, NULL); + glPixelStorei(GL_UNPACK_ROW_LENGTH, + converted ? 0 : adjusted_pitch / f.bytes_per_pixel); + glTexImage2D(gl_target, 0, f.gl_internal_format, + adjusted_width, adjusted_height, 0, + f.gl_format, f.gl_type, + converted ? converted : texture_data); + + if (converted) { + g_free(converted); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + break; + } + case GL_TEXTURE_2D: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { + + unsigned int width = adjusted_width, height = adjusted_height; + + int level; + for (level = 0; level < s.levels; level++) { + width = MAX(width, 1); + height = MAX(height, 1); + + if (f.gl_format == 0) { /* compressed */ + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size + unsigned int block_size = + f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ? + 8 : 16; + unsigned int physical_width = (width + 3) & ~3, + physical_height = (height + 3) & ~3; + if (physical_width != width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width); + } + uint8_t *converted = s3tc_decompress_2d( + gl_internal_format_to_s3tc_enum(f.gl_internal_format), + texture_data, physical_width, physical_height); + unsigned int tex_width = width; + unsigned int tex_height = height; + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); + tex_width = s.width; + tex_height = s.height; + if (physical_width == width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + } + } + + glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0, + GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, converted); + g_free(converted); + if (physical_width != width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + if (s.cubemap && adjusted_width != s.width) { + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); + if (physical_width == width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + } + texture_data += + physical_width / 4 * physical_height / 4 * block_size; + } else { + unsigned int pitch = width * f.bytes_per_pixel; + uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); + unswizzle_rect(texture_data, width, height, + unswizzled, pitch, f.bytes_per_pixel); + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data, width, height, 1, pitch, 0, + NULL); + uint8_t *pixel_data = converted ? converted : unswizzled; + unsigned int tex_width = width; + unsigned int tex_height = height; + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + tex_width = s.width; + tex_height = s.height; + pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; + } + + glTexImage2D(gl_target, level, f.gl_internal_format, tex_width, + tex_height, 0, f.gl_format, f.gl_type, + pixel_data); + if (s.cubemap && s.border) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + if (converted) { + g_free(converted); + } + g_free(unswizzled); + + texture_data += width * height * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + } + + break; + } + case GL_TEXTURE_3D: { + + unsigned int width = adjusted_width; + unsigned int height = adjusted_height; + unsigned int depth = adjusted_depth; + + assert(f.linear == false); + + int level; + for (level = 0; level < s.levels; level++) { + if (f.gl_format == 0) { /* compressed */ + assert(width % 4 == 0 && height % 4 == 0 && + "Compressed 3D texture virtual size"); + width = MAX(width, 4); + height = MAX(height, 4); + depth = MAX(depth, 1); + + unsigned int block_size; + if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + block_size = 8; + } else { + block_size = 16; + } + + size_t texture_size = width/4 * height/4 * depth * block_size; + + uint8_t *converted = s3tc_decompress_3d( + gl_internal_format_to_s3tc_enum(f.gl_internal_format), + texture_data, width, height, depth); + + glTexImage3D(gl_target, level, GL_RGBA8, + width, height, depth, 0, + GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, + converted); + + g_free(converted); + + texture_data += texture_size; + } else { + width = MAX(width, 1); + height = MAX(height, 1); + depth = MAX(depth, 1); + + unsigned int row_pitch = width * f.bytes_per_pixel; + unsigned int slice_pitch = row_pitch * height; + uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth); + unswizzle_box(texture_data, width, height, depth, unswizzled, + row_pitch, slice_pitch, f.bytes_per_pixel); + + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data, width, height, depth, + row_pitch, slice_pitch, NULL); + + glTexImage3D(gl_target, level, f.gl_internal_format, + width, height, depth, 0, + f.gl_format, f.gl_type, + converted ? converted : unswizzled); + + if (converted) { + g_free(converted); + } + g_free(unswizzled); + + texture_data += width * height * depth * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + depth /= 2; + } + break; + } + default: + assert(false); + break; + } +} + +static TextureBinding* generate_texture(const TextureShape s, + const uint8_t *texture_data, + const uint8_t *palette_data) +{ + ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format]; + + /* Create a new opengl texture */ + GLuint gl_texture; + glGenTextures(1, &gl_texture); + + GLenum gl_target; + if (s.cubemap) { + assert(f.linear == false); + assert(s.dimensionality == 2); + gl_target = GL_TEXTURE_CUBE_MAP; + } else { + if (f.linear) { + /* linear textures use unnormalised texcoords. + * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but + * does not allow repeat and mirror wrap modes. + * (or mipmapping, but xbox d3d says 'Non swizzled and non + * compressed textures cannot be mip mapped.') + * Not sure if that'll be an issue. */ + + /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ + gl_target = GL_TEXTURE_RECTANGLE; + assert(s.dimensionality == 2); + } else { + switch(s.dimensionality) { + case 1: gl_target = GL_TEXTURE_1D; break; + case 2: gl_target = GL_TEXTURE_2D; break; + case 3: gl_target = GL_TEXTURE_3D; break; + default: + assert(false); + break; + } + } + } + + glBindTexture(gl_target, gl_texture); + + NV2A_GL_DLABEL(GL_TEXTURE, gl_texture, + "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, " + "width: %d, height: %d, depth: %d", + texture_data - g_nv2a->vram_ptr, + s.color_format, f.linear ? "" : " (SZ)", + s.dimensionality, s.cubemap ? " (Cubemap)" : "", + s.width, s.height, s.depth); + + if (gl_target == GL_TEXTURE_CUBE_MAP) { + + ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format]; + unsigned int block_size; + if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + block_size = 8; + } else { + block_size = 16; + } + + size_t length = 0; + unsigned int w = s.width; + unsigned int h = s.height; + if (!f.linear && s.border) { + w = MAX(16, w * 2); + h = MAX(16, h * 2); + } + + int level; + for (level = 0; level < s.levels; level++) { + if (f.gl_format == 0) { + length += w/4 * h/4 * block_size; + } else { + length += w * h * f.bytes_per_pixel; + } + + w /= 2; + h /= 2; + } + + length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); + + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X, + s, texture_data + 0 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, + s, texture_data + 1 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, + s, texture_data + 2 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + s, texture_data + 3 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + s, texture_data + 4 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, + s, texture_data + 5 * length, palette_data); + } else { + upload_gl_texture(gl_target, s, texture_data, palette_data); + } + + /* Linear textures don't support mipmapping */ + if (!f.linear) { + glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL, + s.min_mipmap_level); + glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL, + s.levels - 1); + } + + if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0 + || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) { + glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA, + (const GLint *)f.gl_swizzle_mask); + } + + TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding)); + ret->gl_target = gl_target; + ret->gl_texture = gl_texture; + ret->refcnt = 1; + ret->draw_time = 0; + ret->data_hash = 0; + ret->min_filter = 0xFFFFFFFF; + ret->mag_filter = 0xFFFFFFFF; + ret->addru = 0xFFFFFFFF; + ret->addrv = 0xFFFFFFFF; + ret->addrp = 0xFFFFFFFF; + ret->border_color_set = false; + return ret; +} + +static void texture_binding_destroy(gpointer data) +{ + TextureBinding *binding = (TextureBinding *)data; + assert(binding->refcnt > 0); + binding->refcnt--; + if (binding->refcnt == 0) { + glDeleteTextures(1, &binding->gl_texture); + g_free(binding); + } +} + +/* functions for texture LRU cache */ +static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key) +{ + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + memcpy(&tnode->key, key, sizeof(TextureKey)); + + tnode->binding = NULL; + tnode->possibly_dirty = false; +} + +static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + if (tnode->binding) { + texture_binding_destroy(tnode->binding); + tnode->binding = NULL; + tnode->possibly_dirty = false; + } +} + +static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + return memcmp(&tnode->key, key, sizeof(TextureKey)); +} + +void pgraph_gl_init_texture_cache(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const size_t texture_cache_size = 512; + lru_init(&r->texture_cache); + r->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode)); + assert(r->texture_cache_entries != NULL); + for (int i = 0; i < texture_cache_size; i++) { + lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node); + } + + r->texture_cache.init_node = texture_cache_entry_init; + r->texture_cache.compare_nodes = texture_cache_entry_compare; + r->texture_cache.post_node_evict = texture_cache_entry_post_evict; +} + +void pgraph_gl_deinit_texture_cache(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + // Clear out texture cache + lru_flush(&r->texture_cache); + free(r->texture_cache_entries); +} diff --git a/hw/xbox/nv2a/pgraph/gl/vertex.c b/hw/xbox/nv2a/pgraph/gl/vertex.c new file mode 100644 index 00000000000..21f42b647c5 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/vertex.c @@ -0,0 +1,283 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_regs.h" +#include +#include "debug.h" +#include "renderer.h" + +static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, + bool quick) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer); + + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + + static hwaddr last_addr, last_end; + if (quick && (addr >= last_addr) && (end <= last_end)) { + return; + } + last_addr = addr; + last_end = end; + + size = end - addr; + if (memory_region_test_and_clear_dirty(d->vram, addr, size, + DIRTY_MEMORY_NV2A)) { + glBufferSubData(GL_ARRAY_BUFFER, addr, size, + d->vram_ptr + addr); + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1); + } +} + +void pgraph_gl_update_entire_memory_buffer(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer); + glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr); +} + +void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, + unsigned int max_element, bool inline_data, + unsigned int inline_stride, + unsigned int provoking_element) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + bool updated_memory_buffer = false; + unsigned int num_elements = max_element - min_element + 1; + + if (inline_data) { + NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", + __func__, num_elements, inline_stride); + } else { + NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); + } + + pg->compressed_attrs = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + + if (!attr->count) { + glDisableVertexAttribArray(i); + glVertexAttrib4fv(i, attr->inline_value); + continue; + } + + NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n", + attr->format, attr->count, attr->stride); + + GLint gl_count = attr->count; + GLenum gl_type; + GLboolean gl_normalize; + bool needs_conversion = false; + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + gl_type = GL_UNSIGNED_BYTE; + gl_normalize = GL_TRUE; + // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt + gl_count = GL_BGRA; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + gl_type = GL_UNSIGNED_BYTE; + gl_normalize = GL_TRUE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: + gl_type = GL_SHORT; + gl_normalize = GL_TRUE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + gl_type = GL_FLOAT; + gl_normalize = GL_FALSE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: + gl_type = GL_SHORT; + gl_normalize = GL_FALSE; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + gl_type = GL_INT; + assert(attr->count == 1); + needs_conversion = true; + break; + default: + fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); + assert(false); + break; + } + + nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND); + hwaddr attrib_data_addr; + size_t stride; + + if (needs_conversion) { + pg->compressed_attrs |= (1 << i); + } + + hwaddr start = 0; + if (inline_data) { + glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer); + attrib_data_addr = attr->inline_array_offset; + stride = inline_stride; + } else { + hwaddr dma_len; + uint8_t *attr_data = (uint8_t *)nv_dma_map( + d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a, + &dma_len); + assert(attr->offset < dma_len); + attrib_data_addr = attr_data + attr->offset - d->vram_ptr; + stride = attr->stride; + start = attrib_data_addr + min_element * stride; + update_memory_buffer(d, start, num_elements * stride, + updated_memory_buffer); + updated_memory_buffer = true; + } + + uint32_t provoking_element_index = provoking_element - min_element; + size_t element_size = attr->size * attr->count; + assert(element_size <= sizeof(attr->inline_value)); + const uint8_t *last_entry; + + if (inline_data) { + last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset; + } else { + last_entry = d->vram_ptr + start; + } + if (!stride) { + // Stride of 0 indicates that only the first element should be + // used. + pgraph_update_inline_value(attr, last_entry); + glDisableVertexAttribArray(i); + glVertexAttrib4fv(i, attr->inline_value); + continue; + } + + if (needs_conversion) { + glVertexAttribIPointer(i, gl_count, gl_type, stride, + (void *)attrib_data_addr); + } else { + glVertexAttribPointer(i, gl_count, gl_type, gl_normalize, stride, + (void *)attrib_data_addr); + } + + glEnableVertexAttribArray(i); + last_entry += stride * provoking_element_index; + pgraph_update_inline_value(attr, last_entry); + } + + NV2A_GL_DGROUP_END(); +} + +unsigned int pgraph_gl_bind_inline_array(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + unsigned int offset = 0; + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->count == 0) { + continue; + } + + /* FIXME: Double check */ + offset = ROUND_UP(offset, attr->size); + attr->inline_array_offset = offset; + NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", + i, attr->size, attr->count); + offset += attr->size * attr->count; + offset = ROUND_UP(offset, attr->size); + } + + unsigned int vertex_size = offset; + unsigned int index_count = pg->inline_array_length*4 / vertex_size; + + NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); + + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2); + glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer); + glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t), + NULL, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array); + pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size, + index_count-1); + + return index_count; +} + +static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key) +{ + VertexLruNode *vnode = container_of(node, VertexLruNode, node); + memcpy(&vnode->key, key, sizeof(struct VertexKey)); + vnode->initialized = false; +} + +static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + VertexLruNode *vnode = container_of(node, VertexLruNode, node); + return memcmp(&vnode->key, key, sizeof(VertexKey)); +} + +void pgraph_gl_init_vertex_cache(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + const size_t element_cache_size = 50*1024; + lru_init(&r->element_cache); + r->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode)); + assert(r->element_cache_entries != NULL); + GLuint element_cache_buffers[element_cache_size]; + glGenBuffers(element_cache_size, element_cache_buffers); + for (int i = 0; i < element_cache_size; i++) { + r->element_cache_entries[i].gl_buffer = element_cache_buffers[i]; + lru_add_free(&r->element_cache, &r->element_cache_entries[i].node); + } + + r->element_cache.init_node = vertex_cache_entry_init; + r->element_cache.compare_nodes = vertex_cache_entry_compare; + + GLint max_vertex_attributes; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes); + assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES); + + glGenBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer); + glGenBuffers(1, &r->gl_inline_array_buffer); + + glGenBuffers(1, &r->gl_memory_buffer); + glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer); + glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram), + NULL, GL_DYNAMIC_DRAW); + + glGenVertexArrays(1, &r->gl_vertex_array); + glBindVertexArray(r->gl_vertex_array); + + assert(glGetError() == GL_NO_ERROR); +} diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c new file mode 100644 index 00000000000..7059880373d --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/common.c @@ -0,0 +1,58 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + + +#include "common.h" + + +MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array) +{ + const char *flat_s = "flat"; + const char *noperspective_s = "noperspective"; + const char *qualifier_s = smooth ? noperspective_s : flat_s; + const char *qualifiers[11] = { + noperspective_s, flat_s, qualifier_s, qualifier_s, + qualifier_s, qualifier_s, noperspective_s, noperspective_s, + noperspective_s, noperspective_s, noperspective_s + }; + + const char *in_out_s = in ? "in" : "out"; + + const char *float_s = "float"; + const char *vec4_s = "vec4"; + const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s, + float_s, vec4_s, vec4_s, vec4_s, vec4_s }; + + const char *prefix_s = prefix ? "v_" : ""; + const char *names[11] = { + "vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1", + "vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3", + }; + const char *suffix_s = array ? "[]" : ""; + + for (int i = 0; i < 11; i++) { + if (location) { + mstring_append_fmt(out, "layout(location = %d) ", i); + } + mstring_append_fmt(out, "%s %s %s %s%s%s;\n", + qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s); + } + + return out; +} diff --git a/hw/xbox/nv2a/pgraph/glsl/common.h b/hw/xbox/nv2a/pgraph/glsl/common.h new file mode 100644 index 00000000000..6820a1dcb19 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/common.h @@ -0,0 +1,38 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_NV2A_SHADERS_COMMON_H +#define HW_NV2A_SHADERS_COMMON_H + +#include "qemu/mstring.h" +#include + +#define GLSL_C(idx) "c[" stringify(idx) "]" +#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]" + +#define GLSL_C_MAT4(idx) \ + "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \ + GLSL_C(idx+2) ", " GLSL_C(idx+3) ")" + +#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n" + +MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c new file mode 100644 index 00000000000..0e738f02806 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -0,0 +1,228 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "common.h" +#include "geom.h" + +MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, + enum ShaderPolygonMode polygon_back_mode, + enum ShaderPrimitiveMode primitive_mode, + bool smooth_shading, + bool vulkan) +{ + /* FIXME: Missing support for 2-sided-poly mode */ + assert(polygon_front_mode == polygon_back_mode); + enum ShaderPolygonMode polygon_mode = polygon_front_mode; + + /* POINT mode shouldn't require any special work */ + if (polygon_mode == POLY_MODE_POINT) { + return NULL; + } + + /* Handle LINE and FILL mode */ + const char *layout_in = NULL; + const char *layout_out = NULL; + const char *body = NULL; + switch (primitive_mode) { + case PRIM_TYPE_POINTS: return NULL; + case PRIM_TYPE_LINES: return NULL; + case PRIM_TYPE_LINE_LOOP: return NULL; + case PRIM_TYPE_LINE_STRIP: return NULL; + case PRIM_TYPE_TRIANGLES: + if (polygon_mode == POLY_MODE_FILL) { return NULL; } + assert(polygon_mode == POLY_MODE_LINE); + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + body = " emit_vertex(0, 0);\n" + " emit_vertex(1, 0);\n" + " emit_vertex(2, 0);\n" + " emit_vertex(0, 0);\n" + " EndPrimitive();\n"; + break; + case PRIM_TYPE_TRIANGLE_STRIP: + if (polygon_mode == POLY_MODE_FILL) { return NULL; } + assert(polygon_mode == POLY_MODE_LINE); + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + /* Imagine a quad made of a tristrip, the comments tell you which + * vertex we are using */ + body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 0);\n" /* bottom right */ + " }\n" + " emit_vertex(1, 0);\n" /* top right */ + " emit_vertex(2, 0);\n" /* bottom left */ + " emit_vertex(0, 0);\n" /* bottom right */ + " } else {\n" + " emit_vertex(2, 0);\n" /* bottom left */ + " emit_vertex(1, 0);\n" /* top left */ + " emit_vertex(0, 0);\n" /* top right */ + " }\n" + " EndPrimitive();\n"; + break; + case PRIM_TYPE_TRIANGLE_FAN: + if (polygon_mode == POLY_MODE_FILL) { return NULL; } + assert(polygon_mode == POLY_MODE_LINE); + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + body = " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 0);\n" + " }\n" + " emit_vertex(1, 0);\n" + " emit_vertex(2, 0);\n" + " emit_vertex(0, 0);\n" + " EndPrimitive();\n"; + break; + case PRIM_TYPE_QUADS: + layout_in = "layout(lines_adjacency) in;\n"; + if (polygon_mode == POLY_MODE_LINE) { + layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + body = " emit_vertex(0, 3);\n" + " emit_vertex(1, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(3, 3);\n" + " emit_vertex(0, 3);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + body = " emit_vertex(3, 3);\n" + " emit_vertex(0, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(1, 3);\n" + " EndPrimitive();\n"; + } else { + assert(false); + return NULL; + } + break; + case PRIM_TYPE_QUAD_STRIP: + layout_in = "layout(lines_adjacency) in;\n"; + if (polygon_mode == POLY_MODE_LINE) { + layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 3);\n" + " }\n" + " emit_vertex(1, 3);\n" + " emit_vertex(3, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(0, 3);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " emit_vertex(0, 3);\n" + " emit_vertex(1, 3);\n" + " emit_vertex(2, 3);\n" + " emit_vertex(3, 3);\n" + " EndPrimitive();\n"; + } else { + assert(false); + return NULL; + } + break; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return NULL; + } + if (polygon_mode == POLY_MODE_FILL) { + if (smooth_shading) { + return NULL; + } + layout_in = "layout(triangles) in;\n"; + layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; + body = " emit_vertex(0, 2);\n" + " emit_vertex(1, 2);\n" + " emit_vertex(2, 2);\n" + " EndPrimitive();\n"; + } else { + assert(false); + return NULL; + } + break; + + default: + assert(false); + return NULL; + } + + /* generate a geometry shader to support deprecated primitive types */ + assert(layout_in); + assert(layout_out); + assert(body); + MString *s = mstring_new(); + mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400); + mstring_append(s, layout_in); + mstring_append(s, layout_out); + mstring_append(s, "\n"); + pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true); + pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false); + + if (smooth_shading) { + mstring_append(s, + "void emit_vertex(int index, int _unused) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" + // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" + " vtx_inv_w = v_vtx_inv_w[index];\n" + " vtx_inv_w_flat = v_vtx_inv_w[index];\n" + " vtxD0 = v_vtxD0[index];\n" + " vtxD1 = v_vtxD1[index];\n" + " vtxB0 = v_vtxB0[index];\n" + " vtxB1 = v_vtxB1[index];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " EmitVertex();\n" + "}\n"); + } else { + mstring_append(s, + "void emit_vertex(int index, int provoking_index) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" + // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" + " vtx_inv_w = v_vtx_inv_w[index];\n" + " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n" + " vtxD0 = v_vtxD0[provoking_index];\n" + " vtxD1 = v_vtxD1[provoking_index];\n" + " vtxB0 = v_vtxB0[provoking_index];\n" + " vtxB1 = v_vtxB1[provoking_index];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " EmitVertex();\n" + "}\n"); + } + + mstring_append(s, "\n" + "void main() {\n"); + mstring_append(s, body); + mstring_append(s, "}\n"); + + return s; +} diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h new file mode 100644 index 00000000000..9ca605be71b --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -0,0 +1,34 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, + enum ShaderPolygonMode polygon_back_mode, + enum ShaderPrimitiveMode primitive_mode, + bool smooth_shading, + bool vulkan); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/meson.build b/hw/xbox/nv2a/pgraph/glsl/meson.build new file mode 100644 index 00000000000..82df3f7edee --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/meson.build @@ -0,0 +1,8 @@ +specific_ss.add([files( + 'common.c', + 'geom.c', + 'psh.c', + 'vsh.c', + 'vsh-ff.c', + 'vsh-prog.c', + )]) diff --git a/hw/xbox/nv2a/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c similarity index 90% rename from hw/xbox/nv2a/psh.c rename to hw/xbox/nv2a/pgraph/glsl/psh.c index ca9bffe79d4..58ad5cf7acf 100644 --- a/hw/xbox/nv2a/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -3,7 +3,7 @@ * * Copyright (c) 2013 espes * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2020-2021 Matt Borgerson + * Copyright (c) 2020-2024 Matt Borgerson * * Based on: * Cxbx, PixelShader.cpp @@ -34,9 +34,9 @@ #include #include -#include "qapi/qmp/qstring.h" - -#include "shaders_common.h" +#include "common.h" +#include "hw/xbox/nv2a/debug.h" +#include "hw/xbox/nv2a/pgraph/psh.h" #include "psh.h" /* @@ -575,7 +575,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s return NULL; case PS_TEXTUREMODES_PROJECT2D: - return state->rect_tex[i] ? sampler2DRect : sampler2D; + return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_BUMPENVMAP: case PS_TEXTUREMODES_BUMPENVMAP_LUM: @@ -584,12 +584,15 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode); assert(!"Shadow map support not implemented for this mode"); } - return state->rect_tex[i] ? sampler2DRect : sampler2D; + return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_PROJECT3D: case PS_TEXTUREMODES_DOT_STR_3D: + if (state->tex_x8y24[i] && state->vulkan) { + return "usampler2D"; + } if (state->shadow_map[i]) { - return state->rect_tex[i] ? sampler2DRect : sampler2D; + return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; } return sampler3D; @@ -634,12 +637,28 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa return; } - mstring_append_fmt(vars, - "pT%d.xy *= texScale%d;\n" - "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", - i, i, i, i, i); - + mstring_append_fmt(vars, "pT%d.xy *= texScale%d;\n", i, i); const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func]; + if (ps->state.rect_tex[i] && ps->state.vulkan) { + if (ps->state.tex_x8y24[i]) { + mstring_append_fmt( + vars, + "uvec4 t%d_depth_raw = texture(texSamp%d, pT%d.xy/pT%d.w);\n", i, i, i, i); + mstring_append_fmt( + vars, + "vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);", + i, i); + } else { + mstring_append_fmt( + vars, + "vec4 t%d_depth = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i, + i, i, i); + } + } else { + mstring_append_fmt( + vars, "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", i, i, + i); + } // Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float. if (compare_z) { @@ -685,18 +704,69 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars, var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]); } +static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex) +{ + // FIXME: Convolution for 2D textures + // FIXME: Quincunx + assert(ps->state.rect_tex[tex]); + + if (ps->state.vulkan) { + mstring_append_fmt(vars, + "vec4 t%d = vec4(0.0);\n" + "for (int i = 0; i < 9; i++) {\n" + " vec2 texCoord = pT%d.xy/pT%d.w + convolution3x3[i];\n" + " t%d += textureLod(texSamp%d, texCoord, 0) * gaussian3x3[i];\n" + "}\n", tex, tex, tex, tex, tex); + } else { + mstring_append_fmt(vars, + "vec4 t%d = vec4(0.0);\n" + "for (int i = 0; i < 9; i++) {\n" + " vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i], 0);\n" + " t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n" + "}\n", tex, tex, tex, tex, tex); + + } +} + static MString* psh_convert(struct PixelShader *ps) { int i; + const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove + MString *preflight = mstring_new(); - mstring_append(preflight, ps->state.smooth_shading ? - STRUCT_VERTEX_DATA_IN_SMOOTH : - STRUCT_VERTEX_DATA_IN_FLAT); - mstring_append(preflight, "\n"); - mstring_append(preflight, "out vec4 fragColor;\n"); - mstring_append(preflight, "\n"); - mstring_append(preflight, "uniform vec4 fogColor;\n"); + pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan, + ps->state.smooth_shading, true, false, false); + + if (ps->state.vulkan) { + mstring_append_fmt(preflight, + "layout(location = 0) out vec4 fragColor;\n" + "layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING); + } else { + mstring_append_fmt(preflight, + "layout(location = 0) out vec4 fragColor;\n"); + } + + mstring_append_fmt(preflight, "%sfloat alphaRef;\n" + "%svec4 fogColor;\n" + "%sivec4 clipRegion[8];\n", + u, u, u); + for (int i = 0; i < 4; i++) { + mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n" + "%sfloat bumpScale%d;\n" + "%sfloat bumpOffset%d;\n" + "%sfloat texScale%d;\n", + u, i, u, i, u, i, u, i); + } + for (int i = 0; i < 9; i++) { + for (int j = 0; j < 2; j++) { + mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i); + } + } + + if (ps->state.vulkan) { + mstring_append(preflight, "};\n"); + } const char *dotmap_funcs[] = { "dotmap_zero_to_one", @@ -766,22 +836,12 @@ static MString* psh_convert(struct PixelShader *ps) " vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n" " vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n" " vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n" - "vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) {\n" - " vec4 sum = vec4(0.0);\n" - " for (int i = 0; i < 9; i++) {\n" - " sum += gaussian3x3[i]*textureProj(sampler,\n" - " texCoord + vec3(convolution3x3[i], 0.0));\n" - " }\n" - " return sum;\n" - "}\n" ); /* Window Clipping */ MString *clip = mstring_new(); - mstring_append(preflight, "uniform ivec4 clipRegion[8];\n"); - mstring_append_fmt(clip, "/* Window-clip (%s) */\n", - ps->state.window_clip_exclusive ? - "Exclusive" : "Inclusive"); + mstring_append_fmt(clip, "/* Window-clip (%slusive) */\n", + ps->state.window_clip_exclusive ? "Exc" : "Inc"); if (!ps->state.window_clip_exclusive) { mstring_append(clip, "bool clipContained = false;\n"); } @@ -856,23 +916,27 @@ static MString* psh_convert(struct PixelShader *ps) if (ps->state.shadow_map[i]) { psh_append_shadowmap(ps, i, false, vars); } else { - const char *lookup = "textureProj"; - if ((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN) - || (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX)) { - /* FIXME: Quincunx looks better than Linear and costs less than - * Gaussian, but Gaussian should be plenty fast so use it for - * now. - */ - if (ps->state.rect_tex[i]) { - lookup = "gaussianFilter2DRectProj"; + apply_border_adjustment(ps, vars, i, "pT%d"); + mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i); + if (ps->state.rect_tex[i]) { + if ((ps->state.conv_tex[i] == + CONVOLUTION_FILTER_GAUSSIAN) || + (ps->state.conv_tex[i] == + CONVOLUTION_FILTER_QUINCUNX)) { + apply_convolution_filter(ps, vars, i); } else { - NV2A_UNIMPLEMENTED("Convolution for 2D textures"); + if (ps->state.vulkan) { + mstring_append_fmt(vars, "vec4 t%d = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", + i, i, i, i); + } else { + mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", + i, i, i); + } } + } else { + mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", + i, i, i); } - apply_border_adjustment(ps, vars, i, "pT%d"); - mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i); - mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n", - i, lookup, i, i); } break; } @@ -880,6 +944,7 @@ static MString* psh_convert(struct PixelShader *ps) if (ps->state.shadow_map[i]) { psh_append_shadowmap(ps, i, true, vars); } else { + assert(!ps->state.rect_tex[i]); apply_border_adjustment(ps, vars, i, "pT%d"); mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n", i, i, i); @@ -906,7 +971,6 @@ static MString* psh_convert(struct PixelShader *ps) } case PS_TEXTUREMODES_BUMPENVMAP: assert(i >= 1); - mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i); if (ps->state.snorm_tex[ps->input_tex[i]]) { /* Input color channels already signed (FIXME: May not always want signed textures in this case) */ @@ -925,9 +989,6 @@ static MString* psh_convert(struct PixelShader *ps) break; case PS_TEXTUREMODES_BUMPENVMAP_LUM: assert(i >= 1); - mstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i); - mstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i); - mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i); if (ps->state.snorm_tex[ps->input_tex[i]]) { /* Input color channels already signed (FIXME: May not always want signed textures in this case) */ @@ -1060,8 +1121,10 @@ static MString* psh_convert(struct PixelShader *ps) break; } - mstring_append_fmt(preflight, "uniform float texScale%d;\n", i); if (sampler_type != NULL) { + if (ps->state.vulkan) { + mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i); + } mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i); /* As this means a texture fetch does happen, do alphakill */ @@ -1091,7 +1154,6 @@ static MString* psh_convert(struct PixelShader *ps) } if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) { - mstring_append_fmt(preflight, "uniform float alphaRef;\n"); if (ps->state.alpha_func == ALPHA_FUNC_NEVER) { mstring_append(ps->code, "discard;\n"); } else { @@ -1112,10 +1174,6 @@ static MString* psh_convert(struct PixelShader *ps) } } - for (i = 0; i < ps->num_const_refs; i++) { - mstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]); - } - for (i = 0; i < ps->num_var_refs; i++) { mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]); if (strcmp(ps->var_refs[i], "r0") == 0) { @@ -1128,7 +1186,7 @@ static MString* psh_convert(struct PixelShader *ps) } MString *final = mstring_new(); - mstring_append(final, "#version 330\n\n"); + mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400); mstring_append(final, mstring_get_str(preflight)); mstring_append(final, "void main() {\n"); mstring_append(final, mstring_get_str(clip)); @@ -1175,7 +1233,7 @@ static void parse_combiner_output(uint32_t value, struct OutputInfo *out) out->cd_alphablue = flags & 0x40; } -MString *psh_translate(const PshState state) +MString *pgraph_gen_psh_glsl(const PshState state) { int i; struct PixelShader ps; diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.h b/hw/xbox/nv2a/pgraph/glsl/psh.h new file mode 100644 index 00000000000..1ae0b0db7ed --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/psh.h @@ -0,0 +1,41 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2013 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * Based on: + * Cxbx, PixelShader.cpp + * Copyright (c) 2004 Aaron Robinson + * Kingofc + * Xeon, XBD3DPixelShader.cpp + * Copyright (c) 2003 _SF_ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +// FIXME: Move to struct +#define PSH_UBO_BINDING 1 +#define PSH_TEX_BINDING 2 + +MString *pgraph_gen_psh_glsl(const PshState state); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c new file mode 100644 index 00000000000..59749003cda --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -0,0 +1,497 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "common.h" +#include "vsh-ff.h" + +static void append_skinning_code(MString* str, bool mix, + unsigned int count, const char* type, + const char* output, const char* input, + const char* matrix, const char* swizzle); + +void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header, + MString *body, MString *uniforms) +{ + int i, j; + const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove + + /* generate vertex shader mimicking fixed function */ + mstring_append(header, +"#define position v0\n" +"#define weight v1\n" +"#define normal v2.xyz\n" +"#define diffuse v3\n" +"#define specular v4\n" +"#define fogCoord v5.x\n" +"#define pointSize v6\n" +"#define backDiffuse v7\n" +"#define backSpecular v8\n" +"#define texture0 v9\n" +"#define texture1 v10\n" +"#define texture2 v11\n" +"#define texture3 v12\n" +"#define reserved1 v13\n" +"#define reserved2 v14\n" +"#define reserved3 v15\n" +"\n"); + mstring_append_fmt(uniforms, +"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n" +"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n" +"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u +); + mstring_append(header, +"\n" +GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0)) +GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0)) +"\n" +GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0)) +GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1)) +GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2)) +GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3)) +"\n" +GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0)) +GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1)) +GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2)) +GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3)) +"\n" +GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0)) +GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1)) +GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2)) +GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3)) +"\n" +GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0)) +GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1)) +GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2)) +GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3)) +"\n" +GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0)) +GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1)) +GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2)) +GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3)) +"\n" +GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0)) +GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1)) +GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2)) +GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3)) +"\n" +GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP)) +"\n" +"#define lightAmbientColor(i) " + "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n" +"#define lightDiffuseColor(i) " + "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n" +"#define lightSpecularColor(i) " + "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n" +"\n" +"#define lightSpotFalloff(i) " + "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n" +"#define lightSpotDirection(i) " + "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n" +"\n" +"#define lightLocalRange(i) " + "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n" +"\n" +GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz") +GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz") +"\n" +); + mstring_append_fmt(uniforms, +"%smat4 invViewport;\n", u); + + /* Skinning */ + unsigned int count; + bool mix; + switch (state->skinning) { + case SKINNING_OFF: + mix = false; count = 0; break; + case SKINNING_1WEIGHTS: + mix = true; count = 2; break; + case SKINNING_2WEIGHTS2MATRICES: + mix = false; count = 2; break; + case SKINNING_2WEIGHTS: + mix = true; count = 3; break; + case SKINNING_3WEIGHTS3MATRICES: + mix = false; count = 3; break; + case SKINNING_3WEIGHTS: + mix = true; count = 4; break; + case SKINNING_4WEIGHTS4MATRICES: + mix = false; count = 4; break; + default: + assert(false); + break; + } + mstring_append_fmt(body, "/* Skinning mode %d */\n", + state->skinning); + + append_skinning_code(body, mix, count, "vec4", + "tPosition", "position", + "modelViewMat", "xyzw"); + append_skinning_code(body, mix, count, "vec3", + "tNormal", "vec4(normal, 0.0)", + "invModelViewMat", "xyz"); + + /* Normalization */ + if (state->normalization) { + mstring_append(body, "tNormal = normalize(tNormal);\n"); + } + + /* Texgen */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + mstring_append_fmt(body, "/* Texgen for stage %d */\n", + i); + /* Set each component individually */ + /* FIXME: could be nicer if some channels share the same texgen */ + for (j = 0; j < 4; j++) { + /* TODO: TexGen View Model missing! */ + char c = "xyzw"[j]; + char cSuffix = "STRQ"[j]; + switch (state->texgen[i][j]) { + case TEXGEN_DISABLE: + mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n", + i, c, i, c); + break; + case TEXGEN_EYE_LINEAR: + mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n", + i, c, cSuffix, i); + break; + case TEXGEN_OBJECT_LINEAR: + mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n", + i, c, cSuffix, i); + break; + case TEXGEN_SPHERE_MAP: + assert(j < 2); /* Channels S,T only! */ + mstring_append(body, "{\n"); + /* FIXME: u, r and m only have to be calculated once */ + mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); + //FIXME: tNormal before or after normalization? Always normalize? + mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); + + /* FIXME: This would consume 1 division fewer and *might* be + * faster than length: + * // [z=1/(2*x) => z=1/x*0.5] + * vec3 ro = r + vec3(0.0, 0.0, 1.0); + * float m = inversesqrt(dot(ro,ro))*0.5; + */ + + mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n"); + mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n", + i, c, c); + mstring_append(body, "}\n"); + break; + case TEXGEN_REFLECTION_MAP: + assert(j < 3); /* Channels S,T,R only! */ + mstring_append(body, "{\n"); + /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */ + mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); + mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); + mstring_append_fmt(body, " oT%d.%c = r.%c;\n", + i, c, c); + mstring_append(body, "}\n"); + break; + case TEXGEN_NORMAL_MAP: + assert(j < 3); /* Channels S,T,R only! */ + mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n", + i, c, c); + break; + default: + assert(false); + break; + } + } + } + + /* Apply texture matrices */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + if (state->texture_matrix_enable[i]) { + mstring_append_fmt(body, + "oT%d = oT%d * texMat%d;\n", + i, i, i); + } + } + + /* Lighting */ + if (state->lighting) { + + //FIXME: Do 2 passes if we want 2 sided-lighting? + + static char alpha_source_diffuse[] = "diffuse.a"; + static char alpha_source_specular[] = "specular.a"; + static char alpha_source_material[] = "material_alpha"; + const char *alpha_source = alpha_source_diffuse; + if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) { + mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u); + alpha_source = alpha_source_material; + } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) { + alpha_source = alpha_source_specular; + } + + if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) { + mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source); + } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) { + mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source); + } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) { + mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source); + } + + mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n"); + if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) { + mstring_append(body, "oD0.rgb += sceneAmbientColor;\n"); + } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) { + mstring_append(body, "oD0.rgb += diffuse.rgb;\n"); + } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) { + mstring_append(body, "oD0.rgb += specular.rgb;\n"); + } + + mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n"); + + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + if (state->light[i] == LIGHT_OFF) { + continue; + } + + /* FIXME: It seems that we only have to handle the surface colors if + * they are not part of the material [= vertex colors]. + * If they are material the cpu will premultiply light + * colors + */ + + mstring_append_fmt(body, "/* Light %d */ {\n", i); + + if (state->light[i] == LIGHT_LOCAL + || state->light[i] == LIGHT_SPOT) { + + mstring_append_fmt(uniforms, + "%svec3 lightLocalPosition%d;\n" + "%svec3 lightLocalAttenuation%d;\n", + u, i, u, i); + mstring_append_fmt(body, + " vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n" + " float d = length(VP);\n" +//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights? + " VP = normalize(VP);\n" + " float attenuation = 1.0 / (lightLocalAttenuation%d.x\n" + " + lightLocalAttenuation%d.y * d\n" + " + lightLocalAttenuation%d.z * d * d);\n" + " vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */ + " float nDotVP = max(0.0, dot(tNormal, VP));\n" + " float nDotHV = max(0.0, dot(tNormal, halfVector));\n", + i, i, i, i); + + } + + switch(state->light[i]) { + case LIGHT_INFINITE: + + /* lightLocalRange will be 1e+30 here */ + + mstring_append_fmt(uniforms, + "%svec3 lightInfiniteHalfVector%d;\n" + "%svec3 lightInfiniteDirection%d;\n", + u, i, u, i); + mstring_append_fmt(body, + " float attenuation = 1.0;\n" + " float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n" + " float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n", + i, i); + + /* FIXME: Do specular */ + + /* FIXME: tBackDiffuse */ + + break; + case LIGHT_LOCAL: + /* Everything done already */ + break; + case LIGHT_SPOT: + /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */ + mstring_append_fmt(body, + " vec4 spotDir = lightSpotDirection(%d);\n" + " float invScale = 1/length(spotDir.xyz);\n" + " float cosHalfPhi = -invScale*spotDir.w;\n" + " float cosHalfTheta = invScale + cosHalfPhi;\n" + " float spotDirDotVP = dot(spotDir.xyz, VP);\n" + " float rho = invScale*spotDirDotVP;\n" + " if (rho > cosHalfTheta) {\n" + " } else if (rho <= cosHalfPhi) {\n" + " attenuation = 0.0;\n" + " } else {\n" + " attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */ + " }\n", + i); + break; + default: + assert(false); + break; + } + + mstring_append_fmt(body, + " float pf;\n" + " if (nDotVP == 0.0) {\n" + " pf = 0.0;\n" + " } else {\n" + " pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n" + " }\n" + " vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n" + " vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n" + " vec3 lightSpecular = lightSpecularColor(%d) * pf;\n", + i, i, i); + + mstring_append(body, + " oD0.xyz += lightAmbient;\n"); + + switch (state->diffuse_src) { + case MATERIAL_COLOR_SRC_MATERIAL: + mstring_append(body, + " oD0.xyz += lightDiffuse;\n"); + break; + case MATERIAL_COLOR_SRC_DIFFUSE: + mstring_append(body, + " oD0.xyz += diffuse.xyz * lightDiffuse;\n"); + break; + case MATERIAL_COLOR_SRC_SPECULAR: + mstring_append(body, + " oD0.xyz += specular.xyz * lightDiffuse;\n"); + break; + } + + mstring_append(body, + " oD1.xyz += specular.xyz * lightSpecular;\n"); + + mstring_append(body, "}\n"); + } + } else { + mstring_append(body, " oD0 = diffuse;\n"); + mstring_append(body, " oD1 = specular;\n"); + } + mstring_append(body, " oB0 = backDiffuse;\n"); + mstring_append(body, " oB1 = backSpecular;\n"); + + /* Fog */ + if (state->fog_enable) { + + /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */ + switch(state->foggen) { + case FOGGEN_SPEC_ALPHA: + /* FIXME: Do we have to clamp here? */ + mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n"); + break; + case FOGGEN_RADIAL: + mstring_append(body, " float fogDistance = length(tPosition.xyz);\n"); + break; + case FOGGEN_PLANAR: + case FOGGEN_ABS_PLANAR: + mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n"); + if (state->foggen == FOGGEN_ABS_PLANAR) { + mstring_append(body, " fogDistance = abs(fogDistance);\n"); + } + break; + case FOGGEN_FOG_X: + mstring_append(body, " float fogDistance = fogCoord;\n"); + break; + default: + assert(false); + break; + } + + } + + /* If skinning is off the composite matrix already includes the MV matrix */ + if (state->skinning == SKINNING_OFF) { + mstring_append(body, " tPosition = position;\n"); + } + + mstring_append(body, + " oPos = invViewport * (tPosition * compositeMat);\n" + ); + + if (state->vulkan) { + mstring_append(body, " oPos.y *= -1;\n"); + } else { + mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n"); + } + + /* FIXME: Testing */ + if (state->point_params_enable) { + mstring_append_fmt( + body, + " float d_e = length(position * modelViewMat0);\n" + " oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n", + state->point_params[0], state->point_params[1], state->point_params[2], + state->point_params[6]); + mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n", + state->point_params[3], state->point_params[7], + state->surface_scale_factor); + } else { + mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size, + state->surface_scale_factor); + } + + mstring_append(body, + " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" + " vtx_inv_w = 1.0;\n" + " } else {\n" + " vtx_inv_w = 1.0 / oPos.w;\n" + " }\n" + " vtx_inv_w_flat = vtx_inv_w;\n"); +} + +static void append_skinning_code(MString* str, bool mix, + unsigned int count, const char* type, + const char* output, const char* input, + const char* matrix, const char* swizzle) +{ + if (count == 0) { + mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n", + type, output, input, matrix, swizzle); + } else { + mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type); + if (mix) { + /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */ + mstring_append(str, "{\n" + " float weight_i;\n" + " float weight_n = 1.0;\n"); + int i; + for (i = 0; i < count; i++) { + if (i < (count - 1)) { + char c = "xyzw"[i]; + mstring_append_fmt(str, " weight_i = weight.%c;\n" + " weight_n -= weight_i;\n", + c); + } else { + mstring_append(str, " weight_i = weight_n;\n"); + } + mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n", + output, input, matrix, i, swizzle); + } + mstring_append(str, "}\n"); + } else { + /* Individual weights */ + int i; + for (i = 0; i < count; i++) { + char c = "xyzw"[i]; + mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n", + output, input, matrix, i, swizzle, c); + } + } + } +} diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h new file mode 100644 index 00000000000..949bf542520 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h @@ -0,0 +1,31 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header, + MString *body, MString *uniforms); + +#endif diff --git a/hw/xbox/nv2a/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c similarity index 97% rename from hw/xbox/nv2a/vsh.c rename to hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 0e4cf314bc2..7bebed71e85 100644 --- a/hw/xbox/nv2a/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -1,5 +1,5 @@ /* - * QEMU Geforce NV2A vertex shader translation + * Geforce NV2A PGRAPH GLSL Shader Generator * * Copyright (c) 2014 Jannik Vogel * Copyright (c) 2012 espes @@ -32,8 +32,9 @@ #include #include -#include "shaders_common.h" -#include "vsh.h" +#include "hw/xbox/nv2a/pgraph/vsh.h" +#include "common.h" +#include "vsh-prog.h" #define VSH_D3DSCM_CORRECTION 96 @@ -794,10 +795,11 @@ static const char* vsh_header = " return t;\n" "}\n"; -void vsh_translate(uint16_t version, +void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, unsigned int length, bool z_perspective, + bool vulkan, MString *header, MString *body) { @@ -843,14 +845,30 @@ void vsh_translate(uint16_t version, * TODO: the pixel-center co-ordinate differences should handled */ " oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n" - " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n" - ); + ); + + if (vulkan) { + mstring_append(body, + " oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n"); + } else { + mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) " + "/ surfaceSize.y;\n"); + } + if (z_perspective) { mstring_append(body, " oPos.z = oPos.w;\n"); } + + mstring_append(body, + " if (clipRange.y != clipRange.x) {\n"); + if (vulkan) { + mstring_append(body, " oPos.z /= clipRange.y;\n"); + } else { + mstring_append(body, + " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y " + "- clipRange.x)) - 1;\n"); + } mstring_append(body, - " if (clipRange.y != clipRange.x) {\n" - " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;\n" " }\n" /* Correct for the perspective divide */ diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h new file mode 100644 index 00000000000..84d8141c5e5 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h @@ -0,0 +1,35 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2014 Jannik Vogel + * Copyright (c) 2012 espes + * + * Based on: + * Cxbx, VertexShader.cpp + * Copyright (c) 2004 Aaron Robinson + * Kingofc + * Dxbx, uPushBuffer.pas + * Copyright (c) 2007 Shadow_tj, PatrickvL + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H + +void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, + unsigned int length, bool z_perspective, + bool vulkan, MString *header, MString *body); + +#endif diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c new file mode 100644 index 00000000000..4fcc09cac56 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -0,0 +1,274 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "common.h" +#include "vsh.h" +#include "vsh-ff.h" +#include "vsh-prog.h" +#include + +MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) +{ + int i; + MString *output = mstring_new(); + mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400); + + MString *header = mstring_from_str(""); + + MString *uniforms = mstring_from_str(""); + + const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove + + mstring_append_fmt(uniforms, + "%svec4 clipRange;\n" + "%svec2 surfaceSize;\n" + "%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n" + "%svec2 fogParam;\n", + u, u, u, u + ); + + mstring_append(header, + GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG)) + GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT)) + GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT)) + GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT)) + GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT)) + + "\n" + "vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n" + "\n" + "vec4 decompress_11_11_10(int cmp) {\n" + " float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n" + " float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n" + " float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n" + " return vec4(x, y, z, 1);\n" + "}\n"); + + pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading, + false, prefix_outputs, false); + + if (prefix_outputs) { + mstring_append(header, + "#define vtx_inv_w v_vtx_inv_w\n" + "#define vtx_inv_w_flat v_vtx_inv_w_flat\n" + "#define vtxD0 v_vtxD0\n" + "#define vtxD1 v_vtxD1\n" + "#define vtxB0 v_vtxB0\n" + "#define vtxB1 v_vtxB1\n" + "#define vtxFog v_vtxFog\n" + "#define vtxT0 v_vtxT0\n" + "#define vtxT1 v_vtxT1\n" + "#define vtxT2 v_vtxT2\n" + "#define vtxT3 v_vtxT3\n" + ); + } + mstring_append(header, "\n"); + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + + bool is_uniform = state->uniform_attrs & (1 << i); + bool is_compressed = state->compressed_attrs & (1 << i); + + assert(!(is_uniform && is_compressed)); + + if (is_uniform) { + mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i); + } else { + if (state->compressed_attrs & (1 << i)) { + mstring_append_fmt(header, + "layout(location = %d) in int v%d_cmp;\n", i, i); + } else if (state->swizzle_attrs & (1 << i)) { + mstring_append_fmt(header, "layout(location = %d) in vec4 v%d_sw;\n", + i, i); + } else { + mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n", + i, i); + } + } + } + mstring_append(header, "\n"); + + MString *body = mstring_from_str("void main() {\n"); + + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + if (state->compressed_attrs & (1 << i)) { + mstring_append_fmt( + body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i); + } + + if (state->swizzle_attrs & (1 << i)) { + mstring_append_fmt(body, "vec4 v%d = v%d_sw.bgra;\n", i, i); + } + + } + + if (state->fixed_function) { + pgraph_gen_vsh_ff_glsl(state, header, body, uniforms); + } else if (state->vertex_program) { + pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS, + (uint32_t *)state->program_data, + state->program_length, state->z_perspective, + state->vulkan, header, body); + } else { + assert(false); + } + + + /* Fog */ + + if (state->fog_enable) { + + if (state->vertex_program) { + /* FIXME: Does foggen do something here? Let's do some tracking.. + * + * "RollerCoaster Tycoon" has + * state->vertex_program = true; state->foggen == FOGGEN_PLANAR + * but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z + */ + mstring_append(body, " float fogDistance = oFog.x;\n"); + } + + /* FIXME: Do this per pixel? */ + + switch (state->fog_mode) { + case FOG_MODE_LINEAR: + case FOG_MODE_LINEAR_ABS: + + /* f = (end - d) / (end - start) + * fogParam.y = -1 / (end - start) + * fogParam.x = 1 - end * fogParam.y; + */ + + mstring_append(body, + " if (isinf(fogDistance)) {\n" + " fogDistance = 0.0;\n" + " }\n" + ); + mstring_append(body, " float fogFactor = fogParam.x + fogDistance * fogParam.y;\n"); + mstring_append(body, " fogFactor -= 1.0;\n"); + break; + case FOG_MODE_EXP: + mstring_append(body, + " if (isinf(fogDistance)) {\n" + " fogDistance = 0.0;\n" + " }\n" + ); + /* fallthru */ + case FOG_MODE_EXP_ABS: + + /* f = 1 / (e^(d * density)) + * fogParam.y = -density / (2 * ln(256)) + * fogParam.x = 1.5 + */ + + mstring_append(body, " float fogFactor = fogParam.x + exp2(fogDistance * fogParam.y * 16.0);\n"); + mstring_append(body, " fogFactor -= 1.5;\n"); + break; + case FOG_MODE_EXP2: + case FOG_MODE_EXP2_ABS: + + /* f = 1 / (e^((d * density)^2)) + * fogParam.y = -density / (2 * sqrt(ln(256))) + * fogParam.x = 1.5 + */ + + mstring_append(body, " float fogFactor = fogParam.x + exp2(-fogDistance * fogDistance * fogParam.y * fogParam.y * 32.0);\n"); + mstring_append(body, " fogFactor -= 1.5;\n"); + break; + default: + assert(false); + break; + } + /* Calculate absolute for the modes which need it */ + switch (state->fog_mode) { + case FOG_MODE_LINEAR_ABS: + case FOG_MODE_EXP_ABS: + case FOG_MODE_EXP2_ABS: + mstring_append(body, " fogFactor = abs(fogFactor);\n"); + break; + default: + break; + } + + mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n"); + } else { + /* FIXME: Is the fog still calculated / passed somehow?! + */ + mstring_append(body, " oFog.xyzw = vec4(1.0);\n"); + } + + /* Set outputs */ + const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat"; + mstring_append_fmt(body, "\n" + " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n" + " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n" + " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n" + " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n" + " vtxFog = oFog.x * vtx_inv_w;\n" + " vtxT0 = oT0 * vtx_inv_w;\n" + " vtxT1 = oT1 * vtx_inv_w;\n" + " vtxT2 = oT2 * vtx_inv_w;\n" + " vtxT3 = oT3 * vtx_inv_w;\n" + " gl_Position = oPos;\n" + " gl_PointSize = oPts.x;\n" + // " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near + // " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far + "\n" + "}\n", + shade_model_mult, + shade_model_mult, + shade_model_mult, + shade_model_mult); + + + /* Return combined header + source */ + if (state->vulkan) { + mstring_append_fmt( + output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n", + VSH_UBO_BINDING, mstring_get_str(uniforms)); + // FIXME: Only needed for vk, for gl we use glVertexAttrib + mstring_append_fmt(output, + "layout(push_constant) uniform PushConstants {\n" + "vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n" + "};\n\n"); + } else { + mstring_append( + output, mstring_get_str(uniforms)); + } + + mstring_append(output, mstring_get_str(header)); + mstring_unref(header); + + mstring_append(output, mstring_get_str(body)); + mstring_unref(body); + return output; +} diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.h b/hw/xbox/nv2a/pgraph/glsl/vsh.h new file mode 100644 index 00000000000..584e1997e38 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.h @@ -0,0 +1,33 @@ +/* + * Geforce NV2A PGRAPH GLSL Shader Generator + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H +#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H + +#include "qemu/mstring.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +// FIXME: Move to struct +#define VSH_UBO_BINDING 0 + +MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs); + +#endif diff --git a/hw/xbox/nv2a/pgraph/meson.build b/hw/xbox/nv2a/pgraph/meson.build new file mode 100644 index 00000000000..5b8bc181c3d --- /dev/null +++ b/hw/xbox/nv2a/pgraph/meson.build @@ -0,0 +1,19 @@ +specific_ss.add(files( + 'pgraph.c', + 'profile.c', + 'rdi.c', + 's3tc.c', + 'shaders.c', + 'swizzle.c', + 'texture.c', + 'vertex.c', + )) +if have_renderdoc + specific_ss.add(files('debug_renderdoc.c')) +endif +subdir('thirdparty') +subdir('null') +subdir('gl') +subdir('glsl') +subdir('vk') +specific_ss.add(nv2a_vsh_cpu) diff --git a/hw/xbox/nv2a/pgraph_methods.h b/hw/xbox/nv2a/pgraph/methods.h similarity index 100% rename from hw/xbox/nv2a/pgraph_methods.h rename to hw/xbox/nv2a/pgraph/methods.h diff --git a/hw/xbox/nv2a/pgraph/null/meson.build b/hw/xbox/nv2a/pgraph/null/meson.build new file mode 100644 index 00000000000..e2731a13d92 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/null/meson.build @@ -0,0 +1,3 @@ +specific_ss.add([sdl, files( + 'renderer.c', + )]) diff --git a/hw/xbox/nv2a/pgraph/null/renderer.c b/hw/xbox/nv2a/pgraph/null/renderer.c new file mode 100644 index 00000000000..9a9c2512ccf --- /dev/null +++ b/hw/xbox/nv2a/pgraph/null/renderer.c @@ -0,0 +1,146 @@ +/* + * Geforce NV2A PGRAPH Null Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "hw/hw.h" +#include "hw/xbox/nv2a/nv2a_int.h" + +static void pgraph_null_sync(NV2AState *d) +{ + qatomic_set(&d->pgraph.sync_pending, false); + qemu_event_set(&d->pgraph.sync_complete); +} + +static void pgraph_null_flush(NV2AState *d) +{ + qatomic_set(&d->pgraph.flush_pending, false); + qemu_event_set(&d->pgraph.flush_complete); +} + +static void pgraph_null_process_pending(NV2AState *d) +{ + if ( + qatomic_read(&d->pgraph.sync_pending) || + qatomic_read(&d->pgraph.flush_pending) + ) { + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + if (qatomic_read(&d->pgraph.sync_pending)) { + pgraph_null_sync(d); + } + if (qatomic_read(&d->pgraph.flush_pending)) { + pgraph_null_flush(d); + } + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } +} + +static void pgraph_null_clear_report_value(NV2AState *d) +{ +} + +static void pgraph_null_clear_surface(NV2AState *d, uint32_t parameter) +{ +} + +static void pgraph_null_draw_begin(NV2AState *d) +{ +} + +static void pgraph_null_draw_end(NV2AState *d) +{ +} + +static void pgraph_null_flip_stall(NV2AState *d) +{ +} + +static void pgraph_null_flush_draw(NV2AState *d) +{ +} + +static void pgraph_null_get_report(NV2AState *d, uint32_t parameter) +{ + pgraph_write_zpass_pixel_cnt_report(d, parameter, 0); +} + +static void pgraph_null_image_blit(NV2AState *d) +{ +} + +static void pgraph_null_pre_savevm_trigger(NV2AState *d) +{ +} + +static void pgraph_null_pre_savevm_wait(NV2AState *d) +{ +} + +static void pgraph_null_pre_shutdown_trigger(NV2AState *d) +{ +} + +static void pgraph_null_pre_shutdown_wait(NV2AState *d) +{ +} + +static void pgraph_null_process_pending_reports(NV2AState *d) +{ +} + +static void pgraph_null_surface_update(NV2AState *d, bool upload, + bool color_write, bool zeta_write) +{ +} + +static void pgraph_null_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->null_renderer_state = NULL; +} + +static PGRAPHRenderer pgraph_null_renderer = { + .type = CONFIG_DISPLAY_RENDERER_NULL, + .name = "Null", + .ops = { + .init = pgraph_null_init, + .clear_report_value = pgraph_null_clear_report_value, + .clear_surface = pgraph_null_clear_surface, + .draw_begin = pgraph_null_draw_begin, + .draw_end = pgraph_null_draw_end, + .flip_stall = pgraph_null_flip_stall, + .flush_draw = pgraph_null_flush_draw, + .get_report = pgraph_null_get_report, + .image_blit = pgraph_null_image_blit, + .pre_savevm_trigger = pgraph_null_pre_savevm_trigger, + .pre_savevm_wait = pgraph_null_pre_savevm_wait, + .pre_shutdown_trigger = pgraph_null_pre_shutdown_trigger, + .pre_shutdown_wait = pgraph_null_pre_shutdown_wait, + .process_pending = pgraph_null_process_pending, + .process_pending_reports = pgraph_null_process_pending_reports, + .surface_update = pgraph_null_surface_update, + } +}; + +static void __attribute__((constructor)) register_renderer(void) +{ + pgraph_renderer_register(&pgraph_null_renderer); +} diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c new file mode 100644 index 00000000000..0062efa15f4 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -0,0 +1,2874 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "../nv2a_int.h" +#include "ui/xemu-settings.h" +#include "util.h" +#include "swizzle.h" +#include "nv2a_vsh_emulator.h" + +#define PG_GET_MASK(reg, mask) GET_MASK(pgraph_reg_r(pg, reg), mask) +#define PG_SET_MASK(reg, mask, value) \ + do { \ + uint32_t rv = pgraph_reg_r(pg, reg); \ + SET_MASK(rv, mask, value); \ + pgraph_reg_w(pg, reg, rv); \ + } while (0) + + +NV2AState *g_nv2a; + +uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + PGRAPHState *pg = &d->pgraph; + + qemu_mutex_lock(&pg->lock); + + uint64_t r = 0; + switch (addr) { + case NV_PGRAPH_INTR: + r = pg->pending_interrupts; + break; + case NV_PGRAPH_INTR_EN: + r = pg->enabled_interrupts; + break; + case NV_PGRAPH_RDI_DATA: { + unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_SELECT); + unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS); + + r = pgraph_rdi_read(pg, select, address); + + /* FIXME: Overflow into select? */ + assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, + NV_PGRAPH_RDI_INDEX_ADDRESS)); + PG_SET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); + break; + } + default: + r = pgraph_reg_r(pg, addr); + break; + } + + qemu_mutex_unlock(&pg->lock); + + nv2a_reg_log_read(NV_PGRAPH, addr, size, r); + return r; +} + +void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + PGRAPHState *pg = &d->pgraph; + + nv2a_reg_log_write(NV_PGRAPH, addr, size, val); + + qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here + qemu_mutex_lock(&pg->lock); + + switch (addr) { + case NV_PGRAPH_INTR: + pg->pending_interrupts &= ~val; + + if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) { + pg->waiting_for_nop = false; + } + if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) { + pg->waiting_for_context_switch = false; + } + pfifo_kick(d); + break; + case NV_PGRAPH_INTR_EN: + pg->enabled_interrupts = val; + break; + case NV_PGRAPH_INCREMENT: + if (val & NV_PGRAPH_INCREMENT_READ_3D) { + PG_SET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_READ_3D, + (PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_READ_3D)+1) + % PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_MODULO_3D) ); + nv2a_profile_increment(); + pfifo_kick(d); + } + break; + case NV_PGRAPH_RDI_DATA: { + unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_SELECT); + unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS); + + pgraph_rdi_write(pg, select, address, val); + + /* FIXME: Overflow into select? */ + assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, + NV_PGRAPH_RDI_INDEX_ADDRESS)); + PG_SET_MASK(NV_PGRAPH_RDI_INDEX, + NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); + break; + } + case NV_PGRAPH_CHANNEL_CTX_TRIGGER: { + hwaddr context_address = + PG_GET_MASK(NV_PGRAPH_CHANNEL_CTX_POINTER, + NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; + + if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { +#ifdef DEBUG_NV2A + unsigned pgraph_channel_id = + PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID); +#endif + NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", + pgraph_channel_id, context_address); + + assert(context_address < memory_region_size(&d->ramin)); + + uint8_t *context_ptr = d->ramin_ptr + context_address; + uint32_t context_user = ldl_le_p((uint32_t*)context_ptr); + + NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user); + + pgraph_reg_w(pg, NV_PGRAPH_CTX_USER, context_user); + // pgraph_set_context_user(d, context_user); + } + if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { + /* do stuff ... */ + } + + break; + } + default: + pgraph_reg_w(pg, addr, val); + break; + } + + // events + switch (addr) { + case NV_PGRAPH_FIFO: + pfifo_kick(d); + break; + } + + qemu_mutex_unlock(&pg->lock); + qemu_mutex_unlock(&d->pfifo.lock); +} + +void pgraph_context_switch(NV2AState *d, unsigned int channel_id) +{ + PGRAPHState *pg = &d->pgraph; + + bool channel_valid = + pgraph_reg_r(pg, NV_PGRAPH_CTX_CONTROL) & NV_PGRAPH_CTX_CONTROL_CHID; + unsigned pgraph_channel_id = + PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID); + + bool valid = channel_valid && pgraph_channel_id == channel_id; + if (!valid) { + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, + NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id); + + NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id); + + /* TODO: hardware context switching */ + assert(!PG_GET_MASK(NV_PGRAPH_DEBUG_3, + NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH)); + + pg->waiting_for_context_switch = true; + qemu_mutex_unlock(&pg->lock); + qemu_mutex_lock_iothread(); + pg->pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH; + nv2a_update_irq(d); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&pg->lock); + } +} + +static const PGRAPHRenderer *renderers[CONFIG_DISPLAY_RENDERER__COUNT]; + +void pgraph_renderer_register(const PGRAPHRenderer *renderer) +{ + assert(renderer->type < CONFIG_DISPLAY_RENDERER__COUNT); + renderers[renderer->type] = renderer; +} + +void pgraph_init(NV2AState *d) +{ + g_nv2a = d; + + PGRAPHState *pg = &d->pgraph; + qemu_mutex_init(&pg->lock); + qemu_event_init(&pg->sync_complete, false); + qemu_event_init(&pg->flush_complete, false); + + pg->frame_time = 0; + pg->draw_time = 0; + + pg->material_alpha = 0.0f; + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE, + NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); + pg->primitive_mode = PRIM_TYPE_INVALID; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attribute = &pg->vertex_attributes[i]; + attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH + * sizeof(float) * 4); + attribute->inline_buffer_populated = false; + } + + pgraph_clear_dirty_reg_map(pg); + + pg->renderer = renderers[g_config.display.renderer]; + pg->renderer->ops.init(d); +} + +void pgraph_clear_dirty_reg_map(PGRAPHState *pg) +{ + memset(pg->regs_dirty, 0, sizeof(pg->regs_dirty)); +} + +void pgraph_init_thread(NV2AState *d) +{ + if (d->pgraph.renderer->ops.init_thread) { + d->pgraph.renderer->ops.init_thread(d); + } +} + +static CONFIG_DISPLAY_RENDERER get_default_renderer(void) +{ +#ifdef CONFIG_OPENGL + if (renderers[CONFIG_DISPLAY_RENDERER_OPENGL]) { + return CONFIG_DISPLAY_RENDERER_OPENGL; + } +#endif +#ifdef CONFIG_VULKAN + if (renderers[CONFIG_DISPLAY_RENDERER_VULKAN]) { + return CONFIG_DISPLAY_RENDERER_VULKAN; + } +#endif + fprintf(stderr, "Warning: No available renderer\n"); + return CONFIG_DISPLAY_RENDERER_NULL; +} + +void nv2a_context_init(void) +{ + if (!renderers[g_config.display.renderer]) { + g_config.display.renderer = get_default_renderer(); + fprintf(stderr, + "Warning: Configured renderer unavailable. Switching to %s.\n", + renderers[g_config.display.renderer]->name); + } + + if (renderers[g_config.display.renderer]->ops.early_context_init) { + renderers[g_config.display.renderer]->ops.early_context_init(); + } +} + +void pgraph_destroy(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + + if (pg->renderer->ops.finalize) { + pg->renderer->ops.finalize(d); + } + + qemu_mutex_destroy(&pg->lock); +} + +int nv2a_get_framebuffer_surface(void) +{ + NV2AState *d = g_nv2a; + + if (d->pgraph.renderer->ops.get_framebuffer_surface) { + return d->pgraph.renderer->ops.get_framebuffer_surface(d); + } + + return 0; +} + +void nv2a_set_surface_scale_factor(unsigned int scale) +{ + NV2AState *d = g_nv2a; + + if (d->pgraph.renderer->ops.set_surface_scale_factor) { + d->pgraph.renderer->ops.set_surface_scale_factor(d, scale); + } +} + +unsigned int nv2a_get_surface_scale_factor(void) +{ + NV2AState *d = g_nv2a; + + if (d->pgraph.renderer->ops.get_surface_scale_factor) { + return d->pgraph.renderer->ops.get_surface_scale_factor(d); + } + + return 1; +} + +#define METHOD_ADDR(gclass, name) \ + gclass ## _ ## name +#define METHOD_ADDR_TO_INDEX(x) ((x)>>2) +#define METHOD_NAME_STR(gclass, name) \ + tostring(gclass ## _ ## name) +#define METHOD_FUNC_NAME(gclass, name) \ + pgraph_ ## gclass ## _ ## name ## _handler +#define METHOD_HANDLER_ARG_DECL \ + NV2AState *d, PGRAPHState *pg, \ + unsigned int subchannel, unsigned int method, \ + uint32_t parameter, uint32_t *parameters, \ + size_t num_words_available, size_t *num_words_consumed, bool inc +#define METHOD_HANDLER_ARGS \ + d, pg, subchannel, method, parameter, parameters, \ + num_words_available, num_words_consumed, inc +#define DEF_METHOD_PROTO(gclass, name) \ + static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL) + +#define DEF_METHOD(gclass, name) \ + DEF_METHOD_PROTO(gclass, name); +#define DEF_METHOD_RANGE(gclass, name, range) \ + DEF_METHOD_PROTO(gclass, name); +#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */ +#define DEF_METHOD_CASE_4(gclass, name, stride) \ + DEF_METHOD_PROTO(gclass, name); +#include "methods.h" +#undef DEF_METHOD +#undef DEF_METHOD_RANGE +#undef DEF_METHOD_CASE_4_OFFSET +#undef DEF_METHOD_CASE_4 + +typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL); +static const struct { + uint32_t base; + const char *name; + MethodFunc handler; +} pgraph_kelvin_methods[0x800] = { +#define DEF_METHOD(gclass, name) \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, +#define DEF_METHOD_RANGE(gclass, name, range) \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \ + ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, +#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, \ + [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \ + { \ + METHOD_ADDR(gclass, name), \ + METHOD_NAME_STR(gclass, name), \ + METHOD_FUNC_NAME(gclass, name), \ + }, +#define DEF_METHOD_CASE_4(gclass, name, stride) \ + DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride) +#include "methods.h" +#undef DEF_METHOD +#undef DEF_METHOD_RANGE +#undef DEF_METHOD_CASE_4_OFFSET +#undef DEF_METHOD_CASE_4 +}; + +#define METHOD_RANGE_END_NAME(gclass, name) \ + pgraph_ ## gclass ## _ ## name ## __END +#define DEF_METHOD(gclass, name) \ + static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ + METHOD_ADDR(gclass, name) + 4; +#define DEF_METHOD_RANGE(gclass, name, range) \ + static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ + METHOD_ADDR(gclass, name) + 4*range; +#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */ +#define DEF_METHOD_CASE_4(gclass, name, stride) \ + static const size_t METHOD_RANGE_END_NAME(gclass, name) = \ + METHOD_ADDR(gclass, name) + 4*stride; +#include "methods.h" +#undef DEF_METHOD +#undef DEF_METHOD_RANGE +#undef DEF_METHOD_CASE_4_OFFSET +#undef DEF_METHOD_CASE_4 + +static void pgraph_method_log(unsigned int subchannel, + unsigned int graphics_class, + unsigned int method, uint32_t parameter) +{ + const char *method_name = "?"; + static unsigned int last = 0; + static unsigned int count = 0; + + if (last == NV097_ARRAY_ELEMENT16 && method != last) { + method_name = "NV097_ARRAY_ELEMENT16"; + trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last, + method_name, count); + } + + if (method != NV097_ARRAY_ELEMENT16) { + uint32_t base = method; + switch (graphics_class) { + case NV_KELVIN_PRIMITIVE: { + int idx = METHOD_ADDR_TO_INDEX(method); + if (idx < ARRAY_SIZE(pgraph_kelvin_methods) && + pgraph_kelvin_methods[idx].handler) { + method_name = pgraph_kelvin_methods[idx].name; + base = pgraph_kelvin_methods[idx].base; + } + break; + } + default: + break; + } + + uint32_t offset = method - base; + trace_nv2a_pgraph_method(subchannel, graphics_class, method, + method_name, offset, parameter); + } + + if (method == last) { + count++; + } else { + count = 0; + } + last = method; +} + +static void pgraph_method_inc(MethodFunc handler, uint32_t end, + METHOD_HANDLER_ARG_DECL) +{ + if (!inc) { + handler(METHOD_HANDLER_ARGS); + return; + } + size_t count = MIN(num_words_available, (end - method) / 4); + for (size_t i = 0; i < count; i++) { + parameter = ldl_le_p(parameters + i); + if (i) { + pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, + parameter); + } + handler(METHOD_HANDLER_ARGS); + method += 4; + } + *num_words_consumed = count; +} + +static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL) +{ + if (inc) { + handler(METHOD_HANDLER_ARGS); + return; + } + + for (size_t i = 0; i < num_words_available; i++) { + parameter = ldl_le_p(parameters + i); + if (i) { + pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method, + parameter); + } + handler(METHOD_HANDLER_ARGS); + } + *num_words_consumed = num_words_available; +} + +#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int) +#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int) +#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name) + +#define DEF_METHOD_INC(gclass, name) \ + DEF_METHOD_INT(gclass, name); \ + DEF_METHOD(gclass, name) \ + { \ + pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name), \ + METHOD_RANGE_END_NAME(gclass, name), \ + METHOD_HANDLER_ARGS); \ + } \ + DEF_METHOD_INT(gclass, name) + +#define DEF_METHOD_NON_INC(gclass, name) \ + DEF_METHOD_INT(gclass, name); \ + DEF_METHOD(gclass, name) \ + { \ + pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \ + METHOD_HANDLER_ARGS); \ + } \ + DEF_METHOD_INT(gclass, name) + +int pgraph_method(NV2AState *d, unsigned int subchannel, + unsigned int method, uint32_t parameter, + uint32_t *parameters, size_t num_words_available, + size_t max_lookahead_words, bool inc) +{ + int num_processed = 1; + + PGRAPHState *pg = &d->pgraph; + + bool channel_valid = + PG_GET_MASK(NV_PGRAPH_CTX_CONTROL, NV_PGRAPH_CTX_CONTROL_CHID); + assert(channel_valid); + + ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d; + ImageBlitState *image_blit = &pg->image_blit; + BetaState *beta = &pg->beta; + + assert(subchannel < 8); + + if (method == NV_SET_OBJECT) { + assert(parameter < memory_region_size(&d->ramin)); + uint8_t *obj_ptr = d->ramin_ptr + parameter; + + uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr); + uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4)); + uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8)); + uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12)); + uint32_t ctx_5 = parameter; + + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4, ctx_1); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4, ctx_2); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4, ctx_3); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4, ctx_4); + pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4, ctx_5); + } + + // is this right? + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH1, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH2, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH3, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH4, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4)); + pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH5, + pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4)); + + uint32_t graphics_class = PG_GET_MASK(NV_PGRAPH_CTX_SWITCH1, + NV_PGRAPH_CTX_SWITCH1_GRCLASS); + + pgraph_method_log(subchannel, graphics_class, method, parameter); + + if (subchannel != 0) { + // catches context switching issues on xbox d3d + assert(graphics_class != 0x97); + } + + /* ugly switch for now */ + switch (graphics_class) { + case NV_BETA: { + switch (method) { + case NV012_SET_OBJECT: + beta->object_instance = parameter; + break; + case NV012_SET_BETA: + if (parameter & 0x80000000) { + beta->beta = 0; + } else { + // The parameter is a signed fixed-point number with a sign bit + // and 31 fractional bits. Note that negative values are clamped + // to 0, and only 8 fractional bits are actually implemented in + // hardware. + beta->beta = parameter & 0x7f800000; + } + break; + default: + goto unhandled; + } + break; + } + case NV_CONTEXT_PATTERN: { + switch (method) { + case NV044_SET_MONOCHROME_COLOR0: + pgraph_reg_w(pg, NV_PGRAPH_PATT_COLOR0, parameter); + break; + default: + goto unhandled; + } + break; + } + case NV_CONTEXT_SURFACES_2D: { + switch (method) { + case NV062_SET_OBJECT: + context_surfaces_2d->object_instance = parameter; + break; + case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE: + context_surfaces_2d->dma_image_source = parameter; + break; + case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN: + context_surfaces_2d->dma_image_dest = parameter; + break; + case NV062_SET_COLOR_FORMAT: + context_surfaces_2d->color_format = parameter; + break; + case NV062_SET_PITCH: + context_surfaces_2d->source_pitch = parameter & 0xFFFF; + context_surfaces_2d->dest_pitch = parameter >> 16; + break; + case NV062_SET_OFFSET_SOURCE: + context_surfaces_2d->source_offset = parameter & 0x07FFFFFF; + break; + case NV062_SET_OFFSET_DESTIN: + context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF; + break; + default: + goto unhandled; + } + break; + } + case NV_IMAGE_BLIT: { + switch (method) { + case NV09F_SET_OBJECT: + image_blit->object_instance = parameter; + break; + case NV09F_SET_CONTEXT_SURFACES: + image_blit->context_surfaces = parameter; + break; + case NV09F_SET_OPERATION: + image_blit->operation = parameter; + break; + case NV09F_CONTROL_POINT_IN: + image_blit->in_x = parameter & 0xFFFF; + image_blit->in_y = parameter >> 16; + break; + case NV09F_CONTROL_POINT_OUT: + image_blit->out_x = parameter & 0xFFFF; + image_blit->out_y = parameter >> 16; + break; + case NV09F_SIZE: + image_blit->width = parameter & 0xFFFF; + image_blit->height = parameter >> 16; + + if (image_blit->width && image_blit->height) { + d->pgraph.renderer->ops.image_blit(d); + } + break; + default: + goto unhandled; + } + break; + } + case NV_KELVIN_PRIMITIVE: { + MethodFunc handler = + pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler; + if (handler == NULL) { + goto unhandled; + } + size_t num_words_consumed = 1; + handler(d, pg, subchannel, method, parameter, parameters, + num_words_available, &num_words_consumed, inc); + + /* Squash repeated BEGIN,DRAW_ARRAYS,END */ + #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd)) + #define LAP(i, prm) (parameters[i*2+2] == (prm)) + #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm)) + + if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) && + pg->inline_elements_length == 0 && + pg->draw_arrays_length < + (ARRAY_SIZE(pg->draw_arrays_start) - 1) && + LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) && + LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) && + LAM(2, NV097_DRAW_ARRAYS)) { + num_words_consumed += 4; + pg->draw_arrays_prevent_connect = true; + } + + #undef LAM + #undef LAP + #undef LAMP + + num_processed = num_words_consumed; + break; + } + default: + goto unhandled; + } + + return num_processed; + +unhandled: + trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class, + method, parameter); + return num_processed; +} + +DEF_METHOD(NV097, SET_OBJECT) +{ + pg->kelvin.object_instance = parameter; +} + +DEF_METHOD(NV097, NO_OPERATION) +{ + /* The bios uses nop as a software method call - + * it seems to expect a notify interrupt if the parameter isn't 0. + * According to a nouveau guy it should still be a nop regardless + * of the parameter. It's possible a debug register enables this, + * but nothing obvious sticks out. Weird. + */ + if (parameter == 0) { + return; + } + + unsigned channel_id = + PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID); + + assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)); + + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_CHID, + channel_id); + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_SUBCH, + subchannel); + PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_MTHD, + method); + pgraph_reg_w(pg, NV_PGRAPH_TRAPPED_DATA_LOW, parameter); + pgraph_reg_w(pg, NV_PGRAPH_NSOURCE, + NV_PGRAPH_NSOURCE_NOTIFICATION); /* TODO: check this */ + pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR; + pg->waiting_for_nop = true; + + qemu_mutex_unlock(&pg->lock); + qemu_mutex_lock_iothread(); + nv2a_update_irq(d); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&pg->lock); +} + +DEF_METHOD(NV097, WAIT_FOR_IDLE) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); +} + +DEF_METHOD(NV097, SET_FLIP_READ) +{ + PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_READ_3D, + parameter); +} + +DEF_METHOD(NV097, SET_FLIP_WRITE) +{ + PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D, + parameter); +} + +DEF_METHOD(NV097, SET_FLIP_MODULO) +{ + PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_MODULO_3D, + parameter); +} + +DEF_METHOD(NV097, FLIP_INCREMENT_WRITE) +{ + uint32_t old = + PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D); + + PG_SET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_WRITE_3D, + (PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_WRITE_3D)+1) + % PG_GET_MASK(NV_PGRAPH_SURFACE, + NV_PGRAPH_SURFACE_MODULO_3D) ); + + uint32_t new = + PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D); + + trace_nv2a_pgraph_flip_increment_write(old, new); + pg->frame_time++; +} + +DEF_METHOD(NV097, FLIP_STALL) +{ + trace_nv2a_pgraph_flip_stall(); + d->pgraph.renderer->ops.surface_update(d, false, true, true); + d->pgraph.renderer->ops.flip_stall(d); + nv2a_profile_flip_stall(); + pg->waiting_for_flip = true; +} + +// TODO: these should be loading the dma objects from ramin here? + +DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES) +{ + pg->dma_notifies = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_A) +{ + pg->dma_a = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_B) +{ + pg->dma_b = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE) +{ + pg->dma_state = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR) +{ + /* try to get any straggling draws in before the surface's changed :/ */ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->dma_color = parameter; + pg->surface_color.buffer_dirty = true; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA) +{ + pg->dma_zeta = parameter; + pg->surface_zeta.buffer_dirty = true; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A) +{ + pg->dma_vertex_a = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B) +{ + pg->dma_vertex_b = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE) +{ + pg->dma_semaphore = parameter; +} + +DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT) +{ + d->pgraph.renderer->ops.process_pending_reports(d); + + pg->dma_report = parameter; +} + +DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->surface_shape.clip_x = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X); + pg->surface_shape.clip_width = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH); +} + +DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->surface_shape.clip_y = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y); + pg->surface_shape.clip_height = + GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT); +} + +DEF_METHOD(NV097, SET_SURFACE_FORMAT) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + pg->surface_shape.color_format = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR); + pg->surface_shape.zeta_format = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA); + pg->surface_shape.anti_aliasing = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING); + pg->surface_shape.log_width = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH); + pg->surface_shape.log_height = + GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT); + + int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE); + if (surface_type != pg->surface_type) { + pg->surface_type = surface_type; + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + } +} + +DEF_METHOD(NV097, SET_SURFACE_PITCH) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR); + unsigned int zeta_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA); + + pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch); + pg->surface_color.pitch = color_pitch; + + pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch); + pg->surface_zeta.pitch = zeta_pitch; +} + +DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter); + pg->surface_color.offset = parameter; +} + +DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter); + pg->surface_zeta.offset = parameter; +} + +DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW) +{ + int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAI0 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0) +{ + pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG0, parameter); +} + +DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1) +{ + pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG1, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_ADDRESS) +{ + int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXADDRESS0 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_CONTROL0) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + bool stencil_write_enable = + parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE; + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE, + stencil_write_enable); + + uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT); + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format); + + bool z_perspective = + parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE; + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE, + z_perspective); +} + +DEF_METHOD(NV097, SET_COLOR_MATERIAL) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_EMISSION, + (parameter >> 0) & 3); + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_AMBIENT, + (parameter >> 2) & 3); + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_DIFFUSE, + (parameter >> 4) & 3); + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_SPECULAR, + (parameter >> 6) & 3); +} + +DEF_METHOD(NV097, SET_FOG_MODE) +{ + /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */ + unsigned int mode; + switch (parameter) { + case NV097_SET_FOG_MODE_V_LINEAR: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break; + case NV097_SET_FOG_MODE_V_EXP: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break; + case NV097_SET_FOG_MODE_V_EXP2: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break; + case NV097_SET_FOG_MODE_V_EXP_ABS: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break; + case NV097_SET_FOG_MODE_V_EXP2_ABS: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break; + case NV097_SET_FOG_MODE_V_LINEAR_ABS: + mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break; + default: + assert(false); + break; + } + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOG_MODE, + mode); +} + +DEF_METHOD(NV097, SET_FOG_GEN_MODE) +{ + unsigned int mode; + switch (parameter) { + case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break; + case NV097_SET_FOG_GEN_MODE_V_RADIAL: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break; + case NV097_SET_FOG_GEN_MODE_V_PLANAR: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break; + case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break; + case NV097_SET_FOG_GEN_MODE_V_FOG_X: + mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break; + default: + assert(false); + break; + } + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGGENMODE, mode); +} + +DEF_METHOD(NV097, SET_FOG_ENABLE) +{ + /* + FIXME: There is also: + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGENABLE, + parameter); + */ + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOGENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_FOG_COLOR) +{ + /* PGRAPH channels are ARGB, parameter channels are ABGR */ + uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED); + uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN); + uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE); + uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_RED, red); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_GREEN, green); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_BLUE, blue); + PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_ALPHA, alpha); +} + +DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter); +} + +DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL) +{ + int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4; + for (; slot < 8; ++slot) { + pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPX0 + slot * 4, parameter); + } +} + +DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL) +{ + int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4; + for (; slot < 8; ++slot) { + pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPY0 + slot * 4, parameter); + } +} + +DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter); +} + +DEF_METHOD(NV097, SET_BLEND_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EN, parameter); +} + +DEF_METHOD(NV097, SET_CULL_FACE_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_CULLENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_DITHER_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter); +} + +DEF_METHOD(NV097, SET_LIGHTING_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_LIGHTING, + parameter); +} + +DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_POINTPARAMSENABLE, + parameter); + PG_SET_MASK(NV_PGRAPH_CONTROL_3, + NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter); +} + +DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter); +} + +DEF_METHOD(NV097, SET_SKIN_MODE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_SKIN, + parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter); +} + +DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter); +} + +DEF_METHOD(NV097, SET_ALPHA_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF); +} + +DEF_METHOD(NV097, SET_ALPHA_REF) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHAREF, parameter); +} + +DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR) +{ + unsigned int factor; + switch (parameter) { + case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO: + factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE: + factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break; + case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; + default: + NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_SFACTOR, factor); +} + +DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR) +{ + unsigned int factor; + switch (parameter) { + case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO: + factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE: + factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break; + case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA: + factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break; + default: + NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_DFACTOR, factor); +} + +DEF_METHOD(NV097, SET_BLEND_COLOR) +{ + pgraph_reg_w(pg, NV_PGRAPH_BLENDCOLOR, parameter); +} + +DEF_METHOD(NV097, SET_BLEND_EQUATION) +{ + unsigned int equation; + switch (parameter) { + case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT: + equation = 0; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT: + equation = 1; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_ADD: + equation = 2; break; + case NV097_SET_BLEND_EQUATION_V_MIN: + equation = 3; break; + case NV097_SET_BLEND_EQUATION_V_MAX: + equation = 4; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED: + equation = 5; break; + case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED: + equation = 6; break; + default: + NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EQN, equation); +} + +DEF_METHOD(NV097, SET_DEPTH_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZFUNC, + parameter & 0xF); +} + +DEF_METHOD(NV097, SET_COLOR_MASK) +{ + pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg); + + bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE; + bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE; + bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE; + bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE; + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha); + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red); + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green); + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue); +} + +DEF_METHOD(NV097, SET_DEPTH_MASK) +{ + pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg); + + PG_SET_MASK(NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_MASK) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF); +} + +DEF_METHOD(NV097, SET_STENCIL_FUNC_REF) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter); +} + +DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter); +} + +static unsigned int kelvin_map_stencil_op(uint32_t parameter) +{ + unsigned int op; + switch (parameter) { + case NV097_SET_STENCIL_OP_V_KEEP: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break; + case NV097_SET_STENCIL_OP_V_ZERO: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break; + case NV097_SET_STENCIL_OP_V_REPLACE: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break; + case NV097_SET_STENCIL_OP_V_INCRSAT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break; + case NV097_SET_STENCIL_OP_V_DECRSAT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break; + case NV097_SET_STENCIL_OP_V_INVERT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break; + case NV097_SET_STENCIL_OP_V_INCR: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break; + case NV097_SET_STENCIL_OP_V_DECR: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break; + default: + assert(false); + break; + } + return op; +} + +DEF_METHOD(NV097, SET_STENCIL_OP_FAIL) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_2, + NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL, + kelvin_map_stencil_op(parameter)); +} + +DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_2, + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL, + kelvin_map_stencil_op(parameter)); +} + +DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS) +{ + PG_SET_MASK(NV_PGRAPH_CONTROL_2, + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS, + kelvin_map_stencil_op(parameter)); +} + +DEF_METHOD(NV097, SET_SHADE_MODE) +{ + switch (parameter) { + case NV097_SET_SHADE_MODE_V_FLAT: + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE, + NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT); + break; + case NV097_SET_SHADE_MODE_V_SMOOTH: + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE, + NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH); + break; + default: + /* Discard */ + break; + } +} + +DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETFACTOR, parameter); +} + +DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETBIAS, parameter); +} + +static unsigned int kelvin_map_polygon_mode(uint32_t parameter) +{ + unsigned int mode; + switch (parameter) { + case NV097_SET_FRONT_POLYGON_MODE_V_POINT: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break; + case NV097_SET_FRONT_POLYGON_MODE_V_LINE: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break; + case NV097_SET_FRONT_POLYGON_MODE_V_FILL: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break; + default: + assert(false); + break; + } + return mode; +} + +DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_FRONTFACEMODE, + kelvin_map_polygon_mode(parameter)); +} + +DEF_METHOD(NV097, SET_BACK_POLYGON_MODE) +{ + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, + NV_PGRAPH_SETUPRASTER_BACKFACEMODE, + kelvin_map_polygon_mode(parameter)); +} + +DEF_METHOD(NV097, SET_CLIP_MIN) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMIN, parameter); +} + +DEF_METHOD(NV097, SET_CLIP_MAX) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMAX, parameter); +} + +DEF_METHOD(NV097, SET_CULL_FACE) +{ + unsigned int face; + switch (parameter) { + case NV097_SET_CULL_FACE_V_FRONT: + face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break; + case NV097_SET_CULL_FACE_V_BACK: + face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break; + case NV097_SET_CULL_FACE_V_FRONT_AND_BACK: + face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break; + default: + assert(false); + break; + } + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_CULLCTRL, face); +} + +DEF_METHOD(NV097, SET_FRONT_FACE) +{ + bool ccw; + switch (parameter) { + case NV097_SET_FRONT_FACE_V_CW: + ccw = false; break; + case NV097_SET_FRONT_FACE_V_CCW: + ccw = true; break; + default: + NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter); + return; /* discard */ + } + PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_FRONTFACE, + ccw ? 1 : 0); +} + +DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE, + parameter); +} + +DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION) +{ + int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4; + // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true; +} + +DEF_METHOD(NV097, SET_MATERIAL_ALPHA) +{ + pg->material_alpha = *(float*)¶meter; +} + +DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_LIGHTS, parameter); +} + +static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel) +{ + assert(channel < 4); + unsigned int texgen; + switch (parameter) { + case NV097_SET_TEXGEN_S_DISABLE: + texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break; + case NV097_SET_TEXGEN_S_EYE_LINEAR: + texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break; + case NV097_SET_TEXGEN_S_OBJECT_LINEAR: + texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break; + case NV097_SET_TEXGEN_S_SPHERE_MAP: + assert(channel < 2); + texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break; + case NV097_SET_TEXGEN_S_REFLECTION_MAP: + assert(channel < 3); + texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break; + case NV097_SET_TEXGEN_S_NORMAL_MAP: + assert(channel < 3); + texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break; + default: + assert(false); + break; + } + return texgen; +} + +DEF_METHOD(NV097, SET_TEXGEN_S) +{ + int slot = (method - NV097_SET_TEXGEN_S) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S + : NV_PGRAPH_CSV1_A_T0_S; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 0)); +} + +DEF_METHOD(NV097, SET_TEXGEN_T) +{ + int slot = (method - NV097_SET_TEXGEN_T) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T + : NV_PGRAPH_CSV1_A_T0_T; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 1)); +} + +DEF_METHOD(NV097, SET_TEXGEN_R) +{ + int slot = (method - NV097_SET_TEXGEN_R) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R + : NV_PGRAPH_CSV1_A_T0_R; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 2)); +} + +DEF_METHOD(NV097, SET_TEXGEN_Q) +{ + int slot = (method - NV097_SET_TEXGEN_Q) / 16; + unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A + : NV_PGRAPH_CSV1_B; + unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q + : NV_PGRAPH_CSV1_A_T0_Q; + PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 3)); +} + +DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE) +{ + int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4; + pg->texture_matrix_enable[slot] = parameter; +} + +DEF_METHOD(NV097, SET_POINT_SIZE) +{ + PG_SET_MASK(NV_PGRAPH_POINTSIZE, NV097_SET_POINT_SIZE_V, parameter); +} + +DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX) +{ + int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4; + // pg->projection_matrix[slot] = *(float*)¶meter; + unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4; + pg->vsh_constants[row][slot%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX) +{ + int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4; + unsigned int matnum = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4; + pg->vsh_constants[row][entry % 4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX) +{ + int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4; + unsigned int matnum = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4; + pg->vsh_constants[row][entry % 4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX) +{ + int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4; + unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4; + pg->vsh_constants[row][slot%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX) +{ + int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4; + unsigned int tex = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4; + pg->vsh_constants[row][entry%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD_INC(NV097, SET_FOG_PARAMS) +{ + int slot = (method - NV097_SET_FOG_PARAMS) / 4; + if (slot < 2) { + pgraph_reg_w(pg, NV_PGRAPH_FOGPARAM0 + slot*4, parameter); + } else { + /* FIXME: No idea where slot = 2 is */ + } + + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true; +} + +/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */ +DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S) +{ + int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4; + unsigned int tex = slot / 16; + unsigned int entry = slot % 16; + unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4; + pg->vsh_constants[row][entry%4] = parameter; + pg->vsh_constants_dirty[row] = true; +} + +DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_TEXGEN_REF, + parameter); +} + +DEF_METHOD_INC(NV097, SET_FOG_PLANE) +{ + int slot = (method - NV097_SET_FOG_PLANE) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true; +} + +DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR) +{ + int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4; + // ?? + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true; +} + +DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET) +{ + int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true; +} + +DEF_METHOD_INC(NV097, SET_POINT_PARAMS) +{ + int slot = (method - NV097_SET_POINT_PARAMS) / 4; + pg->point_params[slot] = *(float *)¶meter; /* FIXME: Where? */ +} + +DEF_METHOD_INC(NV097, SET_EYE_POSITION) +{ + int slot = (method - NV097_SET_EYE_POSITION) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true; +} + +DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0) +{ + int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR0 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1) +{ + int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR1 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW) +{ + int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAO0 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW) +{ + int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORI0 + slot*4, parameter); +} + +DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE) +{ + int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4; + pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter; + pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true; +} + +DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM) +{ + int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4; + + int program_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR); + + assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + pg->program_data[program_load][slot%4] = parameter; + pg->program_data_dirty = true; + + if (slot % 4 == 3) { + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1); + } +} + +DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT) +{ + int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4; + int const_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR); + + assert(const_load < NV2A_VERTEXSHADER_CONSTANTS); + // VertexShaderConstant *constant = &pg->constants[const_load]; + pg->vsh_constants_dirty[const_load] |= + (parameter != pg->vsh_constants[const_load][slot%4]); + pg->vsh_constants[const_load][slot%4] = parameter; + + if (slot % 4 == 3) { + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX3F) +{ + int slot = (method - NV097_SET_VERTEX3F) / 4; + VertexAttribute *attribute = + &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; + pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); + attribute->inline_value[slot] = *(float*)¶meter; + attribute->inline_value[3] = 1.0f; + if (slot == 2) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +/* Handles NV097_SET_BACK_LIGHT_* */ +DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR) +{ + int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4; + unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16; + slot /= 16; /* [Light index] */ + assert(slot < 8); + switch(part * 4) { + case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ... + NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8: + part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true; + break; + case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ... + NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8: + part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true; + break; + case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ... + NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8: + part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true; + break; + default: + assert(false); + break; + } +} + +/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */ +DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR) +{ + int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4; + unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32; + slot /= 32; /* [Light index] */ + assert(slot < 8); + switch(part * 4) { + case NV097_SET_LIGHT_AMBIENT_COLOR ... + NV097_SET_LIGHT_AMBIENT_COLOR + 8: + part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true; + break; + case NV097_SET_LIGHT_DIFFUSE_COLOR ... + NV097_SET_LIGHT_DIFFUSE_COLOR + 8: + part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true; + break; + case NV097_SET_LIGHT_SPECULAR_COLOR ... + NV097_SET_LIGHT_SPECULAR_COLOR + 8: + part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4; + pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter; + pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true; + break; + case NV097_SET_LIGHT_LOCAL_RANGE: + pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter; + pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true; + break; + case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ... + NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8: + part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4; + pg->light_infinite_half_vector[slot][part] = *(float*)¶meter; + break; + case NV097_SET_LIGHT_INFINITE_DIRECTION ... + NV097_SET_LIGHT_INFINITE_DIRECTION + 8: + part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4; + pg->light_infinite_direction[slot][part] = *(float*)¶meter; + break; + case NV097_SET_LIGHT_SPOT_FALLOFF ... + NV097_SET_LIGHT_SPOT_FALLOFF + 8: + part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4; + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true; + break; + case NV097_SET_LIGHT_SPOT_DIRECTION ... + NV097_SET_LIGHT_SPOT_DIRECTION + 12: + part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4; + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true; + break; + case NV097_SET_LIGHT_LOCAL_POSITION ... + NV097_SET_LIGHT_LOCAL_POSITION + 8: + part -= NV097_SET_LIGHT_LOCAL_POSITION / 4; + pg->light_local_position[slot][part] = *(float*)¶meter; + break; + case NV097_SET_LIGHT_LOCAL_ATTENUATION ... + NV097_SET_LIGHT_LOCAL_ATTENUATION + 8: + part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4; + pg->light_local_attenuation[slot][part] = *(float*)¶meter; + break; + default: + assert(false); + break; + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX4F) +{ + int slot = (method - NV097_SET_VERTEX4F) / 4; + VertexAttribute *attribute = + &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION]; + pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION); + attribute->inline_value[slot] = *(float*)¶meter; + if (slot == 3) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_NORMAL3S) +{ + int slot = (method - NV097_SET_NORMAL3S) / 4; + unsigned int part = slot % 2; + VertexAttribute *attribute = + &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL]; + pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL); + int16_t val = parameter & 0xFFFF; + attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f); + val = parameter >> 16; + attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f); +} + +#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + unsigned int part = slot % 2; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[part * 2 + 0] = \ + (float)(int16_t)(parameter & 0xFFFF); \ + attribute->inline_value[part * 2 + 1] = \ + (float)(int16_t)(parameter >> 16); \ + } while (0) + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S) +{ + SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATTRIBUTE_4S + +#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index) \ + do { \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \ + attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); \ + attribute->inline_value[2] = 0.0f; \ + attribute->inline_value[3] = 1.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S) +{ + SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATRIBUTE_TEX_2S + +#define SET_VERTEX_COLOR_3F(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[slot] = *(float*)¶meter; \ + attribute->inline_value[3] = 1.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F) +{ + SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F) +{ + SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR); +} + +#undef SET_VERTEX_COLOR_3F + +#define SET_VERTEX_ATTRIBUTE_F(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[slot] = *(float*)¶meter; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_NORMAL3F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL); +} + +DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F, + NV2A_VERTEX_ATTR_SPECULAR); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1); +} + + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F) +{ + SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATTRIBUTE_F + +#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \ + do { \ + int slot = (method - (command)) / 4; \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[slot] = *(float*)¶meter; \ + attribute->inline_value[2] = 0.0f; \ + attribute->inline_value[3] = 1.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F, + NV2A_VERTEX_ATTR_TEXTURE0); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F, + NV2A_VERTEX_ATTR_TEXTURE1); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F, + NV2A_VERTEX_ATTR_TEXTURE2); +} + +DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F) +{ + SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F, + NV2A_VERTEX_ATTR_TEXTURE3); +} + +#undef SET_VERTEX_ATRIBUTE_TEX_2F + +#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index) \ + do { \ + VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \ + pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \ + attribute->inline_value[0] = (parameter & 0xFF) / 255.0f; \ + attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f; \ + attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f; \ + attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f; \ + } while (0) + +DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB) +{ + SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB, + NV2A_VERTEX_ATTR_DIFFUSE); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB) +{ + SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB, + NV2A_VERTEX_ATTR_SPECULAR); +} + +#undef SET_VERTEX_ATTRIBUTE_4UB + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT) +{ + int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4; + VertexAttribute *attr = &pg->vertex_attributes[slot]; + attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE); + attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE); + attr->stride = GET_MASK(parameter, + NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE); + + NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n", + attr->format, attr->count, attr->stride); + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + attr->size = 1; + assert(attr->count == 4); + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + attr->size = 1; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: + attr->size = 2; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + attr->size = 4; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: + attr->size = 2; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + attr->size = 4; + assert(attr->count == 1); + break; + default: + fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); + assert(false); + break; + } + + if (attr->format == NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP) { + pg->compressed_attrs |= (1 << slot); + } else { + pg->compressed_attrs &= ~(1 << slot); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET) +{ + int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4; + + pg->vertex_attributes[slot].dma_select = parameter & 0x80000000; + pg->vertex_attributes[slot].offset = parameter & 0x7fffffff; +} + +DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE) +{ + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP_ENABLE, + parameter); +} + +DEF_METHOD(NV097, SET_LOGIC_OP) +{ + PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP, + parameter & 0xF); +} + +DEF_METHOD(NV097, CLEAR_REPORT_VALUE) +{ + d->pgraph.renderer->ops.clear_report_value(d); +} + +DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE) +{ + pg->zpass_pixel_count_enable = parameter; +} + +DEF_METHOD(NV097, GET_REPORT) +{ + uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); + assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); + + d->pgraph.renderer->ops.get_report(d, parameter); +} + +DEF_METHOD_INC(NV097, SET_EYE_DIRECTION) +{ + int slot = (method - NV097_SET_EYE_DIRECTION) / 4; + pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter; + pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true; +} + +DEF_METHOD(NV097, SET_BEGIN_END) +{ + if (parameter == NV097_SET_BEGIN_END_OP_END) { + if (pg->primitive_mode == PRIM_TYPE_INVALID) { + NV2A_DPRINTF("End without Begin!\n"); + } + nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS); + d->pgraph.renderer->ops.draw_end(d); + pgraph_reset_inline_buffers(pg); + pg->primitive_mode = PRIM_TYPE_INVALID; + } else { + if (pg->primitive_mode != PRIM_TYPE_INVALID) { + NV2A_DPRINTF("Begin without End!\n"); + } + assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON); + pg->primitive_mode = parameter; + pgraph_reset_inline_buffers(pg); + d->pgraph.renderer->ops.draw_begin(d); + } +} + +DEF_METHOD(NV097, SET_TEXTURE_OFFSET) +{ + int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXOFFSET0 + slot * 4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_FORMAT) +{ + int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64; + + bool dma_select = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2; + bool cubemap = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE); + unsigned int border_source = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE); + unsigned int dimensionality = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY); + unsigned int color_format = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR); + unsigned int levels = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS); + unsigned int log_width = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U); + unsigned int log_height = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V); + unsigned int log_depth = + GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P); + + unsigned int reg = NV_PGRAPH_TEXFMT0 + slot * 4; + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_COLOR, color_format); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height); + PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth); + + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_CONTROL0) +{ + int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXCTL0_0 + slot*4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_CONTROL1) +{ + int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXCTL1_0 + slot*4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_FILTER) +{ + int slot = (method - NV097_SET_TEXTURE_FILTER) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXFILTER0 + slot * 4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT) +{ + int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64; + pgraph_reg_w(pg, NV_PGRAPH_TEXIMAGERECT0 + slot * 4, parameter); + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_PALETTE) +{ + int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64; + + bool dma_select = + GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1; + unsigned int length = + GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH); + unsigned int offset = + GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET); + + unsigned int reg = NV_PGRAPH_TEXPALETTE0 + slot * 4; + PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select); + PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length); + PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset); + + pg->texture_dirty[slot] = true; +} + +DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR) +{ + int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64; + pgraph_reg_w(pg, NV_PGRAPH_BORDERCOLOR0 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT) +{ + int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4; + if (slot < 16) { + /* discard */ + return; + } + + slot -= 16; + const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01, + NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 }; + pgraph_reg_w(pg, swizzle[slot % 4] + slot / 4, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE) +{ + int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64; + if (slot == 0) { + /* discard */ + return; + } + + slot--; + pgraph_reg_w(pg, NV_PGRAPH_BUMPSCALE1 + slot * 4, parameter); +} + +DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET) +{ + int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64; + if (slot == 0) { + /* discard */ + return; + } + + slot--; + pgraph_reg_w(pg, NV_PGRAPH_BUMPOFFSET1 + slot * 4, parameter); +} + +static void pgraph_expand_draw_arrays(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + uint32_t start = pg->draw_arrays_start[pg->draw_arrays_length - 1]; + uint32_t count = pg->draw_arrays_count[pg->draw_arrays_length - 1]; + + /* Render any previously squashed DRAW_ARRAYS calls. This case would be + * triggered if a set of BEGIN+DA+END triplets is followed by the + * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */ + if (pg->draw_arrays_length > 1) { + d->pgraph.renderer->ops.flush_draw(d); + pgraph_reset_inline_buffers(pg); + } + assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); + for (unsigned int i = 0; i < count; i++) { + pg->inline_elements[pg->inline_elements_length++] = start + i; + } + + pgraph_reset_draw_arrays(pg); +} + +void pgraph_check_within_begin_end_block(PGRAPHState *pg) +{ + if (pg->primitive_mode == PRIM_TYPE_INVALID) { + NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n"); + } +} + +DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16) +{ + pgraph_check_within_begin_end_block(pg); + + if (pg->draw_arrays_length) { + pgraph_expand_draw_arrays(d); + } + + assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF; + pg->inline_elements[pg->inline_elements_length++] = parameter >> 16; +} + +DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32) +{ + pgraph_check_within_begin_end_block(pg); + + if (pg->draw_arrays_length) { + pgraph_expand_draw_arrays(d); + } + + assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_elements[pg->inline_elements_length++] = parameter; +} + +DEF_METHOD(NV097, DRAW_ARRAYS) +{ + pgraph_check_within_begin_end_block(pg); + + int32_t start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX); + int32_t count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1; + + if (pg->inline_elements_length) { + /* FIXME: Determine HW behavior for overflow case. */ + assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH); + assert(!pg->draw_arrays_prevent_connect); + + for (unsigned int i = 0; i < count; i++) { + pg->inline_elements[pg->inline_elements_length++] = start + i; + } + return; + } + + pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start); + pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count); + + assert(pg->draw_arrays_length < ARRAY_SIZE(pg->draw_arrays_start)); + + /* Attempt to connect contiguous primitives */ + if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) { + unsigned int last_start = + pg->draw_arrays_start[pg->draw_arrays_length - 1]; + int32_t *last_count = + &pg->draw_arrays_count[pg->draw_arrays_length - 1]; + if (start == (last_start + *last_count)) { + *last_count += count; + return; + } + } + + pg->draw_arrays_start[pg->draw_arrays_length] = start; + pg->draw_arrays_count[pg->draw_arrays_length] = count; + pg->draw_arrays_length++; + pg->draw_arrays_prevent_connect = false; +} + +DEF_METHOD_NON_INC(NV097, INLINE_ARRAY) +{ + pgraph_check_within_begin_end_block(pg); + assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_array[pg->inline_array_length++] = parameter; +} + +DEF_METHOD_INC(NV097, SET_EYE_VECTOR) +{ + int slot = (method - NV097_SET_EYE_VECTOR) / 4; + pgraph_reg_w(pg, NV_PGRAPH_EYEVEC0 + slot * 4, parameter); +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M) +{ + int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4; + unsigned int part = slot % 2; + slot /= 2; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[part] = *(float*)¶meter; + /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */ + attribute->inline_value[2] = 0.0; + attribute->inline_value[3] = 1.0; + if ((slot == 0) && (part == 1)) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M) +{ + int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4; + unsigned int part = slot % 4; + slot /= 4; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[part] = *(float*)¶meter; + if ((slot == 0) && (part == 3)) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S) +{ + int slot = (method - NV097_SET_VERTEX_DATA2S) / 4; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); + attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); + attribute->inline_value[2] = 0.0; + attribute->inline_value[3] = 1.0; + if (slot == 0) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB) +{ + int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + attribute->inline_value[0] = (parameter & 0xFF) / 255.0; + attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0; + attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0; + attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0; + if (slot == 0) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M) +{ + int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4; + unsigned int part = slot % 2; + slot /= 2; + VertexAttribute *attribute = &pg->vertex_attributes[slot]; + pgraph_allocate_inline_buffer_vertices(pg, slot); + + attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF); + attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16); + if ((slot == 0) && (part == 1)) { + pgraph_finish_inline_buffer_vertex(pg); + } +} + +DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET) +{ + pgraph_reg_w(pg, NV_PGRAPH_SEMAPHOREOFFSET, parameter); +} + +DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE) +{ + d->pgraph.renderer->ops.surface_update(d, false, true, true); + + //qemu_mutex_unlock(&d->pgraph.lock); + //qemu_mutex_lock_iothread(); + + uint32_t semaphore_offset = pgraph_reg_r(pg, NV_PGRAPH_SEMAPHOREOFFSET); + + hwaddr semaphore_dma_len; + uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore, + &semaphore_dma_len); + assert(semaphore_offset < semaphore_dma_len); + semaphore_data += semaphore_offset; + + stl_le_p((uint32_t*)semaphore_data, parameter); + + //qemu_mutex_lock(&d->pgraph.lock); + //qemu_mutex_unlock_iothread(); +} + +DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL) +{ + switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) { + case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL: + PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL); + break; + case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP: + PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN, + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP); + break; + default: + /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */ + assert(!"Invalid zclamp value"); + break; + } +} + +DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL) +{ + PG_SET_MASK(NV_PGRAPH_ANTIALIASING, NV_PGRAPH_ANTIALIASING_ENABLE, + GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE)); + // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001) +} + +DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE) +{ + pgraph_reg_w(pg, NV_PGRAPH_ZSTENCILCLEARVALUE, parameter); +} + +DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE) +{ + pgraph_reg_w(pg, NV_PGRAPH_COLORCLEARVALUE, parameter); +} + +DEF_METHOD(NV097, CLEAR_SURFACE) +{ + d->pgraph.renderer->ops.clear_surface(d, parameter); +} + +DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL) +{ + pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTX, parameter); +} + +DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL) +{ + pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTY, parameter); +} + +DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR) +{ + int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4; + pgraph_reg_w(pg, NV_PGRAPH_SPECFOGFACTOR0 + slot*4, parameter); +} + +DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE) +{ + pgraph_reg_w(pg, NV_PGRAPH_SHADERCLIPMODE, parameter); +} + +DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW) +{ + int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4; + pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORO0 + slot*4, parameter); +} + +DEF_METHOD(NV097, SET_COMBINER_CONTROL) +{ + pgraph_reg_w(pg, NV_PGRAPH_COMBINECTL, parameter); +} + +DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD) +{ + pgraph_reg_w(pg, NV_PGRAPH_SHADOWZSLOPETHRESHOLD, parameter); + assert(parameter == 0x7F800000); /* FIXME: Unimplemented */ +} + +DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC) +{ + PG_SET_MASK(NV_PGRAPH_SHADOWCTL, NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC, + parameter); +} + +DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM) +{ + pgraph_reg_w(pg, NV_PGRAPH_SHADERPROG, parameter); +} + +DEF_METHOD(NV097, SET_DOT_RGBMAPPING) +{ + PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFF, + GET_MASK(parameter, 0xFFF)); +} + +DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT) +{ + PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFFF000, + GET_MASK(parameter, 0xFFFF000)); +} + +DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA) +{ + int slot = (method - NV097_SET_TRANSFORM_DATA) / 4; + pg->vertex_state_shader_v0[slot] = parameter; +} + +DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM) +{ + unsigned int program_start = parameter; + assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + Nv2aVshProgram program; + Nv2aVshParseResult result = nv2a_vsh_parse_program( + &program, + pg->program_data[program_start], + NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start); + assert(result == NV2AVPR_SUCCESS); + + Nv2aVshCPUXVSSExecutionState state_linkage; + Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state( + &state_linkage, (float*)pg->vsh_constants); + memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0)); + + nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty); + + nv2a_vsh_program_destroy(&program); +} + +DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE) +{ + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_MODE, + GET_MASK(parameter, + NV097_SET_TRANSFORM_EXECUTION_MODE_MODE)); + PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_RANGE_MODE, + GET_MASK(parameter, + NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE)); +} + +DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN) +{ + pg->enable_vertex_program_write = parameter; +} + +DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD) +{ + assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter); +} + +DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START) +{ + assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + PG_SET_MASK(NV_PGRAPH_CSV0_C, + NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter); +} + +DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD) +{ + assert(parameter < NV2A_VERTEXSHADER_CONSTANTS); + PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET, + NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter); +} + +void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]) +{ + uint32_t clear_color = pgraph_reg_r(pg, NV_PGRAPH_COLORCLEARVALUE); + + float *r = &rgba[0], *g = &rgba[1], *b = &rgba[2], *a = &rgba[3]; + + /* Handle RGB */ + switch(pg->surface_shape.color_format) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5: + *r = ((clear_color >> 10) & 0x1F) / 31.0f; + *g = ((clear_color >> 5) & 0x1F) / 31.0f; + *b = (clear_color & 0x1F) / 31.0f; + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: + *r = ((clear_color >> 11) & 0x1F) / 31.0f; + *g = ((clear_color >> 5) & 0x3F) / 63.0f; + *b = (clear_color & 0x1F) / 31.0f; + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: + *r = ((clear_color >> 16) & 0xFF) / 255.0f; + *g = ((clear_color >> 8) & 0xFF) / 255.0f; + *b = (clear_color & 0xFF) / 255.0f; + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8: + /* Xbox D3D doesn't support clearing those */ + default: + *r = 1.0f; + *g = 0.0f; + *b = 1.0f; + fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported", + pg->surface_shape.color_format); + assert(false); + break; + } + + /* Handle alpha */ + switch(pg->surface_shape.color_format) { + /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we + * also have to clear non-alpha bits with alpha value? + * As GL doesn't own those pixels we'd have to do this on + * our own in xbox memory. + */ + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8: + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8: + *a = ((clear_color >> 24) & 0x7F) / 127.0f; + assert(false); /* Untested */ + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: + *a = ((clear_color >> 24) & 0xFF) / 255.0f; + break; + default: + *a = 1.0f; + break; + } +} + +void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, + int *stencil) +{ + uint32_t clear_zstencil = + pgraph_reg_r(pg, NV_PGRAPH_ZSTENCILCLEARVALUE); + *stencil = 0; + *depth = 1.0; + + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: { + uint16_t z = clear_zstencil & 0xFFFF; + /* FIXME: Remove bit for stencil clear? */ + if (pg->surface_shape.z_format) { + *depth = convert_f16_to_float(z) / f16_max; + } else { + *depth = z / (float)0xFFFF; + } + break; + } + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: { + *stencil = clear_zstencil & 0xFF; + uint32_t z = clear_zstencil >> 8; + if (pg->surface_shape.z_format) { + *depth = convert_f24_to_float(z) / f24_max; + } else { + *depth = z / (float)0xFFFFFF; + } + break; + } + default: + fprintf(stderr, "Unknown zeta surface format: 0x%x\n", + pg->surface_shape.zeta_format); + assert(false); + break; + } +} + +void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, + uint32_t result) +{ + PGRAPHState *pg = &d->pgraph; + + uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */ + uint32_t done = 0; // FIXME: Check + + hwaddr report_dma_len; + uint8_t *report_data = + (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len); + + hwaddr offset = GET_MASK(parameter, NV097_GET_REPORT_OFFSET); + assert(offset < report_dma_len); + report_data += offset; + + stq_le_p((uint64_t *)&report_data[0], timestamp); + stl_le_p((uint32_t *)&report_data[8], result); + stl_le_p((uint32_t *)&report_data[12], done); + + NV2A_DPRINTF("Report result %d @%" HWADDR_PRIx, result, offset); +} diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h new file mode 100644 index 00000000000..799e879c062 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -0,0 +1,383 @@ +/* + * QEMU Geforce NV2A PGRAPH internal definitions + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_H +#define HW_XBOX_NV2A_PGRAPH_H + +#include "xemu-config.h" +#include "qemu/osdep.h" +#include "qemu/bitmap.h" +#include "qemu/units.h" +#include "qemu/thread.h" +#include "cpu.h" + +#include "shaders.h" +#include "surface.h" +#include "util.h" + +typedef struct NV2AState NV2AState; +typedef struct PGRAPHNullState PGRAPHNullState; +typedef struct PGRAPHGLState PGRAPHGLState; +typedef struct PGRAPHVkState PGRAPHVkState; + +typedef struct VertexAttribute { + bool dma_select; + hwaddr offset; + + /* inline arrays are packed in order? + * Need to pass the offset to converted attributes */ + unsigned int inline_array_offset; + + float inline_value[4]; + + unsigned int format; + unsigned int size; /* size of the data type */ + unsigned int count; /* number of components */ + uint32_t stride; + + bool needs_conversion; + + float *inline_buffer; + bool inline_buffer_populated; +} VertexAttribute; + +typedef struct Surface { + bool draw_dirty; + bool buffer_dirty; + bool write_enabled_cache; + unsigned int pitch; + + hwaddr offset; +} Surface; + +typedef struct KelvinState { + hwaddr object_instance; +} KelvinState; + +typedef struct ContextSurfaces2DState { + hwaddr object_instance; + hwaddr dma_image_source; + hwaddr dma_image_dest; + unsigned int color_format; + unsigned int source_pitch, dest_pitch; + hwaddr source_offset, dest_offset; +} ContextSurfaces2DState; + +typedef struct ImageBlitState { + hwaddr object_instance; + hwaddr context_surfaces; + unsigned int operation; + unsigned int in_x, in_y; + unsigned int out_x, out_y; + unsigned int width, height; +} ImageBlitState; + +typedef struct BetaState { + hwaddr object_instance; + uint32_t beta; +} BetaState; + +typedef struct PGRAPHRenderer { + CONFIG_DISPLAY_RENDERER type; + const char *name; + struct { + void (*early_context_init)(void); + void (*init)(NV2AState *d); + void (*init_thread)(NV2AState *d); + void (*finalize)(NV2AState *d); + void (*clear_report_value)(NV2AState *d); + void (*clear_surface)(NV2AState *d, uint32_t parameter); + void (*draw_begin)(NV2AState *d); + void (*draw_end)(NV2AState *d); + void (*flip_stall)(NV2AState *d); + void (*flush_draw)(NV2AState *d); + void (*get_report)(NV2AState *d, uint32_t parameter); + void (*image_blit)(NV2AState *d); + void (*pre_savevm_trigger)(NV2AState *d); + void (*pre_savevm_wait)(NV2AState *d); + void (*pre_shutdown_trigger)(NV2AState *d); + void (*pre_shutdown_wait)(NV2AState *d); + void (*process_pending)(NV2AState *d); + void (*process_pending_reports)(NV2AState *d); + void (*surface_flush)(NV2AState *d); + void (*surface_update)(NV2AState *d, bool upload, bool color_write, bool zeta_write); + void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale); + unsigned int (*get_surface_scale_factor)(NV2AState *d); + int (*get_framebuffer_surface)(NV2AState *d); + } ops; +} PGRAPHRenderer; + +typedef struct PGRAPHState { + QemuMutex lock; + + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + + int frame_time; + int draw_time; + + /* subchannels state we're not sure the location of... */ + ContextSurfaces2DState context_surfaces_2d; + ImageBlitState image_blit; + KelvinState kelvin; + BetaState beta; + + hwaddr dma_color, dma_zeta; + Surface surface_color, surface_zeta; + unsigned int surface_type; + SurfaceShape surface_shape; + SurfaceShape last_surface_shape; + + struct { + int clip_x; + int clip_width; + int clip_y; + int clip_height; + int width; + int height; + } surface_binding_dim; // FIXME: Refactor + + hwaddr dma_a, dma_b; + bool texture_dirty[NV2A_MAX_TEXTURES]; + + bool texture_matrix_enable[NV2A_MAX_TEXTURES]; + + hwaddr dma_state; + hwaddr dma_notifies; + hwaddr dma_semaphore; + + hwaddr dma_report; + hwaddr report_offset; + bool zpass_pixel_count_enable; + + hwaddr dma_vertex_a, dma_vertex_b; + + uint32_t primitive_mode; + + bool enable_vertex_program_write; // FIXME: Not used anywhere??? + + uint32_t vertex_state_shader_v0[4]; + uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE]; + bool program_data_dirty; + + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS]; + + /* lighting constant arrays */ + uint32_t ltctxa[NV2A_LTCTXA_COUNT][4]; + bool ltctxa_dirty[NV2A_LTCTXA_COUNT]; + uint32_t ltctxb[NV2A_LTCTXB_COUNT][4]; + bool ltctxb_dirty[NV2A_LTCTXB_COUNT]; + uint32_t ltc1[NV2A_LTC1_COUNT][4]; + bool ltc1_dirty[NV2A_LTC1_COUNT]; + + float material_alpha; + + // should figure out where these are in lighting context + float light_infinite_half_vector[NV2A_MAX_LIGHTS][3]; + float light_infinite_direction[NV2A_MAX_LIGHTS][3]; + float light_local_position[NV2A_MAX_LIGHTS][3]; + float light_local_attenuation[NV2A_MAX_LIGHTS][3]; + + float point_params[8]; + + VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; + uint16_t compressed_attrs; + uint16_t uniform_attrs; + uint16_t swizzle_attrs; + + unsigned int inline_array_length; + uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_elements_length; + uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_buffer_length; + + unsigned int draw_arrays_length; + unsigned int draw_arrays_min_start; + unsigned int draw_arrays_max_count; + /* FIXME: Unknown size, possibly endless, 1250 will do for now */ + /* Keep in sync with size used in nv2a.c */ + int32_t draw_arrays_start[1250]; + int32_t draw_arrays_count[1250]; + bool draw_arrays_prevent_connect; + + uint32_t regs_[0x2000]; + DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t)); + + bool clearing; + bool waiting_for_nop; + bool waiting_for_flip; + bool waiting_for_context_switch; + + bool flush_pending; + QemuEvent flush_complete; + + bool sync_pending; + QemuEvent sync_complete; + + unsigned int surface_scale_factor; + uint8_t *scale_buf; + + const PGRAPHRenderer *renderer; + union { + PGRAPHNullState *null_renderer_state; + PGRAPHGLState *gl_renderer_state; + PGRAPHVkState *vk_renderer_state; + }; +} PGRAPHState; + +void pgraph_init(NV2AState *d); +void pgraph_init_thread(NV2AState *d); +void pgraph_destroy(PGRAPHState *pg); +void pgraph_context_switch(NV2AState *d, unsigned int channel_id); +int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method, + uint32_t parameter, uint32_t *parameters, + size_t num_words_available, size_t max_lookahead_words, + bool inc); +void pgraph_check_within_begin_end_block(PGRAPHState *pg); + +void *pfifo_thread(void *arg); +void pfifo_kick(NV2AState *d); + +void pgraph_renderer_register(const PGRAPHRenderer *renderer); + +// FIXME: Move from here + +extern NV2AState *g_nv2a; + +// FIXME: Add new function pgraph_is_texture_sampler_active() + +static inline uint32_t pgraph_reg_r(PGRAPHState *pg, unsigned int r) +{ + assert(r % 4 == 0); + return pg->regs_[r]; +} + +static inline void pgraph_reg_w(PGRAPHState *pg, unsigned int r, uint32_t v) +{ + assert(r % 4 == 0); + if (pg->regs_[r] != v) { + bitmap_set(pg->regs_dirty, r / sizeof(uint32_t), 1); + } + pg->regs_[r] = v; +} + +void pgraph_clear_dirty_reg_map(PGRAPHState *pg); + +static inline bool pgraph_is_reg_dirty(PGRAPHState *pg, unsigned int reg) +{ + return test_bit(reg / sizeof(uint32_t), pg->regs_dirty); +} + +static inline bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage) +{ + assert(stage < NV2A_MAX_TEXTURES); + uint32_t mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (stage * 5)) & 0x1F; + return mode != 0 && mode != 4;// && mode != 0x11 && mode != 0x0a && mode != 0x09 && mode != 5; +} + +static inline bool pgraph_is_texture_enabled(PGRAPHState *pg, int texture_idx) +{ + uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + texture_idx*4); + return // pgraph_is_texture_stage_active(pg, texture_idx) && + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE); +} + +static inline bool pgraph_is_texture_format_compressed(PGRAPHState *pg, int color_format) +{ + return color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 || + color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8 || + color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8; +} + +static inline bool pgraph_color_write_enabled(PGRAPHState *pg) +{ + return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & ( + NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE); +} + +static inline bool pgraph_zeta_write_enabled(PGRAPHState *pg) +{ + return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & ( + NV_PGRAPH_CONTROL_0_ZWRITEENABLE + | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE); +} + +static inline void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, + unsigned int *width, + unsigned int *height) +{ + switch (pg->surface_shape.anti_aliasing) { + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1: + break; + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2: + if (width) { *width *= 2; } + break; + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4: + if (width) { *width *= 2; } + if (height) { *height *= 2; } + break; + default: + assert(false); + break; + } +} + +static inline void pgraph_apply_scaling_factor(PGRAPHState *pg, + unsigned int *width, + unsigned int *height) +{ + *width *= pg->surface_scale_factor; + *height *= pg->surface_scale_factor; +} + +void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]); +void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, int *stencil); + +/* Vertex */ +void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr); +void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); +void pgraph_reset_inline_buffers(PGRAPHState *pg); +void pgraph_reset_draw_arrays(PGRAPHState *pg); +void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data); + +/* RDI */ +uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select, + unsigned int address); +void pgraph_rdi_write(PGRAPHState *pg, unsigned int select, + unsigned int address, uint32_t val); + +static inline void pgraph_argb_pack32_to_rgba_float(uint32_t argb, float *rgba) +{ + rgba[0] = ((argb >> 16) & 0xFF) / 255.0f; /* red */ + rgba[1] = ((argb >> 8) & 0xFF) / 255.0f; /* green */ + rgba[2] = (argb & 0xFF) / 255.0f; /* blue */ + rgba[3] = ((argb >> 24) & 0xFF) / 255.0f; /* alpha */ +} + +void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, uint32_t result); + +#endif diff --git a/hw/xbox/nv2a/pgraph/profile.c b/hw/xbox/nv2a/pgraph/profile.c new file mode 100644 index 00000000000..69a1b5bfbd1 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/profile.c @@ -0,0 +1,74 @@ +/* + * QEMU Geforce NV2A profiling helpers + * + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "../nv2a_int.h" + +NV2AStats g_nv2a_stats; + +void nv2a_profile_increment(void) +{ + int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + const int64_t fps_update_interval = 250000; + g_nv2a_stats.last_flip_time = now; + + static int64_t frame_count = 0; + frame_count++; + + static int64_t ts = 0; + int64_t delta = now - ts; + if (delta >= fps_update_interval) { + g_nv2a_stats.increment_fps = frame_count * 1000000 / delta; + ts = now; + frame_count = 0; + } +} + +void nv2a_profile_flip_stall(void) +{ + int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000; + + g_nv2a_stats.frame_working.mspf = render_time; + g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] = + g_nv2a_stats.frame_working; + g_nv2a_stats.frame_ptr = + (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES; + g_nv2a_stats.frame_count++; + memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working)); +} + +const char *nv2a_profile_get_counter_name(unsigned int cnt) +{ + const char *default_names[NV2A_PROF__COUNT] = { + #define _X(x) stringify(x), + NV2A_PROF_COUNTERS_XMAC + #undef _X + }; + + assert(cnt < NV2A_PROF__COUNT); + return default_names[cnt] + 10; /* 'NV2A_PROF_' */ +} + +int nv2a_profile_get_counter_value(unsigned int cnt) +{ + assert(cnt < NV2A_PROF__COUNT); + unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) % + NV2A_PROF_NUM_FRAMES; + return g_nv2a_stats.frame_history[idx].counters[cnt]; +} diff --git a/hw/xbox/nv2a/psh.h b/hw/xbox/nv2a/pgraph/psh.h similarity index 96% rename from hw/xbox/nv2a/psh.h rename to hw/xbox/nv2a/pgraph/psh.h index 65ef4e43a20..6232a2834a5 100644 --- a/hw/xbox/nv2a/psh.h +++ b/hw/xbox/nv2a/pgraph/psh.h @@ -20,7 +20,8 @@ #ifndef HW_NV2A_PSH_H #define HW_NV2A_PSH_H -#include "shaders_common.h" +#include +#include enum PshAlphaFunc { ALPHA_FUNC_NEVER, @@ -51,6 +52,8 @@ enum ConvolutionFilter { }; typedef struct PshState { + bool vulkan; + /* fragment shader - register combiner stuff */ uint32_t combiner_control; uint32_t shader_stage_program; @@ -67,6 +70,7 @@ typedef struct PshState { bool compare_mode[4][4]; bool alphakill[4]; enum ConvolutionFilter conv_tex[4]; + bool tex_x8y24[4]; float border_logical_size[4][3]; float border_inv_real_size[4][3]; @@ -82,6 +86,4 @@ typedef struct PshState { bool smooth_shading; } PshState; -MString *psh_translate(const PshState state); - #endif diff --git a/hw/xbox/nv2a/pgraph/rdi.c b/hw/xbox/nv2a/pgraph/rdi.c new file mode 100644 index 00000000000..297c7a67c0c --- /dev/null +++ b/hw/xbox/nv2a/pgraph/rdi.c @@ -0,0 +1,60 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "../nv2a_int.h" + +uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select, + unsigned int address) +{ + uint32_t r = 0; + switch(select) { + case RDI_INDEX_VTX_CONSTANTS0: + case RDI_INDEX_VTX_CONSTANTS1: + assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); + r = pg->vsh_constants[address / 4][3 - address % 4]; + break; + default: + fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n", + select, address); + assert(false); + break; + } + return r; +} + +void pgraph_rdi_write(PGRAPHState *pg, unsigned int select, + unsigned int address, uint32_t val) +{ + switch(select) { + case RDI_INDEX_VTX_CONSTANTS0: + case RDI_INDEX_VTX_CONSTANTS1: + assert(false); /* Untested */ + assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); + pg->vsh_constants_dirty[address / 4] |= + (val != pg->vsh_constants[address / 4][3 - address % 4]); + pg->vsh_constants[address / 4][3 - address % 4] = val; + break; + default: + NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n", + select, address, val); + break; + } +} diff --git a/hw/xbox/nv2a/s3tc.c b/hw/xbox/nv2a/pgraph/s3tc.c similarity index 71% rename from hw/xbox/nv2a/s3tc.c rename to hw/xbox/nv2a/pgraph/s3tc.c index 454cc43aee4..affd058e667 100644 --- a/hw/xbox/nv2a/s3tc.c +++ b/hw/xbox/nv2a/pgraph/s3tc.c @@ -1,5 +1,5 @@ /* - * QEMU texture decompression routines + * S3TC Texture Decompression * * Copyright (c) 2020 Wilhelm Kovatch * @@ -25,13 +25,9 @@ #include "qemu/osdep.h" #include "s3tc.h" -static inline void decode_bc1_colors(uint16_t c0, - uint16_t c1, - uint8_t r[4], - uint8_t g[4], - uint8_t b[4], - uint8_t a[16], - bool transparent) +static void decode_bc1_colors(uint16_t c0, uint16_t c1, uint8_t r[4], + uint8_t g[4], uint8_t b[4], uint8_t a[16], + bool transparent) { r[0] = ((c0 & 0xF800) >> 8) * 0xFF / 0xF8, g[0] = ((c0 & 0x07E0) >> 3) * 0xFF / 0xFC, @@ -66,15 +62,10 @@ static inline void decode_bc1_colors(uint16_t c0, } } -static inline void write_block_to_texture(uint8_t *converted_data, - uint32_t indices, - int i, int j, int width, - int z_pos_factor, - uint8_t r[4], - uint8_t g[4], - uint8_t b[4], - uint8_t a[16], - bool separate_alpha) +static void write_block_to_texture(uint8_t *converted_data, uint32_t indices, + int i, int j, int width, int z_pos_factor, + uint8_t r[4], uint8_t g[4], uint8_t b[4], + uint8_t a[16], bool separate_alpha) { int x0 = i * 4, y0 = j * 4; @@ -89,16 +80,18 @@ static inline void write_block_to_texture(uint8_t *converted_data, int xy_index = y_index + x - x0; uint8_t index = (indices >> 2 * xy_index) & 0x03; uint8_t alpha_index = separate_alpha ? xy_index : index; - uint32_t color = (r[index] << 24) | (g[index] << 16) | (b[index] << 8) | a[alpha_index]; - *(uint32_t*)(converted_data + (z_plus_y_pos_factor + x) * 4) = color; + uint8_t *p = converted_data + (z_plus_y_pos_factor + x) * 4; + *p++ = r[index]; + *p++ = g[index]; + *p++ = b[index]; + *p++ = a[alpha_index]; } } } -static inline void decompress_dxt1_block(const uint8_t block_data[8], - uint8_t *converted_data, - int i, int j, int width, - int z_pos_factor) +static void decompress_dxt1_block(const uint8_t block_data[8], + uint8_t *converted_data, int i, int j, + int width, int z_pos_factor) { uint16_t c0 = ((uint16_t*)block_data)[0], c1 = ((uint16_t*)block_data)[1]; @@ -111,10 +104,9 @@ static inline void decompress_dxt1_block(const uint8_t block_data[8], r, g, b, a, false); } -static inline void decompress_dxt3_block(const uint8_t block_data[16], - uint8_t *converted_data, - int i, int j, int width, - int z_pos_factor) +static void decompress_dxt3_block(const uint8_t block_data[16], + uint8_t *converted_data, int i, int j, + int width, int z_pos_factor) { uint16_t c0 = ((uint16_t*)block_data)[4], c1 = ((uint16_t*)block_data)[5]; @@ -132,10 +124,9 @@ static inline void decompress_dxt3_block(const uint8_t block_data[16], r, g, b, a, true); } -static inline void decompress_dxt5_block(const uint8_t block_data[16], - uint8_t *converted_data, - int i, int j, int width, - int z_pos_factor) +static void decompress_dxt5_block(const uint8_t block_data[16], + uint8_t *converted_data, int i, int j, + int width, int z_pos_factor) { uint16_t c0 = ((uint16_t*)block_data)[4], c1 = ((uint16_t*)block_data)[5]; @@ -173,11 +164,9 @@ static inline void decompress_dxt5_block(const uint8_t block_data[16], r, g, b, a, true); } -uint8_t *decompress_3d_texture_data(GLint color_format, - const uint8_t *data, - unsigned int width, - unsigned int height, - unsigned int depth) +uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height, unsigned int depth) { assert((width > 0) && (width % 4 == 0)); assert((height > 0) && (height % 4 == 0)); @@ -196,13 +185,13 @@ uint8_t *decompress_3d_texture_data(GLint color_format, int sub_block_index = block_index * block_depth + slice; int z_pos_factor = (k * block_depth + slice) * width * height; - if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) { decompress_dxt1_block(data + 8 * sub_block_index, converted_data, i, j, width, z_pos_factor); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) { decompress_dxt3_block(data + 16 * sub_block_index, converted_data, i, j, width, z_pos_factor); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) { decompress_dxt5_block(data + 16 * sub_block_index, converted_data, i, j, width, z_pos_factor); } else { @@ -216,8 +205,9 @@ uint8_t *decompress_3d_texture_data(GLint color_format, return converted_data; } -uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data, - unsigned int width, unsigned int height) +uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height) { assert((width > 0) && (width % 4 == 0)); assert((height > 0) && (height % 4 == 0)); @@ -226,13 +216,13 @@ uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data, for (int j = 0; j < num_blocks_y; j++) { for (int i = 0; i < num_blocks_x; i++) { int block_index = j * num_blocks_x + i; - if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) { decompress_dxt1_block(data + 8 * block_index, converted_data, i, j, width, 0); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) { decompress_dxt3_block(data + 16 * block_index, converted_data, i, j, width, 0); - } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) { + } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) { decompress_dxt5_block(data + 16 * block_index, converted_data, i, j, width, 0); } else { diff --git a/hw/xbox/nv2a/s3tc.h b/hw/xbox/nv2a/pgraph/s3tc.h similarity index 63% rename from hw/xbox/nv2a/s3tc.h rename to hw/xbox/nv2a/pgraph/s3tc.h index 87dad0d3c4f..6a10074e748 100644 --- a/hw/xbox/nv2a/s3tc.h +++ b/hw/xbox/nv2a/pgraph/s3tc.h @@ -1,5 +1,5 @@ /* - * QEMU texture decompression routines + * S3TC Texture Decompression * * Copyright (c) 2020 Wilhelm Kovatch * @@ -22,18 +22,23 @@ * THE SOFTWARE. */ -#ifndef S3TC_H -#define S3TC_H +#ifndef HW_XBOX_NV2A_PGRAPH_S3TC_H +#define HW_XBOX_NV2A_PGRAPH_S3TC_H -#include "gl/gloffscreen.h" +#include -uint8_t *decompress_3d_texture_data(GLint color_format, - const uint8_t *data, - unsigned int width, - unsigned int height, - unsigned int depth); +enum S3TC_DECOMPRESS_FORMAT { + S3TC_DECOMPRESS_FORMAT_DXT1, + S3TC_DECOMPRESS_FORMAT_DXT3, + S3TC_DECOMPRESS_FORMAT_DXT5, +}; -uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data, - unsigned int width, unsigned int height); +uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height, unsigned int depth); + +uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format, + const uint8_t *data, unsigned int width, + unsigned int height); #endif diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c new file mode 100644 index 00000000000..82737b44f48 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -0,0 +1,295 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2020-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/debug.h" +#include "texture.h" +#include "pgraph.h" +#include "shaders.h" + +ShaderState pgraph_get_shader_state(PGRAPHState *pg) +{ + bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_MODE) == 2; + + bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_MODE) == 0; + + int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), + NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START); + + pg->program_data_dirty = false; + + ShaderState state; + + // We will hash it, so make sure any padding is zerod + memset(&state, 0, sizeof(ShaderState)); + + state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN; + state.surface_scale_factor = pg->surface_scale_factor; + + state.compressed_attrs = pg->compressed_attrs; + state.uniform_attrs = pg->uniform_attrs; + state.swizzle_attrs = pg->swizzle_attrs; + + /* register combiner stuff */ + state.psh.vulkan = state.vulkan; + state.psh.window_clip_exclusive = + pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE; + state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL); + state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG); + state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL); + state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0); + state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1); + + state.psh.alpha_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE; + state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC); + + state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE; + + state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC); + + state.fixed_function = fixed_function; + + /* fixed function stuff */ + if (fixed_function) { + state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_SKIN); + state.lighting = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING); + state.normalization = + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE; + + /* color material */ + state.emission_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION); + state.ambient_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT); + state.diffuse_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE); + state.specular_src = (enum MaterialColorSource)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR); + } + + /* vertex program stuff */ + state.vertex_program = vertex_program, + state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + + state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_POINTPARAMSENABLE); + state.point_size = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f; + if (state.point_params_enable) { + for (int i = 0; i < 8; i++) { + state.point_params[i] = pg->point_params[i]; + } + } + + /* geometry shader stuff */ + state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode; + state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE); + state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE); + + state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_SHADEMODE) == + NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; + state.psh.smooth_shading = state.smooth_shading; + + state.program_length = 0; + + if (vertex_program) { + // copy in vertex program tokens + for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; + i++) { + uint32_t *cur_token = (uint32_t *)&pg->program_data[i]; + memcpy(&state.program_data[state.program_length], cur_token, + VSH_TOKEN_SIZE * sizeof(uint32_t)); + state.program_length++; + + if (vsh_get_field(cur_token, FLD_FINAL)) { + break; + } + } + } + + /* Texgen */ + for (int i = 0; i < 4; i++) { + unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B; + for (int j = 0; j < 4; j++) { + unsigned int masks[] = { + (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q + }; + state.texgen[i][j] = + (enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]); + } + } + + /* Fog */ + state.fog_enable = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE; + if (state.fog_enable) { + /*FIXME: Use CSV0_D? */ + state.fog_mode = (enum VshFogMode)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE); + state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), + NV_PGRAPH_CSV0_D_FOGGENMODE); + } else { + /* FIXME: Do we still pass the fogmode? */ + state.fog_mode = (enum VshFogMode)0; + state.foggen = (enum VshFoggen)0; + } + + /* Texture matrices */ + for (int i = 0; i < 4; i++) { + state.texture_matrix_enable[i] = pg->texture_matrix_enable[i]; + } + + /* Lighting */ + if (state.lighting) { + for (int i = 0; i < NV2A_MAX_LIGHTS; i++) { + state.light[i] = (enum VshLight)GET_MASK( + pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2)); + } + } + + /* Copy content of enabled combiner stages */ + int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF; + for (int i = 0; i < num_stages; i++) { + state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4); + state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4); + state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4); + state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4); + // constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); + // constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); + } + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + state.psh.compare_mode[i][j] = + (pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1; + } + + uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4); + bool enabled = pgraph_is_texture_stage_active(pg, i) && + (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE); + if (!enabled) { + continue; + } + + state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; + + uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4); + unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR); + BasicColorFormatInfo f = kelvin_color_format_info_map[color_format]; + state.psh.rect_tex[i] = f.linear; + state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED || + color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT; + + uint32_t border_source = + GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE); + bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); + state.psh.border_logical_size[i][0] = 0.0f; + state.psh.border_logical_size[i][1] = 0.0f; + state.psh.border_logical_size[i][2] = 0.0f; + if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { + if (!f.linear && !cubemap) { + // The actual texture will be (at least) double the reported + // size and shifted by a 4 texel border but texture coordinates + // will still be relative to the reported size. + unsigned int reported_width = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); + unsigned int reported_height = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); + unsigned int reported_depth = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); + + state.psh.border_logical_size[i][0] = reported_width; + state.psh.border_logical_size[i][1] = reported_height; + state.psh.border_logical_size[i][2] = reported_depth; + + if (reported_width < 8) { + state.psh.border_inv_real_size[i][0] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][0] = + 1.0f / (reported_width * 2.0f); + } + if (reported_height < 8) { + state.psh.border_inv_real_size[i][1] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][1] = + 1.0f / (reported_height * 2.0f); + } + if (reported_depth < 8) { + state.psh.border_inv_real_size[i][2] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][2] = + 1.0f / (reported_depth * 2.0f); + } + } else { + NV2A_UNIMPLEMENTED( + "Border source texture with linear %d cubemap %d", f.linear, + cubemap); + } + } + + /* Keep track of whether texture data has been loaded as signed + * normalized integers or not. This dictates whether or not we will need + * to re-map in fragment shader for certain texture modes (e.g. + * bumpenvmap). + * + * FIXME: When signed texture data is loaded as unsigned and remapped in + * fragment shader, there may be interpolation artifacts. Fix this to + * support signed textures more appropriately. + */ +#if 0 // FIXME + state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM) + || (f.gl_internal_format == GL_RG8_SNORM); +#endif + state.psh.shadow_map[i] = f.depth; + + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4); + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED; + /* FIXME: We do not distinguish between min and mag when + * performing convolution. Just use it if specified for min (common AA + * case). + */ + if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) { + int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL); + assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX || + k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3); + kernel = (enum ConvolutionFilter)k; + } + + state.psh.conv_tex[i] = kernel; + } + + return state; +} diff --git a/hw/xbox/nv2a/shaders.h b/hw/xbox/nv2a/pgraph/shaders.h similarity index 56% rename from hw/xbox/nv2a/shaders.h rename to hw/xbox/nv2a/pgraph/shaders.h index 0362da10993..842658f8087 100644 --- a/hw/xbox/nv2a/shaders.h +++ b/hw/xbox/nv2a/pgraph/shaders.h @@ -18,17 +18,14 @@ * License along with this library; if not, see . */ -#ifndef HW_NV2A_SHADERS_H -#define HW_NV2A_SHADERS_H +#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H +#define HW_XBOX_NV2A_PGRAPH_SHADERS_H -#include "qemu/thread.h" -#include "qapi/qmp/qstring.h" -#include "gl/gloffscreen.h" +#include +#include "hw/xbox/nv2a/nv2a_regs.h" -#include "nv2a_regs.h" #include "vsh.h" #include "psh.h" -#include "lru.h" enum ShaderPrimitiveMode { PRIM_TYPE_INVALID, @@ -57,10 +54,13 @@ enum MaterialColorSource { }; typedef struct ShaderState { + bool vulkan; unsigned int surface_scale_factor; PshState psh; uint16_t compressed_attrs; + uint16_t uniform_attrs; + uint16_t swizzle_attrs; bool texture_matrix_enable[4]; enum VshTexgen texgen[4][4]; @@ -101,61 +101,8 @@ typedef struct ShaderState { bool smooth_shading; } ShaderState; -typedef struct ShaderBinding { - GLuint gl_program; - GLenum gl_primitive_mode; - - GLint psh_constant_loc[9][2]; - GLint alpha_ref_loc; - - GLint bump_mat_loc[NV2A_MAX_TEXTURES]; - GLint bump_scale_loc[NV2A_MAX_TEXTURES]; - GLint bump_offset_loc[NV2A_MAX_TEXTURES]; - GLint tex_scale_loc[NV2A_MAX_TEXTURES]; - - GLint surface_size_loc; - GLint clip_range_loc; - - GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; - uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - - GLint inv_viewport_loc; - GLint ltctxa_loc[NV2A_LTCTXA_COUNT]; - GLint ltctxb_loc[NV2A_LTCTXB_COUNT]; - GLint ltc1_loc[NV2A_LTC1_COUNT]; - - GLint fog_color_loc; - GLint fog_param_loc[2]; - GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; - GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS]; - GLint light_local_position_loc[NV2A_MAX_LIGHTS]; - GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS]; - - GLint clip_region_loc[8]; - - GLint material_alpha_loc; -} ShaderBinding; - -typedef struct ShaderLruNode { - LruNode node; - bool cached; - void *program; - size_t program_size; - GLenum program_format; - ShaderState state; - ShaderBinding *binding; - QemuThread *save_thread; -} ShaderLruNode; - typedef struct PGRAPHState PGRAPHState; -GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode); -void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state); -ShaderBinding *generate_shaders(const ShaderState *state); - -void shader_cache_init(PGRAPHState *pg); -void shader_write_cache_reload_list(PGRAPHState *pg); -bool shader_load_from_memory(ShaderLruNode *snode); -void shader_cache_to_disk(ShaderLruNode *snode); +ShaderState pgraph_get_shader_state(PGRAPHState *pg); #endif diff --git a/hw/xbox/nv2a/pgraph/surface.h b/hw/xbox/nv2a/pgraph/surface.h new file mode 100644 index 00000000000..d51bc04ea4f --- /dev/null +++ b/hw/xbox/nv2a/pgraph/surface.h @@ -0,0 +1,35 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_SURFACE_H +#define HW_XBOX_NV2A_PGRAPH_SURFACE_H + +typedef struct SurfaceShape { + unsigned int z_format; + unsigned int color_format; + unsigned int zeta_format; + unsigned int log_width, log_height; + unsigned int clip_x, clip_y; + unsigned int clip_width, clip_height; + unsigned int anti_aliasing; +} SurfaceShape; + +#endif diff --git a/hw/xbox/nv2a/swizzle.c b/hw/xbox/nv2a/pgraph/swizzle.c similarity index 100% rename from hw/xbox/nv2a/swizzle.c rename to hw/xbox/nv2a/pgraph/swizzle.c diff --git a/hw/xbox/nv2a/swizzle.h b/hw/xbox/nv2a/pgraph/swizzle.h similarity index 94% rename from hw/xbox/nv2a/swizzle.h rename to hw/xbox/nv2a/pgraph/swizzle.h index 21889b39cf2..78ff0740a44 100644 --- a/hw/xbox/nv2a/swizzle.h +++ b/hw/xbox/nv2a/pgraph/swizzle.h @@ -18,8 +18,10 @@ * License along with this library; if not, see . */ -#ifndef HW_XBOX_SWIZZLE_H -#define HW_XBOX_SWIZZLE_H +#ifndef HW_XBOX_NV2A_PGRAPH_SWIZZLE_H +#define HW_XBOX_NV2A_PGRAPH_SWIZZLE_H + +#include void swizzle_box( const uint8_t *src_buf, diff --git a/hw/xbox/nv2a/pgraph/texture.c b/hw/xbox/nv2a/pgraph/texture.c new file mode 100644 index 00000000000..e5350ea8d48 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/texture.c @@ -0,0 +1,405 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "texture.h" +#include "util.h" + +const BasicColorFormatInfo kelvin_color_format_info_map[66] = { + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { 1, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { 1, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { 4, false }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { 1, false }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { 1, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { 1, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { 1, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { 1, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { 2, true }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { 2, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { 2, false }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { 2, true }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { 2, false, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { 4, true, + true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { 4, true, + true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { 2, true, + true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { 2, true, + true }, + + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { 2, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { 4, false }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { 4, true }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { 4, true }, +}; + +hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + int i = texture_idx; + + uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4); + unsigned int dma_select = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA); + + hwaddr offset = pgraph_reg_r(pg, NV_PGRAPH_TEXOFFSET0 + i*4); + + hwaddr dma_len; + uint8_t *texture_data; + if (dma_select) { + texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len); + } else { + texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len); + } + assert(offset < dma_len); + texture_data += offset; + + return texture_data - d->vram_ptr; +} + +hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + int i = texture_idx; + + uint32_t palette = pgraph_reg_r(pg, NV_PGRAPH_TEXPALETTE0 + i*4); + bool palette_dma_select = + GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA); + unsigned int palette_length_index = + GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH); + unsigned int palette_offset = + palette & NV_PGRAPH_TEXPALETTE0_OFFSET; + + unsigned int palette_length = 0; + switch (palette_length_index) { + case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break; + default: assert(false); break; + } + if (length) { + *length = palette_length; + } + + hwaddr palette_dma_len; + uint8_t *palette_data; + if (palette_dma_select) { + palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len); + } else { + palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len); + } + assert(palette_offset < palette_dma_len); + palette_data += palette_offset; + + return palette_data - d->vram_ptr; +} + +size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape) +{ + BasicColorFormatInfo f = kelvin_color_format_info_map[shape->color_format]; + size_t length = 0; + + if (f.linear) { + assert(shape->cubemap == false); + assert(shape->dimensionality == 2); + length = shape->height * shape->pitch; + } else { + if (shape->dimensionality >= 2) { + unsigned int w = shape->width, h = shape->height; + int level; + if (!pgraph_is_texture_format_compressed(pg, shape->color_format)) { + for (level = 0; level < shape->levels; level++) { + w = MAX(w, 1); + h = MAX(h, 1); + length += w * h * f.bytes_per_pixel; + w /= 2; + h /= 2; + } + } else { + /* Compressed textures are a bit different */ + unsigned int block_size = + shape->color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ? + 8 : 16; + for (level = 0; level < shape->levels; level++) { + w = MAX(w, 1); + h = MAX(h, 1); + unsigned int phys_w = (w + 3) & ~3, + phys_h = (h + 3) & ~3; + length += phys_w/4 * phys_h/4 * block_size; + w /= 2; + h /= 2; + } + } + if (shape->cubemap) { + assert(shape->dimensionality == 2); + length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1); + length *= 6; + } + if (shape->dimensionality >= 3) { + length *= shape->depth; + } + } + } + + return length; +} + +TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx) +{ + int i = texture_idx; + + uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i*4); + uint32_t ctl_1 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL1_0 + i*4); + uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4); + +#if DEBUG_NV2A + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4); + uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4); +#endif + + unsigned int min_mipmap_level = + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP); + unsigned int max_mipmap_level = + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP); + + unsigned int pitch = + GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH); + + bool cubemap = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); + unsigned int dimensionality = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); + + int tex_mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (texture_idx * 5)) & 0x1F; + if (tex_mode == 0x02) { + assert(pgraph_is_texture_enabled(pg, texture_idx)); + // assert(state.dimensionality == 3); + + // OVERRIDE + // dimensionality = 3; + } + + unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR); + unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS); + unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); + unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); + unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); + + unsigned int rect_width = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4), + NV_PGRAPH_TEXIMAGERECT0_WIDTH); + unsigned int rect_height = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4), + NV_PGRAPH_TEXIMAGERECT0_HEIGHT); +#ifdef DEBUG_NV2A + unsigned int lod_bias = + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS); +#endif + unsigned int border_source = GET_MASK(fmt, + NV_PGRAPH_TEXFMT0_BORDER_SOURCE); + + NV2A_DPRINTF(" texture %d is format 0x%x, " + "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s)," + " filter %x %x, levels %d-%d %d bias %d\n", + i, color_format, address, + rect_width, rect_height, + 1 << log_width, 1 << log_height, 1 << log_depth, + pitch, + cubemap ? "; cubemap" : "", + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN), + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG), + min_mipmap_level, max_mipmap_level, levels, + lod_bias); + + assert(color_format < ARRAY_SIZE(kelvin_color_format_info_map)); + BasicColorFormatInfo f = kelvin_color_format_info_map[color_format]; + if (f.bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n", + color_format); + abort(); + } + + unsigned int width, height, depth; + if (f.linear) { + assert(dimensionality == 2); + width = rect_width; + height = rect_height; + depth = 1; + } else { + width = 1 << log_width; + height = 1 << log_height; + depth = 1 << log_depth; + pitch = 0; + + levels = MIN(levels, max_mipmap_level + 1); + + /* Discard mipmap levels that would be smaller than 1x1. + * FIXME: Is this actually needed? + * + * >> Level 0: 32 x 4 + * Level 1: 16 x 2 + * Level 2: 8 x 1 + * Level 3: 4 x 1 + * Level 4: 2 x 1 + * Level 5: 1 x 1 + */ + levels = MIN(levels, MAX(log_width, log_height) + 1); + assert(levels > 0); + + if (dimensionality == 3) { + /* FIXME: What about 3D mipmaps? */ + if (log_width < 2 || log_height < 2) { + /* Base level is smaller than 4x4... */ + levels = 1; + } else { + levels = MIN(levels, MIN(log_width, log_height) - 1); + } + } + min_mipmap_level = MIN(levels-1, min_mipmap_level); + max_mipmap_level = MIN(levels-1, max_mipmap_level); + } + + TextureShape shape; + + // We will hash it, so make sure any padding is zero + memset(&shape, 0, sizeof(shape)); + + shape.cubemap = cubemap; + shape.dimensionality = dimensionality; + shape.color_format = color_format; + shape.levels = levels; + shape.width = width; + shape.height = height; + shape.depth = depth; + shape.min_mipmap_level = min_mipmap_level; + shape.max_mipmap_level = max_mipmap_level; + shape.pitch = pitch; + shape.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR; + return shape; +} + +uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data, + const uint8_t *palette_data, + unsigned int width, unsigned int height, + unsigned int depth, unsigned int row_pitch, + unsigned int slice_pitch, + size_t *converted_size) +{ + size_t size = 0; + uint8_t *converted_data; + + if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) { + size = width * height * depth * 4; + converted_data = g_malloc(size); + const uint8_t *src = data; + uint32_t *dst = (uint32_t *)converted_data; + for (int z = 0; z < depth; z++) { + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint8_t index = src[y * row_pitch + x]; + uint32_t color = *(uint32_t *)(palette_data + index * 4); + *dst++ = color; + } + } + src += slice_pitch; + } + } else if (s.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 || + s.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) { + // TODO: Investigate whether a non-1 depth is possible. + // Generally the hardware asserts when attempting to use volumetric + // textures in linear formats. + assert(depth == 1); /* FIXME */ + // FIXME: only valid if control0 register allows for colorspace + // conversion + size = width * height * 4; + converted_data = g_malloc(size); + uint8_t *pixel = converted_data; + for (int y = 0; y < height; y++) { + const uint8_t *line = &data[y * row_pitch * depth]; + for (int x = 0; x < width; x++, pixel += 4) { + if (s.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) { + convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], + &pixel[2]); + } else { + convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1], + &pixel[2]); + } + pixel[3] = 255; + } + } + } else if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) { + assert(depth == 1); /* FIXME */ + size = width * height * 3; + converted_data = g_malloc(size); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint16_t rgb655 = *(uint16_t *)(data + y * row_pitch + x * 2); + int8_t *pixel = (int8_t *)&converted_data[(y * width + x) * 3]; + /* Maps 5 bit G and B signed value range to 8 bit + * signed values. R is probably unsigned. + */ + rgb655 ^= (1 << 9) | (1 << 4); + pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F; + pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80; + pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80; + } + } + } else { + return NULL; + } + + if (converted_size) { + *converted_size = size; + } + return converted_data; +} diff --git a/hw/xbox/nv2a/pgraph/texture.h b/hw/xbox/nv2a/pgraph/texture.h new file mode 100644 index 00000000000..4c9818ca3cc --- /dev/null +++ b/hw/xbox/nv2a/pgraph/texture.h @@ -0,0 +1,67 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_TEXTURE_H +#define HW_XBOX_NV2A_PGRAPH_TEXTURE_H + +#include "qemu/osdep.h" +#include "cpu.h" + +#include +#include + +#include "hw/xbox/nv2a/nv2a_regs.h" + +typedef struct PGRAPHState PGRAPHState; + +typedef struct TextureShape { + bool cubemap; + unsigned int dimensionality; + unsigned int color_format; + unsigned int levels; + unsigned int width, height, depth; + bool border; + + unsigned int min_mipmap_level, max_mipmap_level; + unsigned int pitch; +} TextureShape; + +typedef struct BasicColorFormatInfo { + unsigned int bytes_per_pixel; + bool linear; + bool depth; +} BasicColorFormatInfo; + +extern const BasicColorFormatInfo kelvin_color_format_info_map[66]; + +uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data, + const uint8_t *palette_data, + unsigned int width, unsigned int height, + unsigned int depth, unsigned int row_pitch, + unsigned int slice_pitch, + size_t *converted_size); + +hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx); +hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length); +TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx); +size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape); + +#endif diff --git a/hw/xbox/nv2a/gl/gloffscreen_common.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c similarity index 100% rename from hw/xbox/nv2a/gl/gloffscreen_common.c rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c diff --git a/hw/xbox/nv2a/gl/gloffscreen.h b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h similarity index 100% rename from hw/xbox/nv2a/gl/gloffscreen.h rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h diff --git a/hw/xbox/nv2a/gl/gloffscreen_sdl.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c similarity index 98% rename from hw/xbox/nv2a/gl/gloffscreen_sdl.c rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c index 2221067ddd4..277694cc509 100644 --- a/hw/xbox/nv2a/gl/gloffscreen_sdl.c +++ b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c @@ -1,7 +1,7 @@ /* * Offscreen OpenGL abstraction layer -- SDL based * - * Copyright (c) 2018-2021 Matt Borgerson + * Copyright (c) 2018-2024 Matt Borgerson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/hw/xbox/nv2a/thirdparty/meson.build b/hw/xbox/nv2a/pgraph/thirdparty/meson.build similarity index 62% rename from hw/xbox/nv2a/thirdparty/meson.build rename to hw/xbox/nv2a/pgraph/thirdparty/meson.build index ec4068a77c6..d0139f17639 100644 --- a/hw/xbox/nv2a/thirdparty/meson.build +++ b/hw/xbox/nv2a/pgraph/thirdparty/meson.build @@ -10,3 +10,9 @@ libnv2a_vsh_cpu = static_library('nv2a_vsh_cpu', include_directories: ['.', 'nv2a_vsh_cpu/src']) nv2a_vsh_cpu = declare_dependency(link_with: libnv2a_vsh_cpu, include_directories: ['nv2a_vsh_cpu/src']) + +libgloffscreen = static_library('libgloffscreen', + sources: files('gloffscreen/common.c', 'gloffscreen/sdl.c'), + dependencies: sdl) +gloffscreen = declare_dependency(link_with: libgloffscreen, + include_directories: ['gloffscreen']) diff --git a/hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu b/hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu similarity index 100% rename from hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu rename to hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu diff --git a/hw/xbox/nv2a/pgraph/util.h b/hw/xbox/nv2a/pgraph/util.h new file mode 100644 index 00000000000..c8a28d3c0d8 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/util.h @@ -0,0 +1,86 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_UTIL_H +#define HW_XBOX_NV2A_PGRAPH_UTIL_H + +static const float f16_max = 511.9375f; +static const float f24_max = 1.0E30; + +/* 16 bit to [0.0, F16_MAX = 511.9375] */ +static inline +float convert_f16_to_float(uint16_t f16) { + if (f16 == 0x0000) { return 0.0; } + uint32_t i = (f16 << 11) + 0x3C000000; + return *(float*)&i; +} + +/* 24 bit to [0.0, F24_MAX] */ +static inline +float convert_f24_to_float(uint32_t f24) { + assert(!(f24 >> 24)); + f24 &= 0xFFFFFF; + if (f24 == 0x000000) { return 0.0; } + uint32_t i = f24 << 7; + return *(float*)&i; +} + +static inline +uint8_t cliptobyte(int x) +{ + return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); +} + +static inline +void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, + uint8_t *r, uint8_t *g, uint8_t* b) { + int c, d, e; + c = (int)line[ix * 2] - 16; + if (ix % 2) { + d = (int)line[ix * 2 - 1] - 128; + e = (int)line[ix * 2 + 1] - 128; + } else { + d = (int)line[ix * 2 + 1] - 128; + e = (int)line[ix * 2 + 3] - 128; + } + *r = cliptobyte((298 * c + 409 * e + 128) >> 8); + *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); + *b = cliptobyte((298 * c + 516 * d + 128) >> 8); +} + +static inline +void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, + uint8_t *r, uint8_t *g, uint8_t* b) { + int c, d, e; + c = (int)line[ix * 2 + 1] - 16; + if (ix % 2) { + d = (int)line[ix * 2 - 2] - 128; + e = (int)line[ix * 2 + 0] - 128; + } else { + d = (int)line[ix * 2 + 0] - 128; + e = (int)line[ix * 2 + 2] - 128; + } + *r = cliptobyte((298 * c + 409 * e + 128) >> 8); + *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); + *b = cliptobyte((298 * c + 516 * d + 128) >> 8); +} + +#endif diff --git a/hw/xbox/nv2a/pgraph/vertex.c b/hw/xbox/nv2a/pgraph/vertex.c new file mode 100644 index 00000000000..47f7cb56880 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vertex.c @@ -0,0 +1,131 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" + +void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data) +{ + assert(attr->count <= 4); + attr->inline_value[0] = 0.0f; + attr->inline_value[1] = 0.0f; + attr->inline_value[2] = 0.0f; + attr->inline_value[3] = 1.0f; + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + for (uint32_t i = 0; i < attr->count; ++i) { + attr->inline_value[i] = (float)data[i] / 255.0f; + } + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: { + const int16_t *val = (const int16_t *) data; + for (uint32_t i = 0; i < attr->count; ++i, ++val) { + attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f); + } + break; + } + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + memcpy(attr->inline_value, data, attr->size * attr->count); + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: { + const int16_t *val = (const int16_t *) data; + for (uint32_t i = 0; i < attr->count; ++i, ++val) { + attr->inline_value[i] = (float)*val; + } + break; + } + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: { + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + const int32_t val = *(const int32_t *)data; + int32_t x = val & 0x7FF; + if (x & 0x400) { + x |= 0xFFFFF800; + } + int32_t y = (val >> 11) & 0x7FF; + if (y & 0x400) { + y |= 0xFFFFF800; + } + int32_t z = (val >> 22) & 0x7FF; + if (z & 0x200) { + z |= 0xFFFFFC00; + } + + attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f); + attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f); + attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f); + break; + } + default: + fprintf(stderr, "Unknown vertex attribute type: for format 0x%x\n", + attr->format); + assert(!"Unsupported attribute type"); + break; + } +} + +void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr) +{ + VertexAttribute *attribute = &pg->vertex_attributes[attr]; + + if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) { + return; + } + + /* Now upload the previous attribute value */ + attribute->inline_buffer_populated = true; + for (int i = 0; i < pg->inline_buffer_length; i++) { + memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value, + sizeof(float) * 4); + } +} + +void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) +{ + pgraph_check_within_begin_end_block(pg); + assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attribute = &pg->vertex_attributes[i]; + if (attribute->inline_buffer_populated) { + memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4], + attribute->inline_value, sizeof(float) * 4); + } + } + + pg->inline_buffer_length++; +} + +void pgraph_reset_inline_buffers(PGRAPHState *pg) +{ + pg->inline_elements_length = 0; + pg->inline_array_length = 0; + pg->inline_buffer_length = 0; + pgraph_reset_draw_arrays(pg); +} + +void pgraph_reset_draw_arrays(PGRAPHState *pg) +{ + pg->draw_arrays_length = 0; + pg->draw_arrays_min_start = -1; + pg->draw_arrays_max_count = 0; + pg->draw_arrays_prevent_connect = false; +} diff --git a/hw/xbox/nv2a/pgraph/vk/blit.c b/hw/xbox/nv2a/pgraph/vk/blit.c new file mode 100644 index 00000000000..e4529a3c586 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/blit.c @@ -0,0 +1,177 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "renderer.h" + +void pgraph_vk_image_blit(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d; + ImageBlitState *image_blit = &pg->image_blit; + BetaState *beta = &pg->beta; + + pgraph_vk_surface_update(d, false, true, true); + + assert(context_surfaces->object_instance == image_blit->context_surfaces); + + unsigned int bytes_per_pixel; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_Y8: + bytes_per_pixel = 1; + break; + case NV062_SET_COLOR_FORMAT_LE_R5G6B5: + bytes_per_pixel = 2; + break; + case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + case NV062_SET_COLOR_FORMAT_LE_Y32: + bytes_per_pixel = 4; + break; + default: + fprintf(stderr, "Unknown blit surface format: 0x%x\n", + context_surfaces->color_format); + assert(false); + break; + } + + hwaddr source_dma_len, dest_dma_len; + + uint8_t *source = (uint8_t *)nv_dma_map( + d, context_surfaces->dma_image_source, &source_dma_len); + assert(context_surfaces->source_offset < source_dma_len); + source += context_surfaces->source_offset; + + uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest, + &dest_dma_len); + assert(context_surfaces->dest_offset < dest_dma_len); + dest += context_surfaces->dest_offset; + + hwaddr source_addr = source - d->vram_ptr; + hwaddr dest_addr = dest - d->vram_ptr; + + SurfaceBinding *surf_src = pgraph_vk_surface_get(d, source_addr); + if (surf_src) { + pgraph_vk_surface_download_if_dirty(d, surf_src); + } + + SurfaceBinding *surf_dest = pgraph_vk_surface_get(d, dest_addr); + if (surf_dest) { + if (image_blit->height < surf_dest->height || + image_blit->width < surf_dest->width) { + pgraph_vk_surface_download_if_dirty(d, surf_dest); + } else { + // The blit will completely replace the surface so any pending + // download should be discarded. + surf_dest->download_pending = false; + surf_dest->draw_dirty = false; + } + surf_dest->upload_pending = true; + pg->draw_time++; + } + + hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch + + image_blit->in_x * bytes_per_pixel; + hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch + + image_blit->out_x * bytes_per_pixel; + + hwaddr source_size = + (image_blit->height - 1) * context_surfaces->source_pitch + + image_blit->width * bytes_per_pixel; + hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch + + image_blit->width * bytes_per_pixel; + + /* FIXME: What does hardware do in this case? */ + assert(source_addr + source_offset + source_size <= + memory_region_size(d->vram)); + assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram)); + + uint8_t *source_row = source + source_offset; + uint8_t *dest_row = dest + dest_offset; + + if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY"); + for (unsigned int y = 0; y < image_blit->height; y++) { + memmove(dest_row, source_row, image_blit->width * bytes_per_pixel); + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) { + // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND"); + uint32_t max_beta_mult = 0x7f80; + uint32_t beta_mult = beta->beta >> 16; + uint32_t inv_beta_mult = max_beta_mult - beta_mult; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + for (unsigned int ch = 0; ch < 3; ch++) { + uint32_t a = source_row[x * 4 + ch] * beta_mult; + uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult; + dest_row[x * 4 + ch] = (a + b) / max_beta_mult; + } + } + source_row += context_surfaces->source_pitch; + dest_row += context_surfaces->dest_pitch; + } + } else { + fprintf(stderr, "Unknown blit operation: 0x%x\n", + image_blit->operation); + assert(false && "Unknown blit operation"); + } + + NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr); + + bool needs_alpha_patching; + uint8_t alpha_override; + switch (context_surfaces->color_format) { + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0xff; + break; + case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8: + needs_alpha_patching = true; + alpha_override = 0; + break; + default: + needs_alpha_patching = false; + alpha_override = 0; + } + + if (needs_alpha_patching) { + dest_row = dest + dest_offset; + for (unsigned int y = 0; y < image_blit->height; y++) { + for (unsigned int x = 0; x < image_blit->width; x++) { + dest_row[x * 4 + 3] = alpha_override; + } + dest_row += context_surfaces->dest_pitch; + } + } + + dest_addr += dest_offset; + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, dest_addr, dest_size, + DIRTY_MEMORY_NV2A_TEX); +} diff --git a/hw/xbox/nv2a/pgraph/vk/buffer.c b/hw/xbox/nv2a/pgraph/vk/buffer.c new file mode 100644 index 00000000000..440f8ae56e9 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/buffer.c @@ -0,0 +1,206 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" +#include + +static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkBufferCreateInfo buffer_create_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = buffer->buffer_size, + .usage = buffer->usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info, + &buffer->alloc_info, &buffer->buffer, + &buffer->allocation, NULL)); +} + +static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation); + buffer->buffer = VK_NULL_HANDLE; + buffer->allocation = VK_NULL_HANDLE; +} + +void pgraph_vk_init_buffers(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Profile buffer sizes + + VmaAllocationCreateInfo host_alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + }; + VmaAllocationCreateInfo device_alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .buffer_size = 4096 * 4096 * 4, + }; + + r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size, + }; + + r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .buffer_size = (1024 * 10) * (1024 * 10) * 8, + }; + + r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size, + }; + + r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + .buffer_size = sizeof(pg->inline_elements) * 100, + }; + + r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size, + }; + + // FIXME: Don't assume that we can render with host mapped buffer + r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .buffer_size = memory_region_size(d->vram), + }; + + r->bitmap_size = memory_region_size(d->vram) / 4096; + r->uploaded_bitmap = bitmap_new(r->bitmap_size); + bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size); + + r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH * + 4 * sizeof(float) * 10, + }; + + r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size, + }; + + r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){ + .alloc_info = device_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .buffer_size = 8 * 1024 * 1024, + }; + + r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){ + .alloc_info = host_alloc_create_info, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size, + }; + + for (int i = 0; i < BUFFER_COUNT; i++) { + create_buffer(pg, &r->storage_buffers[i]); + } + + // FIXME: Add fallback path for device using host mapped memory + + int buffers_to_map[] = { BUFFER_VERTEX_RAM, + BUFFER_INDEX_STAGING, + BUFFER_VERTEX_INLINE_STAGING, + BUFFER_UNIFORM_STAGING }; + + for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) { + VK_CHECK(vmaMapMemory( + r->allocator, r->storage_buffers[buffers_to_map[i]].allocation, + (void **)&r->storage_buffers[buffers_to_map[i]].mapped)); + } +} + +void pgraph_vk_finalize_buffers(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + for (int i = 0; i < BUFFER_COUNT; i++) { + if (r->storage_buffers[i].mapped) { + vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation); + } + destroy_buffer(pg, &r->storage_buffers[i]); + } + + g_free(r->uploaded_bitmap); + r->uploaded_bitmap = NULL; +} + +bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index, + VkDeviceSize size, + VkDeviceAddress alignment) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + StorageBuffer *b = &r->storage_buffers[index]; + return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size; +} + +VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data, + VkDeviceSize *sizes, size_t count, + VkDeviceAddress alignment) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDeviceSize total_size = 0; + for (int i = 0; i < count; i++) { + total_size += sizes[i]; + } + assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment)); + + StorageBuffer *b = &r->storage_buffers[index]; + VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment); + + assert(b->mapped); + + for (int i = 0; i < count; i++) { + b->buffer_offset = ROUND_UP(b->buffer_offset, alignment); + memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]); + b->buffer_offset += sizes[i]; + } + + return starting_offset; +} diff --git a/hw/xbox/nv2a/pgraph/vk/command.c b/hw/xbox/nv2a/pgraph/vk/command.c new file mode 100644 index 00000000000..0e9fc9a2ee1 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/command.c @@ -0,0 +1,119 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +static void create_command_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + QueueFamilyIndices indices = + pgraph_vk_find_queue_families(r->physical_device); + + VkCommandPoolCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = indices.queue_family, + }; + VK_CHECK( + vkCreateCommandPool(r->device, &create_info, NULL, &r->command_pool)); +} + +static void destroy_command_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyCommandPool(r->device, r->command_pool, NULL); +} + +static void create_command_buffers(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkCommandBufferAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = r->command_pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = ARRAY_SIZE(r->command_buffers), + }; + VK_CHECK( + vkAllocateCommandBuffers(r->device, &alloc_info, r->command_buffers)); + + r->command_buffer = r->command_buffers[0]; + r->aux_command_buffer = r->command_buffers[1]; +} + +static void destroy_command_buffers(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkFreeCommandBuffers(r->device, r->command_pool, + ARRAY_SIZE(r->command_buffers), r->command_buffers); + + r->command_buffer = VK_NULL_HANDLE; + r->aux_command_buffer = VK_NULL_HANDLE; +} + +VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(!r->in_aux_command_buffer); + r->in_aux_command_buffer = true; + + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + VK_CHECK(vkBeginCommandBuffer(r->aux_command_buffer, &begin_info)); + + return r->aux_command_buffer; +} + +void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_aux_command_buffer); + + VK_CHECK(vkEndCommandBuffer(cmd)); + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }; + VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE)); + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_AUX); + VK_CHECK(vkQueueWaitIdle(r->queue)); + + r->in_aux_command_buffer = false; +} + +void pgraph_vk_init_command_buffers(PGRAPHState *pg) +{ + create_command_pool(pg); + create_command_buffers(pg); +} + +void pgraph_vk_finalize_command_buffers(PGRAPHState *pg) +{ + destroy_command_buffers(pg); + destroy_command_pool(pg); +} \ No newline at end of file diff --git a/hw/xbox/nv2a/pgraph/vk/constants.h b/hw/xbox/nv2a/pgraph/vk/constants.h new file mode 100644 index 00000000000..9ae8ba6dd4c --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/constants.h @@ -0,0 +1,418 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H +#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H + +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include + +static const VkFilter pgraph_texture_min_filter_vk_map[] = { + 0, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + VK_FILTER_LINEAR, +}; + +static const VkFilter pgraph_texture_mag_filter_vk_map[] = { + 0, + VK_FILTER_NEAREST, + VK_FILTER_LINEAR, + 0, + VK_FILTER_LINEAR /* TODO: Convolution filter... */ +}; + +static const VkSamplerAddressMode pgraph_texture_addr_vk_map[] = { + 0, + VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */ +}; + +static const VkBlendFactor pgraph_blend_factor_vk_map[] = { + VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + 0, + VK_BLEND_FACTOR_CONSTANT_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + VK_BLEND_FACTOR_CONSTANT_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, +}; + +static const VkBlendOp pgraph_blend_equation_vk_map[] = { + VK_BLEND_OP_SUBTRACT, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, + VK_BLEND_OP_MIN, + VK_BLEND_OP_MAX, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, +}; + +/* FIXME +static const GLenum pgraph_blend_logicop_map[] = { + GL_CLEAR, + GL_AND, + GL_AND_REVERSE, + GL_COPY, + GL_AND_INVERTED, + GL_NOOP, + GL_XOR, + GL_OR, + GL_NOR, + GL_EQUIV, + GL_INVERT, + GL_OR_REVERSE, + GL_COPY_INVERTED, + GL_OR_INVERTED, + GL_NAND, + GL_SET, +}; +*/ + +static const VkCullModeFlags pgraph_cull_face_vk_map[] = { + 0, + VK_CULL_MODE_FRONT_BIT, + VK_CULL_MODE_BACK_BIT, + VK_CULL_MODE_FRONT_AND_BACK, +}; + +static const VkCompareOp pgraph_depth_func_vk_map[] = { + VK_COMPARE_OP_NEVER, + VK_COMPARE_OP_LESS, + VK_COMPARE_OP_EQUAL, + VK_COMPARE_OP_LESS_OR_EQUAL, + VK_COMPARE_OP_GREATER, + VK_COMPARE_OP_NOT_EQUAL, + VK_COMPARE_OP_GREATER_OR_EQUAL, + VK_COMPARE_OP_ALWAYS, +}; + +static const VkCompareOp pgraph_stencil_func_vk_map[] = { + VK_COMPARE_OP_NEVER, + VK_COMPARE_OP_LESS, + VK_COMPARE_OP_EQUAL, + VK_COMPARE_OP_LESS_OR_EQUAL, + VK_COMPARE_OP_GREATER, + VK_COMPARE_OP_NOT_EQUAL, + VK_COMPARE_OP_GREATER_OR_EQUAL, + VK_COMPARE_OP_ALWAYS, +}; + +static const VkStencilOp pgraph_stencil_op_vk_map[] = { + 0, + VK_STENCIL_OP_KEEP, + VK_STENCIL_OP_ZERO, + VK_STENCIL_OP_REPLACE, + VK_STENCIL_OP_INCREMENT_AND_CLAMP, + VK_STENCIL_OP_DECREMENT_AND_CLAMP, + VK_STENCIL_OP_INVERT, + VK_STENCIL_OP_INCREMENT_AND_WRAP, + VK_STENCIL_OP_DECREMENT_AND_WRAP, +}; + +static const VkPolygonMode pgraph_polygon_mode_vk_map[] = { + [POLY_MODE_FILL] = VK_POLYGON_MODE_FILL, + [POLY_MODE_POINT] = VK_POLYGON_MODE_POINT, + [POLY_MODE_LINE] = VK_POLYGON_MODE_LINE, +}; + +typedef struct VkColorFormatInfo { + VkFormat vk_format; + VkComponentMapping component_map; +} VkColorFormatInfo; + +static const VkColorFormatInfo kelvin_color_format_vk_map[66] = { + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { + VK_FORMAT_A4R4G4B4_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { + VK_FORMAT_R5G6B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { + VK_FORMAT_R5G6B5_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { + VK_FORMAT_A4R4G4B4_UNORM_PACK16, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { + VK_FORMAT_B8G8R8A8_UNORM, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { + VK_FORMAT_R8_UNORM, + { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { + VK_FORMAT_R8G8B8_SNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { + VK_FORMAT_R8G8_UNORM, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { + VK_FORMAT_R8G8B8A8_UNORM, // Converted + }, + + /* Additional information is passed to the pixel shader via the swizzle: + * RED: The depth value. + * GREEN: 0 for 16-bit, 1 for 24 bit + * BLUE: 0 for fixed, 1 for float + */ + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { + VK_FORMAT_R16_UNORM, // FIXME + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { + // FIXME + // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}}, + VK_FORMAT_R32_UINT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { + // FIXME + // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}}, + VK_FORMAT_R32_UINT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { + VK_FORMAT_R16_UNORM, // FIXME + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { + VK_FORMAT_R16_SFLOAT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO }, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { + VK_FORMAT_R16_UNORM, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { + VK_FORMAT_R8G8B8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { + VK_FORMAT_R8G8B8A8_UNORM, + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R } + }, + [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { + VK_FORMAT_R8G8B8A8_UNORM, + { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R } + }, +}; + +typedef struct BasicSurfaceFormatInfo { + unsigned int bytes_per_pixel; +} BasicSurfaceFormatInfo; + +typedef struct SurfaceFormatInfo { + unsigned int host_bytes_per_pixel; + VkFormat vk_format; + VkImageUsageFlags usage; + VkImageAspectFlags aspect; +} SurfaceFormatInfo; + +static const BasicSurfaceFormatInfo kelvin_surface_color_format_map[] = { + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = { 2 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = { 2 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = { 4 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = { 4 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = { 1 }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = { 2 }, +}; + +static const SurfaceFormatInfo kelvin_surface_color_format_vk_map[] = { + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = + { + // FIXME: Force alpha to zero + 2, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = + { + 2, + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = + { + // FIXME: Force alpha to zero + 4, + VK_FORMAT_B8G8R8A8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = + { + 4, + VK_FORMAT_B8G8R8A8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = + { + // FIXME: Map channel color + 1, + VK_FORMAT_R8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, + [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = + { + // FIXME: Map channel color + 2, + VK_FORMAT_R8G8_UNORM, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, + }, +}; + +static const BasicSurfaceFormatInfo kelvin_surface_zeta_format_map[] = { + [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = { 2 }, + [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = { 4 }, +}; + +// FIXME: Actually support stored float format + +static const SurfaceFormatInfo zeta_d16 = { + 2, + VK_FORMAT_D16_UNORM, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT, +}; + +static const SurfaceFormatInfo zeta_d32_sfloat_s8_uint = { + 8, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, +}; + +static const SurfaceFormatInfo zeta_d24_unorm_s8_uint = { + 4, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, +}; + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c new file mode 100644 index 00000000000..a8cb08c4a2f --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/debug.c @@ -0,0 +1,59 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" +#include "debug.h" + +#ifndef _WIN32 +#include +#endif + +#ifdef CONFIG_RENDERDOC +#pragma GCC diagnostic ignored "-Wstrict-prototypes" +#include "thirdparty/renderdoc_app.h" +#endif + +int nv2a_vk_dgroup_indent = 0; + +void pgraph_vk_debug_init(void) +{ +#ifdef CONFIG_RENDERDOC + nv2a_dbg_renderdoc_init(); +#endif +} + +void pgraph_vk_debug_frame_terminator(void) +{ +#ifdef CONFIG_RENDERDOC + if (nv2a_dbg_renderdoc_available()) { + RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api(); + + PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state; + if (rdoc_api->IsTargetControlConnected()) { + if (rdoc_api->IsFrameCapturing()) { + rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + } + if (renderdoc_capture_frames > 0) { + rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + --renderdoc_capture_frames; + } + } + } +#endif +} diff --git a/hw/xbox/nv2a/pgraph/vk/debug.h b/hw/xbox/nv2a/pgraph/vk/debug.h new file mode 100644 index 00000000000..62cd63e592e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/debug.h @@ -0,0 +1,61 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H +#define HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H + +#define DEBUG_VK 0 + +extern int nv2a_vk_dgroup_indent; + +#define NV2A_VK_XDPRINTF(x, fmt, ...) \ + do { \ + if (x) { \ + for (int i = 0; i < nv2a_vk_dgroup_indent; i++) \ + fprintf(stderr, " "); \ + fprintf(stderr, fmt "\n", ##__VA_ARGS__); \ + } \ + } while (0) + +#define NV2A_VK_DPRINTF(fmt, ...) NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__) + +#define NV2A_VK_DGROUP_BEGIN(fmt, ...) \ + do { \ + NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__); \ + nv2a_vk_dgroup_indent++; \ + } while (0) + +#define NV2A_VK_DGROUP_END(...) \ + do { \ + nv2a_vk_dgroup_indent--; \ + assert(nv2a_vk_dgroup_indent >= 0); \ + } while (0) + +#define VK_CHECK(x) \ + do { \ + VkResult vk_result = (x); \ + if (vk_result != VK_SUCCESS) { \ + fprintf(stderr, "vk_result = %d\n", vk_result); \ + } \ + assert(vk_result == VK_SUCCESS && "vk check failed"); \ + } while (0) + +void pgraph_vk_debug_frame_terminator(void); + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c new file mode 100644 index 00000000000..595f119ca24 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -0,0 +1,896 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +static const char *display_frag_glsl = + "#version 450\n" + "layout(binding = 0) uniform sampler2D tex;\n" + "layout(binding = 1) uniform sampler2D pvideo_tex;\n" + "layout(push_constant, std430) uniform PushConstants {\n" + " bool pvideo_enable;\n" + " vec2 pvideo_in_pos;\n" + " vec4 pvideo_pos;\n" + " vec3 pvideo_scale;\n" + " bool pvideo_color_key_enable;\n" + " vec2 display_size;\n" + " float line_offset;\n" + " vec4 pvideo_color_key;\n" + "};\n" + "layout(location = 0) out vec4 out_Color;\n" + "void main()\n" + "{\n" + " vec2 texCoord = gl_FragCoord.xy/display_size;\n" + " texCoord.y = 1 - texCoord.y;\n" // GL compat + " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" + " texCoord.y = 1 + rel*(texCoord.y - 1);" + " out_Color.rgba = texture(tex, texCoord);\n" + // " if (pvideo_enable) {\n" + // " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" + // " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" + // " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" + // " greaterThan(screenCoord, output_region.zw));\n" + // " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" + // " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" + // " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" + // " in_st.y *= -1.0;\n" + // " out_Color.rgba = texture(pvideo_tex, in_st);\n" + // " }\n" + // " }\n" + "}\n"; + +static void create_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorPoolSize pool_sizes = { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 2, + }; + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = 1, + .pPoolSizes = &pool_sizes, + .maxSets = 1, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + }; + VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL, + &r->display.descriptor_pool)); +} + +static void destroy_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorPool(r->device, r->display.descriptor_pool, NULL); + r->display.descriptor_pool = VK_NULL_HANDLE; +} + +static void create_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayoutBinding bindings[2]; + + for (int i = 0; i < ARRAY_SIZE(bindings); i++) { + bindings[i] = (VkDescriptorSetLayoutBinding){ + .binding = i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + } + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL, + &r->display.descriptor_set_layout)); +} + +static void destroy_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorSetLayout(r->device, r->display.descriptor_set_layout, + NULL); + r->display.descriptor_set_layout = VK_NULL_HANDLE; +} + +static void create_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayout layout = r->display.descriptor_set_layout; + + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = r->display.descriptor_pool, + .descriptorSetCount = 1, + .pSetLayouts = &layout, + }; + VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info, + &r->display.descriptor_set)); +} + +static void create_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkAttachmentDescription attachment; + + VkAttachmentReference color_reference; + attachment = (VkAttachmentDescription){ + .format = VK_FORMAT_R8G8B8A8_UNORM, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + color_reference = (VkAttachmentReference){ + 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + + VkSubpassDependency dependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + }; + + dependency.srcStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &color_reference, + }; + + VkRenderPassCreateInfo renderpass_create_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 1, + .pDependencies = &dependency, + }; + VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL, + &r->display.render_pass)); +} + +static void destroy_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + vkDestroyRenderPass(r->device, r->display.render_pass, NULL); + r->display.render_pass = VK_NULL_HANDLE; +} + +static void create_display_pipeline(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->display.display_frag = + pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, display_frag_glsl); + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->quad_vert_module->module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->display.display_frag->module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = VK_FALSE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + }; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .blendEnable = VK_FALSE, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + }; + + VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR }; + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = dynamic_states, + }; + + VkPushConstantRange push_constant_range = { + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .offset = 0, + .size = r->display.display_frag->push_constants.total_size, + }; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &r->display.descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constant_range, + }; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &r->display.pipeline_layout)); + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_state, + .layout = r->display.pipeline_layout, + .renderPass = r->display.render_pass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_info, NULL, + &r->display.pipeline)); +} + +static void destroy_display_pipeline(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyPipeline(r->device, r->display.pipeline, NULL); + r->display.pipeline = VK_NULL_HANDLE; +} + +static void create_frame_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkFramebufferCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = r->display.render_pass, + .attachmentCount = 1, + .pAttachments = &r->display.image_view, + .width = r->display.width, + .height = r->display.height, + .layers = 1, + }; + VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL, + &r->display.framebuffer)); +} + +static void destroy_frame_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + vkDestroyFramebuffer(r->device, r->display.framebuffer, NULL); + r->display.framebuffer = NULL; +} + +static void destroy_current_display_image(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *d = &r->display; + + if (d->image == VK_NULL_HANDLE) { + return; + } + + destroy_frame_buffer(pg); + +#if HAVE_EXTERNAL_MEMORY + glDeleteTextures(1, &d->gl_texture_id); + d->gl_texture_id = 0; + + glDeleteMemoryObjectsEXT(1, &d->gl_memory_obj); + d->gl_memory_obj = 0; + +#ifdef WIN32 + CloseHandle(d->handle); + d->handle = 0; +#endif +#endif + + vkDestroyImageView(r->device, d->image_view, NULL); + d->image_view = VK_NULL_HANDLE; + + vkDestroyImage(r->device, d->image, NULL); + d->image = VK_NULL_HANDLE; + + vkFreeMemory(r->device, d->memory, NULL); + d->memory = VK_NULL_HANDLE; + + d->draw_time = 0; +} + +// FIXME: We may need to use two images. One for actually rendering display, +// and another for GL in the correct tiling mode + +static void create_display_image_from_surface(PGRAPHState *pg, + SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *d = &r->display; + + if (r->display.image != VK_NULL_HANDLE) { + destroy_current_display_image(pg); + } + + const GLint gl_internal_format = GL_RGBA8; + bool use_optimal_tiling = true; + +#if HAVE_EXTERNAL_MEMORY + GLint num_tiling_types; + glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format, + GL_NUM_TILING_TYPES_EXT, 1, &num_tiling_types); + // XXX: Apparently on AMD GL_OPTIMAL_TILING_EXT is reported to be + // supported, but doesn't work? On nVidia, GL_LINEAR_TILING_EXT may not + // be supported so we must use optimal. Default to optimal unless + // linear is explicitly specified... + GLint tiling_types[num_tiling_types]; + glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format, + GL_TILING_TYPES_EXT, num_tiling_types, tiling_types); + for (int i = 0; i < num_tiling_types; i++) { + if (tiling_types[i] == GL_LINEAR_TILING_EXT) { + use_optimal_tiling = false; + break; + } + } +#endif + + // Create image + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = surface->width, + .extent.height = surface->height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .tiling = use_optimal_tiling ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + pgraph_apply_scaling_factor(pg, &image_create_info.extent.width, + &image_create_info.extent.height); + + VkExternalMemoryImageCreateInfo external_memory_image_create_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + image_create_info.pNext = &external_memory_image_create_info; + + VK_CHECK(vkCreateImage(r->device, &image_create_info, NULL, &d->image)); + + // Allocate and bind image memory + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(r->device, d->image, &memory_requirements); + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = memory_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + }; + + VkExportMemoryAllocateInfo export_memory_alloc_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, + .handleTypes = +#ifdef WIN32 + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR +#else + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT +#endif + , + }; + alloc_info.pNext = &export_memory_alloc_info; + + VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory)); + + vkBindImageMemory(r->device, d->image, d->memory, 0); + + // Create Image View + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = d->image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_create_info.format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &d->image_view)); + +#if HAVE_EXTERNAL_MEMORY + +#ifdef WIN32 + + VkMemoryGetWin32HandleInfoKHR handle_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, + .memory = d->memory, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR + }; + VK_CHECK(vkGetMemoryWin32HandleKHR(r->device, &handle_info, &d->handle)); + + glCreateMemoryObjectsEXT(1, &d->gl_memory_obj); + glImportMemoryWin32HandleEXT(d->gl_memory_obj, memory_requirements.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, d->handle); + assert(glGetError() == GL_NO_ERROR); + +#else + + VkMemoryGetFdInfoKHR fd_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = d->memory, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + VK_CHECK(vkGetMemoryFdKHR(r->device, &fd_info, &d->fd)); + + glCreateMemoryObjectsEXT(1, &d->gl_memory_obj); + glImportMemoryFdEXT(d->gl_memory_obj, memory_requirements.size, + GL_HANDLE_TYPE_OPAQUE_FD_EXT, d->fd); + assert(glIsMemoryObjectEXT(d->gl_memory_obj)); + assert(glGetError() == GL_NO_ERROR); + +#endif // WIN32 + + glGenTextures(1, &d->gl_texture_id); + glBindTexture(GL_TEXTURE_2D, d->gl_texture_id); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, + use_optimal_tiling ? GL_OPTIMAL_TILING_EXT : + GL_LINEAR_TILING_EXT); + glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, gl_internal_format, + image_create_info.extent.width, + image_create_info.extent.height, d->gl_memory_obj, 0); + assert(glGetError() == GL_NO_ERROR); + +#endif // HAVE_EXTERNAL_MEMORY + + d->width = image_create_info.extent.width; + d->height = image_create_info.extent.height; + + create_frame_buffer(pg); +} + +static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorImageInfo image_infos[2]; + VkWriteDescriptorSet descriptor_writes[2]; + + // Display surface + image_infos[0] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = surface->image_view, + .sampler = r->display.sampler, + }; + descriptor_writes[0] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->display.descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[0], + }; + + // FIXME: PVIDEO Overlay + image_infos[1] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = r->dummy_texture.image_view, + .sampler = r->dummy_texture.sampler, + }; + descriptor_writes[1] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->display.descriptor_set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[1], + }; + + vkUpdateDescriptorSets(r->device, ARRAY_SIZE(descriptor_writes), + descriptor_writes, 0, NULL); +} + +static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int width, height; + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + int line_offset = surface->pitch / pline_offset; + + /* Adjust viewport height for interlaced mode, used only in 1080i */ + if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { + height *= 2; + } + + pgraph_apply_scaling_factor(pg, &width, &height); + + ShaderUniformLayout *l = &r->display.display_frag->push_constants; + int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache + int line_offset_loc = uniform_index(l, "line_offset"); + uniform2f(l, display_size_loc, width, height); + uniform1f(l, line_offset_loc, line_offset); + +#if 0 // FIXME: PVIDEO overlay + // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. + // Many games seem to pass this value when initializing or tearing down + // PVIDEO. On its own, this generally does not result in the overlay being + // hidden, however there are certain games (e.g., Ultimate Beach Soccer) + // that use an unknown mechanism to hide the overlay without explicitly + // stopping it. + // Since the value seems to be set to 0xFFFFFFFF only in cases where the + // content is not valid, it is probably good enough to treat it as an + // implicit stop. + bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) + && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; + glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_enable_loc, enabled); + if (!enabled) { + return; + } + + hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; + hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; + hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + + int in_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); + int in_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); + + int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_S); + int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_T); + + int in_pitch = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); + int in_color = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); + + unsigned int out_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); + unsigned int out_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); + + float scale_x = 1.0f; + float scale_y = 1.0f; + unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; + unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; + if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_x = pvideo_calculate_scale(ds_dx, out_width); + } + if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { + scale_y = pvideo_calculate_scale(dt_dy, out_height); + } + + // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results + // in them being capped to the output size, content is not scaled. This is + // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF + // during initialization or teardown. + if (in_width > out_width) { + in_width = floorf((float)out_width * scale_x + 0.5f); + } + if (in_height > out_height) { + in_height = floorf((float)out_height * scale_y + 0.5f); + } + + /* TODO: support other color formats */ + assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + + unsigned int out_x = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); + unsigned int out_y = + GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); + + unsigned int color_key_enabled = + GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); + glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_enable_loc, + color_key_enabled); + + // TODO: Verify that masking off the top byte is correct. + // SeaBlade sets a color key of 0x80000000 but the texture passed into the + // shader is cleared to 0 alpha. + unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; + glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_loc, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, + GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); + + assert(offset + in_pitch * in_height <= limit); + hwaddr end = base + offset + in_pitch * in_height; + assert(end <= memory_region_size(d->vram)); + + pgraph_apply_scaling_factor(pg, &out_x, &out_y); + pgraph_apply_scaling_factor(pg, &out_width, &out_height); + + // Translate for the GL viewport origin. + out_y = MAX(pg->renderer_state->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); + + glActiveTexture(GL_TEXTURE0 + 1); + glBindTexture(GL_TEXTURE_2D, d->pgraph.renderer_state->disp_rndr.pvideo_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( + d->vram_ptr + base + offset, in_width, in_height, in_pitch); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, + GL_UNSIGNED_BYTE, tex_rgba); + g_free(tex_rgba); + glUniform1i(d->pgraph.renderer_state->disp_rndr.pvideo_tex_loc, 1); + glUniform2f(d->pgraph.renderer_state->disp_rndr.pvideo_in_pos_loc, in_s, in_t); + glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_pos_loc, + out_x, out_y, out_width, out_height); + glUniform3f(d->pgraph.renderer_state->disp_rndr.pvideo_scale_loc, + scale_x, scale_y, 1.0f / pg->surface_scale_factor); +#endif +} + +static void render_display(PGRAPHState *pg, SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *disp = &r->display; + + if (disp->draw_time >= surface->draw_time) { + return; + } + + if (r->in_command_buffer && + surface->draw_time >= r->command_buffer_start_time) { + pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING); + } + + update_uniforms(pg, surface); + update_descriptor_set(pg, surface); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout(pg, cmd, surface->image, + surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + pgraph_vk_transition_image_layout( + pg, cmd, disp->image, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + VkRenderPassBeginInfo render_pass_begin_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = disp->render_pass, + .framebuffer = disp->framebuffer, + .renderArea.extent.width = disp->width, + .renderArea.extent.height = disp->height, + }; + vkCmdBeginRenderPass(cmd, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + disp->pipeline); + + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + disp->pipeline_layout, 0, 1, &disp->descriptor_set, + 0, NULL); + + VkViewport viewport = { + .width = disp->width, + .height = disp->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }; + vkCmdSetViewport(cmd, 0, 1, &viewport); + + VkRect2D scissor = { + .extent.width = disp->width, + .extent.height = disp->height, + }; + vkCmdSetScissor(cmd, 0, 1, &scissor); + + vkCmdPushConstants(cmd, disp->pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + 0, disp->display_frag->push_constants.total_size, + disp->display_frag->push_constants.allocation); + + vkCmdDraw(cmd, 3, 1, 0, 0); + + vkCmdEndRenderPass(cmd); + +#if 0 + VkImageCopy region = { + .srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .srcSubresource.layerCount = 1, + .dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .dstSubresource.layerCount = 1, + .extent.width = surface->width, + .extent.height = surface->height, + .extent.depth = 1, + }; + pgraph_apply_scaling_factor(pg, ®ion.extent.width, + ®ion.extent.height); + + vkCmdCopyImage(cmd, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, disp->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); +#endif + + pgraph_vk_transition_image_layout(pg, cmd, surface->image, + surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, disp->image, + VK_FORMAT_R8G8B8_UNORM, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + pgraph_vk_end_single_time_commands(pg, cmd); + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5); + + disp->draw_time = surface->draw_time; +} + +static void create_surface_sampler(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .anisotropyEnable = VK_FALSE, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + }; + + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &r->display.sampler)); +} + +static void destroy_surface_sampler(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroySampler(r->device, r->display.sampler, NULL); + r->display.sampler = VK_NULL_HANDLE; +} + +void pgraph_vk_init_display(PGRAPHState *pg) +{ + create_descriptor_pool(pg); + create_descriptor_set_layout(pg); + create_descriptor_sets(pg); + create_render_pass(pg); + create_display_pipeline(pg); + create_surface_sampler(pg); +} + +void pgraph_vk_finalize_display(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (r->display.image != VK_NULL_HANDLE) { + destroy_current_display_image(pg); + } + + destroy_surface_sampler(pg); + destroy_display_pipeline(pg); + destroy_render_pass(pg); + destroy_descriptor_set_layout(pg); + destroy_descriptor_pool(pg); +} + +void pgraph_vk_render_display(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = + pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL || !surface->color) { + return; + } + + unsigned int width = surface->width, height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + PGRAPHVkDisplayState *disp = &r->display; + if (!disp->image || disp->width != width || disp->height != height) { + create_display_image_from_surface(pg, surface); + } + + render_display(pg, surface); +} diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c new file mode 100644 index 00000000000..c4f2cd85e05 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -0,0 +1,1916 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/fast-hash.h" +#include "renderer.h" + +void pgraph_vk_draw_begin(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + NV2A_VK_DPRINTF("NV097_SET_BEGIN_END: 0x%x", d->pgraph.primitive_mode); + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + pgraph_vk_surface_update(d, true, true, depth_test || stencil_test); + + if (is_nop_draw) { + NV2A_VK_DPRINTF("nop!"); + NV2A_VK_DGROUP_END(); + return; + } +} + +static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + int polygon_mode = r->shader_binding->state.polygon_front_mode; + int primitive_mode = r->shader_binding->state.primitive_mode; + + if (polygon_mode == POLY_MODE_POINT) { + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + + // FIXME: Replace with LUT + switch (primitive_mode) { + case PRIM_TYPE_POINTS: + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + case PRIM_TYPE_LINES: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + case PRIM_TYPE_LINE_LOOP: + // FIXME: line strips, except that the first and last vertices are also used as a line + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case PRIM_TYPE_LINE_STRIP: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case PRIM_TYPE_TRIANGLES: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + case PRIM_TYPE_TRIANGLE_STRIP: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + case PRIM_TYPE_TRIANGLE_FAN: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + case PRIM_TYPE_QUADS: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + case PRIM_TYPE_QUAD_STRIP: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; + case PRIM_TYPE_POLYGON: + if (polygon_mode == POLY_MODE_LINE) { + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME + } else if (polygon_mode == POLY_MODE_FILL) { + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + } + assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); + return 0; + default: + assert(!"Invalid primitive_mode"); + return 0; + } +} + +static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + PipelineBinding *snode = container_of(node, PipelineBinding, node); + snode->layout = VK_NULL_HANDLE; + snode->pipeline = VK_NULL_HANDLE; + snode->draw_time = 0; +} + +static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache); + PipelineBinding *snode = container_of(node, PipelineBinding, node); + + assert((!r->in_command_buffer || + snode->draw_time < r->command_buffer_start_time) && + "Pipeline evicted while in use!"); + + vkDestroyPipeline(r->device, snode->pipeline, NULL); + snode->pipeline = VK_NULL_HANDLE; + + vkDestroyPipelineLayout(r->device, snode->layout, NULL); + snode->layout = VK_NULL_HANDLE; + + fprintf(stderr, "released pipeline\n"); +} + +static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + PipelineBinding *snode = container_of(node, PipelineBinding, node); + return memcmp(&snode->key, key, sizeof(PipelineKey)); +} + +static void init_pipeline_cache(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipelineCacheCreateInfo cache_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + .flags = 0, + .initialDataSize = 0, + .pInitialData = NULL, + .pNext = NULL, + }; + VK_CHECK(vkCreatePipelineCache(r->device, &cache_info, NULL, + &r->vk_pipeline_cache)); + + const size_t pipeline_cache_size = 2048; + lru_init(&r->pipeline_cache); + r->pipeline_cache_entries = + g_malloc_n(pipeline_cache_size, sizeof(PipelineBinding)); + assert(r->pipeline_cache_entries != NULL); + for (int i = 0; i < pipeline_cache_size; i++) { + lru_add_free(&r->pipeline_cache, &r->pipeline_cache_entries[i].node); + } + + r->pipeline_cache.init_node = pipeline_cache_entry_init; + r->pipeline_cache.compare_nodes = pipeline_cache_entry_compare; + r->pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict; +} + +static void finalize_pipeline_cache(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + lru_flush(&r->pipeline_cache); + g_free(r->pipeline_cache_entries); + r->pipeline_cache_entries = NULL; + + vkDestroyPipelineCache(r->device, r->vk_pipeline_cache, NULL); +} + +static char const *const quad_glsl = + "#version 450\n" + "void main()\n" + "{\n" + " float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n" + " float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n" + " gl_Position = vec4(x, y, 0, 1);\n" + "}\n"; + +static char const *const solid_frag_glsl = + "#version 450\n" + "layout(location = 0) out vec4 fragColor;\n" + "void main()\n" + "{\n" + " fragColor = vec4(1.0);" + "}\n"; + +static void init_clear_shaders(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + r->quad_vert_module = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, quad_glsl); + r->solid_frag_module = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl); +} + +void pgraph_vk_init_pipelines(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + init_pipeline_cache(pg); + init_clear_shaders(pg); + + VkSemaphoreCreateInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO + }; + VK_CHECK(vkCreateSemaphore(r->device, &semaphore_info, NULL, + &r->command_buffer_semaphore)); + + VkFenceCreateInfo fence_info = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + VK_CHECK( + vkCreateFence(r->device, &fence_info, NULL, &r->command_buffer_fence)); +} + +void pgraph_vk_finalize_pipelines(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + finalize_pipeline_cache(pg); + + vkDestroyFence(r->device, r->command_buffer_fence, NULL); + vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL); +} + +static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + state->color_format = r->color_binding ? + r->color_binding->host_fmt.vk_format : + VK_FORMAT_UNDEFINED; + state->zeta_format = r->zeta_binding ? r->zeta_binding->host_fmt.vk_format : + VK_FORMAT_UNDEFINED; +} + +static VkRenderPass create_render_pass(PGRAPHState *pg, RenderPassState *state) +{ + NV2A_VK_DPRINTF("Creating render pass"); + + PGRAPHVkState *r = pg->vk_renderer_state; + + VkAttachmentDescription attachments[2]; + int num_attachments = 0; + + bool color = state->color_format != VK_FORMAT_UNDEFINED; + bool zeta = state->zeta_format != VK_FORMAT_UNDEFINED; + + VkAttachmentReference color_reference; + if (color) { + attachments[num_attachments] = (VkAttachmentDescription){ + .format = state->color_format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + color_reference = (VkAttachmentReference){ + num_attachments, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + num_attachments++; + } + + VkAttachmentReference depth_reference; + if (zeta) { + attachments[num_attachments] = (VkAttachmentDescription){ + .format = state->zeta_format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + }; + depth_reference = (VkAttachmentReference){ + num_attachments, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + }; + num_attachments++; + } + + VkSubpassDependency dependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + }; + + if (color) { + dependency.srcStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask |= + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (zeta) { + dependency.srcStageMask |= + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + dependency.dstStageMask |= + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = color ? 1 : 0, + .pColorAttachments = color ? &color_reference : NULL, + .pDepthStencilAttachment = zeta ? &depth_reference : NULL, + }; + + VkRenderPassCreateInfo renderpass_create_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = num_attachments, + .pAttachments = attachments, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 1, + .pDependencies = &dependency, + }; + VkRenderPass render_pass; + VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL, + &render_pass)); + return render_pass; +} + +static VkRenderPass add_new_render_pass(PGRAPHState *pg, RenderPassState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (r->render_passes_index == r->render_passes_capacity) { + int n_blocks = r->render_passes_capacity; + r->render_passes_capacity = n_blocks ? (n_blocks * 2) : 256; + r->render_passes = + g_realloc_n(r->render_passes, r->render_passes_capacity, + sizeof(*r->render_passes)); + } + + RenderPass *rp = &r->render_passes[r->render_passes_index++]; + memcpy(&rp->state, state, sizeof(*state)); + rp->render_pass = create_render_pass(pg, state); + + return rp->render_pass; +} + +static VkRenderPass get_render_pass(PGRAPHState *pg, RenderPassState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + for (int i = 0; i < r->render_passes_index; i++) { + if (!memcmp(&r->render_passes[i].state, state, sizeof(*state))) { + return r->render_passes[i].render_pass; + } + } + + return add_new_render_pass(pg, state); +} + +static void create_frame_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + NV2A_VK_DPRINTF("Creating framebuffer"); + + assert(r->color_binding || r->zeta_binding); + + if (r->framebuffer_index >= ARRAY_SIZE(r->framebuffers)) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + } + + VkImageView attachments[2]; + int attachment_count = 0; + + if (r->color_binding) { + attachments[attachment_count++] = r->color_binding->image_view; + } + if (r->zeta_binding) { + attachments[attachment_count++] = r->zeta_binding->image_view; + } + + SurfaceBinding *binding = r->color_binding ? : r->zeta_binding; + + VkFramebufferCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = r->render_pass, + .attachmentCount = attachment_count, + .pAttachments = attachments, + .width = binding->width, + .height = binding->height, + .layers = 1, + }; + pgraph_apply_scaling_factor(pg, &create_info.width, &create_info.height); + VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL, + &r->framebuffers[r->framebuffer_index++])); +} + +static void destroy_framebuffers(PGRAPHState *pg) +{ + NV2A_VK_DPRINTF("Destroying framebuffer"); + PGRAPHVkState *r = pg->vk_renderer_state; + + for (int i = 0; i < r->framebuffer_index; i++) { + vkDestroyFramebuffer(r->device, r->framebuffers[i], NULL); + r->framebuffers[i] = VK_NULL_HANDLE; + } + r->framebuffer_index = 0; +} + +static void create_clear_pipeline(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + NV2A_VK_DGROUP_BEGIN("Creating clear pipeline"); + + PipelineKey key; + memset(&key, 0, sizeof(key)); + key.clear = true; + init_render_pass_state(pg, &key.render_pass_state); + + key.regs[0] = r->clear_parameter; + + uint64_t hash = fast_hash((void *)&key, sizeof(key)); + LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key); + PipelineBinding *snode = container_of(node, PipelineBinding, node); + + if (snode->pipeline != VK_NULL_HANDLE) { + NV2A_VK_DPRINTF("Cache hit"); + r->pipeline_binding_changed = r->pipeline_binding != snode; + r->pipeline_binding = snode; + NV2A_VK_DGROUP_END(); + return; + } + + NV2A_VK_DPRINTF("Cache miss"); + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN); + memcpy(&snode->key, &key, sizeof(key)); + + bool clear_any_color_channels = + r->clear_parameter & NV097_CLEAR_SURFACE_COLOR; + bool clear_all_color_channels = + (r->clear_parameter & NV097_CLEAR_SURFACE_COLOR) == + (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | NV097_CLEAR_SURFACE_B | + NV097_CLEAR_SURFACE_A); + bool partial_color_clear = + clear_any_color_channels && !clear_all_color_channels; + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->quad_vert_module->module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->solid_frag_module->module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = + (r->clear_parameter & NV097_CLEAR_SURFACE_Z) ? VK_TRUE : VK_FALSE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + }; + + if (r->clear_parameter & NV097_CLEAR_SURFACE_STENCIL) { + depth_stencil.stencilTestEnable = VK_TRUE; + depth_stencil.front.failOp = VK_STENCIL_OP_REPLACE; + depth_stencil.front.passOp = VK_STENCIL_OP_REPLACE; + depth_stencil.front.depthFailOp = VK_STENCIL_OP_REPLACE; + depth_stencil.front.compareOp = VK_COMPARE_OP_ALWAYS; + depth_stencil.front.compareMask = 0xff; + depth_stencil.front.writeMask = 0xff; + depth_stencil.front.reference = 0xff; + depth_stencil.back = depth_stencil.front; + } + + VkColorComponentFlags write_mask = 0; + if (r->clear_parameter & NV097_CLEAR_SURFACE_R) + write_mask |= VK_COLOR_COMPONENT_R_BIT; + if (r->clear_parameter & NV097_CLEAR_SURFACE_G) + write_mask |= VK_COLOR_COMPONENT_G_BIT; + if (r->clear_parameter & NV097_CLEAR_SURFACE_B) + write_mask |= VK_COLOR_COMPONENT_B_BIT; + if (r->clear_parameter & NV097_CLEAR_SURFACE_A) + write_mask |= VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = write_mask, + .blendEnable = VK_TRUE, + .colorBlendOp = VK_BLEND_OP_ADD, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR, + .alphaBlendOp = VK_BLEND_OP_ADD, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = r->color_binding ? 1 : 0, + .pAttachments = r->color_binding ? &color_blend_attachment : NULL, + }; + + VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS }; + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = partial_color_clear ? 3 : 2, + .pDynamicStates = dynamic_states, + }; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + }; + + VkPipelineLayout layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &layout)); + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_state, + .layout = layout, + .renderPass = get_render_pass(pg, &key.render_pass_state), + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_info, NULL, &pipeline)); + + snode->pipeline = pipeline; + snode->layout = layout; + snode->render_pass = pipeline_info.renderPass; + snode->draw_time = pg->draw_time; + + r->pipeline_binding = snode; + r->pipeline_binding_changed = true; + + NV2A_VK_DGROUP_END(); +} + +static bool check_render_pass_dirty(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(r->pipeline_binding); + + RenderPassState state; + init_render_pass_state(pg, &state); + + return memcmp(&state, &r->pipeline_binding->key.render_pass_state, + sizeof(state)) != 0; +} + +// Quickly check for any state changes that would require more analysis +static bool check_pipeline_dirty(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(r->pipeline_binding); + + if (r->shader_bindings_changed || r->texture_bindings_changed || + check_render_pass_dirty(pg)) { + return true; + } + + const unsigned int regs[] = { + NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR, + NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3, + NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR, + }; + + for (int i = 0; i < ARRAY_SIZE(regs); i++) { + if (pgraph_is_reg_dirty(pg, regs[i])) { + return true; + } + } + + // FIXME: Use dirty bits instead + if (memcmp(r->vertex_attribute_descriptions, + r->pipeline_binding->key.attribute_descriptions, + r->num_active_vertex_attribute_descriptions * + sizeof(r->vertex_attribute_descriptions[0])) || + memcmp(r->vertex_binding_descriptions, + r->pipeline_binding->key.binding_descriptions, + r->num_active_vertex_binding_descriptions * + sizeof(r->vertex_binding_descriptions[0]))) { + return true; + } + + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_NOTDIRTY); + + return false; +} + +static void init_pipeline_key(PGRAPHState *pg, PipelineKey *key) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + memset(key, 0, sizeof(*key)); + init_render_pass_state(pg, &key->render_pass_state); + memcpy(&key->shader_state, &r->shader_binding->state, sizeof(ShaderState)); + memcpy(key->binding_descriptions, r->vertex_binding_descriptions, + sizeof(key->binding_descriptions[0]) * + r->num_active_vertex_binding_descriptions); + memcpy(key->attribute_descriptions, r->vertex_attribute_descriptions, + sizeof(key->attribute_descriptions[0]) * + r->num_active_vertex_attribute_descriptions); + + // FIXME: Register masking + // FIXME: Use more dynamic state updates + const int regs[] = { + NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR, + NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1, + NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3, + NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE, + NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR, + }; + assert(ARRAY_SIZE(regs) == ARRAY_SIZE(key->regs)); + for (int i = 0; i < ARRAY_SIZE(regs); i++) { + key->regs[i] = pgraph_reg_r(pg, regs[i]); + } +} + +static void create_pipeline(PGRAPHState *pg) +{ + NV2A_VK_DGROUP_BEGIN("Creating pipeline"); + + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + pgraph_vk_bind_textures(d); + pgraph_vk_bind_shaders(pg); + + // FIXME: If nothing was dirty, don't even try creating the key or hashing. + // Just use the same pipeline. + if (r->pipeline_binding && !check_pipeline_dirty(pg)) { + return; + } + + PipelineKey key; + init_pipeline_key(pg, &key); + uint64_t hash = fast_hash((void *)&key, sizeof(key)); + + static uint64_t last_hash; + if (hash == last_hash) { + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_MERGE); + } + last_hash = hash; + + LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key); + PipelineBinding *snode = container_of(node, PipelineBinding, node); + if (snode->pipeline != VK_NULL_HANDLE) { + NV2A_VK_DPRINTF("Cache hit"); + r->pipeline_binding_changed = r->pipeline_binding != snode; + r->pipeline_binding = snode; + NV2A_VK_DGROUP_END(); + return; + } + + NV2A_VK_DPRINTF("Cache miss"); + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN); + + memcpy(&snode->key, &key, sizeof(key)); + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool depth_write = !!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE); + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + + int num_active_shader_stages = 0; + VkPipelineShaderStageCreateInfo shader_stages[3]; + + if (r->shader_binding->geometry) { + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .module = r->shader_binding->geometry->module, + .pName = "main", + }; + } + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->shader_binding->vertex->module, + .pName = "main", + }; + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->shader_binding->fragment->module, + .pName = "main", + }; + + VkPipelineVertexInputStateCreateInfo vertex_input = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = + r->num_active_vertex_binding_descriptions, + .pVertexBindingDescriptions = r->vertex_binding_descriptions, + .vertexAttributeDescriptionCount = + r->num_active_vertex_attribute_descriptions, + .pVertexAttributeDescriptions = r->vertex_attribute_descriptions, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = get_primitive_topology(pg), + .primitiveRestartEnable = VK_FALSE, + }; + + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + + + void *rasterizer_next_struct = NULL; + + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; + + if (r->provoking_vertex_extension_enabled) { + VkProvokingVertexModeEXT provoking_mode = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_SHADEMODE) == + NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ? + VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT : + VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; + + provoking_state = + (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){ + .sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .provokingVertexMode = provoking_mode, + }; + rasterizer_next_struct = &provoking_state; + } else { + // FIXME: Handle in shader? + } + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state + .polygon_front_mode], + .lineWidth = 1.0f, + .frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + NV_PGRAPH_SETUPRASTER_FRONTFACE) ? + VK_FRONT_FACE_COUNTER_CLOCKWISE : + VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .pNext = rasterizer_next_struct, + }; + + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_CULLENABLE) { + uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_CULLCTRL); + assert(cull_face < ARRAY_SIZE(pgraph_cull_face_vk_map)); + rasterizer.cullMode = pgraph_cull_face_vk_map[cull_face]; + } else { + rasterizer.cullMode = VK_CULL_MODE_NONE; + } + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineDepthStencilStateCreateInfo depth_stencil = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE, + }; + + if (depth_test) { + depth_stencil.depthTestEnable = VK_TRUE; + uint32_t depth_func = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ZFUNC); + assert(depth_func < ARRAY_SIZE(pgraph_depth_func_vk_map)); + depth_stencil.depthCompareOp = pgraph_depth_func_vk_map[depth_func]; + } + + if (stencil_test) { + depth_stencil.stencilTestEnable = VK_TRUE; + uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_FUNC); + uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_REF); + uint32_t mask_read = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ); + uint32_t mask_write = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1), + NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE); + uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL); + uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL); + uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2), + NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS); + + assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_vk_map)); + assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_vk_map)); + assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_vk_map)); + assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_vk_map)); + + depth_stencil.front.failOp = pgraph_stencil_op_vk_map[op_fail]; + depth_stencil.front.passOp = pgraph_stencil_op_vk_map[op_zpass]; + depth_stencil.front.depthFailOp = pgraph_stencil_op_vk_map[op_zfail]; + depth_stencil.front.compareOp = + pgraph_stencil_func_vk_map[stencil_func]; + depth_stencil.front.compareMask = mask_read; + depth_stencil.front.writeMask = mask_write; + depth_stencil.front.reference = stencil_ref; + depth_stencil.back = depth_stencil.front; + } + + VkColorComponentFlags write_mask = 0; + if (control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_R_BIT; + if (control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_G_BIT; + if (control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_B_BIT; + if (control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE) + write_mask |= VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = write_mask, + }; + + float blend_constant[4] = { 0, 0, 0, 0 }; + + if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) { + color_blend_attachment.blendEnable = VK_TRUE; + + uint32_t sfactor = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_SFACTOR); + uint32_t dfactor = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_DFACTOR); + assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map)); + assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map)); + color_blend_attachment.srcColorBlendFactor = + pgraph_blend_factor_vk_map[sfactor]; + color_blend_attachment.dstColorBlendFactor = + pgraph_blend_factor_vk_map[dfactor]; + color_blend_attachment.srcAlphaBlendFactor = + pgraph_blend_factor_vk_map[sfactor]; + color_blend_attachment.dstAlphaBlendFactor = + pgraph_blend_factor_vk_map[dfactor]; + + uint32_t equation = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_EQN); + assert(equation < ARRAY_SIZE(pgraph_blend_equation_vk_map)); + + color_blend_attachment.colorBlendOp = + pgraph_blend_equation_vk_map[equation]; + color_blend_attachment.alphaBlendOp = + pgraph_blend_equation_vk_map[equation]; + + uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR); + pgraph_argb_pack32_to_rgba_float(blend_color, blend_constant); + } + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = r->color_binding ? 1 : 0, + .pAttachments = r->color_binding ? &color_blend_attachment : NULL, + .blendConstants[0] = blend_constant[0], + .blendConstants[1] = blend_constant[1], + .blendConstants[2] = blend_constant[2], + .blendConstants[3] = blend_constant[3], + }; + + VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR }; + + VkPipelineDynamicStateCreateInfo dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = ARRAY_SIZE(dynamic_states), + .pDynamicStates = dynamic_states, + }; + + // /* Clipping */ + // glEnable(GL_CLIP_DISTANCE0); + // glEnable(GL_CLIP_DISTANCE1); + + // /* Polygon offset */ + // /* FIXME: GL implementation-specific, maybe do this in VS? */ + // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) + // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) + // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); + float zfactor = *(float *)&zfactor_u32; + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + float zbias = *(float *)&zbias_u32; + rasterizer.depthBiasEnable = VK_TRUE; + rasterizer.depthBiasSlopeFactor = zfactor; + rasterizer.depthBiasConstantFactor = zbias; + } + + if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { + rasterizer.depthClampEnable = VK_TRUE; + } + + // FIXME: Dither + // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + // NV_PGRAPH_CONTROL_0_DITHERENABLE)) + // FIXME: point size + // FIXME: Edge Antialiasing + // bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), + // NV_PGRAPH_ANTIALIASING_ENABLE); + // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { + // FIXME: VK_EXT_line_rasterization + // } + + // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + // NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { + // FIXME: No direct analog. Just do it with MSAA. + // } + + VkPushConstantRange push_constant_range = { + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + // FIXME: Minimize push constants + .size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float), + }; + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &r->descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constant_range, + }; + VkPipelineLayout layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &layout)); + + VkGraphicsPipelineCreateInfo pipeline_create_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = num_active_shader_stages, + .pStages = shader_stages, + .pVertexInputState = &vertex_input, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_state, + .layout = layout, + .renderPass = get_render_pass(pg, &key.render_pass_state), + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_create_info, NULL, &pipeline)); + + snode->pipeline = pipeline; + snode->layout = layout; + snode->render_pass = pipeline_create_info.renderPass; + snode->draw_time = pg->draw_time; + + r->pipeline_binding = snode; + r->pipeline_binding_changed = true; + + NV2A_VK_DGROUP_END(); +} + +static void push_vertex_attrib_values(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Do partial updates + + float attrib_values[NV2A_VERTEXSHADER_ATTRIBUTES * 4]; + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + attrib_values[i * 4 + 0] = pg->vertex_attributes[i].inline_value[0]; + attrib_values[i * 4 + 1] = pg->vertex_attributes[i].inline_value[1]; + attrib_values[i * 4 + 2] = pg->vertex_attributes[i].inline_value[2]; + attrib_values[i * 4 + 3] = pg->vertex_attributes[i].inline_value[3]; + } + + vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout, + VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(attrib_values), + &attrib_values); +} + +static void bind_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(r->descriptor_set_index >= 1); + + vkCmdBindDescriptorSets(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipeline_binding->layout, 0, 1, + &r->descriptor_sets[r->descriptor_set_index - 1], 0, + NULL); +} + +static void begin_query(PGRAPHVkState *r) +{ + assert(r->in_command_buffer); + assert(!r->in_render_pass); + assert(!r->query_in_flight); + + // FIXME: We should handle this. Make the query buffer bigger, but at least + // flush current queries. + assert(r->num_queries_in_flight < r->max_queries_in_flight); + + nv2a_profile_inc_counter(NV2A_PROF_QUERY); + vkCmdResetQueryPool(r->command_buffer, r->query_pool, + r->num_queries_in_flight, 1); + vkCmdBeginQuery(r->command_buffer, r->query_pool, r->num_queries_in_flight, + VK_QUERY_CONTROL_PRECISE_BIT); + + r->query_in_flight = true; + r->new_query_needed = false; + r->num_queries_in_flight++; +} + +static void end_query(PGRAPHVkState *r) +{ + assert(r->in_command_buffer); + assert(!r->in_render_pass); + assert(r->query_in_flight); + + vkCmdEndQuery(r->command_buffer, r->query_pool, + r->num_queries_in_flight - 1); + r->query_in_flight = false; +} + +static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd, + int index_src, int index_dst) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + StorageBuffer *b_src = &r->storage_buffers[index_src]; + StorageBuffer *b_dst = &r->storage_buffers[index_dst]; + + if (!b_src->buffer_offset) { + return; + } + + VkBufferCopy copy_region = { .size = b_src->buffer_offset }; + vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, ©_region); + + b_src->buffer_offset = 0; +} + +static void begin_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_command_buffer); + assert(!r->in_render_pass); + + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_RENDERPASSES); + + unsigned int vp_width = pg->surface_binding_dim.width, + vp_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); + + assert(r->framebuffer_index > 0); + + VkRenderPassBeginInfo render_pass_begin_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = r->render_pass, + .framebuffer = r->framebuffers[r->framebuffer_index - 1], + .renderArea.extent.width = vp_width, + .renderArea.extent.height = vp_height, + .clearValueCount = 0, + .pClearValues = NULL, + }; + vkCmdBeginRenderPass(r->command_buffer, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + r->in_render_pass = true; + +} + +static void end_render_pass(PGRAPHVkState *r) +{ + if (r->in_render_pass) { + vkCmdEndRenderPass(r->command_buffer); + r->in_render_pass = false; + } +} + +const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = { + [VK_FINISH_REASON_VERTEX_BUFFER_DIRTY] = NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY, + [VK_FINISH_REASON_SURFACE_CREATE] = NV2A_PROF_FINISH_SURFACE_CREATE, + [VK_FINISH_REASON_SURFACE_DOWN] = NV2A_PROF_FINISH_SURFACE_DOWN, + [VK_FINISH_REASON_NEED_BUFFER_SPACE] = NV2A_PROF_FINISH_NEED_BUFFER_SPACE, + [VK_FINISH_REASON_FRAMEBUFFER_DIRTY] = NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY, + [VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING, + [VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL, + [VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH, +}; + +void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(!r->in_draw); + + if (r->in_command_buffer) { + + nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]); + + if (r->in_render_pass) { + end_render_pass(r); + } + if (r->query_in_flight) { + end_query(r); + } + VK_CHECK(vkEndCommandBuffer(r->command_buffer)); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); // FIXME: Cleanup + sync_staging_buffer(pg, cmd, BUFFER_INDEX_STAGING, BUFFER_INDEX); + sync_staging_buffer(pg, cmd, BUFFER_VERTEX_INLINE_STAGING, + BUFFER_VERTEX_INLINE); + sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM); + bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size); + VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer)); + r->in_aux_command_buffer = false; + + VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkSubmitInfo submit_infos[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->aux_command_buffer, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &r->command_buffer_semaphore, + }, + { + + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->command_buffer, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &r->command_buffer_semaphore, + .pWaitDstStageMask = &wait_stage, + } + }; + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT); + vkResetFences(r->device, 1, &r->command_buffer_fence); + VK_CHECK(vkQueueSubmit(r->queue, ARRAY_SIZE(submit_infos), submit_infos, + r->command_buffer_fence)); + r->submit_count += 1; + + // Periodically check memory budget + const int max_num_submits_before_budget_update = 5; + if (finish_reason == VK_FINISH_REASON_FLIP_STALL || + (r->submit_count - r->allocator_last_submit_index) > + max_num_submits_before_budget_update) { + + // VMA queries budget via vmaSetCurrentFrameIndex + vmaSetCurrentFrameIndex(r->allocator, r->submit_count); + r->allocator_last_submit_index = r->submit_count; + + pgraph_vk_check_memory_budget(pg); + } + + VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence, + VK_TRUE, UINT64_MAX)); + + r->descriptor_set_index = 0; + r->in_command_buffer = false; + destroy_framebuffers(pg); + } + + NV2AState *d = container_of(pg, NV2AState, pgraph); + pgraph_vk_process_pending_reports_internal(d); +} + +void pgraph_vk_begin_command_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(!r->in_command_buffer); + + VkCommandBufferBeginInfo command_buffer_begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + VK_CHECK(vkBeginCommandBuffer(r->command_buffer, + &command_buffer_begin_info)); + r->command_buffer_start_time = pg->draw_time; + r->in_command_buffer = true; +} + +// FIXME: Refactor below + +void pgraph_vk_ensure_command_buffer(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!r->in_command_buffer) { + pgraph_vk_begin_command_buffer(pg); + } +} + +void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + end_render_pass(r); + if (r->query_in_flight) { + end_query(r); + } +} + +VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + pgraph_vk_ensure_command_buffer(pg); + pgraph_vk_ensure_not_in_render_pass(pg); + return r->command_buffer; +} + +void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + assert(cmd == r->command_buffer); +} + +// FIXME: Add more metrics for determining command buffer 'fullness' and +// conservatively flush. Unfortunately there doesn't appear to be a good +// way to determine what the actual maximum capacity of a command buffer +// is, but we are obviously not supposed to endlessly append to one command +// buffer. For other reasons though (like descriptor set amount, surface +// changes, etc) we do flush often. + +static void begin_pre_draw(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->color_binding || r->zeta_binding); + assert(!r->color_binding || r->color_binding->initialized); + assert(!r->zeta_binding || r->zeta_binding->initialized); + + if (pg->clearing) { + create_clear_pipeline(pg); + } else { + create_pipeline(pg); + } + + bool render_pass_dirty = r->pipeline_binding->render_pass != r->render_pass; + + if (r->framebuffer_dirty || render_pass_dirty) { + pgraph_vk_ensure_not_in_render_pass(pg); + } + if (render_pass_dirty) { + r->render_pass = r->pipeline_binding->render_pass; + } + if (r->framebuffer_dirty) { + create_frame_buffer(pg); + r->framebuffer_dirty = false; + } + if (!pg->clearing) { + pgraph_vk_update_descriptor_sets(pg); + } + if (r->framebuffer_index == 0) { + create_frame_buffer(pg); + } + + pgraph_vk_ensure_command_buffer(pg); +} + +static void begin_draw(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_command_buffer); + + // Visibility testing + if (pg->zpass_pixel_count_enable) { + if (r->new_query_needed && r->query_in_flight) { + end_render_pass(r); + end_query(r); + } + if (!r->query_in_flight) { + end_render_pass(r); + begin_query(r); + } + } else if (r->query_in_flight) { + end_render_pass(r); + end_query(r); + } + + bool must_bind_pipeline = r->pipeline_binding_changed; + + if (!r->in_render_pass) { + begin_render_pass(pg); + must_bind_pipeline = true; + } + + if (must_bind_pipeline) { + nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_BIND); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + r->pipeline_binding->pipeline); + r->pipeline_binding->draw_time = pg->draw_time; + + unsigned int vp_width = pg->surface_binding_dim.width, + vp_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &vp_width, &vp_height); + + VkViewport viewport = { + .width = vp_width, + .height = vp_height, + .minDepth = 0.0, + .maxDepth = 1.0, + }; + vkCmdSetViewport(r->command_buffer, 0, 1, &viewport); + + /* Surface clip */ + /* FIXME: Consider moving to PSH w/ window clip */ + unsigned int xmin = pg->surface_shape.clip_x - + pg->surface_binding_dim.clip_x, + ymin = pg->surface_shape.clip_y - + pg->surface_binding_dim.clip_y; + + unsigned int xmax = xmin + pg->surface_shape.clip_width - 1, + ymax = ymin + pg->surface_shape.clip_height - 1; + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + VkRect2D scissor = { + .offset.x = xmin, + .offset.y = ymin, + .extent.width = scissor_width, + .extent.height = scissor_height, + }; + vkCmdSetScissor(r->command_buffer, 0, 1, &scissor); + } + + if (!pg->clearing) { + bind_descriptor_sets(pg); + push_vertex_attrib_values(pg); + } + + r->in_draw = true; +} + +static void end_draw(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->in_command_buffer); + assert(r->in_render_pass); + + r->in_draw = false; + + // FIXME: We could clear less + pgraph_clear_dirty_reg_map(pg); +} + +void pgraph_vk_draw_end(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0); + bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE; + bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE; + bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE; + bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE; + bool color_write = mask_alpha || mask_red || mask_green || mask_blue; + bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE; + bool stencil_test = + pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE; + bool is_nop_draw = !(color_write || depth_test || stencil_test); + + if (is_nop_draw) { + // FIXME: Check PGRAPH register 0x880. + // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit + // check that will raise an exception in the case that a draw should + // modify the color and/or zeta buffer but the target(s) are masked + // off. This check only seems to trigger during the fragment + // processing, it is legal to attempt a draw that is entirely + // clipped regardless of 0x880. See xemu#635 for context. + NV2A_VK_DPRINTF("nop draw!\n"); + return; + } + + pgraph_vk_flush_draw(d); + + pg->draw_time++; + if (r->color_binding && pgraph_color_write_enabled(pg)) { + r->color_binding->draw_time = pg->draw_time; + } + if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) { + r->zeta_binding->draw_time = pg->draw_time; + } + + pgraph_vk_set_surface_dirty(pg, color_write, depth_test || stencil_test); +} + +static int compare_memory_sync_requirement_by_addr(const void *p1, + const void *p2) +{ + const MemorySyncRequirement *l = p1, *r = p2; + if (l->addr < r->addr) + return -1; + if (l->addr > r->addr) + return 1; + return 0; +} + +static void sync_vertex_ram_buffer(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + + if (r->num_vertex_ram_buffer_syncs == 0) { + return; + } + + // Align sync requirements to page boundaries + NV2A_VK_DGROUP_BEGIN("Sync vertex RAM buffer"); + + for (int i = 0; i < r->num_vertex_ram_buffer_syncs; i++) { + NV2A_VK_DPRINTF("Need to sync vertex memory @%" HWADDR_PRIx + ", %" HWADDR_PRIx " bytes", + r->vertex_ram_buffer_syncs[i].addr, + r->vertex_ram_buffer_syncs[i].size); + + hwaddr start_addr = + r->vertex_ram_buffer_syncs[i].addr & TARGET_PAGE_MASK; + hwaddr end_addr = r->vertex_ram_buffer_syncs[i].addr + + r->vertex_ram_buffer_syncs[i].size; + end_addr = ROUND_UP(end_addr, TARGET_PAGE_SIZE); + + NV2A_VK_DPRINTF("- %d: %08" HWADDR_PRIx " %zd bytes" + " -> %08" HWADDR_PRIx " %zd bytes", i, + r->vertex_ram_buffer_syncs[i].addr, + r->vertex_ram_buffer_syncs[i].size, start_addr, + end_addr - start_addr); + + r->vertex_ram_buffer_syncs[i].addr = start_addr; + r->vertex_ram_buffer_syncs[i].size = end_addr - start_addr; + } + + // Sort the requirements in increasing order of addresses + qsort(r->vertex_ram_buffer_syncs, r->num_vertex_ram_buffer_syncs, + sizeof(MemorySyncRequirement), + compare_memory_sync_requirement_by_addr); + + // Merge overlapping/adjacent requests to minimize number of tests + MemorySyncRequirement merged[16]; + int num_syncs = 1; + + merged[0] = r->vertex_ram_buffer_syncs[0]; + + for (int i = 1; i < r->num_vertex_ram_buffer_syncs; i++) { + MemorySyncRequirement *p = &merged[num_syncs - 1]; + MemorySyncRequirement *t = &r->vertex_ram_buffer_syncs[i]; + + if (t->addr <= (p->addr + p->size)) { + // Merge with previous + hwaddr p_end_addr = p->addr + p->size; + hwaddr t_end_addr = t->addr + t->size; + hwaddr new_end_addr = MAX(p_end_addr, t_end_addr); + p->size = new_end_addr - p->addr; + } else { + merged[num_syncs++] = *t; + } + } + + if (num_syncs < r->num_vertex_ram_buffer_syncs) { + NV2A_VK_DPRINTF("Reduced to %d sync checks", num_syncs); + } + + for (int i = 0; i < num_syncs; i++) { + hwaddr addr = merged[i].addr; + VkDeviceSize size = merged[i].size; + + NV2A_VK_DPRINTF("- %d: %08"HWADDR_PRIx" %zd bytes", i, addr, size); + + if (memory_region_test_and_clear_dirty(d->vram, addr, size, + DIRTY_MEMORY_NV2A)) { + NV2A_VK_DPRINTF("Memory dirty. Synchronizing..."); + pgraph_vk_update_vertex_ram_buffer(pg, addr, d->vram_ptr + addr, + size); + } + } + + r->num_vertex_ram_buffer_syncs = 0; + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + nv2a_profile_inc_counter(NV2A_PROF_CLEAR); + + bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR); + bool write_zeta = + (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); + + // FIXME: If doing a full surface clear, mark the surface for full clear + // and we can just do the clear as part of the surface load. + pgraph_vk_surface_update(d, true, write_color, write_zeta); + + if (!(r->color_binding || r->zeta_binding)) { + /* Nothing bound to clear */ + return; + } + + pg->clearing = true; + r->clear_parameter = parameter; + + unsigned int xmin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN); + unsigned int xmax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX); + unsigned int ymin = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN); + unsigned int ymax = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX); + + NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax, + ymax, write_color ? " color" : "", + write_zeta ? " zeta" : ""); + + begin_pre_draw(pg); + begin_draw(pg); + + unsigned int scissor_width = xmax - xmin + 1, + scissor_height = ymax - ymin + 1; + + pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); + pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); + + pgraph_apply_scaling_factor(pg, &xmin, &ymin); + pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); + + VkClearRect clear_rect = { + .rect = { + .offset = { .x = xmin, .y = ymin }, + .extent = { .width = scissor_width, .height = scissor_height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + int num_attachments = 0; + VkClearAttachment attachments[2]; + + if (write_color && r->color_binding) { + const bool clear_all_color_channels = + (parameter & NV097_CLEAR_SURFACE_COLOR) == + (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | + NV097_CLEAR_SURFACE_B | NV097_CLEAR_SURFACE_A); + + if (clear_all_color_channels) { + attachments[num_attachments] = (VkClearAttachment){ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = 0, + }; + pgraph_get_clear_color( + pg, attachments[num_attachments].clearValue.color.float32); + num_attachments++; + } else { + float blend_constants[4]; + pgraph_get_clear_color(pg, blend_constants); + vkCmdSetScissor(r->command_buffer, 0, 1, &clear_rect.rect); + vkCmdSetBlendConstants(r->command_buffer, blend_constants); + vkCmdDraw(r->command_buffer, 3, 1, 0, 0); + } + } + + if (write_zeta && r->zeta_binding) { + int stencil_value = 0; + float depth_value = 1.0; + pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value); + + VkImageAspectFlags aspect = 0; + if (parameter & NV097_CLEAR_SURFACE_Z) + aspect |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (parameter & NV097_CLEAR_SURFACE_STENCIL) + aspect |= VK_IMAGE_ASPECT_STENCIL_BIT; + + attachments[num_attachments++] = (VkClearAttachment){ + .aspectMask = aspect, + .clearValue.depthStencil.depth = depth_value, + .clearValue.depthStencil.stencil = stencil_value, + }; + } + + if (num_attachments) { + vkCmdClearAttachments(r->command_buffer, num_attachments, attachments, + 1, &clear_rect); + } + end_draw(pg); + + pg->clearing = false; + + pgraph_vk_set_surface_dirty(pg, write_color, write_zeta); + + NV2A_VK_DGROUP_END(); +} + +#if 0 +static void pgraph_vk_debug_attrs(NV2AState *d) +{ + for (int vertex_idx = 0; vertex_idx < pg->draw_arrays_count[i]; vertex_idx++) { + NV2A_VK_DGROUP_BEGIN("Vertex %d+%d", pg->draw_arrays_start[i], vertex_idx); + for (int attr_idx = 0; attr_idx < NV2A_VERTEXSHADER_ATTRIBUTES; attr_idx++) { + VertexAttribute *attr = &pg->vertex_attributes[attr_idx]; + if (attr->count) { + char *p = (char *)d->vram_ptr + r->attribute_offsets[attr_idx] + (pg->draw_arrays_start[i] + vertex_idx) * attr->stride; + NV2A_VK_DGROUP_BEGIN("Attribute %d data at %tx", attr_idx, (ptrdiff_t)(p - (char*)d->vram_ptr)); + for (int count_idx = 0; count_idx < attr->count; count_idx++) { + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + NV2A_VK_DPRINTF("[%d] %f", count_idx, *(float*)p); + p += sizeof(float); + break; + default: + assert(0); + break; + } + } + NV2A_VK_DGROUP_END(); + } + } + NV2A_VK_DGROUP_END(); + } +} +#endif + +static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx, + VkDeviceSize offset) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(buffer_idx == BUFFER_VERTEX_RAM || + buffer_idx == BUFFER_VERTEX_INLINE); + + VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES]; + VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; + + for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) { + int attr_idx = r->vertex_attribute_descriptions[i].location; + buffers[i] = r->storage_buffers[buffer_idx].buffer; + offsets[i] = offset + r->vertex_attribute_offsets[attr_idx]; + } + + vkCmdBindVertexBuffers(r->command_buffer, 0, + r->num_active_vertex_binding_descriptions, buffers, + offsets); +} + +void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) +{ + NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta, + pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); + + PGRAPHVkState *r = pg->vk_renderer_state; + + /* FIXME: Does this apply to CLEARs too? */ + color = color && pgraph_color_write_enabled(pg); + zeta = zeta && pgraph_zeta_write_enabled(pg); + pg->surface_color.draw_dirty |= color; + pg->surface_zeta.draw_dirty |= zeta; + + if (r->color_binding) { + r->color_binding->draw_dirty |= color; + r->color_binding->frame_time = pg->frame_time; + r->color_binding->cleared = false; + } + + if (r->zeta_binding) { + r->zeta_binding->draw_dirty |= zeta; + r->zeta_binding->frame_time = pg->frame_time; + r->zeta_binding->cleared = false; + } +} + +static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size) +{ + if (!pgraph_vk_buffer_has_space_for(pg, index, size, 1)) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + return true; + } + + return false; +} + +void pgraph_vk_flush_draw(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!(r->color_binding || r->zeta_binding)) { + NV2A_VK_DPRINTF("No binding present!!!\n"); + return; + } + + r->num_vertex_ram_buffer_syncs = 0; + + if (pg->draw_arrays_length) { + NV2A_VK_DGROUP_BEGIN("Draw Arrays"); + nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS); + + assert(pg->inline_elements_length == 0); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start, + pg->draw_arrays_max_count - 1, false, + 0, pg->draw_arrays_max_count - 1); + sync_vertex_ram_buffer(pg); + + begin_pre_draw(pg); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0); + for (int i = 0; i < pg->draw_arrays_length; i++) { + uint32_t start = pg->draw_arrays_start[i], + count = pg->draw_arrays_count[i]; + NV2A_VK_DPRINTF("- [%d] Start:%d Count:%d", i, start, count); + vkCmdDraw(r->command_buffer, count, 1, start, 0); + } + end_draw(pg); + + NV2A_VK_DGROUP_END(); + } else if (pg->inline_elements_length) { + NV2A_VK_DGROUP_BEGIN("Inline Elements"); + assert(pg->inline_buffer_length == 0); + assert(pg->inline_array_length == 0); + + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS); + + size_t index_data_size = + pg->inline_elements_length * sizeof(pg->inline_elements[0]); + + ensure_buffer_space(pg, BUFFER_INDEX_STAGING, index_data_size); + + uint32_t min_element = (uint32_t)-1; + uint32_t max_element = 0; + for (int i = 0; i < pg->inline_elements_length; i++) { + max_element = MAX(pg->inline_elements[i], max_element); + min_element = MIN(pg->inline_elements[i], min_element); + } + pgraph_vk_bind_vertex_attributes( + d, min_element, max_element, false, 0, + pg->inline_elements[pg->inline_elements_length - 1]); + sync_vertex_ram_buffer(pg); + + begin_pre_draw(pg); + VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer( + pg, pg->inline_elements, index_data_size); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0); + vkCmdBindIndexBuffer(r->command_buffer, + r->storage_buffers[BUFFER_INDEX].buffer, + buffer_offset, VK_INDEX_TYPE_UINT32); + vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0, + 0); + end_draw(pg); + + NV2A_VK_DGROUP_END(); + } else if (pg->inline_buffer_length) { + NV2A_VK_DGROUP_BEGIN("Inline Buffer"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS); + assert(pg->inline_array_length == 0); + + size_t vertex_data_size = pg->inline_buffer_length * sizeof(float) * 4; + void *data[NV2A_VERTEXSHADER_ATTRIBUTES]; + size_t sizes[NV2A_VERTEXSHADER_ATTRIBUTES]; + size_t offset = 0; + + pgraph_vk_bind_vertex_attributes_inline(d); + for (int i = 0; i < r->num_active_vertex_attribute_descriptions; i++) { + int attr_index = r->vertex_attribute_descriptions[i].location; + + VertexAttribute *attr = &pg->vertex_attributes[attr_index]; + r->vertex_attribute_offsets[attr_index] = offset; + + data[i] = attr->inline_buffer; + sizes[i] = vertex_data_size; + + attr->inline_buffer_populated = false; + offset += vertex_data_size; + } + ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, offset); + + begin_pre_draw(pg); + VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( + pg, data, sizes, r->num_active_vertex_attribute_descriptions); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset); + vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0); + end_draw(pg); + + NV2A_VK_DGROUP_END(); + } else if (pg->inline_array_length) { + NV2A_VK_DGROUP_BEGIN("Inline Array"); + nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS); + + VkDeviceSize inline_array_data_size = pg->inline_array_length * 4; + ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, + inline_array_data_size); + + unsigned int offset = 0; + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->count == 0) { + continue; + } + + /* FIXME: Double check */ + offset = ROUND_UP(offset, attr->size); + attr->inline_array_offset = offset; + NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", i, + attr->size, attr->count); + offset += attr->size * attr->count; + offset = ROUND_UP(offset, attr->size); + } + + unsigned int vertex_size = offset; + unsigned int index_count = pg->inline_array_length * 4 / vertex_size; + + NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); + pgraph_vk_bind_vertex_attributes(d, 0, index_count - 1, true, + vertex_size, index_count - 1); + + begin_pre_draw(pg); + void *inline_array_data = pg->inline_array; + VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( + pg, &inline_array_data, &inline_array_data_size, 1); + begin_draw(pg); + bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset); + vkCmdDraw(r->command_buffer, index_count, 1, 0, 0); + end_draw(pg); + NV2A_VK_DGROUP_END(); + } else { + NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END"); + NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END"); + } +} diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.c b/hw/xbox/nv2a/pgraph/vk/glsl.c new file mode 100644 index 00000000000..fb3aed34f59 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/glsl.c @@ -0,0 +1,380 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +#include +#include +#include + +static const glslang_resource_t + resource_limits = { .max_lights = 32, + .max_clip_planes = 6, + .max_texture_units = 32, + .max_texture_coords = 32, + .max_vertex_attribs = 64, + .max_vertex_uniform_components = 4096, + .max_varying_floats = 64, + .max_vertex_texture_image_units = 32, + .max_combined_texture_image_units = 80, + .max_texture_image_units = 32, + .max_fragment_uniform_components = 4096, + .max_draw_buffers = 32, + .max_vertex_uniform_vectors = 128, + .max_varying_vectors = 8, + .max_fragment_uniform_vectors = 16, + .max_vertex_output_vectors = 16, + .max_fragment_input_vectors = 15, + .min_program_texel_offset = -8, + .max_program_texel_offset = 7, + .max_clip_distances = 8, + .max_compute_work_group_count_x = 65535, + .max_compute_work_group_count_y = 65535, + .max_compute_work_group_count_z = 65535, + .max_compute_work_group_size_x = 1024, + .max_compute_work_group_size_y = 1024, + .max_compute_work_group_size_z = 64, + .max_compute_uniform_components = 1024, + .max_compute_texture_image_units = 16, + .max_compute_image_uniforms = 8, + .max_compute_atomic_counters = 8, + .max_compute_atomic_counter_buffers = 1, + .max_varying_components = 60, + .max_vertex_output_components = 64, + .max_geometry_input_components = 64, + .max_geometry_output_components = 128, + .max_fragment_input_components = 128, + .max_image_units = 8, + .max_combined_image_units_and_fragment_outputs = 8, + .max_combined_shader_output_resources = 8, + .max_image_samples = 0, + .max_vertex_image_uniforms = 0, + .max_tess_control_image_uniforms = 0, + .max_tess_evaluation_image_uniforms = 0, + .max_geometry_image_uniforms = 0, + .max_fragment_image_uniforms = 8, + .max_combined_image_uniforms = 8, + .max_geometry_texture_image_units = 16, + .max_geometry_output_vertices = 256, + .max_geometry_total_output_components = 1024, + .max_geometry_uniform_components = 1024, + .max_geometry_varying_components = 64, + .max_tess_control_input_components = 128, + .max_tess_control_output_components = 128, + .max_tess_control_texture_image_units = 16, + .max_tess_control_uniform_components = 1024, + .max_tess_control_total_output_components = 4096, + .max_tess_evaluation_input_components = 128, + .max_tess_evaluation_output_components = 128, + .max_tess_evaluation_texture_image_units = 16, + .max_tess_evaluation_uniform_components = 1024, + .max_tess_patch_components = 120, + .max_patch_vertices = 32, + .max_tess_gen_level = 64, + .max_viewports = 16, + .max_vertex_atomic_counters = 0, + .max_tess_control_atomic_counters = 0, + .max_tess_evaluation_atomic_counters = 0, + .max_geometry_atomic_counters = 0, + .max_fragment_atomic_counters = 8, + .max_combined_atomic_counters = 8, + .max_atomic_counter_bindings = 1, + .max_vertex_atomic_counter_buffers = 0, + .max_tess_control_atomic_counter_buffers = 0, + .max_tess_evaluation_atomic_counter_buffers = 0, + .max_geometry_atomic_counter_buffers = 0, + .max_fragment_atomic_counter_buffers = 1, + .max_combined_atomic_counter_buffers = 1, + .max_atomic_counter_buffer_size = 16384, + .max_transform_feedback_buffers = 4, + .max_transform_feedback_interleaved_components = 64, + .max_cull_distances = 8, + .max_combined_clip_and_cull_distances = 8, + .max_samples = 4, + .max_mesh_output_vertices_nv = 256, + .max_mesh_output_primitives_nv = 512, + .max_mesh_work_group_size_x_nv = 32, + .max_mesh_work_group_size_y_nv = 1, + .max_mesh_work_group_size_z_nv = 1, + .max_task_work_group_size_x_nv = 32, + .max_task_work_group_size_y_nv = 1, + .max_task_work_group_size_z_nv = 1, + .max_mesh_view_count_nv = 4, + .maxDualSourceDrawBuffersEXT = 1, + .limits = { + .non_inductive_for_loops = 1, + .while_loops = 1, + .do_while_loops = 1, + .general_uniform_indexing = 1, + .general_attribute_matrix_vector_indexing = 1, + .general_varying_indexing = 1, + .general_sampler_indexing = 1, + .general_variable_indexing = 1, + .general_constant_matrix_vector_indexing = 1, + } }; + +void pgraph_vk_init_glsl_compiler(void) +{ + glslang_initialize_process(); +} + +void pgraph_vk_finalize_glsl_compiler(void) +{ + glslang_finalize_process(); +} + +GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, + const char *glsl_source) +{ + const glslang_input_t input = { + .language = GLSLANG_SOURCE_GLSL, + .stage = stage, + .client = GLSLANG_CLIENT_VULKAN, + .client_version = GLSLANG_TARGET_VULKAN_1_3, + .target_language = GLSLANG_TARGET_SPV, + .target_language_version = GLSLANG_TARGET_SPV_1_5, + .code = glsl_source, + .default_version = 460, + .default_profile = GLSLANG_NO_PROFILE, + .force_default_version_and_profile = false, + .forward_compatible = false, + .messages = GLSLANG_MSG_DEFAULT_BIT, + .resource = &resource_limits, + }; + + glslang_shader_t *shader = glslang_shader_create(&input); + + if (!glslang_shader_preprocess(shader, &input)) { + fprintf(stderr, + "GLSL preprocessing failed\n" + "[INFO]: %s\n" + "[DEBUG]: %s\n" + "%s\n", + glslang_shader_get_info_log(shader), + glslang_shader_get_info_debug_log(shader), input.code); + assert(!"glslang preprocess failed"); + glslang_shader_delete(shader); + return NULL; + } + + if (!glslang_shader_parse(shader, &input)) { + fprintf(stderr, + "GLSL parsing failed\n" + "[INFO]: %s\n" + "[DEBUG]: %s\n" + "%s\n", + glslang_shader_get_info_log(shader), + glslang_shader_get_info_debug_log(shader), + glslang_shader_get_preprocessed_code(shader)); + assert(!"glslang parse failed"); + glslang_shader_delete(shader); + return NULL; + } + + glslang_program_t *program = glslang_program_create(); + glslang_program_add_shader(program, shader); + + if (!glslang_program_link(program, GLSLANG_MSG_SPV_RULES_BIT | + GLSLANG_MSG_VULKAN_RULES_BIT)) { + fprintf(stderr, + "GLSL linking failed\n" + "[INFO]: %s\n" + "[DEBUG]: %s\n", + glslang_program_get_info_log(program), + glslang_program_get_info_debug_log(program)); + assert(!"glslang link failed"); + glslang_program_delete(program); + glslang_shader_delete(shader); + return NULL; + } + + glslang_spv_options_t spv_options = { + .validate = true, + +#if defined(CONFIG_RENDERDOC) + .disable_optimizer = true, + .generate_debug_info = true, + .emit_nonsemantic_shader_debug_info = true, + .emit_nonsemantic_shader_debug_source = true, +#endif + }; + glslang_program_SPIRV_generate_with_options(program, stage, &spv_options); + + const char *spirv_messages = glslang_program_SPIRV_get_messages(program); + if (spirv_messages) { + printf("%s\b", spirv_messages); + } + + size_t num_program_bytes = + glslang_program_SPIRV_get_size(program) * sizeof(uint32_t); + + guint8 *data = g_malloc(num_program_bytes); + glslang_program_SPIRV_get(program, (unsigned int *)data); + + glslang_program_delete(program); + glslang_shader_delete(shader); + + return g_byte_array_new_take(data, num_program_bytes); +} + +VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv) +{ + VkShaderModuleCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = spv->len, + .pCode = (uint32_t *)spv->data, + }; + VkShaderModule module; + VK_CHECK( + vkCreateShaderModule(r->device, &create_info, NULL, &module)); + return module; +} + +static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUniformLayout *layout) +{ + assert(!layout->uniforms); + + layout->num_uniforms = block->member_count; + layout->uniforms = g_malloc0_n(block->member_count, sizeof(ShaderUniform)); + layout->total_size = block->size; + layout->allocation = g_malloc0(block->size); + + for (uint32_t k = 0; k < block->member_count; ++k) { + const SpvReflectBlockVariable *member = &block->members[k]; + + assert(member->array.dims_count < 2); + + layout->uniforms[k] = (ShaderUniform){ + .name = strdup(member->name), + .offset = member->offset, + .dim_v = MAX(1, member->numeric.vector.component_count), + .dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count), + .stride = MAX(member->array.stride, member->numeric.matrix.stride), + }; + + // fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n", + // layout->uniforms[k].name, + // layout->uniforms[k].offset, + // layout->uniforms[k].dim_v, + // layout->uniforms[k].dim_a, + // layout->uniforms[k].stride + // ); + } + // fprintf(stderr, "--\n"); +} + +static void init_layout_from_spv(ShaderModuleInfo *info) +{ + SpvReflectResult result = spvReflectCreateShaderModule( + info->spirv->len, info->spirv->data, &info->reflect_module); + assert(result == SPV_REFLECT_RESULT_SUCCESS && + "Failed to create SPIR-V shader module"); + + uint32_t descriptor_set_count = 0; + result = spvReflectEnumerateDescriptorSets(&info->reflect_module, + &descriptor_set_count, NULL); + assert(result == SPV_REFLECT_RESULT_SUCCESS && + "Failed to enumerate descriptor sets"); + + info->descriptor_sets = + g_malloc_n(descriptor_set_count, sizeof(SpvReflectDescriptorSet *)); + result = spvReflectEnumerateDescriptorSets( + &info->reflect_module, &descriptor_set_count, info->descriptor_sets); + assert(result == SPV_REFLECT_RESULT_SUCCESS && + "Failed to enumerate descriptor sets"); + + info->uniforms.num_uniforms = 0; + info->uniforms.uniforms = NULL; + + for (uint32_t i = 0; i < descriptor_set_count; ++i) { + const SpvReflectDescriptorSet *descriptor_set = + info->descriptor_sets[i]; + for (uint32_t j = 0; j < descriptor_set->binding_count; ++j) { + const SpvReflectDescriptorBinding *binding = + descriptor_set->bindings[j]; + if (binding->descriptor_type != + SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + continue; + } + + const SpvReflectBlockVariable *block = &binding->block; + block_to_uniforms(block, &info->uniforms); + } + } + + info->push_constants.num_uniforms = 0; + info->push_constants.uniforms = NULL; + assert(info->reflect_module.push_constant_block_count < 2); + if (info->reflect_module.push_constant_block_count) { + block_to_uniforms(&info->reflect_module.push_constant_blocks[0], + &info->push_constants); + } +} + +static glslang_stage_t vk_shader_stage_to_glslang_stage(VkShaderStageFlagBits stage) +{ + switch (stage) { + case VK_SHADER_STAGE_GEOMETRY_BIT: + return GLSLANG_STAGE_GEOMETRY; + case VK_SHADER_STAGE_VERTEX_BIT: + return GLSLANG_STAGE_VERTEX; + case VK_SHADER_STAGE_FRAGMENT_BIT: + return GLSLANG_STAGE_FRAGMENT; + case VK_SHADER_STAGE_COMPUTE_BIT: + return GLSLANG_STAGE_COMPUTE; + default: + assert(0); + } +} + +ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl( + PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl) +{ + ShaderModuleInfo *info = g_malloc0(sizeof(*info)); + info->glsl = strdup(glsl); + info->spirv = pgraph_vk_compile_glsl_to_spv( + vk_shader_stage_to_glslang_stage(stage), glsl); + info->module = pgraph_vk_create_shader_module_from_spv(r, info->spirv); + init_layout_from_spv(info); + return info; +} + +static void finalize_uniform_layout(ShaderUniformLayout *layout) +{ + for (int i = 0; i < layout->num_uniforms; i++) { + free((void*)layout->uniforms[i].name); + } + if (layout->uniforms) { + g_free(layout->uniforms); + } +} + +void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info) +{ + if (info->glsl) { + free(info->glsl); + } + finalize_uniform_layout(&info->uniforms); + finalize_uniform_layout(&info->push_constants); + free(info->descriptor_sets); + spvReflectDestroyShaderModule(&info->reflect_module); + vkDestroyShaderModule(r->device, info->module, NULL); + g_byte_array_unref(info->spirv); + g_free(info); +} diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.h b/hw/xbox/nv2a/pgraph/vk/glsl.h new file mode 100644 index 00000000000..3f6ccd9b3a6 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/glsl.h @@ -0,0 +1,205 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_GLSL_H +#define HW_XBOX_NV2A_PGRAPH_VK_GLSL_H + +#include "qemu/osdep.h" +#include +#include +#include + +typedef struct ShaderUniform { + const char *name; + size_t dim_v; + size_t dim_a; + size_t align; + size_t stride; + size_t offset; +} ShaderUniform; + +typedef struct ShaderUniformLayout { + ShaderUniform *uniforms; + size_t num_uniforms; + size_t total_size; + void *allocation; +} ShaderUniformLayout; + +static inline void uniform_std140(ShaderUniformLayout *layout) +{ + size_t offset = 0; + + for (int i = 0; i < layout->num_uniforms; i++) { + ShaderUniform *u = &layout->uniforms[i]; + size_t size = sizeof(float); // float or int + size_t align = size; + size_t stride = 0; + + size *= u->dim_v; + align *= u->dim_v == 3 ? 4 : u->dim_v; + + // If an array, each element is padded to vec4. + if (u->dim_a > 1) { + align = 4 * sizeof(float); + stride = align; + size = u->dim_a * align; + } else { + align = size; + stride = 0; + } + + offset = ROUND_UP(offset, align); + + u->align = align; + u->offset = offset; + u->stride = stride; + + offset += size; + } + + layout->total_size = offset; + assert(layout->total_size); +} + +static inline void uniform_std430(ShaderUniformLayout *layout) +{ + size_t offset = 0; + + for (int i = 0; i < layout->num_uniforms; i++) { + ShaderUniform *u = &layout->uniforms[i]; + size_t size = sizeof(float); // float or int + size *= u->dim_v; + size_t align = size; + size *= u->dim_a; + + offset = ROUND_UP(offset, align); + + u->align = align; + u->offset = offset; + u->stride = u->dim_a > 1 ? (size * u->dim_v) : 0; + + offset += size; + } + + layout->total_size = offset; + assert(layout->total_size); +} + +static inline int uniform_index(ShaderUniformLayout *layout, const char *name) +{ + for (int i = 0; i < layout->num_uniforms; i++) { + if (!strcmp(layout->uniforms[i].name, name)) { + return i + 1; + } + } + + return -1; +} + +static inline +void *uniform_ptr(ShaderUniformLayout *layout, int idx) +{ + assert(idx > 0 && "invalid uniform index"); + + return (char *)layout->allocation + layout->uniforms[idx - 1].offset; +} + +static inline +void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count) +{ + assert(idx > 0 && "invalid uniform index"); + + ShaderUniform *u = &layout->uniforms[idx - 1]; + const size_t element_size = value_size * u->dim_v; + + size_t bytes_remaining = value_size * count; + char *p_out = uniform_ptr(layout, idx); + char *p_max = p_out + layout->total_size; + char *p_in = (char *)values; + + int index = 0; + while (bytes_remaining) { + assert(p_out < p_max); + assert(index < u->dim_a); + memcpy(p_out, p_in, element_size); + bytes_remaining -= element_size; + p_out += u->stride; + p_in += element_size; + index += 1; + } +} + +static inline +void uniform1fv(ShaderUniformLayout *layout, int idx, size_t count, float *values) +{ + uniform_copy(layout, idx, values, sizeof(float), count); +} + +static inline +void uniform1f(ShaderUniformLayout *layout, int idx, float value) +{ + uniform1fv(layout, idx, 1, &value); +} + +static inline +void uniform2f(ShaderUniformLayout *layout, int idx, float v0, float v1) +{ + float values[] = { v0, v1 }; + uniform1fv(layout, idx, 2, values); +} + +static inline +void uniform4f(ShaderUniformLayout *layout, int idx, float v0, float v1, float v2, float v3) +{ + float values[] = { v0, v1, v2, v3 }; + uniform1fv(layout, idx, 4, values); +} + +static inline +void uniformMatrix2fv(ShaderUniformLayout *layout, int idx, float *values) +{ + uniform1fv(layout, idx, 4, values); +} + +static inline +void uniformMatrix4fv(ShaderUniformLayout *layout, int idx, float *values) +{ + uniform1fv(layout, idx, 4 * 4, values); +} + +static inline +void uniform1iv(ShaderUniformLayout *layout, int idx, size_t count, int32_t *values) +{ + uniform_copy(layout, idx, values, sizeof(int32_t), count); +} + +static inline +void uniform1i(ShaderUniformLayout *layout, int idx, int32_t value) +{ + uniform1iv(layout, idx, 1, &value); +} + +static inline +void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int v3) +{ + int values[] = { v0, v1, v2, v3 }; + uniform1iv(layout, idx, 4, values); +} + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/image.c b/hw/xbox/nv2a/pgraph/vk/image.c new file mode 100644 index 00000000000..1161d81f547 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/image.c @@ -0,0 +1,209 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +static bool check_format_has_depth_component(VkFormat format) +{ + return format == VK_FORMAT_D32_SFLOAT_S8_UINT || + format == VK_FORMAT_D24_UNORM_S8_UINT || + format == VK_FORMAT_D16_UNORM; +} + +static bool check_format_has_stencil_component(VkFormat format) +{ + return format == VK_FORMAT_D32_SFLOAT_S8_UINT || + format == VK_FORMAT_D24_UNORM_S8_UINT; +} + +void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, + VkImage image, VkFormat format, + VkImageLayout oldLayout, + VkImageLayout newLayout) +{ + VkImageMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .oldLayout = oldLayout, + .newLayout = newLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + if (check_format_has_depth_component(format)) { + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + + if (check_format_has_stencil_component(format)) { + barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } else { + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + } + + VkPipelineStageFlags sourceStage; + VkPipelineStageFlags destinationStage; + + // Undefined -> Dst + if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Undefined -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Undefined -> Depth + } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + + // Dst -> Shader Read + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + // Dst -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Dst -> Depth + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + + // Dst -> Src + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Shader Read -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Shader Read -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Color -> Src + } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Color -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Color -> Shader Read + } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + // Depth -> Src + } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + + sourceStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Depth -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + // Src -> Color + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // Src -> Depth + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + + // Src -> Dst + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && + newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + } else { + assert(!"unsupported layout transition!"); + } + + vkCmdPipelineBarrier(cmd, sourceStage, destinationStage, 0, 0, + NULL, 0, NULL, 1, &barrier); +} diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c new file mode 100644 index 00000000000..4023fd5858c --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -0,0 +1,662 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "ui/xemu-settings.h" +#include "renderer.h" +#include "xemu-version.h" + +#include +#include +#include + +#include + +typedef GArray VkExtensionPropertiesArray; +typedef GArray StringArray; + +static bool enable_validation = false; + +static char const *const validation_layers[] = { + "VK_LAYER_KHRONOS_validation", +}; + +static char const *const required_instance_extensions[] = { + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, +}; + +static char const *const required_device_extensions[] = { + VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, +#ifdef WIN32 + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, +#else + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, +#endif +}; + +static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( + VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData) +{ + NV2A_VK_DPRINTF("[vk] %s", pCallbackData->pMessage); + fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage); + + if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) && + (messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) { + exit(1); + } + return VK_FALSE; +} + +static bool check_validation_layer_support(void) +{ + uint32_t num_available_layers; + vkEnumerateInstanceLayerProperties(&num_available_layers, NULL); + + g_autofree VkLayerProperties *available_layers = + g_malloc_n(num_available_layers, sizeof(VkLayerProperties)); + vkEnumerateInstanceLayerProperties(&num_available_layers, available_layers); + + for (int i = 0; i < ARRAY_SIZE(validation_layers); i++) { + bool found = false; + for (int j = 0; j < num_available_layers; j++) { + if (!strcmp(validation_layers[i], available_layers[j].layerName)) { + found = true; + break; + } + } + if (!found) { + fprintf(stderr, "desired validation layer not found: %s\n", + validation_layers[i]); + return false; + } + } + + return true; +} + +static SDL_Window *create_window(void) +{ + SDL_Window *window = SDL_CreateWindow( + "SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, + 640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN); + + if (window == NULL) { + fprintf(stderr, "%s: Failed to create window\n", __func__); + SDL_Quit(); + exit(1); + } + + return window; +} + +static VkExtensionPropertiesArray * +get_available_instance_extensions(PGRAPHState *pg) +{ + uint32_t num_extensions = 0; + + VK_CHECK( + vkEnumerateInstanceExtensionProperties(NULL, &num_extensions, NULL)); + + VkExtensionPropertiesArray *extensions = g_array_sized_new( + FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions); + + g_array_set_size(extensions, num_extensions); + VK_CHECK(vkEnumerateInstanceExtensionProperties( + NULL, &num_extensions, (VkExtensionProperties *)extensions->data)); + + return extensions; +} + +static bool +is_extension_available(VkExtensionPropertiesArray *available_extensions, + const char *extension_name) +{ + for (int i = 0; i < available_extensions->len; i++) { + VkExtensionProperties *e = + &g_array_index(available_extensions, VkExtensionProperties, i); + if (!strcmp(e->extensionName, extension_name)) { + return true; + } + } + + return false; +} + +static StringArray *get_required_instance_extension_names(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // Add instance extensions SDL lists as required + unsigned int sdl_count = 0; + SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, NULL); + + StringArray *extensions = + g_array_sized_new(FALSE, FALSE, sizeof(char *), + sdl_count + ARRAY_SIZE(required_instance_extensions)); + + if (sdl_count) { + g_array_set_size(extensions, sdl_count); + SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, + (const char **)extensions->data); + } + + // Add additional required extensions + g_array_append_vals(extensions, required_instance_extensions, + ARRAY_SIZE(required_instance_extensions)); + + return extensions; +} + +static bool +add_extension_if_available(VkExtensionPropertiesArray *available_extensions, + StringArray *enabled_extension_names, + const char *desired_extension_name) +{ + if (is_extension_available(available_extensions, desired_extension_name)) { + g_array_append_val(enabled_extension_names, desired_extension_name); + return true; + } + + fprintf(stderr, "Warning: extension not available: %s\n", + desired_extension_name); + return false; +} + +static void +add_optional_instance_extension_names(PGRAPHState *pg, + VkExtensionPropertiesArray *available_extensions, + StringArray *enabled_extension_names) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->debug_utils_extension_enabled = + g_config.display.vulkan.validation_layers && + add_extension_if_available(available_extensions, enabled_extension_names, + VK_EXT_DEBUG_UTILS_EXTENSION_NAME); +} + +static void create_instance(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->window = create_window(); + + VK_CHECK(volkInitialize()); + + VkApplicationInfo app_info = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "xemu", + .applicationVersion = VK_MAKE_VERSION( + xemu_version_major, xemu_version_minor, xemu_version_patch), + .pEngineName = "No Engine", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = VK_API_VERSION_1_3, + }; + + g_autofree VkExtensionPropertiesArray *available_extensions = + get_available_instance_extensions(pg); + + g_autofree StringArray *enabled_extension_names = + get_required_instance_extension_names(pg); + + bool all_required_extensions_available = true; + for (int i = 0; i < enabled_extension_names->len; i++) { + const char *required_extension = + g_array_index(enabled_extension_names, const char *, i); + if (!is_extension_available(available_extensions, required_extension)) { + fprintf(stderr, + "Error: Required instance extension not available: %s\n", + required_extension); + all_required_extensions_available = false; + } + } + assert(all_required_extensions_available); + + add_optional_instance_extension_names(pg, available_extensions, + enabled_extension_names); + + fprintf(stderr, "Enabled instance extensions:\n"); + for (int i = 0; i < enabled_extension_names->len; i++) { + fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i)); + } + + VkInstanceCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &app_info, + .enabledExtensionCount = enabled_extension_names->len, + .ppEnabledExtensionNames = + &g_array_index(enabled_extension_names, const char *, 0), + }; + + VkDebugUtilsMessengerCreateInfoEXT dbg_create_info; + if (r->debug_utils_extension_enabled) { + dbg_create_info = (VkDebugUtilsMessengerCreateInfoEXT){ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = debugCallback, + }; + } + + enable_validation = g_config.display.vulkan.validation_layers; + + if (enable_validation) { + if (check_validation_layer_support()) { + fprintf(stderr, "Warning: Validation layers enabled. Expect performance impact.\n"); + create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); + create_info.ppEnabledLayerNames = validation_layers; + if (r->debug_utils_extension_enabled) { + create_info.pNext = + (VkDebugUtilsMessengerCreateInfoEXT *)&dbg_create_info; + } + } else { + fprintf(stderr, "Warning: validation layers not available\n"); + enable_validation = false; + } + } + + VK_CHECK(vkCreateInstance(&create_info, NULL, &r->instance)); + + volkLoadInstance(r->instance); +} + +static bool is_queue_family_indicies_complete(QueueFamilyIndices indices) +{ + return indices.queue_family >= 0; +} + +QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device) +{ + QueueFamilyIndices indices = { + .queue_family = -1, + }; + + uint32_t num_queue_families = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, NULL); + + g_autofree VkQueueFamilyProperties *queue_families = + g_malloc_n(num_queue_families, sizeof(VkQueueFamilyProperties)); + vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, + queue_families); + + for (int i = 0; i < num_queue_families; i++) { + VkQueueFamilyProperties queueFamily = queue_families[i]; + // FIXME: Support independent graphics, compute queues + int required_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + if ((queueFamily.queueFlags & required_flags) == required_flags) { + indices.queue_family = i; + } + if (is_queue_family_indicies_complete(indices)) { + break; + } + } + + return indices; +} + +static VkExtensionPropertiesArray * +get_available_device_extensions(VkPhysicalDevice device) +{ + uint32_t num_extensions = 0; + + VK_CHECK(vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions, + NULL)); + + VkExtensionPropertiesArray *extensions = g_array_sized_new( + FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions); + + g_array_set_size(extensions, num_extensions); + VK_CHECK(vkEnumerateDeviceExtensionProperties( + device, NULL, &num_extensions, + (VkExtensionProperties *)extensions->data)); + + return extensions; +} + +static StringArray *get_required_device_extension_names(void) +{ + StringArray *extensions = + g_array_sized_new(FALSE, FALSE, sizeof(char *), + ARRAY_SIZE(required_device_extensions)); + + g_array_append_vals(extensions, required_device_extensions, + ARRAY_SIZE(required_device_extensions)); + + return extensions; +} + +static void add_optional_device_extension_names( + PGRAPHState *pg, VkExtensionPropertiesArray *available_extensions, + StringArray *enabled_extension_names) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + r->custom_border_color_extension_enabled = + add_extension_if_available(available_extensions, enabled_extension_names, + VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + + r->provoking_vertex_extension_enabled = + add_extension_if_available(available_extensions, enabled_extension_names, + VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + + r->memory_budget_extension_enabled = add_extension_if_available( + available_extensions, enabled_extension_names, + VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); +} + +static bool check_device_support_required_extensions(VkPhysicalDevice device) +{ + g_autofree VkExtensionPropertiesArray *available_extensions = + get_available_device_extensions(device); + + for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) { + if (!is_extension_available(available_extensions, + required_device_extensions[i])) { + fprintf(stderr, "required device extension not found: %s\n", + required_device_extensions[i]); + return false; + } + } + + return true; +} + +static bool is_device_compatible(VkPhysicalDevice device) +{ + QueueFamilyIndices indices = pgraph_vk_find_queue_families(device); + + return is_queue_family_indicies_complete(indices) && + check_device_support_required_extensions(device); + // FIXME: Check formats + // FIXME: Check vram +} + +static void select_physical_device(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t num_physical_devices = 0; + + vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL); + if (num_physical_devices == 0) { + assert(!"failed to find GPUs with Vulkan support"); + } + + g_autofree VkPhysicalDevice *devices = + g_malloc_n(num_physical_devices, sizeof(VkPhysicalDevice)); + vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, devices); + + fprintf(stderr, "Available physical devices:\n"); + for (int i = 0; i < num_physical_devices; i++) { + vkGetPhysicalDeviceProperties(devices[i], &r->device_props); + fprintf(stderr, "- %s\n", r->device_props.deviceName); + } + + // FIXME: Store preferred device + + r->physical_device = VK_NULL_HANDLE; + for (int i = 0; i < num_physical_devices; i++) { + if (is_device_compatible(devices[i])) { + r->physical_device = devices[i]; + break; + } + } + if (r->physical_device == VK_NULL_HANDLE) { + assert(!"failed to find a suitable GPU"); + } + + vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props); + fprintf(stderr, + "Selected physical device: %s\n" + "- Vendor: %x, Device: %x\n" + "- Driver Version: %d.%d.%d\n", + r->device_props.deviceName, + r->device_props.vendorID, + r->device_props.deviceID, + VK_VERSION_MAJOR(r->device_props.driverVersion), + VK_VERSION_MINOR(r->device_props.driverVersion), + VK_VERSION_PATCH(r->device_props.driverVersion)); + + size_t vsh_attr_values_size = + NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); + assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size); +} + +static void create_logical_device(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + QueueFamilyIndices indices = + pgraph_vk_find_queue_families(r->physical_device); + + g_autofree VkExtensionPropertiesArray *available_extensions = + get_available_device_extensions(r->physical_device); + + g_autofree StringArray *enabled_extension_names = + get_required_device_extension_names(); + + add_optional_device_extension_names(pg, available_extensions, + enabled_extension_names); + + fprintf(stderr, "Enabled device extensions:\n"); + for (int i = 0; i < enabled_extension_names->len; i++) { + fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i)); + } + + float queuePriority = 1.0f; + + VkDeviceQueueCreateInfo queue_create_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = indices.queue_family, + .queueCount = 1, + .pQueuePriorities = &queuePriority, + }; + + // Ensure device supports required features + VkPhysicalDeviceFeatures available_features, enabled_features; + vkGetPhysicalDeviceFeatures(r->physical_device, &available_features); + memset(&enabled_features, 0, sizeof(enabled_features)); + + struct { + const char *name; + VkBool32 available, *enabled; + } required_features[] = { + #define F(n) { #n, available_features.n, &enabled_features.n } + F(shaderClipDistance), + F(geometryShader), + F(shaderTessellationAndGeometryPointSize), + F(depthClamp), + F(occlusionQueryPrecise), + #undef F + }; + + bool all_features_available = true; + for (int i = 0; i < ARRAY_SIZE(required_features); i++) { + if (required_features[i].available != VK_TRUE) { + fprintf(stderr, "Error: Device does not support required feature %s\n", required_features[i].name); + all_features_available = false; + } + *required_features[i].enabled = VK_TRUE; + } + assert(all_features_available); + + void *next_struct = NULL; + + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features; + if (r->provoking_vertex_extension_enabled) { + provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){ + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .provokingVertexLast = VK_TRUE, + .pNext = next_struct, + }; + next_struct = &provoking_vertex_features; + } + + VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features; + if (r->custom_border_color_extension_enabled) { + custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){ + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, + .customBorderColors = VK_TRUE, + .pNext = next_struct, + }; + next_struct = &custom_border_features; + } + + VkDeviceCreateInfo device_create_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &queue_create_info, + .pEnabledFeatures = &enabled_features, + .enabledExtensionCount = enabled_extension_names->len, + .ppEnabledExtensionNames = + &g_array_index(enabled_extension_names, const char *, 0), + .pNext = next_struct, + }; + + if (enable_validation) { + device_create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); + device_create_info.ppEnabledLayerNames = validation_layers; + } + + VK_CHECK(vkCreateDevice(r->physical_device, &device_create_info, NULL, + &r->device)); + + vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue); +} + +uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, + VkMemoryPropertyFlags properties) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPhysicalDeviceMemoryProperties prop; + vkGetPhysicalDeviceMemoryProperties(r->physical_device, &prop); + for (uint32_t i = 0; i < prop.memoryTypeCount; i++) { + if ((prop.memoryTypes[i].propertyFlags & properties) == properties && + type_bits & (1 << i)) { + return i; + } + } + return 0xFFFFFFFF; // Unable to find memoryType +} + +static void init_allocator(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VmaVulkanFunctions vulkanFunctions = { + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + .vkGetInstanceProcAddr = vkGetInstanceProcAddr, + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + .vkGetDeviceProcAddr = vkGetDeviceProcAddr, + .vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties, + .vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties, + .vkAllocateMemory = vkAllocateMemory, + .vkFreeMemory = vkFreeMemory, + .vkMapMemory = vkMapMemory, + .vkUnmapMemory = vkUnmapMemory, + .vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges, + .vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges, + .vkBindBufferMemory = vkBindBufferMemory, + .vkBindImageMemory = vkBindImageMemory, + .vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements, + .vkGetImageMemoryRequirements = vkGetImageMemoryRequirements, + .vkCreateBuffer = vkCreateBuffer, + .vkDestroyBuffer = vkDestroyBuffer, + .vkCreateImage = vkCreateImage, + .vkDestroyImage = vkDestroyImage, + .vkCmdCopyBuffer = vkCmdCopyBuffer, + #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + .vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2, + /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + .vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2, + #endif + #if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. + .vkBindBufferMemory2KHR = vkBindBufferMemory2, + /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. + .vkBindImageMemory2KHR = vkBindImageMemory2, + #endif + #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. + .vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR, + #endif + #if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + .vkGetDeviceBufferMemoryRequirements = vkGetDeviceBufferMemoryRequirements, + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + .vkGetDeviceImageMemoryRequirements = vkGetDeviceImageMemoryRequirements, + #endif + }; + + VmaAllocatorCreateInfo create_info = { + .flags = (r->memory_budget_extension_enabled ? + VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT : + 0), + .vulkanApiVersion = VK_API_VERSION_1_3, + .instance = r->instance, + .physicalDevice = r->physical_device, + .device = r->device, + .pVulkanFunctions = &vulkanFunctions, + }; + + VK_CHECK(vmaCreateAllocator(&create_info, &r->allocator)); +} + +static void finalize_allocator(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vmaDestroyAllocator(r->allocator); +} + +void pgraph_vk_init_instance(PGRAPHState *pg) +{ + create_instance(pg); + select_physical_device(pg); + create_logical_device(pg); + init_allocator(pg); +} + +void pgraph_vk_finalize_instance(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + finalize_allocator(pg); + vkDestroyDevice(r->device, NULL); + r->device = VK_NULL_HANDLE; + + vkDestroyInstance(r->instance, NULL); + r->instance = VK_NULL_HANDLE; +} diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build new file mode 100644 index 00000000000..24c2474cb97 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/meson.build @@ -0,0 +1,24 @@ +if vulkan.found() + +specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen, + files( + 'blit.c', + 'buffer.c', + 'command.c', + 'debug.c', + 'display.c', + 'draw.c', + 'glsl.c', + 'image.c', + 'instance.c', + 'renderer.c', + 'reports.c', + 'shaders.c', + 'surface-compute.c', + 'surface.c', + 'texture.c', + 'vertex.c', + ) + ]) + +endif diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c new file mode 100644 index 00000000000..f947aa39e5f --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -0,0 +1,266 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "renderer.h" + +#include "gloffscreen.h" + +#if HAVE_EXTERNAL_MEMORY +static GloContext *g_gl_context; + +static void gl_context_init(void) +{ + g_gl_context = glo_context_create(); +} +#endif + +static void pgraph_vk_init_thread(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + +#if HAVE_EXTERNAL_MEMORY + glo_set_current(g_gl_context); +#endif + + pgraph_vk_init_instance(pg); + pgraph_vk_init_command_buffers(pg); + pgraph_vk_init_buffers(d); + pgraph_vk_init_surfaces(pg); + pgraph_vk_init_shaders(pg); + pgraph_vk_init_pipelines(pg); + pgraph_vk_init_textures(pg); + pgraph_vk_init_reports(pg); + pgraph_vk_init_compute(pg); + pgraph_vk_init_display(pg); +} + +static void pgraph_vk_finalize(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pgraph_vk_finalize_display(pg); + pgraph_vk_finalize_compute(pg); + pgraph_vk_finalize_reports(pg); + pgraph_vk_finalize_textures(pg); + pgraph_vk_finalize_pipelines(pg); + pgraph_vk_finalize_shaders(pg); + pgraph_vk_finalize_surfaces(pg); + pgraph_vk_finalize_buffers(d); + pgraph_vk_finalize_command_buffers(pg); + pgraph_vk_finalize_instance(pg); +} + +static void pgraph_vk_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pgraph_vk_finish(pg, VK_FINISH_REASON_FLUSH); + pgraph_vk_surface_flush(d); + pgraph_vk_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram)); + pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr, + memory_region_size(d->vram)); + for (int i = 0; i < 4; i++) { + pg->texture_dirty[i] = true; + } + + /* FIXME: Flush more? */ + + qatomic_set(&d->pgraph.flush_pending, false); + qemu_event_set(&d->pgraph.flush_complete); +} + +static void pgraph_vk_sync(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pgraph_vk_render_display(pg); + + qatomic_set(&d->pgraph.sync_pending, false); + qemu_event_set(&d->pgraph.sync_complete); +} + +static void pgraph_vk_process_pending(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + if (qatomic_read(&r->downloads_pending) || + qatomic_read(&r->download_dirty_surfaces_pending) || + qatomic_read(&d->pgraph.sync_pending) || + qatomic_read(&d->pgraph.flush_pending) + ) { + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + if (qatomic_read(&r->downloads_pending)) { + pgraph_vk_process_pending_downloads(d); + } + if (qatomic_read(&r->download_dirty_surfaces_pending)) { + pgraph_vk_download_dirty_surfaces(d); + } + if (qatomic_read(&d->pgraph.sync_pending)) { + pgraph_vk_sync(d); + } + if (qatomic_read(&d->pgraph.flush_pending)) { + pgraph_vk_flush(d); + } + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } +} + +static void pgraph_vk_flip_stall(NV2AState *d) +{ + pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_FLIP_STALL); + pgraph_vk_debug_frame_terminator(); +} + +static void pgraph_vk_pre_savevm_trigger(NV2AState *d) +{ + qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true); + qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); +} + +static void pgraph_vk_pre_savevm_wait(NV2AState *d) +{ + qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); +} + +static void pgraph_vk_pre_shutdown_trigger(NV2AState *d) +{ + // qatomic_set(&d->pgraph.vk_renderer_state->shader_cache_writeback_pending, true); + // qemu_event_reset(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete); +} + +static void pgraph_vk_pre_shutdown_wait(NV2AState *d) +{ + // qemu_event_wait(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete); +} + +static int pgraph_vk_get_framebuffer_surface(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + qemu_mutex_lock(&d->pfifo.lock); + // FIXME: Possible race condition with pgraph, consider lock + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset); + if (surface == NULL || !surface->color) { + qemu_mutex_unlock(&d->pfifo.lock); + return 0; + } + + assert(surface->color); + + surface->frame_time = pg->frame_time; + +#if HAVE_EXTERNAL_MEMORY + qemu_event_reset(&d->pgraph.sync_complete); + qatomic_set(&pg->sync_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.sync_complete); + return r->display.gl_texture_id; +#else + qemu_mutex_unlock(&d->pfifo.lock); + pgraph_vk_wait_for_surface_download(surface); + return 0; +#endif +} + +static void pgraph_vk_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState)); + + pgraph_vk_debug_init(); +} + +static PGRAPHRenderer pgraph_vk_renderer = { + .type = CONFIG_DISPLAY_RENDERER_VULKAN, + .name = "Vulkan", + .ops = { + .init = pgraph_vk_init, +#if HAVE_EXTERNAL_MEMORY + .early_context_init = gl_context_init, +#endif + .init_thread = pgraph_vk_init_thread, + .finalize = pgraph_vk_finalize, + .clear_report_value = pgraph_vk_clear_report_value, + .clear_surface = pgraph_vk_clear_surface, + .draw_begin = pgraph_vk_draw_begin, + .draw_end = pgraph_vk_draw_end, + .flip_stall = pgraph_vk_flip_stall, + .flush_draw = pgraph_vk_flush_draw, + .get_report = pgraph_vk_get_report, + .image_blit = pgraph_vk_image_blit, + .pre_savevm_trigger = pgraph_vk_pre_savevm_trigger, + .pre_savevm_wait = pgraph_vk_pre_savevm_wait, + .pre_shutdown_trigger = pgraph_vk_pre_shutdown_trigger, + .pre_shutdown_wait = pgraph_vk_pre_shutdown_wait, + .process_pending = pgraph_vk_process_pending, + .process_pending_reports = pgraph_vk_process_pending_reports, + .surface_update = pgraph_vk_surface_update, + .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor, + .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor, + .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface, + } +}; + +static void __attribute__((constructor)) register_renderer(void) +{ + pgraph_renderer_register(&pgraph_vk_renderer); +} + +void pgraph_vk_check_memory_budget(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPhysicalDeviceMemoryProperties const *props; + vmaGetMemoryProperties(r->allocator, &props); + + g_autofree VmaBudget *budgets = g_malloc_n(props->memoryHeapCount, sizeof(VmaBudget)); + vmaGetHeapBudgets(r->allocator, budgets); + + const float budget_threshold = 0.8; + bool near_budget = false; + + for (int i = 0; i < props->memoryHeapCount; i++) { + VmaBudget *b = &budgets[i]; + float use_to_budget_ratio = + (double)b->statistics.allocationBytes / (double)b->budget; + NV2A_VK_DPRINTF("Heap %d: used %lu/%lu MiB (%.2f%%)", i, + b->statistics.allocationBytes / (1024 * 1024), + b->budget / (1024 * 1024), use_to_budget_ratio * 100); + near_budget |= use_to_budget_ratio > budget_threshold; + } + + // If any heaps are near budget, free up some resources + if (near_budget) { + pgraph_vk_trim_texture_cache(pg); + } + +#if 0 + char *s; + vmaBuildStatsString(r->allocator, &s, VK_TRUE); + puts(s); + vmaFreeStatsString(r->allocator, s); +#endif +} diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h new file mode 100644 index 00000000000..a509de8d71a --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -0,0 +1,526 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H +#define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H + +#define VK_NO_PROTOTYPES 1 + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/queue.h" +#include "qemu/lru.h" +#include "hw/hw.h" +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/nv2a_regs.h" +#include "hw/xbox/nv2a/pgraph/surface.h" +#include "hw/xbox/nv2a/pgraph/texture.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" + +#include +#include +#include +#include + +#define VMA_STATIC_VULKAN_FUNCTIONS 1 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0 +#include + +#include "debug.h" +#include "constants.h" +#include "glsl.h" + +#define HAVE_EXTERNAL_MEMORY 1 + +typedef struct QueueFamilyIndices { + int queue_family; +} QueueFamilyIndices; + +typedef struct MemorySyncRequirement { + hwaddr addr, size; +} MemorySyncRequirement; + +typedef struct RenderPassState { + VkFormat color_format; + VkFormat zeta_format; +} RenderPassState; + +typedef struct RenderPass { + RenderPassState state; + VkRenderPass render_pass; +} RenderPass; + +typedef struct PipelineKey { + bool clear; + RenderPassState render_pass_state; + ShaderState shader_state; + uint32_t regs[10]; + VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; + VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; +} PipelineKey; + +typedef struct PipelineBinding { + LruNode node; + PipelineKey key; + VkPipelineLayout layout; + VkPipeline pipeline; + VkRenderPass render_pass; + unsigned int draw_time; +} PipelineBinding; + +enum Buffer { + BUFFER_STAGING_DST, + BUFFER_STAGING_SRC, + BUFFER_COMPUTE_DST, + BUFFER_COMPUTE_SRC, + BUFFER_INDEX, + BUFFER_INDEX_STAGING, + BUFFER_VERTEX_RAM, + BUFFER_VERTEX_INLINE, + BUFFER_VERTEX_INLINE_STAGING, + BUFFER_UNIFORM, + BUFFER_UNIFORM_STAGING, + BUFFER_COUNT +}; + +typedef struct StorageBuffer { + VkBuffer buffer; + VkBufferUsageFlags usage; + VmaAllocationCreateInfo alloc_info; + VmaAllocation allocation; + VkMemoryPropertyFlags properties; + size_t buffer_offset; + size_t buffer_size; + uint8_t *mapped; +} StorageBuffer; + +typedef struct SurfaceBinding { + QTAILQ_ENTRY(SurfaceBinding) entry; + MemAccessCallback *access_cb; + + hwaddr vram_addr; + + SurfaceShape shape; + uintptr_t dma_addr; + uintptr_t dma_len; + bool color; + bool swizzle; + + unsigned int width; + unsigned int height; + unsigned int pitch; + size_t size; + + bool cleared; + int frame_time; + int draw_time; + bool draw_dirty; + bool download_pending; + bool upload_pending; + + BasicSurfaceFormatInfo fmt; + SurfaceFormatInfo host_fmt; + + VkImage image; + VkImageView image_view; + VmaAllocation allocation; + + // Used for scaling + VkImage image_scratch; + VkImageLayout image_scratch_current_layout; + VmaAllocation allocation_scratch; + + bool initialized; +} SurfaceBinding; + +typedef struct ShaderModuleInfo { + char *glsl; + GByteArray *spirv; + VkShaderModule module; + SpvReflectShaderModule reflect_module; + SpvReflectDescriptorSet **descriptor_sets; + ShaderUniformLayout uniforms; + ShaderUniformLayout push_constants; +} ShaderModuleInfo; + +typedef struct ShaderBinding { + LruNode node; + ShaderState state; + ShaderModuleInfo *geometry; + ShaderModuleInfo *vertex; + ShaderModuleInfo *fragment; + + int psh_constant_loc[9][2]; + int alpha_ref_loc; + + int bump_mat_loc[NV2A_MAX_TEXTURES]; + int bump_scale_loc[NV2A_MAX_TEXTURES]; + int bump_offset_loc[NV2A_MAX_TEXTURES]; + int tex_scale_loc[NV2A_MAX_TEXTURES]; + + int surface_size_loc; + int clip_range_loc; + + int vsh_constant_loc; + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + + int inv_viewport_loc; + int ltctxa_loc; + int ltctxb_loc; + int ltc1_loc; + + int fog_color_loc; + int fog_param_loc; + int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; + int light_infinite_direction_loc[NV2A_MAX_LIGHTS]; + int light_local_position_loc[NV2A_MAX_LIGHTS]; + int light_local_attenuation_loc[NV2A_MAX_LIGHTS]; + + int clip_region_loc; + + int material_alpha_loc; +} ShaderBinding; + +typedef struct TextureKey { + TextureShape state; + hwaddr texture_vram_offset; + hwaddr texture_length; + hwaddr palette_vram_offset; + hwaddr palette_length; + float scale; +} TextureKey; + +typedef struct TextureBinding { + LruNode node; + TextureKey key; + VkImage image; + VkImageLayout current_layout; + VkImageView image_view; + VmaAllocation allocation; + VkSampler sampler; + bool possibly_dirty; + uint64_t hash; + unsigned int draw_time; + uint32_t submit_time; +} TextureBinding; + +typedef struct QueryReport { + QSIMPLEQ_ENTRY(QueryReport) entry; + bool clear; + uint32_t parameter; + unsigned int query_count; +} QueryReport; + +typedef struct PGRAPHVkDisplayState { + ShaderModuleInfo *display_frag; + + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSet descriptor_set; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + VkRenderPass render_pass; + VkFramebuffer framebuffer; + + VkImage image; + VkImageView image_view; + VkDeviceMemory memory; + VkSampler sampler; + + int width, height; + int draw_time; + + // OpenGL Interop +#ifdef WIN32 + HANDLE handle; +#else + int fd; +#endif + GLuint gl_memory_obj; + GLuint gl_texture_id; +} PGRAPHVkDisplayState; + +typedef struct PGRAPHVkComputeState { + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSet descriptor_sets[1]; + VkPipelineLayout pipeline_layout; + VkPipeline pipeline_pack_d24s8; + VkPipeline pipeline_unpack_d24s8; + VkPipeline pipeline_pack_f32s8; + VkPipeline pipeline_unpack_f32s8; +} PGRAPHVkComputeState; + +typedef struct PGRAPHVkState { + void *window; + VkInstance instance; + + bool debug_utils_extension_enabled; + bool custom_border_color_extension_enabled; + bool provoking_vertex_extension_enabled; + bool memory_budget_extension_enabled; + + VkPhysicalDevice physical_device; + VkPhysicalDeviceProperties device_props; + VkDevice device; + VmaAllocator allocator; + uint32_t allocator_last_submit_index; + + VkQueue queue; + VkCommandPool command_pool; + VkCommandBuffer command_buffers[2]; + + VkCommandBuffer command_buffer; + VkSemaphore command_buffer_semaphore; + VkFence command_buffer_fence; + unsigned int command_buffer_start_time; + bool in_command_buffer; + uint32_t submit_count; + + VkCommandBuffer aux_command_buffer; + bool in_aux_command_buffer; + + VkFramebuffer framebuffers[50]; + int framebuffer_index; + bool framebuffer_dirty; + + VkRenderPass render_pass; + RenderPass *render_passes; + int render_passes_index; + int render_passes_capacity; + bool in_render_pass; + bool in_draw; + + Lru pipeline_cache; + VkPipelineCache vk_pipeline_cache; + PipelineBinding *pipeline_cache_entries; + PipelineBinding *pipeline_binding; + bool pipeline_binding_changed; + + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSet descriptor_sets[1024]; + int descriptor_set_index; + + StorageBuffer storage_buffers[BUFFER_COUNT]; + + MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES]; + size_t num_vertex_ram_buffer_syncs; + unsigned long *uploaded_bitmap; + size_t bitmap_size; + + VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; + int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES]; + int num_active_vertex_attribute_descriptions; + + VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; + int num_active_vertex_binding_descriptions; + hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; + + QTAILQ_HEAD(, SurfaceBinding) surfaces; + QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces; + SurfaceBinding *color_binding, *zeta_binding; + bool downloads_pending; + QemuEvent downloads_complete; + bool download_dirty_surfaces_pending; + QemuEvent dirty_surfaces_download_complete; // common + + Lru texture_cache; + TextureBinding *texture_cache_entries; + TextureBinding *texture_bindings[NV2A_MAX_TEXTURES]; + TextureBinding dummy_texture; + bool texture_bindings_changed; + + Lru shader_cache; + ShaderBinding *shader_cache_entries; + ShaderBinding *shader_binding; + ShaderModuleInfo *quad_vert_module, *solid_frag_module; + bool shader_bindings_changed; + + // FIXME: Merge these into a structure + uint64_t uniform_buffer_hashes[2]; + size_t uniform_buffer_offsets[2]; + bool uniforms_changed; + + VkQueryPool query_pool; + int max_queries_in_flight; // FIXME: Move out to constant + int num_queries_in_flight; + bool new_query_needed; + bool query_in_flight; + uint32_t zpass_pixel_count_result; + QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate + + SurfaceFormatInfo kelvin_surface_zeta_vk_map[3]; + + uint32_t clear_parameter; + + PGRAPHVkDisplayState display; + PGRAPHVkComputeState compute; +} PGRAPHVkState; + +// renderer.c +void pgraph_vk_check_memory_budget(PGRAPHState *pg); + +// debug.c +void pgraph_vk_debug_init(void); + +// instance.c +void pgraph_vk_init_instance(PGRAPHState *pg); +void pgraph_vk_finalize_instance(PGRAPHState *pg); +QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device); +uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, + VkMemoryPropertyFlags properties); + +// glsl.c +void pgraph_vk_init_glsl_compiler(void); +void pgraph_vk_finalize_glsl_compiler(void); +GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, + const char *glsl_source); +VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, + GByteArray *spv); +ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl( + PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl); +void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info); + +// buffer.c +void pgraph_vk_init_buffers(NV2AState *d); +void pgraph_vk_finalize_buffers(NV2AState *d); +bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index, + VkDeviceSize size, + VkDeviceAddress alignment); +VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data, + VkDeviceSize *sizes, size_t count, + VkDeviceAddress alignment); + +// command.c +void pgraph_vk_init_command_buffers(PGRAPHState *pg); +void pgraph_vk_finalize_command_buffers(PGRAPHState *pg); +VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg); +void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd); + +// image.c +void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, + VkImage image, VkFormat format, + VkImageLayout oldLayout, + VkImageLayout newLayout); + +// vertex.c +void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, + unsigned int max_element, + bool inline_data, + unsigned int inline_stride, + unsigned int provoking_element); +void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d); +void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data, + VkDeviceSize size); +VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data, + VkDeviceSize size); +VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data, + VkDeviceSize *sizes, + size_t count); + +// surface.c +void pgraph_vk_init_surfaces(PGRAPHState *pg); +void pgraph_vk_finalize_surfaces(PGRAPHState *pg); +void pgraph_vk_surface_flush(NV2AState *d); +void pgraph_vk_process_pending_downloads(NV2AState *d); +void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface); +SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr); +void pgraph_vk_wait_for_surface_download(SurfaceBinding *e); +void pgraph_vk_download_dirty_surfaces(NV2AState *d); +void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, + bool force); +void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, + bool zeta_write); +SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr); +void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); +void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale); +unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d); +void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg); + +// surface-compute.c +void pgraph_vk_init_compute(PGRAPHState *pg); +void pgraph_vk_finalize_compute(PGRAPHState *pg); +void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst, bool downscale); +void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst); + +// display.c +void pgraph_vk_init_display(PGRAPHState *pg); +void pgraph_vk_finalize_display(PGRAPHState *pg); +void pgraph_vk_render_display(PGRAPHState *pg); + +// texture.c +void pgraph_vk_init_textures(PGRAPHState *pg); +void pgraph_vk_finalize_textures(PGRAPHState *pg); +void pgraph_vk_bind_textures(NV2AState *d); +void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, + hwaddr size); +void pgraph_vk_trim_texture_cache(PGRAPHState *pg); + +// shaders.c +void pgraph_vk_init_shaders(PGRAPHState *pg); +void pgraph_vk_finalize_shaders(PGRAPHState *pg); +void pgraph_vk_update_descriptor_sets(PGRAPHState *pg); +void pgraph_vk_bind_shaders(PGRAPHState *pg); +void pgraph_vk_update_shader_uniforms(PGRAPHState *pg); + +// reports.c +void pgraph_vk_init_reports(PGRAPHState *pg); +void pgraph_vk_finalize_reports(PGRAPHState *pg); +void pgraph_vk_clear_report_value(NV2AState *d); +void pgraph_vk_get_report(NV2AState *d, uint32_t parameter); +void pgraph_vk_process_pending_reports(NV2AState *d); +void pgraph_vk_process_pending_reports_internal(NV2AState *d); + +typedef enum FinishReason { + VK_FINISH_REASON_VERTEX_BUFFER_DIRTY, + VK_FINISH_REASON_SURFACE_CREATE, + VK_FINISH_REASON_SURFACE_DOWN, + VK_FINISH_REASON_NEED_BUFFER_SPACE, + VK_FINISH_REASON_FRAMEBUFFER_DIRTY, + VK_FINISH_REASON_PRESENTING, + VK_FINISH_REASON_FLIP_STALL, + VK_FINISH_REASON_FLUSH, +} FinishReason; + +// draw.c +void pgraph_vk_init_pipelines(PGRAPHState *pg); +void pgraph_vk_finalize_pipelines(PGRAPHState *pg); +void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter); +void pgraph_vk_draw_begin(NV2AState *d); +void pgraph_vk_draw_end(NV2AState *d); +void pgraph_vk_finish(PGRAPHState *pg, FinishReason why); +void pgraph_vk_flush_draw(NV2AState *d); +void pgraph_vk_begin_command_buffer(PGRAPHState *pg); +void pgraph_vk_ensure_command_buffer(PGRAPHState *pg); +void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg); + +VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg); +void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd); + +// blit.c +void pgraph_vk_image_blit(NV2AState *d); + +#endif diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c new file mode 100644 index 00000000000..2e6bdf96f32 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/reports.c @@ -0,0 +1,134 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +void pgraph_vk_init_reports(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + QSIMPLEQ_INIT(&r->report_queue); + r->num_queries_in_flight = 0; + r->max_queries_in_flight = 1024; + r->new_query_needed = true; + r->query_in_flight = false; + r->zpass_pixel_count_result = 0; + + VkQueryPoolCreateInfo pool_create_info = (VkQueryPoolCreateInfo){ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .queryType = VK_QUERY_TYPE_OCCLUSION, + .queryCount = r->max_queries_in_flight, + }; + VK_CHECK( + vkCreateQueryPool(r->device, &pool_create_info, NULL, &r->query_pool)); +} + +void pgraph_vk_finalize_reports(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyQueryPool(r->device, r->query_pool, NULL); +} + +void pgraph_vk_clear_report_value(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate + q->clear = true; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry); +} + +void pgraph_vk_get_report(NV2AState *d, uint32_t parameter) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); + assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); + + QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate + q->clear = false; + q->parameter = parameter; + q->query_count = r->num_queries_in_flight; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry); + + r->new_query_needed = true; +} + +void pgraph_vk_process_pending_reports_internal(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + NV2A_VK_DGROUP_BEGIN("Processing queries"); + + assert(!r->in_command_buffer); + + // Fetch all query results + g_autofree uint64_t *query_results = NULL; + + if (r->num_queries_in_flight > 0) { + size_t size_of_results = r->num_queries_in_flight * sizeof(uint64_t); + query_results = g_malloc_n(r->num_queries_in_flight, + sizeof(uint64_t)); // FIXME: Pre-allocate + VkResult result; + do { + result = vkGetQueryPoolResults( + r->device, r->query_pool, 0, r->num_queries_in_flight, + size_of_results, query_results, sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + } while (result == VK_NOT_READY); + } + + // Write out queries + QueryReport *q, *next; + int num_results_counted = 0; + + int result_divisor = pg->surface_scale_factor * pg->surface_scale_factor; + + QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) { + if (q->clear) { + NV2A_VK_DPRINTF("Cleared"); + r->zpass_pixel_count_result = 0; + } else { + assert(q->query_count >= num_results_counted); + assert(q->query_count <= r->num_queries_in_flight); + + while (num_results_counted < q->query_count) { + r->zpass_pixel_count_result += + query_results[num_results_counted++]; + } + + pgraph_write_zpass_pixel_cnt_report( + d, q->parameter, + r->zpass_pixel_count_result / result_divisor); + } + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); + g_free(q); + } + + r->num_queries_in_flight = 0; + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_process_pending_reports(NV2AState *d) +{ +} diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c new file mode 100644 index 00000000000..7d5000d7517 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -0,0 +1,797 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2015 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/shaders.h" +#include "hw/xbox/nv2a/pgraph/util.h" +#include "hw/xbox/nv2a/pgraph/glsl/geom.h" +#include "hw/xbox/nv2a/pgraph/glsl/vsh.h" +#include "hw/xbox/nv2a/pgraph/glsl/psh.h" +#include "qemu/fast-hash.h" +#include "qemu/mstring.h" +#include "renderer.h" +#include + +static void create_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + size_t num_sets = ARRAY_SIZE(r->descriptor_sets); + + VkDescriptorPoolSize pool_sizes[] = { + { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 2 * num_sets, + }, + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = NV2A_MAX_TEXTURES * num_sets, + } + }; + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = ARRAY_SIZE(pool_sizes), + .pPoolSizes = pool_sizes, + .maxSets = ARRAY_SIZE(r->descriptor_sets), + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + }; + VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL, + &r->descriptor_pool)); +} + +static void destroy_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL); + r->descriptor_pool = VK_NULL_HANDLE; +} + +static void create_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES]; + + bindings[0] = (VkDescriptorSetLayoutBinding){ + .binding = VSH_UBO_BINDING, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + }; + bindings[1] = (VkDescriptorSetLayoutBinding){ + .binding = PSH_UBO_BINDING, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + bindings[2 + i] = (VkDescriptorSetLayoutBinding){ + .binding = PSH_TEX_BINDING + i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }; + } + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL, + &r->descriptor_set_layout)); +} + +static void destroy_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL); + r->descriptor_set_layout = VK_NULL_HANDLE; +} + +static void create_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)]; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + layouts[i] = r->descriptor_set_layout; + } + + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = r->descriptor_pool, + .descriptorSetCount = ARRAY_SIZE(r->descriptor_sets), + .pSetLayouts = layouts, + }; + VK_CHECK( + vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets)); +} + +static void destroy_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkFreeDescriptorSets(r->device, r->descriptor_pool, + ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets); + for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) { + r->descriptor_sets[i] = VK_NULL_HANDLE; + } +} + +void pgraph_vk_update_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + bool need_uniform_write = + r->uniforms_changed || + !r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset; + + if (!(r->shader_bindings_changed || r->texture_bindings_changed || + (r->descriptor_set_index == 0) || need_uniform_write)) { + return; // Nothing changed + } + + ShaderBinding *binding = r->shader_binding; + ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms, + &binding->fragment->uniforms }; + VkDeviceSize ubo_buffer_total_size = 0; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + ubo_buffer_total_size += layouts[i]->total_size; + } + bool need_ubo_staging_buffer_reset = + r->uniforms_changed && + !pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING, + ubo_buffer_total_size, + r->device_props.limits.minUniformBufferOffsetAlignment); + + bool need_descriptor_write_reset = + (r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets)); + + if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + need_uniform_write = true; + } + + VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES]; + + assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets)); + + if (need_uniform_write) { + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + void *data = layouts[i]->allocation; + VkDeviceSize size = layouts[i]->total_size; + r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer( + pg, BUFFER_UNIFORM_STAGING, &data, &size, 1, + r->device_props.limits.minUniformBufferOffsetAlignment); + } + + r->uniforms_changed = false; + } + + VkDescriptorBufferInfo ubo_buffer_infos[2]; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + ubo_buffer_infos[i] = (VkDescriptorBufferInfo){ + .buffer = r->storage_buffers[BUFFER_UNIFORM].buffer, + .offset = r->uniform_buffer_offsets[i], + .range = layouts[i]->total_size, + }; + descriptor_writes[i] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->descriptor_sets[r->descriptor_set_index], + .dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &ubo_buffer_infos[i], + }; + } + + VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES]; + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + image_infos[i] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = r->texture_bindings[i]->image_view, + .sampler = r->texture_bindings[i]->sampler, + }; + descriptor_writes[2 + i] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->descriptor_sets[r->descriptor_set_index], + .dstBinding = PSH_TEX_BINDING + i, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .pImageInfo = &image_infos[i], + }; + } + + vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL); + + r->descriptor_set_index++; +} + +static void update_shader_constant_locations(ShaderBinding *binding) +{ + int i, j; + char tmp[64]; + + /* lookup fragment shader uniforms */ + for (i = 0; i < 9; i++) { + for (j = 0; j < 2; j++) { + snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); + binding->psh_constant_loc[i][j] = + uniform_index(&binding->fragment->uniforms, tmp); + } + } + binding->alpha_ref_loc = + uniform_index(&binding->fragment->uniforms, "alphaRef"); + binding->fog_color_loc = + uniform_index(&binding->fragment->uniforms, "fogColor"); + for (i = 1; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "bumpMat%d", i); + binding->bump_mat_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "bumpScale%d", i); + binding->bump_scale_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); + binding->bump_offset_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + snprintf(tmp, sizeof(tmp), "texScale%d", i); + binding->tex_scale_loc[i] = + uniform_index(&binding->fragment->uniforms, tmp); + } + + /* lookup vertex shader uniforms */ + binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c"); + binding->surface_size_loc = + uniform_index(&binding->vertex->uniforms, "surfaceSize"); + binding->clip_range_loc = + uniform_index(&binding->vertex->uniforms, "clipRange"); + binding->fog_param_loc = + uniform_index(&binding->vertex->uniforms, "fogParam"); + + binding->inv_viewport_loc = + uniform_index(&binding->vertex->uniforms, "invViewport"); + binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa"); + binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb"); + binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1"); + + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); + binding->light_infinite_half_vector_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); + binding->light_infinite_direction_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + + snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); + binding->light_local_position_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); + binding->light_local_attenuation_loc[i] = + uniform_index(&binding->vertex->uniforms, tmp); + } + + binding->clip_region_loc = + uniform_index(&binding->fragment->uniforms, "clipRegion"); + + binding->material_alpha_loc = + uniform_index(&binding->vertex->uniforms, "material_alpha"); +} + +static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + ShaderBinding *snode = container_of(node, ShaderBinding, node); + memcpy(&snode->state, state, sizeof(ShaderState)); +} + +static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache); + ShaderBinding *snode = container_of(node, ShaderBinding, node); + + ShaderModuleInfo *modules[] = { + snode->geometry, + snode->vertex, + snode->fragment, + }; + for (int i = 0; i < ARRAY_SIZE(modules); i++) { + if (modules[i]) { + pgraph_vk_destroy_shader_module(r, modules[i]); + } + } + + memset(&snode->state, 0, sizeof(ShaderState)); +} + +static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + ShaderBinding *snode = container_of(node, ShaderBinding, node); + return memcmp(&snode->state, key, sizeof(ShaderState)); +} + +static void shader_cache_init(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + const size_t shader_cache_size = 1024; + lru_init(&r->shader_cache); + r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding)); + assert(r->shader_cache_entries != NULL); + for (int i = 0; i < shader_cache_size; i++) { + lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node); + } + r->shader_cache.init_node = shader_cache_entry_init; + r->shader_cache.compare_nodes = shader_cache_entry_compare; + r->shader_cache.post_node_evict = shader_cache_entry_post_evict; +} + +static void shader_cache_finalize(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + lru_flush(&r->shader_cache); + g_free(r->shader_cache_entries); + r->shader_cache_entries = NULL; +} + +static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + uint64_t hash = fast_hash((void *)state, sizeof(*state)); + LruNode *node = lru_lookup(&r->shader_cache, hash, state); + ShaderBinding *snode = container_of(node, ShaderBinding, node); + + NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode); + + if (!snode->fragment) { + NV2A_VK_DPRINTF("cache miss"); + nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); + + char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); + if (previous_numeric_locale) { + previous_numeric_locale = g_strdup(previous_numeric_locale); + } + + /* Ensure numeric values are printed with '.' radix, no grouping */ + setlocale(LC_NUMERIC, "C"); + + MString *geometry_shader_code = pgraph_gen_geom_glsl( + state->polygon_front_mode, state->polygon_back_mode, + state->primitive_mode, state->smooth_shading, true); + if (geometry_shader_code) { + NV2A_VK_DPRINTF("geometry shader: \n%s", + mstring_get_str(geometry_shader_code)); + snode->geometry = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_GEOMETRY_BIT, + mstring_get_str(geometry_shader_code)); + mstring_unref(geometry_shader_code); + } else { + memset(&snode->geometry, 0, sizeof(snode->geometry)); + } + + MString *vertex_shader_code = + pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL); + NV2A_VK_DPRINTF("vertex shader: \n%s", + mstring_get_str(vertex_shader_code)); + snode->vertex = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, + mstring_get_str(vertex_shader_code)); + mstring_unref(vertex_shader_code); + + MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh); + NV2A_VK_DPRINTF("fragment shader: \n%s", + mstring_get_str(fragment_shader_code)); + snode->fragment = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, + mstring_get_str(fragment_shader_code)); + mstring_unref(fragment_shader_code); + + if (previous_numeric_locale) { + setlocale(LC_NUMERIC, previous_numeric_locale); + g_free(previous_numeric_locale); + } + + update_shader_constant_locations(snode); + } + + return snode; +} + +// FIXME: Move to common +static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, + bool binding_changed, bool vertex_program, + bool fixed_function) +{ + int i, j; + + /* update combiner constants */ + for (i = 0; i < 9; i++) { + uint32_t constant[2]; + if (i == 8) { + /* final combiner */ + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1); + } else { + constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); + constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); + } + + for (j = 0; j < 2; j++) { + GLint loc = binding->psh_constant_loc[i][j]; + if (loc != -1) { + float value[4]; + pgraph_argb_pack32_to_rgba_float(constant[j], value); + uniform1fv(&binding->fragment->uniforms, loc, 4, value); + } + } + } + if (binding->alpha_ref_loc != -1) { + float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), + NV_PGRAPH_CONTROL_0_ALPHAREF) / + 255.0; + uniform1f(&binding->fragment->uniforms, binding->alpha_ref_loc, + alpha_ref); + } + + + /* For each texture stage */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + int loc; + + /* Bump luminance only during stages 1 - 3 */ + if (i > 0) { + loc = binding->bump_mat_loc[i]; + if (loc != -1) { + uint32_t m_u32[4]; + m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)); + m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)); + m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)); + m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)); + float m[4]; + m[0] = *(float*)&m_u32[0]; + m[1] = *(float*)&m_u32[1]; + m[2] = *(float*)&m_u32[2]; + m[3] = *(float*)&m_u32[3]; + uniformMatrix2fv(&binding->fragment->uniforms, loc, m); + } + loc = binding->bump_scale_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4); + uniform1f(&binding->fragment->uniforms, loc, + *(float *)&v); + } + loc = binding->bump_offset_loc[i]; + if (loc != -1) { + uint32_t v = + pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4); + uniform1f(&binding->fragment->uniforms, loc, + *(float *)&v); + } + } + + loc = binding->tex_scale_loc[i]; + if (loc != -1) { + assert(pg->vk_renderer_state->texture_bindings[i] != NULL); + float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale; + BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format]; + if (!f_basic.linear) { + scale = 1.0; + } + uniform1f(&binding->fragment->uniforms, loc, scale); + } + } + + if (binding->fog_color_loc != -1) { + uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR); + uniform4f(&binding->fragment->uniforms, binding->fog_color_loc, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); + } + if (binding->fog_param_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1); + uniform2f(&binding->vertex->uniforms, + binding->fog_param_loc, *(float *)&v[0], + *(float *)&v[1]); + } + + float zmax; + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; + break; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; + break; + default: + assert(0); + } + + if (fixed_function) { + /* update lighting constants */ + struct { + uint32_t *v; + int locs; + size_t len; + } lighting_arrays[] = { + { &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT }, + { &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT }, + { &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT }, + }; + + for (i = 0; i < ARRAY_SIZE(lighting_arrays); i++) { + uniform1iv( + &binding->vertex->uniforms, lighting_arrays[i].locs, + lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v); + } + + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + int loc = binding->light_infinite_half_vector_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_infinite_half_vector[i]); + } + loc = binding->light_infinite_direction_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_infinite_direction[i]); + } + + loc = binding->light_local_position_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_local_position[i]); + } + loc = binding->light_local_attenuation_loc[i]; + if (loc != -1) { + uniform1fv(&binding->vertex->uniforms, loc, 3, + pg->light_local_attenuation[i]); + } + } + + /* estimate the viewport by assuming it matches the surface ... */ + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + + float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width); + float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height); + float m33 = zmax; + float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; + float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; + + float invViewport[16] = { + 1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0, + 0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22, + 0, 1.0 + }; + + if (binding->inv_viewport_loc != -1) { + uniformMatrix4fv(&binding->vertex->uniforms, + binding->inv_viewport_loc, &invViewport[0]); + } + } + + /* update vertex program constants */ + uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc, + NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants); + + if (binding->surface_size_loc != -1) { + unsigned int aa_width = 1, aa_height = 1; + pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); + uniform2f(&binding->vertex->uniforms, binding->surface_size_loc, + pg->surface_binding_dim.width / aa_width, + pg->surface_binding_dim.height / aa_height); + } + + if (binding->clip_range_loc != -1) { + uint32_t v[2]; + v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); + v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); + float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0; + float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0; + uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0, + zmax, zclip_min, zclip_max); + } + + /* Clipping regions */ + unsigned int max_gl_width = pg->surface_binding_dim.width; + unsigned int max_gl_height = pg->surface_binding_dim.height; + pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); + + uint32_t clip_regions[8][4]; + + for (i = 0; i < 8; i++) { + uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4); + unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); + unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; + uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4); + unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); + unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; + pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); + pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); + + pgraph_apply_scaling_factor(pg, &x_min, &y_min); + pgraph_apply_scaling_factor(pg, &x_max, &y_max); + + clip_regions[i][0] = x_min; + clip_regions[i][1] = y_min; + clip_regions[i][2] = x_max; + clip_regions[i][3] = y_max; + } + uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc, + 8 * 4, (void *)clip_regions); + + if (binding->material_alpha_loc != -1) { + uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc, + pg->material_alpha); + } +} + +// Quickly check PGRAPH state to see if any registers have changed that +// necessitate a full shader state inspection. +static bool check_shaders_dirty(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!r->shader_binding) { + return true; + } + if (pg->program_data_dirty) { + return true; + } + + int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF; + for (int i = 0; i < num_stages; i++) { + if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) { + return true; + } + } + unsigned int regs[] = { + NV_PGRAPH_COMBINECTL, + NV_PGRAPH_COMBINESPECFOG0, + NV_PGRAPH_COMBINESPECFOG1, + NV_PGRAPH_CSV0_C, + NV_PGRAPH_CSV0_D, + NV_PGRAPH_CSV1_A, + NV_PGRAPH_CSV1_B, + NV_PGRAPH_POINTSIZE, + NV_PGRAPH_SHADERCLIPMODE, + NV_PGRAPH_SHADERCTL, + NV_PGRAPH_SHADERPROG, + NV_PGRAPH_SHADOWCTL, + }; + for (int i = 0; i < ARRAY_SIZE(regs); i++) { + if (pgraph_is_reg_dirty(pg, regs[i])) { + return true; + } + } + + ShaderState *state = &r->shader_binding->state; + if (pg->uniform_attrs != state->uniform_attrs || + pg->swizzle_attrs != state->swizzle_attrs || + pg->compressed_attrs != state->compressed_attrs || + pg->primitive_mode != state->primitive_mode || + pg->surface_scale_factor != state->surface_scale_factor) { + return true; + } + + // Textures + for (int i = 0; i < 4; i++) { + if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] || + pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) || + pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) { + return true; + } + } + + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); + + return false; +} + +void pgraph_vk_bind_shaders(PGRAPHState *pg) +{ + NV2A_VK_DGROUP_BEGIN("%s", __func__); + + PGRAPHVkState *r = pg->vk_renderer_state; + + r->shader_bindings_changed = false; + + if (check_shaders_dirty(pg)) { + ShaderState new_state; + memset(&new_state, 0, sizeof(ShaderState)); + new_state = pgraph_get_shader_state(pg); + if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) { + r->shader_binding = gen_shaders(pg, &new_state); + r->shader_bindings_changed = true; + } + } + + // FIXME: Use dirty bits + pgraph_vk_update_shader_uniforms(pg); + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_update_shader_uniforms(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + NV2A_VK_DGROUP_BEGIN("%s", __func__); + nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); + + assert(r->shader_binding); + ShaderBinding *binding = r->shader_binding; + ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms, + &binding->fragment->uniforms }; + shader_update_constants(pg, r->shader_binding, true, + r->shader_binding->state.vertex_program, + r->shader_binding->state.fixed_function); + + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size); + r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]); + r->uniform_buffer_hashes[i] = hash; + } + + nv2a_profile_inc_counter(r->uniforms_changed ? + NV2A_PROF_SHADER_UBO_DIRTY : + NV2A_PROF_SHADER_UBO_NOTDIRTY); + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_init_shaders(PGRAPHState *pg) +{ + pgraph_vk_init_glsl_compiler(); + create_descriptor_pool(pg); + create_descriptor_set_layout(pg); + create_descriptor_sets(pg); + shader_cache_init(pg); +} + +void pgraph_vk_finalize_shaders(PGRAPHState *pg) +{ + shader_cache_finalize(pg); + destroy_descriptor_sets(pg); + destroy_descriptor_set_layout(pg); + destroy_descriptor_pool(pg); + pgraph_vk_finalize_glsl_compiler(); +} diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c new file mode 100644 index 00000000000..045f8231b8f --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -0,0 +1,473 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "renderer.h" +#include + +// TODO: Swizzle/Unswizzle +// TODO: Float depth format (low priority, but would be better for accuracy) + +// FIXME: Below pipeline creation assumes identical 3 buffer setup. For +// swizzle shader we will need more flexibility. + +const char *pack_d24_unorm_s8_uint_to_z24s8_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n" + "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" + "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint scale = width_in / width_out;" + " uint y = (idx_out / width_out) * scale;\n" + " uint x = (idx_out % width_out) * scale;\n" + " return y * width_in + x;\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " uint depth_value = depth_in[idx_in];\n" + " uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n" + " depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n" + "}\n"; + +const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n" + "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" + "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint scale = width_out / width_in;" + " uint y = (idx_out / width_out) / scale;\n" + " uint x = (idx_out % width_out) / scale;\n" + " return y * width_in + x;\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " depth_out[idx_out] = depth_stencil_in[idx_in] >> 8;\n" + " if (idx_out % 4 == 0) {\n" + " uint stencil_value = 0;\n" + " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels + " uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n" + " stencil_value |= v << (i * 8);\n" + " }\n" + " stencil_out[idx_out / 4] = stencil_value;\n" + " }\n" + "}\n"; + +const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthIn { float depth_in[]; };\n" + "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" + "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint y = idx_out / width_out;\n" + " uint x = idx_out % width_out;\n" + " return (y * width_in + x) * (width_in / width_out);\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " uint depth_value = int(depth_in[idx_in] * float(0xffffff));\n" + " uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n" + " depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n" + "}\n"; + +const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = + "#version 450\n" + "layout(local_size_x = 256) in;\n" + "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" + "layout(binding = 0) buffer DepthOut { float depth_out[]; };\n" + "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" + "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" + "uint get_input_idx(uint idx_out) {\n" + " uint scale = width_out / width_in;" + " uint y = (idx_out / width_out) / scale;\n" + " uint x = (idx_out % width_out) / scale;\n" + " return y * width_in + x;\n" + "}\n" + "void main() {\n" + " uint idx_out = gl_GlobalInvocationID.x;\n" + " uint idx_in = get_input_idx(idx_out);\n" + " depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n" + " if (idx_out % 4 == 0) {\n" + " uint stencil_value = 0;\n" + " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels + " uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n" + " stencil_value |= v << (i * 8);\n" + " }\n" + " stencil_out[idx_out / 4] = stencil_value;\n" + " }\n" + "}\n"; + +static void create_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorPoolSize pool_sizes[] = { + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 3, + }, + }; + + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = ARRAY_SIZE(pool_sizes), + .pPoolSizes = pool_sizes, + .maxSets = ARRAY_SIZE(r->compute.descriptor_sets), + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + }; + VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL, + &r->compute.descriptor_pool)); +} + +static void destroy_descriptor_pool(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorPool(r->device, r->compute.descriptor_pool, NULL); + r->compute.descriptor_pool = VK_NULL_HANDLE; +} + +static void create_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + const int num_buffers = 3; + + VkDescriptorSetLayoutBinding bindings[num_buffers]; + for (int i = 0; i < num_buffers; i++) { + bindings[i] = (VkDescriptorSetLayoutBinding){ + .binding = i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }; + } + VkDescriptorSetLayoutCreateInfo layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = ARRAY_SIZE(bindings), + .pBindings = bindings, + }; + VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL, + &r->compute.descriptor_set_layout)); +} + +static void destroy_descriptor_set_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkDestroyDescriptorSetLayout(r->device, r->compute.descriptor_set_layout, + NULL); + r->compute.descriptor_set_layout = VK_NULL_HANDLE; +} + +static void create_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)]; + for (int i = 0; i < ARRAY_SIZE(layouts); i++) { + layouts[i] = r->compute.descriptor_set_layout; + } + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = r->compute.descriptor_pool, + .descriptorSetCount = ARRAY_SIZE(r->compute.descriptor_sets), + .pSetLayouts = layouts, + }; + VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info, + r->compute.descriptor_sets)); +} + +static void destroy_descriptor_sets(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + vkFreeDescriptorSets(r->device, r->compute.descriptor_pool, + ARRAY_SIZE(r->compute.descriptor_sets), + r->compute.descriptor_sets); + for (int i = 0; i < ARRAY_SIZE(r->compute.descriptor_sets); i++) { + r->compute.descriptor_sets[i] = VK_NULL_HANDLE; + } +} + +static void create_compute_pipeline_layout(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPushConstantRange push_constant_range = { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .size = 2 * sizeof(uint32_t), + }; + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &r->compute.descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constant_range, + }; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &r->compute.pipeline_layout)); +} + +static VkPipeline create_compute_pipeline(PGRAPHState *pg, const char *glsl) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_COMPUTE_BIT, glsl); + + VkComputePipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .layout = r->compute.pipeline_layout, + .stage = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .pName = "main", + .module = module->module, + }, + }; + VkPipeline pipeline; + VK_CHECK(vkCreateComputePipelines(r->device, r->vk_pipeline_cache, 1, + &pipeline_info, NULL, + &pipeline)); + + pgraph_vk_destroy_shader_module(r, module); + + return pipeline; +} + +static void update_descriptor_sets(PGRAPHState *pg, + VkDescriptorBufferInfo *buffers, int count) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(count == 3); + VkWriteDescriptorSet descriptor_writes[3]; + const int descriptor_set_index = 0; + + for (int i = 0; i < count; i++) { + descriptor_writes[i] = (VkWriteDescriptorSet){ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = r->compute.descriptor_sets[descriptor_set_index], + .dstBinding = i, + .dstArrayElement = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &buffers[i], + }; + } + vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL); +} + +// +// Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8 +// formatted buffer with depth in bits 31-8 and stencil in bits 7-0. +// +void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst, bool downscale) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int input_width = surface->width, input_height = surface->height; + pgraph_apply_scaling_factor(pg, &input_width, &input_height); + + unsigned int output_width = surface->width, output_height = surface->height; + if (!downscale) { + pgraph_apply_scaling_factor(pg, &output_width, &output_height); + } + + size_t depth_bytes_per_pixel = 4; + size_t depth_size = input_width * input_height * depth_bytes_per_pixel; + + size_t stencil_bytes_per_pixel = 1; + size_t stencil_size = input_width * input_height * stencil_bytes_per_pixel; + + size_t output_bytes_per_pixel = 4; + size_t output_size = output_width * output_height * output_bytes_per_pixel; + + VkDescriptorBufferInfo buffers[] = { + { + .buffer = src, + .offset = 0, + .range = depth_size, + }, + { + .buffer = src, + .offset = depth_size, + .range = stencil_size, + }, + { + .buffer = dst, + .offset = 0, + .range = output_size, + }, + }; + update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); + + if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_pack_d24s8); + } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_pack_f32s8); + } else { + assert(!"Unsupported pack format"); + } + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[0], 0, NULL); + + uint32_t push_constants[2] = { input_width, output_width }; + assert(sizeof(push_constants) == 8); + vkCmdPushConstants(cmd, r->compute.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + push_constants); + + size_t workgroup_size_in_units = 256; + size_t output_size_in_units = output_width * output_height; + assert(output_size_in_units % workgroup_size_in_units == 0); + size_t group_count = output_size_in_units / workgroup_size_in_units; + + // FIXME: Check max group count + + vkCmdDispatch(cmd, group_count, 1, 1); +} + +void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, + VkCommandBuffer cmd, VkBuffer src, + VkBuffer dst) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int input_width = surface->width, input_height = surface->height; + + unsigned int output_width = surface->width, output_height = surface->height; + pgraph_apply_scaling_factor(pg, &output_width, &output_height); + + size_t depth_bytes_per_pixel = 4; + size_t depth_size = output_width * output_height * depth_bytes_per_pixel; + + size_t stencil_bytes_per_pixel = 1; + size_t stencil_size = output_width * output_height * stencil_bytes_per_pixel; + + size_t input_bytes_per_pixel = 4; + size_t input_size = input_width * input_height * input_bytes_per_pixel; + + VkDescriptorBufferInfo buffers[] = { + { + .buffer = dst, + .offset = 0, + .range = depth_size, + }, + { + .buffer = dst, + .offset = depth_size, + .range = stencil_size, + }, + { + .buffer = src, + .offset = 0, + .range = input_size, + }, + }; + update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); + + if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_unpack_d24s8); + } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_unpack_f32s8); + } else { + assert(!"Unsupported pack format"); + } + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[0], 0, NULL); + + assert(output_width >= input_width); + uint32_t push_constants[2] = { input_width, output_width }; + assert(sizeof(push_constants) == 8); + vkCmdPushConstants(cmd, r->compute.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + push_constants); + + size_t workgroup_size_in_units = 256; + size_t output_size_in_units = output_width * output_height; + assert(output_size_in_units % workgroup_size_in_units == 0); + size_t group_count = output_size_in_units / workgroup_size_in_units; + + // FIXME: Check max group count + + vkCmdDispatch(cmd, group_count, 1, 1); +} + +void pgraph_vk_init_compute(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + create_descriptor_pool(pg); + create_descriptor_set_layout(pg); + create_descriptor_sets(pg); + create_compute_pipeline_layout(pg); + + r->compute.pipeline_pack_d24s8 = + create_compute_pipeline(pg, pack_d24_unorm_s8_uint_to_z24s8_glsl); + r->compute.pipeline_unpack_d24s8 = + create_compute_pipeline(pg, unpack_z24s8_to_d24_unorm_s8_uint_glsl); + r->compute.pipeline_pack_f32s8 = + create_compute_pipeline(pg, pack_d32_sfloat_s8_uint_to_z24s8_glsl); + r->compute.pipeline_unpack_f32s8 = + create_compute_pipeline(pg, unpack_z24s8_to_d32_sfloat_s8_uint_glsl); +} + +void pgraph_vk_finalize_compute(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipeline *pipelines[] = { + &r->compute.pipeline_pack_d24s8, + &r->compute.pipeline_unpack_d24s8, + &r->compute.pipeline_pack_f32s8, + &r->compute.pipeline_unpack_f32s8, + }; + + for (int i = 0; i < ARRAY_SIZE(pipelines); i++) { + vkDestroyPipeline(r->device, *pipelines[i], NULL); + pipelines[i] = VK_NULL_HANDLE; + } + + vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL); + r->compute.pipeline_layout = VK_NULL_HANDLE; + + destroy_descriptor_sets(pg); + destroy_descriptor_set_layout(pg); + destroy_descriptor_pool(pg); +} diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c new file mode 100644 index 00000000000..9df98666bff --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -0,0 +1,1485 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "hw/xbox/nv2a/nv2a_int.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "qemu/compiler.h" +#include "ui/xemu-settings.h" +#include "renderer.h" + +const int num_invalid_surfaces_to_keep = 10; // FIXME: Make automatic +const int max_surface_frame_time_delta = 5; + +void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale) +{ + g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; + + qemu_mutex_unlock_iothread(); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, true); + qemu_mutex_unlock(&d->pfifo.lock); + + // FIXME: It's just flush + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); + qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); + + qemu_mutex_lock(&d->pgraph.lock); + qemu_event_reset(&d->pgraph.flush_complete); + qatomic_set(&d->pgraph.flush_pending, true); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.flush_complete); + + qemu_mutex_lock(&d->pfifo.lock); + qatomic_set(&d->pfifo.halt, false); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + + qemu_mutex_lock_iothread(); +} + +unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d) +{ + return d->pgraph.surface_scale_factor; // FIXME: Move internal to renderer +} + +void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg) +{ + int factor = g_config.display.quality.surface_scale; + pg->surface_scale_factor = MAX(factor, 1); +} + +// FIXME: Move to common +static void get_surface_dimensions(PGRAPHState const *pg, unsigned int *width, + unsigned int *height) +{ + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + if (swizzle) { + *width = 1 << pg->surface_shape.log_width; + *height = 1 << pg->surface_shape.log_height; + } else { + *width = pg->surface_shape.clip_width; + *height = pg->surface_shape.clip_height; + } +} + +// FIXME: Move to common +static bool framebuffer_dirty(PGRAPHState const *pg) +{ + bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, + sizeof(SurfaceShape)) != 0; + if (!shape_changed || (!pg->surface_shape.color_format + && !pg->surface_shape.zeta_format)) { + return false; + } + return true; +} + +static void memcpy_image(void *dst, void const *src, int dst_stride, + int src_stride, int height) +{ + if (dst_stride == src_stride) { + memcpy(dst, src, dst_stride * height); + return; + } + + uint8_t *dst_ptr = (uint8_t *)dst; + uint8_t const *src_ptr = (uint8_t *)src; + + size_t copy_stride = MIN(src_stride, dst_stride); + + for (int i = 0; i < height; i++) { + memcpy(dst_ptr, src_ptr, copy_stride); + dst_ptr += dst_stride; + src_ptr += src_stride; + } +} + +static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, + uint8_t *pixels) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); + + if (r->in_command_buffer && + surface->draw_time >= r->command_buffer_start_time) { + pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN); + } + + bool downscale = (pg->surface_scale_factor != 1); + + trace_nv2a_pgraph_surface_download( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + // Read surface into memory + uint8_t *gl_read_buf = pixels; + + uint8_t *swizzle_buf = pixels; + if (surface->swizzle) { + // FIXME: Swizzle in shader + assert(pg->surface_scale_factor == 1 || downscale); + swizzle_buf = (uint8_t *)g_malloc(surface->size); + gl_read_buf = swizzle_buf; + } + + unsigned int scaled_width = surface->width, + scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + int num_copy_regions = 1; + VkBufferImageCopy copy_regions[2]; + copy_regions[0] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = surface->color ? + VK_IMAGE_ASPECT_COLOR_BIT : + VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.layerCount = 1, + }; + + bool use_compute_to_convert_depth_stencil_format = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + VkImage surface_image_loc; + if (downscale && !use_compute_to_convert_depth_stencil_format) { + copy_regions[0].imageExtent = + (VkExtent3D){ surface->width, surface->height, 1 }; + + if (surface->image_scratch_current_layout != + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + pgraph_vk_transition_image_layout( + pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + VkImageBlit blit_region = { + .srcSubresource.aspectMask = surface->host_fmt.aspect, + .srcSubresource.mipLevel = 0, + .srcSubresource.baseArrayLayer = 0, + .srcSubresource.layerCount = 1, + .srcOffsets[0] = (VkOffset3D){0, 0, 0}, + .srcOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1}, + + .dstSubresource.aspectMask = surface->host_fmt.aspect, + .dstSubresource.mipLevel = 0, + .dstSubresource.baseArrayLayer = 0, + .dstSubresource.layerCount = 1, + .dstOffsets[0] = (VkOffset3D){0, 0, 0}, + .dstOffsets[1] = (VkOffset3D){surface->width, surface->height, 1}, + }; + + vkCmdBlitImage(cmd, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->image_scratch, + surface->image_scratch_current_layout, 1, &blit_region, + surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + surface_image_loc = surface->image_scratch; + } else { + copy_regions[0].imageExtent = + (VkExtent3D){ scaled_width, scaled_height, 1 }; + surface_image_loc = surface->image; + } + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + copy_regions[num_copy_regions++] = (VkBufferImageCopy){ + .bufferOffset = scaled_width * scaled_height * 4, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + }; + } + + int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ? + BUFFER_COMPUTE_DST : + BUFFER_STAGING_DST; + VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer; + + vkCmdCopyImageToBuffer(cmd, surface_image_loc, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, + num_copy_regions, copy_regions); + + // FIXME: Verify output of depth stencil conversion + // FIXME: Track current layout and only transition when required + + if (use_compute_to_convert_depth_stencil_format) { + size_t bytes_per_pixel = 4; + size_t packed_size = + downscale ? (surface->width * surface->height * bytes_per_pixel) : + (scaled_width * scaled_height * bytes_per_pixel); + + VkBufferMemoryBarrier pre_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_pack_barrier, 0, NULL); + + VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer; + pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer, + downscale); + + VkBufferMemoryBarrier post_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = pack_buffer, + .size = packed_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_pack_barrier, 0, NULL); + + copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer; + VkBufferCopy buffer_copy_region = { + .size = packed_size, + }; + vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region); + } + + size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel * + surface->width * surface->height; + assert((downloaded_image_size) <= + r->storage_buffers[BUFFER_STAGING_DST].buffer_size); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1); + pgraph_vk_end_single_time_commands(pg, cmd); + + void *mapped_memory_ptr; + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_DST].allocation, + &mapped_memory_ptr)); + + // FIXME: Swizzle in shader + // FIXME: Eliminate this extra copy if we need to swizzle + // FIXME: Use native buffer copy options for pitch adjust + + bool no_conversion_necessary = + surface->color || use_compute_to_convert_depth_stencil_format || + surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM; + + assert(no_conversion_necessary); + + memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch, + surface->width * surface->fmt.bytes_per_pixel, + surface->height); + + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_DST].allocation); + + if (surface->swizzle) { + swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, + surface->pitch, surface->fmt.bytes_per_pixel); + nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); + g_free(swizzle_buf); + } +} + +static void download_surface(NV2AState *d, SurfaceBinding *surface, bool force) +{ + if (!(surface->download_pending || force)) { + return; + } + + // FIXME: Respect write enable at last TOU? + + download_surface_to_buffer(d, surface, d->vram_ptr + surface->vram_addr); + + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_VGA); + memory_region_set_client_dirty(d->vram, surface->vram_addr, + surface->pitch * surface->height, + DIRTY_MEMORY_NV2A_TEX); + + surface->download_pending = false; + surface->draw_dirty = false; +} + +void pgraph_vk_wait_for_surface_download(SurfaceBinding *surface) +{ + NV2AState *d = g_nv2a; + + if (qatomic_read(&surface->draw_dirty)) { + qemu_mutex_lock(&d->pfifo.lock); + qemu_event_reset(&d->pgraph.vk_renderer_state->downloads_complete); + qatomic_set(&surface->download_pending, true); + qatomic_set(&d->pgraph.vk_renderer_state->downloads_pending, true); + pfifo_kick(d); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_event_wait(&d->pgraph.vk_renderer_state->downloads_complete); + } +} + +void pgraph_vk_process_pending_downloads(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + SurfaceBinding *surface; + + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + download_surface(d, surface, false); + } + + qatomic_set(&r->downloads_pending, false); + qemu_event_set(&r->downloads_complete); +} + +void pgraph_vk_download_dirty_surfaces(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + pgraph_vk_surface_download_if_dirty(d, surface); + } + + qatomic_set(&r->download_dirty_surfaces_pending, false); + qemu_event_set(&r->dirty_surfaces_download_complete); +} + +static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, + hwaddr len, bool write) +{ + SurfaceBinding *e = opaque; + assert(addr >= e->vram_addr); + hwaddr offset = addr - e->vram_addr; + assert(offset < e->size); + + if (qatomic_read(&e->draw_dirty)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + pgraph_vk_wait_for_surface_download(e); + } + + if (write && !qatomic_read(&e->upload_pending)) { + trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); + qatomic_set(&e->upload_pending, true); + } +} + +static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface) +{ + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_insert(qemu_get_cpu(0), + d->vram, surface->vram_addr, surface->size, + &surface->access_cb, &surface_access_callback, + surface); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } +} + +static void unregister_cpu_access_callback(NV2AState *d, + SurfaceBinding const *surface) +{ + if (tcg_enabled()) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock_iothread(); + mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); + qemu_mutex_unlock_iothread(); + qemu_mutex_lock(&d->pgraph.lock); + } +} + +static void bind_surface(PGRAPHVkState *r, SurfaceBinding *surface) +{ + if (surface->color) { + r->color_binding = surface; + } else { + r->zeta_binding = surface; + } + + r->framebuffer_dirty = true; +} + +static void unbind_surface(NV2AState *d, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + if (color) { + if (r->color_binding) { + r->color_binding = NULL; + r->framebuffer_dirty = true; + } + } else { + if (r->zeta_binding) { + r->zeta_binding = NULL; + r->framebuffer_dirty = true; + } + } +} + +static void invalidate_surface(NV2AState *d, SurfaceBinding *surface) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); + + // FIXME: We may be reading from the surface in the current command buffer! + // Add a detection to handle it. For now, finish to be safe. + pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_SURFACE_DOWN); + + assert((!r->in_command_buffer || + surface->draw_time < r->command_buffer_start_time) && + "Surface evicted while in use!"); + + if (surface == r->color_binding) { + assert(d->pgraph.surface_color.buffer_dirty); + unbind_surface(d, true); + } + if (surface == r->zeta_binding) { + assert(d->pgraph.surface_zeta.buffer_dirty); + unbind_surface(d, false); + } + + unregister_cpu_access_callback(d, surface); + + QTAILQ_REMOVE(&r->surfaces, surface, entry); + QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry); +} + +static void invalidate_overlapping_surfaces(NV2AState *d, + SurfaceBinding const *surface) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + uintptr_t e_end = surface->vram_addr + surface->size - 1; + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + uintptr_t s_end = s->vram_addr + s->size - 1; + bool overlapping = + !(s->vram_addr > e_end || surface->vram_addr > s_end); + if (overlapping) { + trace_nv2a_pgraph_surface_evict_overlapping( + s->vram_addr, s->width, s->height, + s->pitch); + pgraph_vk_surface_download_if_dirty(d, s); + invalidate_surface(d, s); + } + } +} + +static void surface_put(NV2AState *d, SurfaceBinding *surface) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + assert(pgraph_vk_surface_get(d, surface->vram_addr) == NULL); + + invalidate_overlapping_surfaces(d, surface); + register_cpu_access_callback(d, surface); + + QTAILQ_INSERT_HEAD(&r->surfaces, surface, entry); +} + +SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (surface->vram_addr == addr) { + return surface; + } + } + + return NULL; +} + +SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &r->surfaces, entry) { + if (addr >= surface->vram_addr && + addr < (surface->vram_addr + surface->size)) { + return surface; + } + } + + return NULL; +} + +static void set_surface_label(PGRAPHState *pg, SurfaceBinding const *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + g_autofree gchar *label = g_strdup_printf( + "Surface %" HWADDR_PRIx "h fmt:%s,%02xh %dx%d aa:%d", + surface->vram_addr, surface->color ? "Color" : "Zeta", + surface->color ? surface->shape.color_format : + surface->shape.zeta_format, + surface->width, surface->height, pg->surface_shape.anti_aliasing); + + VkDebugUtilsObjectNameInfoEXT name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = (uint64_t)surface->image, + .pObjectName = label, + }; + + if (r->debug_utils_extension_enabled) { + vkSetDebugUtilsObjectNameEXT(r->device, &name_info); + } + vmaSetAllocationName(r->allocator, surface->allocation, label); + + if (surface->image_scratch) { + g_autofree gchar *label_scratch = + g_strdup_printf("%s (scratch)", label); + name_info.objectHandle = (uint64_t)surface->image_scratch; + name_info.pObjectName = label_scratch; + if (r->debug_utils_extension_enabled) { + vkSetDebugUtilsObjectNameEXT(r->device, &name_info); + } + vmaSetAllocationName(r->allocator, surface->allocation_scratch, + label_scratch); + } +} + +static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int width = surface->width, height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + NV2A_VK_DPRINTF( + "Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx, + width, height, surface->vram_addr); + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = width, + .extent.height = height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = surface->host_fmt.vk_format, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | surface->host_fmt.usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &surface->image, + &surface->allocation, NULL)); + + if (pg->surface_scale_factor > 1) { + VkImageCreateInfo scratch_image_create_info = image_create_info; + scratch_image_create_info.extent.width = surface->width; + scratch_image_create_info.extent.height = surface->height; + VK_CHECK( + vmaCreateImage(r->allocator, &scratch_image_create_info, + &alloc_create_info, &surface->image_scratch, + &surface->allocation_scratch, NULL)); + surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + } else { + surface->image_scratch = VK_NULL_HANDLE; + surface->allocation_scratch = VK_NULL_HANDLE; + } + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = surface->image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = surface->host_fmt.vk_format, + .subresourceRange.aspectMask = surface->host_fmt.aspect, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &surface->image_view)); + + // FIXME: Go right into main command buffer + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_UNDEFINED, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3); + pgraph_vk_end_single_time_commands(pg, cmd); + nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE); +} + +static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src) +{ + dst->image = src->image; + dst->image_view = src->image_view; + dst->allocation = src->allocation; + dst->image_scratch = src->image_scratch; + dst->image_scratch_current_layout = src->image_scratch_current_layout; + dst->allocation_scratch = src->allocation_scratch; + + src->image = VK_NULL_HANDLE; + src->image_view = VK_NULL_HANDLE; + src->allocation = VK_NULL_HANDLE; + src->image_scratch = VK_NULL_HANDLE; + src->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + src->allocation_scratch = VK_NULL_HANDLE; +} + +static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface) +{ + vkDestroyImageView(r->device, surface->image_view, NULL); + vmaDestroyImage(r->allocator, surface->image, surface->allocation); + if (surface->image_scratch) { + vmaDestroyImage(r->allocator, surface->image_scratch, + surface->allocation_scratch); + } +} + +static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface, + SurfaceBinding *target) +{ + return surface->host_fmt.vk_format == target->host_fmt.vk_format && + surface->width == target->width && + surface->height == target->height && + surface->pitch == target->pitch && + surface->host_fmt.usage == target->host_fmt.usage; +} + +static SurfaceBinding * +get_any_compatible_invalid_surface(PGRAPHVkState *r, SurfaceBinding *target) +{ + SurfaceBinding *surface, *next; + QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) { + if (check_invalid_surface_is_compatibile(surface, target)) { + QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry); + return surface; + } + } + + return NULL; +} + +static void prune_invalid_surfaces(PGRAPHVkState *r, int keep) +{ + int num_surfaces = 0; + + SurfaceBinding *surface, *next; + QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) { + num_surfaces += 1; + if (num_surfaces > keep) { + QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry); + destroy_surface_image(r, surface); + g_free(surface); + } + } +} + +static void expire_old_surfaces(NV2AState *d) +{ + PGRAPHVkState *r = d->pgraph.vk_renderer_state; + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + int last_used = d->pgraph.frame_time - s->frame_time; + if (last_used >= max_surface_frame_time_delta) { + trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); + pgraph_vk_surface_download_if_dirty(d, s); + invalidate_surface(d, s); + } + } +} + +static bool check_surface_compatibility(SurfaceBinding const *s1, + SurfaceBinding const *s2, bool strict) +{ + bool format_compatible = + (s1->color == s2->color) && + (s1->host_fmt.vk_format == s2->host_fmt.vk_format) && + (s1->pitch == s2->pitch) && + (s1->shape.clip_x <= s2->shape.clip_x) && + (s1->shape.clip_y <= s2->shape.clip_y); + if (!format_compatible) { + return false; + } + + if (!strict) { + return (s1->width >= s2->width) && (s1->height >= s2->height); + } else { + return (s1->width == s2->width) && (s1->height == s2->height); + } +} + +void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface) +{ + if (surface->draw_dirty) { + download_surface(d, surface, true); + } +} + +void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, + bool force) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + if (!(surface->upload_pending || force)) { + return; + } + + nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); + + pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); // FIXME: SURFACE_UP + + trace_nv2a_pgraph_surface_upload( + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + surface->upload_pending = false; + surface->draw_time = pg->draw_time; + + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + surface->vram_addr; + + g_autofree uint8_t *swizzle_buf = NULL; + uint8_t *gl_read_buf = NULL; + + if (surface->swizzle) { + swizzle_buf = (uint8_t*)g_malloc(surface->size); + gl_read_buf = swizzle_buf; + unswizzle_rect(data + surface->vram_addr, + surface->width, surface->height, + swizzle_buf, + surface->pitch, + surface->fmt.bytes_per_pixel); + nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); + } else { + gl_read_buf = buf; + } + + // FIXME: Eliminate extra copies + + VkBufferImageCopy regions[2]; + int num_regions = 1; + regions[0] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = surface->color ? + VK_IMAGE_ASPECT_COLOR_BIT : + VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, + }; + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + regions[num_regions++] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, + }; + } + + size_t uploaded_image_size = surface->height * surface->width * + surface->fmt.bytes_per_pixel; + + StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC]; + assert(uploaded_image_size <= copy_buffer->buffer_size); + + void *mapped_memory_ptr; + VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation, + &mapped_memory_ptr)); + + bool use_compute_to_convert_depth_stencil_format = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + bool no_conversion_necessary = + surface->color || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM || + use_compute_to_convert_depth_stencil_format; + assert(no_conversion_necessary); + + memcpy_image(mapped_memory_ptr, gl_read_buf, + surface->width * surface->fmt.bytes_per_pixel, surface->pitch, + surface->height); + + vmaUnmapMemory(r->allocator, copy_buffer->allocation); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + unsigned int scaled_width = surface->width, scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + if (use_compute_to_convert_depth_stencil_format) { + size_t packed_size = uploaded_image_size; + VkBufferCopy buffer_copy_region = { + .size = packed_size, + }; + vkCmdCopyBuffer(cmd, copy_buffer->buffer, + r->storage_buffers[BUFFER_COMPUTE_DST].buffer, 1, + &buffer_copy_region); + + size_t num_pixels = scaled_width * scaled_height; + size_t unpacked_depth_image_size = num_pixels * 4; + size_t unpacked_stencil_image_size = num_pixels; + size_t unpacked_size = + unpacked_depth_image_size + unpacked_stencil_image_size; + + VkBufferMemoryBarrier pre_unpack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = packed_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_unpack_barrier, 0, NULL); + + StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC]; + pgraph_vk_unpack_depth_stencil( + pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + unpack_buffer->buffer); + + VkBufferMemoryBarrier post_unpack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = unpack_buffer->buffer, + .size = unpacked_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_unpack_barrier, 0, NULL); + + // Already scaled during compute. Adjust copy regions. + regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }; + regions[1].imageExtent = regions[0].imageExtent; + regions[1].bufferOffset = unpacked_depth_image_size; + + copy_buffer = unpack_buffer; + } + + bool upscale = !use_compute_to_convert_depth_stencil_format && + pg->surface_scale_factor > 1; + + if (upscale && surface->image_scratch_current_layout != + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, + upscale ? surface->image_scratch : surface->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions, + regions); + + if (upscale) { + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + unsigned int scaled_width = surface->width, + scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + VkImageBlit blitRegion = { + .srcSubresource.aspectMask = surface->host_fmt.aspect, + .srcSubresource.mipLevel = 0, + .srcSubresource.baseArrayLayer = 0, + .srcSubresource.layerCount = 1, + .srcOffsets[0] = (VkOffset3D){0, 0, 0}, + .srcOffsets[1] = (VkOffset3D){surface->width, surface->height, 1}, + + .dstSubresource.aspectMask = surface->host_fmt.aspect, + .dstSubresource.mipLevel = 0, + .dstSubresource.baseArrayLayer = 0, + .dstSubresource.layerCount = 1, + .dstOffsets[0] = (VkOffset3D){0, 0, 0}, + .dstOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1}, + }; + + vkCmdBlitImage(cmd, surface->image_scratch, + surface->image_scratch_current_layout, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion, + surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2); + pgraph_vk_end_single_time_commands(pg, cmd); + + surface->initialized = true; +} + +static void compare_surfaces(SurfaceBinding const *a, SurfaceBinding const *b) +{ + #define DO_CMP(fld) \ + if (a->fld != b->fld) \ + trace_nv2a_pgraph_surface_compare_mismatch( \ + #fld, (long int)a->fld, (long int)b->fld); + DO_CMP(shape.clip_x) + DO_CMP(shape.clip_width) + DO_CMP(shape.clip_y) + DO_CMP(shape.clip_height) + DO_CMP(fmt.bytes_per_pixel) + DO_CMP(host_fmt.vk_format) + DO_CMP(color) + DO_CMP(swizzle) + DO_CMP(vram_addr) + DO_CMP(width) + DO_CMP(height) + DO_CMP(pitch) + DO_CMP(size) + DO_CMP(dma_addr) + DO_CMP(dma_len) + DO_CMP(frame_time) + DO_CMP(draw_time) + #undef DO_CMP +} + +static void populate_surface_binding_target_sized(NV2AState *d, bool color, + unsigned int width, + unsigned int height, + SurfaceBinding *target) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + Surface *surface; + hwaddr dma_address; + BasicSurfaceFormatInfo fmt; + SurfaceFormatInfo host_fmt; + + if (color) { + surface = &pg->surface_color; + dma_address = pg->dma_color; + assert(pg->surface_shape.color_format != 0); + assert(pg->surface_shape.color_format < + ARRAY_SIZE(kelvin_surface_color_format_vk_map)); + fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format]; + host_fmt = kelvin_surface_color_format_vk_map[pg->surface_shape.color_format]; + if (host_fmt.host_bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", + pg->surface_shape.color_format); + abort(); + } + } else { + surface = &pg->surface_zeta; + dma_address = pg->dma_zeta; + assert(pg->surface_shape.zeta_format != 0); + assert(pg->surface_shape.zeta_format < + ARRAY_SIZE(r->kelvin_surface_zeta_vk_map)); + fmt = kelvin_surface_zeta_format_map[pg->surface_shape.zeta_format]; + host_fmt = r->kelvin_surface_zeta_vk_map[pg->surface_shape.zeta_format]; + // FIXME: Support float 16,24b float format surface + } + + DMAObject dma = nv_dma_load(d, dma_address); + // There's a bunch of bugs that could cause us to hit this function + // at the wrong time and get a invalid dma object. + // Check that it's sane. + assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); + // assert(dma.address + surface->offset != 0); + assert(surface->offset <= dma.limit); + assert(surface->offset + surface->pitch * height <= dma.limit + 1); + assert(surface->pitch % fmt.bytes_per_pixel == 0); + assert((dma.address & ~0x07FFFFFF) == 0); + + target->shape = (color || !r->color_binding) ? pg->surface_shape : + r->color_binding->shape; + target->fmt = fmt; + target->host_fmt = host_fmt; + target->color = color; + target->swizzle = + (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + target->vram_addr = dma.address + surface->offset; + target->width = width; + target->height = height; + target->pitch = surface->pitch; + target->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); + target->upload_pending = true; + target->download_pending = false; + target->draw_dirty = false; + target->dma_addr = dma.address; + target->dma_len = dma.limit; + target->frame_time = pg->frame_time; + target->draw_time = pg->draw_time; + target->cleared = false; + + target->initialized = false; +} + +static void populate_surface_binding_target(NV2AState *d, bool color, + SurfaceBinding *target) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int width, height; + + if (color || !r->color_binding) { + get_surface_dimensions(pg, &width, &height); + pgraph_apply_anti_aliasing_factor(pg, &width, &height); + + // Since we determine surface dimensions based on the clipping + // rectangle, make sure to include the surface offset as well. + if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { + width += pg->surface_shape.clip_x; + height += pg->surface_shape.clip_y; + } + } else { + width = r->color_binding->width; + height = r->color_binding->height; + } + + populate_surface_binding_target_sized(d, color, width, height, target); +} + +static void update_surface_part(NV2AState *d, bool upload, bool color) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + SurfaceBinding target; + populate_surface_binding_target(d, color, &target); + + Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta; + + bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( + d->vram, target.vram_addr, + target.size, DIRTY_MEMORY_NV2A); + + if (upload && (pg_surface->buffer_dirty || mem_dirty)) { + // FIXME: We don't need to be so aggressive flushing the command list + // pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); + pgraph_vk_ensure_not_in_render_pass(pg); + + unbind_surface(d, color); + + SurfaceBinding *surface = pgraph_vk_surface_get(d, target.vram_addr); + if (surface != NULL) { + // FIXME: Support same color/zeta surface target? In the mean time, + // if the surface we just found is currently bound, just unbind it. + SurfaceBinding *other = (color ? r->zeta_binding + : r->color_binding); + if (surface == other) { + NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); + unbind_surface(d, !color); + } + } + + trace_nv2a_pgraph_surface_target( + color ? "COLOR" : "ZETA", target.vram_addr, + target.swizzle ? "sz" : "ln", + pg->surface_shape.anti_aliasing, + pg->surface_shape.clip_x, + pg->surface_shape.clip_width, pg->surface_shape.clip_y, + pg->surface_shape.clip_height); + + bool should_create = true; + + if (surface != NULL) { + bool is_compatible = + check_surface_compatibility(surface, &target, false); + + void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height, + const char *layout, uint32_t anti_aliasing, + uint32_t clip_x, uint32_t clip_width, + uint32_t clip_y, uint32_t clip_height, + uint32_t pitch) = + surface->color ? trace_nv2a_pgraph_surface_match_color : + trace_nv2a_pgraph_surface_match_zeta; + + trace_fn(surface->vram_addr, surface->width, surface->height, + surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing, + surface->shape.clip_x, surface->shape.clip_width, + surface->shape.clip_y, surface->shape.clip_height, + surface->pitch); + + assert(!(target.swizzle && pg->clearing)); + +#if 0 + if (surface->swizzle != target.swizzle) { + // Clears should only be done on linear surfaces. Avoid + // synchronization by allowing (1) a surface marked swizzled to + // be cleared under the assumption the entire surface is + // destined to be cleared and (2) a fully cleared linear surface + // to be marked swizzled. Strictly match size to avoid + // pathological cases. + is_compatible &= (pg->clearing || surface->cleared) && + check_surface_compatibility(surface, &target, true); + if (is_compatible) { + trace_nv2a_pgraph_surface_migrate_type( + target.swizzle ? "swizzled" : "linear"); + } + } +#endif + + if (is_compatible && color && + !check_surface_compatibility(surface, &target, true)) { + SurfaceBinding zeta_entry; + populate_surface_binding_target_sized( + d, !color, surface->width, surface->height, &zeta_entry); + hwaddr color_end = surface->vram_addr + surface->size; + hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; + is_compatible &= surface->vram_addr >= zeta_end || + zeta_entry.vram_addr >= color_end; + } + + if (is_compatible && !color && r->color_binding) { + is_compatible &= (surface->width == r->color_binding->width) && + (surface->height == r->color_binding->height); + } + + if (is_compatible) { + // FIXME: Refactor + pg->surface_binding_dim.width = surface->width; + pg->surface_binding_dim.clip_x = surface->shape.clip_x; + pg->surface_binding_dim.clip_width = surface->shape.clip_width; + pg->surface_binding_dim.height = surface->height; + pg->surface_binding_dim.clip_y = surface->shape.clip_y; + pg->surface_binding_dim.clip_height = surface->shape.clip_height; + surface->upload_pending |= mem_dirty; + pg->surface_zeta.buffer_dirty |= color; + should_create = false; + } else { + trace_nv2a_pgraph_surface_evict_reason( + "incompatible", surface->vram_addr); + compare_surfaces(surface, &target); + pgraph_vk_surface_download_if_dirty(d, surface); + invalidate_surface(d, surface); + } + } + + if (should_create) { + surface = get_any_compatible_invalid_surface(r, &target); + if (surface) { + migrate_surface_image(&target, surface); + } else { + surface = g_malloc(sizeof(SurfaceBinding)); + create_surface_image(pg, &target); + } + + *surface = target; + set_surface_label(pg, surface); + surface_put(d, surface); + + // FIXME: Refactor + pg->surface_binding_dim.width = target.width; + pg->surface_binding_dim.clip_x = target.shape.clip_x; + pg->surface_binding_dim.clip_width = target.shape.clip_width; + pg->surface_binding_dim.height = target.height; + pg->surface_binding_dim.clip_y = target.shape.clip_y; + pg->surface_binding_dim.clip_height = target.shape.clip_height; + + if (color && r->zeta_binding && + (r->zeta_binding->width != target.width || + r->zeta_binding->height != target.height)) { + pg->surface_zeta.buffer_dirty = true; + } + } + + void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height, + const char *layout, uint32_t anti_aliasing, + uint32_t clip_x, uint32_t clip_width, uint32_t clip_y, + uint32_t clip_height, uint32_t pitch) = + color ? (should_create ? trace_nv2a_pgraph_surface_create_color : + trace_nv2a_pgraph_surface_hit_color) : + (should_create ? trace_nv2a_pgraph_surface_create_zeta : + trace_nv2a_pgraph_surface_hit_zeta); + trace_fn(surface->vram_addr, surface->width, surface->height, + surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing, + surface->shape.clip_x, surface->shape.clip_width, + surface->shape.clip_y, surface->shape.clip_height, surface->pitch); + + bind_surface(r, surface); + pg_surface->buffer_dirty = false; + } + + if (!upload && pg_surface->draw_dirty) { + if (!tcg_enabled()) { + // FIXME: Cannot monitor for reads/writes; flush now + download_surface(d, color ? r->color_binding : r->zeta_binding, + true); + } + + pg_surface->write_enabled_cache = false; + pg_surface->draw_dirty = false; + } +} + +// FIXME: Move to common? +void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, + bool zeta_write) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + pg->surface_shape.z_format = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_Z_FORMAT); + + color_write = color_write && + (pg->clearing || pgraph_color_write_enabled(pg)); + zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); + + if (upload) { + bool fb_dirty = framebuffer_dirty(pg); + if (fb_dirty) { + memcpy(&pg->last_surface_shape, &pg->surface_shape, + sizeof(SurfaceShape)); + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + } + + if (pg->surface_color.buffer_dirty) { + unbind_surface(d, true); + } + + if (color_write) { + update_surface_part(d, true, true); + } + + if (pg->surface_zeta.buffer_dirty) { + unbind_surface(d, false); + } + + if (zeta_write) { + update_surface_part(d, true, false); + } + } else { + if ((color_write || pg->surface_color.write_enabled_cache) + && pg->surface_color.draw_dirty) { + update_surface_part(d, false, true); + } + if ((zeta_write || pg->surface_zeta.write_enabled_cache) + && pg->surface_zeta.draw_dirty) { + update_surface_part(d, false, false); + } + } + + if (upload) { + pg->draw_time++; + } + + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + + if (r->color_binding) { + r->color_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_vk_upload_surface_data(d, r->color_binding, false); + r->color_binding->draw_time = pg->draw_time; + r->color_binding->swizzle = swizzle; + } + } + + if (r->zeta_binding) { + r->zeta_binding->frame_time = pg->frame_time; + if (upload) { + pgraph_vk_upload_surface_data(d, r->zeta_binding, false); + r->zeta_binding->draw_time = pg->draw_time; + r->zeta_binding->swizzle = swizzle; + } + } + + // Sanity check color and zeta dimensions match + if (r->color_binding && r->zeta_binding) { + assert(r->color_binding->width == r->zeta_binding->width); + assert(r->color_binding->height == r->zeta_binding->height); + } + + expire_old_surfaces(d); + prune_invalid_surfaces(r, num_invalid_surfaces_to_keep); +} + +static bool check_format_and_usage_supported(PGRAPHVkState *r, VkFormat format, + VkImageUsageFlags usage) +{ + VkPhysicalDeviceImageFormatInfo2 pdif2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = usage, + }; + VkImageFormatProperties2 props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; + VkResult result = vkGetPhysicalDeviceImageFormatProperties2( + r->physical_device, &pdif2, &props); + return result == VK_SUCCESS; +} + +static bool check_surface_internal_formats_supported( + PGRAPHVkState *r, const SurfaceFormatInfo *fmts, size_t count) +{ + bool all_supported = true; + for (int i = 0; i < count; i++) { + const SurfaceFormatInfo *f = &fmts[i]; + if (f->host_bytes_per_pixel) { + all_supported &= + check_format_and_usage_supported(r, f->vk_format, f->usage); + } + } + return all_supported; +} + +void pgraph_vk_init_surfaces(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // Make sure all surface format types are supported. We don't expect issue + // with these, and therefore have no fallback mechanism. + bool color_formats_supported = check_surface_internal_formats_supported( + r, kelvin_surface_color_format_vk_map, + ARRAY_SIZE(kelvin_surface_color_format_vk_map)); + assert(color_formats_supported); + + // Check if the device supports preferred VK_FORMAT_D24_UNORM_S8_UINT + // format, fall back to D32_SFLOAT_S8_UINT otherwise. + r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = zeta_d16; + if (check_surface_internal_formats_supported(r, &zeta_d24_unorm_s8_uint, + 1)) { + r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + zeta_d24_unorm_s8_uint; + } else if (check_surface_internal_formats_supported( + r, &zeta_d32_sfloat_s8_uint, 1)) { + r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = + zeta_d32_sfloat_s8_uint; + } else { + assert(!"No suitable depth-stencil format supported"); + } + + QTAILQ_INIT(&r->surfaces); + QTAILQ_INIT(&r->invalid_surfaces); + + r->downloads_pending = false; + qemu_event_init(&r->downloads_complete, false); + qemu_event_init(&r->dirty_surfaces_download_complete, false); + + r->color_binding = NULL; + r->zeta_binding = NULL; + r->framebuffer_dirty = true; + + pgraph_vk_reload_surface_scale_factor(pg); // FIXME: Move internal +} + +void pgraph_vk_finalize_surfaces(PGRAPHState *pg) +{ + pgraph_vk_surface_flush(container_of(pg, NV2AState, pgraph)); +} + +void pgraph_vk_surface_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // Clear last surface shape to force recreation of buffers at next draw + pg->surface_color.draw_dirty = false; + pg->surface_zeta.draw_dirty = false; + memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); + unbind_surface(d, true); + unbind_surface(d, false); + + SurfaceBinding *s, *next; + QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + invalidate_surface(d, s); + } + prune_invalid_surfaces(r, 0); + + pgraph_vk_reload_surface_scale_factor(pg); +} diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c new file mode 100644 index 00000000000..10a4ccd2e44 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -0,0 +1,1456 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/xbox/nv2a/pgraph/s3tc.h" +#include "hw/xbox/nv2a/pgraph/swizzle.h" +#include "qemu/fast-hash.h" +#include "qemu/lru.h" +#include "renderer.h" + +static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode); + +static const VkImageType dimensionality_to_vk_image_type[] = { + 0, + VK_IMAGE_TYPE_1D, + VK_IMAGE_TYPE_2D, + VK_IMAGE_TYPE_3D, +}; +static const VkImageViewType dimensionality_to_vk_image_view_type[] = { + 0, + VK_IMAGE_VIEW_TYPE_1D, + VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_VIEW_TYPE_3D, +}; + +static VkSamplerAddressMode lookup_texture_address_mode(int idx) +{ + assert(0 < idx && idx < ARRAY_SIZE(pgraph_texture_addr_vk_map)); + return pgraph_texture_addr_vk_map[idx]; +} + +// FIXME: Move to common +// FIXME: We can shrink the size of this structure +// FIXME: Use simple allocator +typedef struct TextureLevel { + unsigned int width, height, depth; + hwaddr vram_addr; + void *decoded_data; + size_t decoded_size; +} TextureLevel; + +typedef struct TextureLayer { + TextureLevel levels[16]; +} TextureLayer; + +typedef struct TextureLayout { + TextureLayer layers[6]; +} TextureLayout; + +// FIXME: Move to common +static enum S3TC_DECOMPRESS_FORMAT kelvin_format_to_s3tc_format(int color_format) +{ + switch (color_format) { + case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5: + return S3TC_DECOMPRESS_FORMAT_DXT1; + case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8: + return S3TC_DECOMPRESS_FORMAT_DXT3; + case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8: + return S3TC_DECOMPRESS_FORMAT_DXT5; + default: + assert(false); + } +} + +// FIXME: Move to common +static void memcpy_image(void *dst, void *src, int min_stride, int dst_stride, int src_stride, int height) +{ + uint8_t *dst_ptr = (uint8_t *)dst; + uint8_t *src_ptr = (uint8_t *)src; + + for (int i = 0; i < height; i++) { + memcpy(dst_ptr, src_ptr, min_stride); + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +// FIXME: Move to common +static size_t get_cubemap_layer_size(PGRAPHState *pg, TextureShape s) +{ + BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format]; + bool is_compressed = + pgraph_is_texture_format_compressed(pg, s.color_format); + unsigned int block_size; + + unsigned int w = s.width, h = s.height; + size_t length = 0; + + if (!f.linear && s.border) { + w = MAX(16, w * 2); + h = MAX(16, h * 2); + } + + if (is_compressed) { + block_size = + s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ? + 8 : + 16; + } + + for (int level = 0; level < s.levels; level++) { + if (is_compressed) { + length += w / 4 * h / 4 * block_size; + } else { + length += w * h * f.bytes_per_pixel; + } + + w /= 2; + h /= 2; + } + + return ROUND_UP(length, NV2A_CUBEMAP_FACE_ALIGNMENT); +} + +// FIXME: Move to common +// FIXME: More refactoring +// FIXME: Possible parallelization of decoding +// FIXME: Bounds checking +static TextureLayout *get_texture_layout(PGRAPHState *pg, int texture_idx) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + TextureShape s = pgraph_get_texture_shape(pg, texture_idx); + BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format]; + + NV2A_VK_DGROUP_BEGIN("Texture %d: cubemap=%d, dimensionality=%d, color_format=0x%x, levels=%d, width=%d, height=%d, depth=%d border=%d, min_mipmap_level=%d, max_mipmap_level=%d, pitch=%d", + texture_idx, + s.cubemap, + s.dimensionality, + s.color_format, + s.levels, + s.width, + s.height, + s.depth, + s.border, + s.min_mipmap_level, + s.max_mipmap_level, + s.pitch + ); + + // Sanity checks on below assumptions + if (f.linear) { + assert(s.dimensionality == 2); + } + if (s.cubemap) { + assert(s.dimensionality == 2); + assert(!f.linear); + } + assert(s.dimensionality > 1); + + const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx); + void *texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset; + + size_t texture_palette_data_size; + const hwaddr texture_palette_vram_offset = + pgraph_get_texture_palette_phys_addr_length(pg, texture_idx, + &texture_palette_data_size); + void *palette_data_ptr = (char *)d->vram_ptr + texture_palette_vram_offset; + + unsigned int adjusted_width = s.width, adjusted_height = s.height, + adjusted_pitch = s.pitch, adjusted_depth = s.depth; + + if (!f.linear && s.border) { + adjusted_width = MAX(16, adjusted_width * 2); + adjusted_height = MAX(16, adjusted_height * 2); + adjusted_pitch = adjusted_width * (s.pitch / s.width); + adjusted_depth = MAX(16, s.depth * 2); + } + + TextureLayout *layout = g_malloc0(sizeof(TextureLayout)); + + if (f.linear) { + assert(s.pitch % f.bytes_per_pixel == 0 && "Can't handle strides unaligned to pixels"); + + size_t converted_size; + uint8_t *converted = pgraph_convert_texture_data( + s, texture_data_ptr, palette_data_ptr, adjusted_width, + adjusted_height, 1, adjusted_pitch, 0, &converted_size); + + if (!converted) { + int dst_stride = adjusted_width * f.bytes_per_pixel; + assert(adjusted_width <= s.width); + converted_size = dst_stride * adjusted_height; + converted = g_malloc(converted_size); + memcpy_image(converted, texture_data_ptr, adjusted_width * f.bytes_per_pixel, dst_stride, + adjusted_pitch, adjusted_height); + } + + assert(s.levels == 1); + layout->layers[0].levels[0] = (TextureLevel){ + .width = adjusted_width, + .height = adjusted_height, + .depth = 1, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + NV2A_VK_DGROUP_END(); + return layout; + } + + bool is_compressed = pgraph_is_texture_format_compressed(pg, s.color_format); + size_t block_size = 0; + if (is_compressed) { + bool is_dxt1 = + s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5; + block_size = is_dxt1 ? 8 : 16; + } + + if (s.dimensionality == 2) { + hwaddr layer_size = 0; + if (s.cubemap) { + layer_size = get_cubemap_layer_size(pg, s); + } + + const int num_layers = s.cubemap ? 6 : 1; + for (int layer = 0; layer < num_layers; layer++) { + unsigned int width = adjusted_width, height = adjusted_height; + texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset + + layer * layer_size; + + for (int level = 0; level < s.levels; level++) { + NV2A_VK_DPRINTF("Layer %d Level %d @ %x", layer, level, (int)((char*)texture_data_ptr - (char*)d->vram_ptr)); + + width = MAX(width, 1); + height = MAX(height, 1); + if (is_compressed) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size + unsigned int tex_width = width, tex_height = height; + unsigned int physical_width = (width + 3) & ~3, + physical_height = (height + 3) & ~3; + // if (physical_width != width) { + // glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width); + // } + + size_t converted_size = width * height * 4; + uint8_t *converted = s3tc_decompress_2d( + kelvin_format_to_s3tc_format(s.color_format), + texture_data_ptr, physical_width, physical_height); + assert(converted); + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + + // glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); + // glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); + tex_width = s.width; + tex_height = s.height; + // if (physical_width == width) { + // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + // } + + // FIXME: Crop by 4 pixels on each side + } + + layout->layers[layer].levels[level] = (TextureLevel){ + .width = tex_width, + .height = tex_height, + .depth = 1, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += + physical_width / 4 * physical_height / 4 * block_size; + } else { + unsigned int pitch = width * f.bytes_per_pixel; + unsigned int tex_width = width, tex_height = height; + + size_t converted_size = height * pitch; + uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); + unswizzle_rect(texture_data_ptr, width, height, + unswizzled, pitch, f.bytes_per_pixel); + + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data_ptr, width, height, 1, + pitch, 0, &converted_size); + + if (converted) { + g_free(unswizzled); + } else { + converted = unswizzled; + } + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + tex_width = s.width; + tex_height = s.height; + // pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; + + // FIXME: Crop by 4 pixels on each side + } + + layout->layers[layer].levels[level] = (TextureLevel){ + .width = tex_width, + .height = tex_height, + .depth = 1, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += width * height * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + } + } + } else if (s.dimensionality == 3) { + assert(!f.linear); + unsigned int width = adjusted_width, height = adjusted_height, + depth = adjusted_depth; + + for (int level = 0; level < s.levels; level++) { + if (is_compressed) { + assert(width % 4 == 0 && height % 4 == 0 && + "Compressed 3D texture virtual size"); + + width = MAX(width, 4); + height = MAX(height, 4); + depth = MAX(depth, 1); + + size_t converted_size = width * height * depth * 4; + uint8_t *converted = s3tc_decompress_3d( + kelvin_format_to_s3tc_format(s.color_format), + texture_data_ptr, width, height, depth); + assert(converted); + + layout->layers[0].levels[level] = (TextureLevel){ + .width = width, + .height = height, + .depth = depth, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += width / 4 * height / 4 * depth * block_size; + } else { + width = MAX(width, 1); + height = MAX(height, 1); + depth = MAX(depth, 1); + + unsigned int row_pitch = width * f.bytes_per_pixel; + unsigned int slice_pitch = row_pitch * height; + + size_t unswizzled_size = slice_pitch * depth; + uint8_t *unswizzled = g_malloc(unswizzled_size); + unswizzle_box(texture_data_ptr, width, height, depth, + unswizzled, row_pitch, slice_pitch, + f.bytes_per_pixel); + + size_t converted_size; + uint8_t *converted = pgraph_convert_texture_data( + s, unswizzled, palette_data_ptr, width, height, depth, + row_pitch, slice_pitch, &converted_size); + + if (converted) { + g_free(unswizzled); + } else { + converted = unswizzled; + converted_size = unswizzled_size; + } + + layout->layers[0].levels[level] = (TextureLevel){ + .width = width, + .height = height, + .depth = depth, + .decoded_size = converted_size, + .decoded_data = converted, + }; + + texture_data_ptr += width * height * depth * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + depth /= 2; + } + } + + NV2A_VK_DGROUP_END(); + return layout; +} + +struct pgraph_texture_possibly_dirty_struct { + hwaddr addr, end; +}; + +static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) +{ + struct pgraph_texture_possibly_dirty_struct *test = opaque; + + TextureBinding *tnode = container_of(node, TextureBinding, node); + if (tnode->possibly_dirty) { + return; + } + + uintptr_t k_tex_addr = tnode->key.texture_vram_offset; + uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; + bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); + + if (tnode->key.palette_length > 0) { + uintptr_t k_pal_addr = tnode->key.palette_vram_offset; + uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; + overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); + } + + tnode->possibly_dirty |= overlapping; +} + +void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, + hwaddr addr, hwaddr size) +{ + hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1; + addr &= TARGET_PAGE_MASK; + assert(end <= memory_region_size(d->vram)); + + struct pgraph_texture_possibly_dirty_struct test = { + .addr = addr, + .end = end, + }; + + lru_visit_active(&d->pgraph.vk_renderer_state->texture_cache, + mark_textures_possibly_dirty_visitor, + &test); +} + +static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) +{ + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + return memory_region_test_and_clear_dirty(d->vram, addr, end - addr, + DIRTY_MEMORY_NV2A_TEX); +} + +// Check if any of the pages spanned by the a texture are dirty. +static bool check_texture_possibly_dirty(NV2AState *d, + hwaddr texture_vram_offset, + unsigned int length, + hwaddr palette_vram_offset, + unsigned int palette_length) +{ + bool possibly_dirty = false; + if (check_texture_dirty(d, texture_vram_offset, length)) { + possibly_dirty = true; + pgraph_vk_mark_textures_possibly_dirty(d, texture_vram_offset, length); + } + if (palette_length && check_texture_dirty(d, palette_vram_offset, + palette_length)) { + possibly_dirty = true; + pgraph_vk_mark_textures_possibly_dirty(d, palette_vram_offset, + palette_length); + } + return possibly_dirty; +} + +// FIXME: Make sure we update sampler when data matches. Should we add filtering +// options to the textureshape? +static void upload_texture_image(PGRAPHState *pg, int texture_idx, + TextureBinding *binding) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + TextureShape *state = &binding->key.state; + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + + nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); + + g_autofree TextureLayout *layout = get_texture_layout(pg, texture_idx); + const int num_layers = state->cubemap ? 6 : 1; + + // Calculate decoded texture data size + size_t texture_data_size = 0; + for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) { + TextureLayer *layer = &layout->layers[layer_idx]; + for (int level_idx = 0; level_idx < state->levels; level_idx++) { + size_t size = layer->levels[level_idx].decoded_size; + assert(size); + texture_data_size += size; + } + } + + assert(texture_data_size <= + r->storage_buffers[BUFFER_STAGING_SRC].buffer_size); + + // Copy texture data to mapped device buffer + uint8_t *mapped_memory_ptr; + + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, + (void *)&mapped_memory_ptr)); + + int num_regions = num_layers * state->levels; + g_autofree VkBufferImageCopy *regions = + g_malloc0_n(num_regions, sizeof(VkBufferImageCopy)); + + VkBufferImageCopy *region = regions; + VkDeviceSize buffer_offset = 0; + + for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) { + TextureLayer *layer = &layout->layers[layer_idx]; + NV2A_VK_DPRINTF("Layer %d", layer_idx); + for (int level_idx = 0; level_idx < state->levels; level_idx++) { + TextureLevel *level = &layer->levels[level_idx]; + NV2A_VK_DPRINTF(" - Level %d, w=%d h=%d d=%d @ %08" HWADDR_PRIx, + level_idx, level->width, level->height, + level->depth, buffer_offset); + memcpy(mapped_memory_ptr + buffer_offset, level->decoded_data, + level->decoded_size); + *region = (VkBufferImageCopy){ + .bufferOffset = buffer_offset, + .bufferRowLength = 0, // Tightly packed + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = level_idx, + .imageSubresource.baseArrayLayer = layer_idx, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = + (VkExtent3D){ level->width, level->height, level->depth }, + }; + buffer_offset += level->decoded_size; + region++; + } + } + assert(buffer_offset <= texture_data_size); + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation); + + // FIXME: Use nondraw. Need to fill and copy tex buffer at once + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format, + binding->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + binding->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer, + binding->image, binding->current_layout, + num_regions, regions); + + pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format, + binding->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + binding->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_4); + pgraph_vk_end_single_time_commands(pg, cmd); + + // Release decoded texture data + for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) { + TextureLayer *layer = &layout->layers[layer_idx]; + for (int level_idx = 0; level_idx < state->levels; level_idx++) { + g_free(layer->levels[level_idx].decoded_data); + } + } +} + +static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, + TextureBinding *texture) +{ + assert(!surface->color); + + PGRAPHVkState *r = pg->vk_renderer_state; + TextureShape *state = &texture->key.state; + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); + + trace_nv2a_pgraph_surface_render_to_texture( + surface->vram_addr, surface->width, surface->height); + + VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg); + + unsigned int scaled_width = surface->width, + scaled_height = surface->height; + pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + size_t copied_image_size = + scaled_width * scaled_height * surface->host_fmt.host_bytes_per_pixel; + size_t stencil_buffer_offset = 0; + size_t stencil_buffer_size = 0; + + int num_regions = 0; + VkBufferImageCopy regions[2]; + regions[num_regions++] = (VkBufferImageCopy){ + .bufferOffset = 0, + .bufferRowLength = 0, // Tightly packed + .bufferImageHeight = 0, // Tightly packed + .imageSubresource.aspectMask = surface->color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){0, 0, 0}, + .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + }; + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + stencil_buffer_offset = scaled_width * scaled_height * 4; + stencil_buffer_size = scaled_width * scaled_height; + copied_image_size += stencil_buffer_size; + + regions[num_regions++] = (VkBufferImageCopy){ + .bufferOffset = stencil_buffer_offset, + .bufferRowLength = 0, // Tightly packed + .bufferImageHeight = 0, // Tightly packed + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){0, 0, 0}, + .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + }; + } + + bool use_compute_to_convert_depth_stencil = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + assert(use_compute_to_convert_depth_stencil && "Unimplemented"); + + StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST]; + assert(dst_storage_buffer->buffer_size >= copied_image_size); + + vkCmdCopyImageToBuffer( + cmd, surface->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + dst_storage_buffer->buffer, + num_regions, regions); + + if (use_compute_to_convert_depth_stencil) { + size_t packed_image_size = scaled_width * scaled_height * 4; + + VkBufferMemoryBarrier pre_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_pack_barrier, 0, NULL); + + pgraph_vk_pack_depth_stencil( + pg, surface, cmd, + r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, false); + + VkBufferMemoryBarrier post_pack_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, + .size = packed_image_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_pack_barrier, 0, NULL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + regions[0] = (VkBufferImageCopy){ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }, + }; + + vkCmdCopyBufferToImage( + cmd, r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, texture->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, regions); + } + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + pgraph_vk_end_nondraw_commands(pg, cmd); + + texture->draw_time = surface->draw_time; +} + +// FIXME: Should be able to skip the copy and sample the original surface image +static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, + TextureBinding *texture) +{ + if (!surface->color) { + copy_zeta_surface_to_texture(pg, surface, texture); + return; + } + + TextureShape *state = &texture->key.state; + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); + + trace_nv2a_pgraph_surface_render_to_texture( + surface->vram_addr, surface->width, surface->height); + + VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + VkImageCopy region = { + .srcSubresource.aspectMask = surface->host_fmt.aspect, + .srcSubresource.layerCount = 1, + .dstSubresource.aspectMask = surface->host_fmt.aspect, + .dstSubresource.layerCount = 1, + .extent.width = surface->width, + .extent.height = surface->height, + .extent.depth = 1, + }; + pgraph_apply_scaling_factor(pg, ®ion.extent.width, + ®ion.extent.height); + vkCmdCopyImage(cmd, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture->image, + texture->current_layout, 1, ®ion); + + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + pgraph_vk_end_nondraw_commands(pg, cmd); + + texture->draw_time = surface->draw_time; +} + +static bool check_surface_to_texture_compatiblity(const SurfaceBinding *surface, + const TextureShape *shape) +{ + // FIXME: Better checks/handling on formats and surface-texture compat + + if ((!surface->swizzle && surface->pitch != shape->pitch) || + surface->width != shape->width || + surface->height != shape->height) { + return false; + } + + int surface_fmt = surface->shape.color_format; + int texture_fmt = shape->color_format; + + if (!surface->color) { + if (surface->shape.zeta_format == NV097_SET_SURFACE_FORMAT_ZETA_Z24S8) { + return true; + } + return false; + } + + if (shape->cubemap) { + // FIXME: Support rendering surface to cubemap face + return false; + } + + if (shape->levels > 1) { + // FIXME: Support rendering surface to mip levels + return false; + } + + switch (surface_fmt) { + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true; + default: break; + } + break; + case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) { + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true; + case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true; + default: break; + } + break; + default: + break; + } + + trace_nv2a_pgraph_surface_texture_compat_failed( + surface_fmt, texture_fmt); + return false; +} + +static void create_dummy_texture(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = 16, + .extent.height = 16, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8_UNORM, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .flags = 0, + }; + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + VkImage texture_image; + VmaAllocation texture_allocation; + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &texture_image, + &texture_allocation, NULL)); + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = texture_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8_UNORM, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = image_create_info.mipLevels, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = image_create_info.arrayLayers, + .components = (VkComponentMapping){ VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R }, + }; + VkImageView texture_image_view; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &texture_image_view)); + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .anisotropyEnable = VK_FALSE, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + }; + + VkSampler texture_sampler; + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &texture_sampler)); + + // Copy texture data to mapped device buffer + uint8_t *mapped_memory_ptr; + size_t texture_data_size = + image_create_info.extent.width * image_create_info.extent.height; + + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, + (void *)&mapped_memory_ptr)); + memset(mapped_memory_ptr, 0xff, texture_data_size); + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation); + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + pgraph_vk_transition_image_layout( + pg, cmd, texture_image, VK_FORMAT_R8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = (VkExtent3D){ image_create_info.extent.width, + image_create_info.extent.height, 1 }, + }; + vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer, + texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ®ion); + + pgraph_vk_transition_image_layout(pg, cmd, texture_image, + VK_FORMAT_R8_UNORM, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + pgraph_vk_end_single_time_commands(pg, cmd); + + r->dummy_texture = (TextureBinding){ + .key.scale = 1.0, + .image = texture_image, + .current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .allocation = texture_allocation, + .image_view = texture_image_view, + .sampler = texture_sampler, + }; +} + +static void destroy_dummy_texture(PGRAPHVkState *r) +{ + texture_cache_release_node_resources(r, &r->dummy_texture); +} + +static void set_texture_label(PGRAPHState *pg, TextureBinding *texture) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + g_autofree gchar *label = g_strdup_printf( + "Texture %" HWADDR_PRIx "h fmt:%02xh %dx%dx%d lvls:%d", + texture->key.texture_vram_offset, texture->key.state.color_format, + texture->key.state.width, texture->key.state.height, + texture->key.state.depth, texture->key.state.levels); + + VkDebugUtilsObjectNameInfoEXT name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = (uint64_t)texture->image, + .pObjectName = label, + }; + + if (r->debug_utils_extension_enabled) { + vkSetDebugUtilsObjectNameEXT(r->device, &name_info); + } + vmaSetAllocationName(r->allocator, texture->allocation, label); +} + +static void create_texture(PGRAPHState *pg, int texture_idx) +{ + NV2A_VK_DGROUP_BEGIN("Creating texture %d", texture_idx); + + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + TextureShape state = pgraph_get_texture_shape(pg, texture_idx); // FIXME: Check for pad issues + BasicColorFormatInfo f_basic = kelvin_color_format_info_map[state.color_format]; + + const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx); + size_t texture_palette_data_size; + const hwaddr texture_palette_vram_offset = + pgraph_get_texture_palette_phys_addr_length(pg, texture_idx, + &texture_palette_data_size); + + size_t texture_length = pgraph_get_texture_length(pg, &state); + + TextureKey key; + memset(&key, 0, sizeof(key)); + key.state = state; + key.texture_vram_offset = texture_vram_offset; + key.texture_length = texture_length; + key.palette_vram_offset = texture_palette_vram_offset; + key.palette_length = texture_palette_data_size; + key.scale = 1; + + bool is_indexed = (state.color_format == + NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); + + bool possibly_dirty = false; + bool possibly_dirty_checked = false; + bool surface_to_texture = false; + + // Check active surfaces to see if this texture was a render target + SurfaceBinding *surface = pgraph_vk_surface_get(d, texture_vram_offset); + if (surface && state.levels == 1) { + surface_to_texture = + check_surface_to_texture_compatiblity(surface, &state); + + if (surface_to_texture && surface->upload_pending) { + pgraph_vk_upload_surface_data(d, surface, false); + } + } + + if (!surface_to_texture) { + // FIXME: Restructure to support rendering surfaces to cubemap faces + + // Writeback any surfaces which this texture may index + hwaddr tex_vram_end = texture_vram_offset + texture_length - 1; + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= tex_vram_end + || texture_vram_offset >= surf_vram_end); + if (overlapping) { + pgraph_vk_surface_download_if_dirty(d, surface); + } + } + } + + if (surface_to_texture && pg->surface_scale_factor > 1) { + key.scale = pg->surface_scale_factor; + } + + uint64_t key_hash = fast_hash((void*)&key, sizeof(key)); + LruNode *node = lru_lookup(&r->texture_cache, key_hash, &key); + TextureBinding *snode = container_of(node, TextureBinding, node); + bool binding_found = snode->image != VK_NULL_HANDLE; + + if (binding_found) { + NV2A_VK_DPRINTF("Cache hit"); + r->texture_bindings[texture_idx] = snode; + possibly_dirty |= snode->possibly_dirty; + } else { + possibly_dirty = true; + } + + if (!surface_to_texture && !possibly_dirty_checked) { + possibly_dirty |= check_texture_possibly_dirty( + d, texture_vram_offset, texture_length, texture_palette_vram_offset, + texture_palette_data_size); + } + + // Calculate hash of texture data, if necessary + void *texture_data = (char*)d->vram_ptr + texture_vram_offset; + void *palette_data = (char*)d->vram_ptr + texture_palette_vram_offset; + + uint64_t content_hash = 0; + if (!surface_to_texture && possibly_dirty) { + content_hash = fast_hash(texture_data, texture_length); + if (is_indexed) { + content_hash ^= fast_hash(palette_data, texture_palette_data_size); + } + } + + if (binding_found) { + if (surface_to_texture) { + // FIXME: Add draw time tracking + if (surface->draw_time != snode->draw_time) { + copy_surface_to_texture(pg, surface, snode); + } + } else { + if (possibly_dirty && content_hash != snode->hash) { + upload_texture_image(pg, texture_idx, snode); + snode->hash = content_hash; + } + } + + NV2A_VK_DGROUP_END(); + return; + } + + NV2A_VK_DPRINTF("Cache miss"); + + memcpy(&snode->key, &key, sizeof(key)); + snode->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + snode->possibly_dirty = false; + snode->hash = content_hash; + + VkColorFormatInfo vkf = kelvin_color_format_vk_map[state.color_format]; + assert(vkf.vk_format != 0); + assert(0 < state.dimensionality); + assert(state.dimensionality < ARRAY_SIZE(dimensionality_to_vk_image_type)); + assert(state.dimensionality < + ARRAY_SIZE(dimensionality_to_vk_image_view_type)); + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = dimensionality_to_vk_image_type[state.dimensionality], + .extent.width = state.width, // FIXME: Use adjusted size? + .extent.height = state.height, + .extent.depth = state.depth, + .mipLevels = f_basic.linear ? 1 : state.levels, + .arrayLayers = state.cubemap ? 6 : 1, + .format = vkf.vk_format, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .flags = (state.cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0), + }; + + if (surface_to_texture) { + pgraph_apply_scaling_factor(pg, &image_create_info.extent.width, + &image_create_info.extent.height); + } + + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &snode->image, + &snode->allocation, NULL)); + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = snode->image, + .viewType = state.cubemap ? + VK_IMAGE_VIEW_TYPE_CUBE : + dimensionality_to_vk_image_view_type[state.dimensionality], + .format = vkf.vk_format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = image_create_info.mipLevels, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = image_create_info.arrayLayers, + .components = vkf.component_map, + }; + + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &snode->image_view)); + + + void *sampler_next_struct = NULL; + + VkSamplerCustomBorderColorCreateInfoEXT custom_border_color_create_info; + VkBorderColor vk_border_color; + uint32_t border_color_pack32 = + pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4); + + if (r->custom_border_color_extension_enabled) { + float border_color_rgba[4]; + pgraph_argb_pack32_to_rgba_float(border_color_pack32, border_color_rgba); + + custom_border_color_create_info = + (VkSamplerCustomBorderColorCreateInfoEXT){ + .sType = + VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .customBorderColor.float32 = { border_color_rgba[0], + border_color_rgba[1], + border_color_rgba[2], + border_color_rgba[3] }, + .format = image_view_create_info.format, + .pNext = sampler_next_struct + }; + + vk_border_color = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; + sampler_next_struct = &custom_border_color_create_info; + } else { + // FIXME: Handle custom color in shader + if (border_color_pack32 == 0x00000000) { + vk_border_color = VK_BORDER_COLOR_INT_TRANSPARENT_BLACK; + } else if (border_color_pack32 == 0xff000000) { + vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + } else { + vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_WHITE; + } + } + + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4); + if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); + if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED"); + if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED"); + if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) + NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED"); + + unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); + assert(mag_filter < ARRAY_SIZE(pgraph_texture_mag_filter_vk_map)); + + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + assert(min_filter < ARRAY_SIZE(pgraph_texture_min_filter_vk_map)); + + bool mipmap_nearest = + f_basic.linear || image_create_info.mipLevels == 1 || + min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD || + min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD; + + uint32_t address = + pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4); + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_LINEAR, // FIXME + .minFilter = VK_FILTER_LINEAR, // FIXME + .addressModeU = lookup_texture_address_mode( + GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU)), + .addressModeV = lookup_texture_address_mode( + GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV)), + .addressModeW = lookup_texture_address_mode( + GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP)), + .anisotropyEnable = VK_FALSE, + // .anisotropyEnable = VK_TRUE, + // .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .borderColor = vk_border_color, + .unnormalizedCoordinates = f_basic.linear ? VK_TRUE : VK_FALSE, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .mipmapMode = mipmap_nearest ? VK_SAMPLER_MIPMAP_MODE_NEAREST : + VK_SAMPLER_MIPMAP_MODE_LINEAR, + .minLod = 0.0, + .maxLod = f_basic.linear ? 0.0 : image_create_info.mipLevels, + .mipLodBias = 0.0, + .pNext = sampler_next_struct, + }; + + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &snode->sampler)); + + set_texture_label(pg, snode); + + r->texture_bindings[texture_idx] = snode; + + if (surface_to_texture) { + copy_surface_to_texture(pg, surface, snode); + } else { + upload_texture_image(pg, texture_idx, snode); + snode->draw_time = 0; + } + + NV2A_VK_DGROUP_END(); +} + +static bool check_textures_dirty(PGRAPHState *pg) +{ + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + if (pg->texture_dirty[i]) { + return true; + } + } + return false; +} + +static void update_timestamps(PGRAPHVkState *r) +{ + for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) { + if (r->texture_bindings[i]) { + r->texture_bindings[i]->submit_time = r->submit_count; + } + } +} + +void pgraph_vk_bind_textures(NV2AState *d) +{ + NV2A_VK_DGROUP_BEGIN("%s", __func__); + + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Check for modifications on bind fastpath (CPU hook) + // FIXME: Mark textures that are sourced from surfaces so we can track them + + r->texture_bindings_changed = false; + + if (!check_textures_dirty(pg)) { + NV2A_VK_DPRINTF("Not dirty"); + NV2A_VK_DGROUP_END(); + update_timestamps(r); + return; + } + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + if (!pgraph_is_texture_enabled(pg, i)) { + r->texture_bindings[i] = &r->dummy_texture; + continue; + } + if (!pg->texture_dirty[i]) { // FIXME: Fails to check memory + continue; + } + + create_texture(pg, i); + + pg->texture_dirty[i] = false; // FIXME: Move to renderer? + } + + r->texture_bindings_changed = true; + update_timestamps(r); + NV2A_VK_DGROUP_END(); +} + +static void texture_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + TextureBinding *snode = container_of(node, TextureBinding, node); + + snode->image = VK_NULL_HANDLE; + snode->allocation = VK_NULL_HANDLE; + snode->image_view = VK_NULL_HANDLE; + snode->sampler = VK_NULL_HANDLE; +} + +static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode) +{ + vkDestroySampler(r->device, snode->sampler, NULL); + snode->sampler = VK_NULL_HANDLE; + + vkDestroyImageView(r->device, snode->image_view, NULL); + snode->image_view = VK_NULL_HANDLE; + + vmaDestroyImage(r->allocator, snode->image, snode->allocation); + snode->image = VK_NULL_HANDLE; + snode->allocation = VK_NULL_HANDLE; +} + +static bool texture_cache_entry_pre_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache); + TextureBinding *snode = container_of(node, TextureBinding, node); + + // FIXME: Simplify. We don't really need to check bindings + + + // Currently bound + for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) { + if (r->texture_bindings[i] == snode) { + return false; + } + } + + // Used in command buffer + if (r->in_command_buffer && snode->submit_time == r->submit_count) { + return false; + } + + return true; +} + +static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache); + TextureBinding *snode = container_of(node, TextureBinding, node); + texture_cache_release_node_resources(r, snode); +} + +static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + TextureBinding *snode = container_of(node, TextureBinding, node); + return memcmp(&snode->key, key, sizeof(TextureKey)); +} + +static void texture_cache_init(PGRAPHVkState *r) +{ + const size_t texture_cache_size = 1024; + lru_init(&r->texture_cache); + r->texture_cache_entries = g_malloc_n(texture_cache_size, sizeof(TextureBinding)); + assert(r->texture_cache_entries != NULL); + for (int i = 0; i < texture_cache_size; i++) { + lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node); + } + r->texture_cache.init_node = texture_cache_entry_init; + r->texture_cache.compare_nodes = texture_cache_entry_compare; + r->texture_cache.pre_node_evict = texture_cache_entry_pre_evict; + r->texture_cache.post_node_evict = texture_cache_entry_post_evict; +} + +static void texture_cache_finalize(PGRAPHVkState *r) +{ + lru_flush(&r->texture_cache); + g_free(r->texture_cache_entries); + r->texture_cache_entries = NULL; +} + +void pgraph_vk_trim_texture_cache(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + // FIXME: Allow specifying some amount to trim by + + int num_to_evict = r->texture_cache.num_used / 4; + int num_evicted = 0; + + while (num_to_evict-- && lru_try_evict_one(&r->texture_cache)) { + num_evicted += 1; + } + + NV2A_VK_DPRINTF("Evicted %d textures, %d remain", num_evicted, r->texture_cache.num_used); +} + +void pgraph_vk_init_textures(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + texture_cache_init(r); + create_dummy_texture(pg); +} + +void pgraph_vk_finalize_textures(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + destroy_dummy_texture(r); + texture_cache_finalize(r); + + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + r->texture_bindings[i] = NULL; + } +} diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c new file mode 100644 index 00000000000..6625520c65e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -0,0 +1,312 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024 Matt Borgerson + * + * Based on GL implementation: + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2024 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "renderer.h" + +VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data, + VkDeviceSize size) +{ + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2); + return pgraph_vk_append_to_buffer(pg, BUFFER_INDEX_STAGING, &data, &size, 1, + 1); +} + +VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data, + VkDeviceSize *sizes, + size_t count) +{ + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3); + return pgraph_vk_append_to_buffer(pg, BUFFER_VERTEX_INLINE_STAGING, data, + sizes, count, 1); +} + +void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, + void *data, VkDeviceSize size) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + size_t offset_bit = offset / 4096; + size_t nbits = size / 4096; + if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) { + // Vertex data changed while building the draw list. Finish drawing + // before updating RAM buffer. + pgraph_vk_finish(pg, VK_FINISH_REASON_VERTEX_BUFFER_DIRTY); + } + + nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1); + memcpy(r->storage_buffers[BUFFER_VERTEX_RAM].mapped + offset, data, size); + + bitmap_set(r->uploaded_bitmap, offset_bit, nbits); +} + +static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + assert(r->num_vertex_ram_buffer_syncs < + ARRAY_SIZE(r->vertex_ram_buffer_syncs)); + r->vertex_ram_buffer_syncs[r->num_vertex_ram_buffer_syncs++] = + (MemorySyncRequirement){ .addr = addr, .size = size }; +} + +static const VkFormat float_to_count[] = { + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT, +}; + +static const VkFormat ub_to_count[] = { + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8A8_UNORM, +}; + +static const VkFormat s1_to_count[] = { + VK_FORMAT_R16_SNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16A16_SNORM, +}; + +static const VkFormat s32k_to_count[] = { + VK_FORMAT_R16_SSCALED, + VK_FORMAT_R16G16_SSCALED, + VK_FORMAT_R16G16B16_SSCALED, + VK_FORMAT_R16G16B16A16_SSCALED, +}; + +static char const * const vertex_data_array_format_to_str[] = { + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D] = "UB_D3D", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL] = "UB_OGL", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1] = "S1", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F] = "F", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K] = "S32K", + [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP] = "CMP", +}; + +void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, + unsigned int max_element, + bool inline_data, + unsigned int inline_stride, + unsigned int provoking_element) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + unsigned int num_elements = max_element - min_element + 1; + + if (inline_data) { + NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", + __func__, num_elements, inline_stride); + } else { + NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); + } + + pg->compressed_attrs = 0; + pg->uniform_attrs = 0; + pg->swizzle_attrs = 0; + + r->num_active_vertex_attribute_descriptions = 0; + r->num_active_vertex_binding_descriptions = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + NV2A_VK_DGROUP_BEGIN("[attr %02d] format=%s, count=%d, stride=%d", i, + vertex_data_array_format_to_str[attr->format], + attr->count, attr->stride); + r->vertex_attribute_to_description_location[i] = -1; + if (!attr->count) { + pg->uniform_attrs |= 1 << i; + NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}", + attr->inline_value[0], attr->inline_value[1], + attr->inline_value[2], attr->inline_value[3]); + NV2A_VK_DGROUP_END(); + continue; + } + + VkFormat vk_format; + bool needs_conversion = false; + bool d3d_swizzle = false; + + switch (attr->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D: + assert(attr->count == 4); + d3d_swizzle = true; + /* fallthru */ + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL: + assert(attr->count <= ARRAY_SIZE(ub_to_count)); + vk_format = ub_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: + assert(attr->count <= ARRAY_SIZE(s1_to_count)); + vk_format = s1_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F: + assert(attr->count <= ARRAY_SIZE(float_to_count)); + vk_format = float_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: + assert(attr->count <= ARRAY_SIZE(s32k_to_count)); + vk_format = s32k_to_count[attr->count - 1]; + break; + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: + vk_format = + VK_FORMAT_R32_SINT; // VK_FORMAT_B10G11R11_UFLOAT_PACK32 ?? + /* 3 signed, normalized components packed in 32-bits. (11,11,10) */ + assert(attr->count == 1); + needs_conversion = true; + break; + default: + fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format); + assert(false); + break; + } + + nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND); + hwaddr attrib_data_addr; + size_t stride; + + if (needs_conversion) { + pg->compressed_attrs |= (1 << i); + } + if (d3d_swizzle) { + pg->swizzle_attrs |= (1 << i); + } + + hwaddr start = 0; + if (inline_data) { + attrib_data_addr = attr->inline_array_offset; + stride = inline_stride; + } else { + hwaddr dma_len; + uint8_t *attr_data = (uint8_t *)nv_dma_map( + d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a, + &dma_len); + assert(attr->offset < dma_len); + attrib_data_addr = attr_data + attr->offset - d->vram_ptr; + stride = attr->stride; + start = attrib_data_addr + min_element * stride; + update_memory_buffer(d, start, num_elements * stride); + } + + uint32_t provoking_element_index = provoking_element - min_element; + size_t element_size = attr->size * attr->count; + assert(element_size <= sizeof(attr->inline_value)); + const uint8_t *last_entry; + + if (inline_data) { + last_entry = + (uint8_t *)pg->inline_array + attr->inline_array_offset; + } else { + last_entry = d->vram_ptr + start; + } + if (!stride) { + // Stride of 0 indicates that only the first element should be + // used. + pg->uniform_attrs |= 1 << i; + pgraph_update_inline_value(attr, last_entry); + NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}", + attr->inline_value[0], attr->inline_value[1], + attr->inline_value[2], attr->inline_value[3]); + NV2A_VK_DGROUP_END(); + continue; + } + + NV2A_VK_DPRINTF("offset = %08" HWADDR_PRIx, attrib_data_addr); + last_entry += stride * provoking_element_index; + pgraph_update_inline_value(attr, last_entry); + + r->vertex_attribute_to_description_location[i] = + r->num_active_vertex_binding_descriptions; + + r->vertex_binding_descriptions + [r->num_active_vertex_binding_descriptions++] = + (VkVertexInputBindingDescription){ + .binding = r->vertex_attribute_to_description_location[i], + .stride = stride, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; + + r->vertex_attribute_descriptions + [r->num_active_vertex_attribute_descriptions++] = + (VkVertexInputAttributeDescription){ + .binding = r->vertex_attribute_to_description_location[i], + .location = i, + .format = vk_format, + }; + + r->vertex_attribute_offsets[i] = attrib_data_addr; + + NV2A_VK_DGROUP_END(); + } + + NV2A_VK_DGROUP_END(); +} + +void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + pg->compressed_attrs = 0; + pg->uniform_attrs = 0; + pg->swizzle_attrs = 0; + + r->num_active_vertex_attribute_descriptions = 0; + r->num_active_vertex_binding_descriptions = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attr = &pg->vertex_attributes[i]; + if (attr->inline_buffer_populated) { + r->vertex_attribute_to_description_location[i] = + r->num_active_vertex_binding_descriptions; + r->vertex_binding_descriptions + [r->num_active_vertex_binding_descriptions++] = + (VkVertexInputBindingDescription){ + .binding = + r->vertex_attribute_to_description_location[i], + .stride = 4 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; + r->vertex_attribute_descriptions + [r->num_active_vertex_attribute_descriptions++] = + (VkVertexInputAttributeDescription){ + .binding = + r->vertex_attribute_to_description_location[i], + .location = i, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + }; + memcpy(attr->inline_value, + attr->inline_buffer + (pg->inline_buffer_length - 1) * 4, + sizeof(attr->inline_value)); + } else { + r->vertex_attribute_to_description_location[i] = -1; + pg->uniform_attrs |= 1 << i; + } + } +} \ No newline at end of file diff --git a/hw/xbox/nv2a/vsh.h b/hw/xbox/nv2a/pgraph/vsh.h similarity index 92% rename from hw/xbox/nv2a/vsh.h rename to hw/xbox/nv2a/pgraph/vsh.h index 18ef4bb5f20..405b6c9aa68 100644 --- a/hw/xbox/nv2a/vsh.h +++ b/hw/xbox/nv2a/pgraph/vsh.h @@ -21,7 +21,7 @@ #define HW_NV2A_VSH_H #include -#include "shaders_common.h" +#include "qemu/mstring.h" enum VshLight { LIGHT_OFF, @@ -130,11 +130,4 @@ typedef enum { uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name); -void vsh_translate(uint16_t version, - const uint32_t *tokens, - unsigned int length, - bool z_perspective, - MString *header, MString *body); - - #endif diff --git a/hw/xbox/nv2a/shaders.c b/hw/xbox/nv2a/shaders.c deleted file mode 100644 index cafe326e93e..00000000000 --- a/hw/xbox/nv2a/shaders.c +++ /dev/null @@ -1,1599 +0,0 @@ -/* - * QEMU Geforce NV2A shader generator - * - * Copyright (c) 2015 espes - * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2020-2021 Matt Borgerson - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include - -#include "shaders_common.h" -#include "shaders.h" -#include "nv2a_int.h" -#include "ui/xemu-settings.h" -#include "xemu-version.h" - -void mstring_append_fmt(MString *qstring, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - mstring_append_va(qstring, fmt, ap); - va_end(ap); -} - -MString *mstring_from_fmt(const char *fmt, ...) -{ - MString *ret = mstring_new(); - va_list ap; - va_start(ap, fmt); - mstring_append_va(ret, fmt, ap); - va_end(ap); - - return ret; -} - -void mstring_append_va(MString *qstring, const char *fmt, va_list va) -{ - char scratch[256]; - - va_list ap; - va_copy(ap, va); - const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap); - va_end(ap); - - if (len == 0) { - return; - } else if (len < sizeof(scratch)) { - mstring_append(qstring, scratch); - return; - } - - /* overflowed out scratch buffer, alloc and try again */ - char *buf = g_malloc(len + 1); - va_copy(ap, va); - vsnprintf(buf, len + 1, fmt, ap); - va_end(ap); - - mstring_append(qstring, buf); - g_free(buf); -} - -GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) -{ - if (polygon_mode == POLY_MODE_POINT) { - return GL_POINTS; - } - - switch (primitive_mode) { - case PRIM_TYPE_POINTS: return GL_POINTS; - case PRIM_TYPE_LINES: return GL_LINES; - case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP; - case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP; - case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES; - case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP; - case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN; - case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY; - case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY; - case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return GL_LINE_LOOP; - } else if (polygon_mode == POLY_MODE_FILL) { - return GL_TRIANGLE_FAN; - } - - assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); - return 0; - default: - assert(!"Invalid primitive_mode"); - return 0; - } -} - -static MString* generate_geometry_shader( - enum ShaderPolygonMode polygon_front_mode, - enum ShaderPolygonMode polygon_back_mode, - enum ShaderPrimitiveMode primitive_mode, - GLenum *gl_primitive_mode, - bool smooth_shading) -{ - /* FIXME: Missing support for 2-sided-poly mode */ - assert(polygon_front_mode == polygon_back_mode); - enum ShaderPolygonMode polygon_mode = polygon_front_mode; - - *gl_primitive_mode = get_gl_primitive_mode(polygon_mode, primitive_mode); - - /* POINT mode shouldn't require any special work */ - if (polygon_mode == POLY_MODE_POINT) { - return NULL; - } - - /* Handle LINE and FILL mode */ - const char *layout_in = NULL; - const char *layout_out = NULL; - const char *body = NULL; - switch (primitive_mode) { - case PRIM_TYPE_POINTS: return NULL; - case PRIM_TYPE_LINES: return NULL; - case PRIM_TYPE_LINE_LOOP: return NULL; - case PRIM_TYPE_LINE_STRIP: return NULL; - case PRIM_TYPE_TRIANGLES: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(0, 0);\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; - break; - case PRIM_TYPE_TRIANGLE_STRIP: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - /* Imagine a quad made of a tristrip, the comments tell you which - * vertex we are using */ - body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" /* bottom right */ - " }\n" - " emit_vertex(1, 0);\n" /* top right */ - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(0, 0);\n" /* bottom right */ - " } else {\n" - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(1, 0);\n" /* top left */ - " emit_vertex(0, 0);\n" /* top right */ - " }\n" - " EndPrimitive();\n"; - break; - case PRIM_TYPE_TRIANGLE_FAN: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" - " }\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; - break; - case PRIM_TYPE_QUADS: - layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; - body = " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(1, 3);\n" - " EndPrimitive();\n"; - } else { - assert(false); - return NULL; - } - break; - case PRIM_TYPE_QUAD_STRIP: - layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; - body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 3);\n" - " }\n" - " emit_vertex(1, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(0, 3);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " EndPrimitive();\n"; - } else { - assert(false); - return NULL; - } - break; - case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return NULL; - } - if (polygon_mode == POLY_MODE_FILL) { - if (smooth_shading) { - return NULL; - } - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " emit_vertex(0, 2);\n" - " emit_vertex(1, 2);\n" - " emit_vertex(2, 2);\n" - " EndPrimitive();\n"; - } else { - assert(false); - return NULL; - } - break; - - default: - assert(false); - return NULL; - } - - /* generate a geometry shader to support deprecated primitive types */ - assert(layout_in); - assert(layout_out); - assert(body); - MString* s = mstring_from_str("#version 330\n" - "\n"); - mstring_append(s, layout_in); - mstring_append(s, layout_out); - mstring_append(s, "\n"); - if (smooth_shading) { - mstring_append(s, - STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH - "\n" - STRUCT_VERTEX_DATA_OUT_SMOOTH - "\n" - "void emit_vertex(int index, int _unused) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[index];\n" - " vtxD0 = v_vtxD0[index];\n" - " vtxD1 = v_vtxD1[index];\n" - " vtxB0 = v_vtxB0[index];\n" - " vtxB1 = v_vtxB1[index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); - } else { - mstring_append(s, - STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT - "\n" - STRUCT_VERTEX_DATA_OUT_FLAT - "\n" - "void emit_vertex(int index, int provoking_index) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n" - " vtxD0 = v_vtxD0[provoking_index];\n" - " vtxD1 = v_vtxD1[provoking_index];\n" - " vtxB0 = v_vtxB0[provoking_index];\n" - " vtxB1 = v_vtxB1[provoking_index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); - } - - mstring_append(s, "\n" - "void main() {\n"); - mstring_append(s, body); - mstring_append(s, "}\n"); - - return s; -} - -static void append_skinning_code(MString* str, bool mix, - unsigned int count, const char* type, - const char* output, const char* input, - const char* matrix, const char* swizzle) -{ - if (count == 0) { - mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n", - type, output, input, matrix, swizzle); - } else { - mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type); - if (mix) { - /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */ - mstring_append(str, "{\n" - " float weight_i;\n" - " float weight_n = 1.0;\n"); - int i; - for (i = 0; i < count; i++) { - if (i < (count - 1)) { - char c = "xyzw"[i]; - mstring_append_fmt(str, " weight_i = weight.%c;\n" - " weight_n -= weight_i;\n", - c); - } else { - mstring_append(str, " weight_i = weight_n;\n"); - } - mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n", - output, input, matrix, i, swizzle); - } - mstring_append(str, "}\n"); - } else { - /* Individual weights */ - int i; - for (i = 0; i < count; i++) { - char c = "xyzw"[i]; - mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n", - output, input, matrix, i, swizzle, c); - } - } - } -} - -#define GLSL_C(idx) "c[" stringify(idx) "]" -#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]" - -#define GLSL_C_MAT4(idx) \ - "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \ - GLSL_C(idx+2) ", " GLSL_C(idx+3) ")" - -#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n" - -static void generate_fixed_function(const ShaderState *state, - MString *header, MString *body) -{ - int i, j; - - /* generate vertex shader mimicking fixed function */ - mstring_append(header, -"#define position v0\n" -"#define weight v1\n" -"#define normal v2.xyz\n" -"#define diffuse v3\n" -"#define specular v4\n" -"#define fogCoord v5.x\n" -"#define pointSize v6\n" -"#define backDiffuse v7\n" -"#define backSpecular v8\n" -"#define texture0 v9\n" -"#define texture1 v10\n" -"#define texture2 v11\n" -"#define texture3 v12\n" -"#define reserved1 v13\n" -"#define reserved2 v14\n" -"#define reserved3 v15\n" -"\n" -"uniform vec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n" -"uniform vec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n" -"uniform vec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n" -"\n" -GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0)) -GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0)) -"\n" -GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0)) -GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1)) -GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2)) -GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3)) -"\n" -GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0)) -GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1)) -GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2)) -GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3)) -"\n" -GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0)) -GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1)) -GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2)) -GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3)) -"\n" -GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0)) -GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1)) -GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2)) -GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3)) -"\n" -GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0)) -GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1)) -GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2)) -GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3)) -"\n" -GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0)) -GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1)) -GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2)) -GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3)) -"\n" -GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP)) -"\n" -"#define lightAmbientColor(i) " - "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n" -"#define lightDiffuseColor(i) " - "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n" -"#define lightSpecularColor(i) " - "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n" -"\n" -"#define lightSpotFalloff(i) " - "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n" -"#define lightSpotDirection(i) " - "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n" -"\n" -"#define lightLocalRange(i) " - "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n" -"\n" -GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz") -GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz") -"\n" -"uniform mat4 invViewport;\n" -"\n"); - - /* Skinning */ - unsigned int count; - bool mix; - switch (state->skinning) { - case SKINNING_OFF: - mix = false; count = 0; break; - case SKINNING_1WEIGHTS: - mix = true; count = 2; break; - case SKINNING_2WEIGHTS2MATRICES: - mix = false; count = 2; break; - case SKINNING_2WEIGHTS: - mix = true; count = 3; break; - case SKINNING_3WEIGHTS3MATRICES: - mix = false; count = 3; break; - case SKINNING_3WEIGHTS: - mix = true; count = 4; break; - case SKINNING_4WEIGHTS4MATRICES: - mix = false; count = 4; break; - default: - assert(false); - break; - } - mstring_append_fmt(body, "/* Skinning mode %d */\n", - state->skinning); - - append_skinning_code(body, mix, count, "vec4", - "tPosition", "position", - "modelViewMat", "xyzw"); - append_skinning_code(body, mix, count, "vec3", - "tNormal", "vec4(normal, 0.0)", - "invModelViewMat", "xyz"); - - /* Normalization */ - if (state->normalization) { - mstring_append(body, "tNormal = normalize(tNormal);\n"); - } - - /* Texgen */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - mstring_append_fmt(body, "/* Texgen for stage %d */\n", - i); - /* Set each component individually */ - /* FIXME: could be nicer if some channels share the same texgen */ - for (j = 0; j < 4; j++) { - /* TODO: TexGen View Model missing! */ - char c = "xyzw"[j]; - char cSuffix = "STRQ"[j]; - switch (state->texgen[i][j]) { - case TEXGEN_DISABLE: - mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n", - i, c, i, c); - break; - case TEXGEN_EYE_LINEAR: - mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n", - i, c, cSuffix, i); - break; - case TEXGEN_OBJECT_LINEAR: - mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n", - i, c, cSuffix, i); - break; - case TEXGEN_SPHERE_MAP: - assert(j < 2); /* Channels S,T only! */ - mstring_append(body, "{\n"); - /* FIXME: u, r and m only have to be calculated once */ - mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); - //FIXME: tNormal before or after normalization? Always normalize? - mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); - - /* FIXME: This would consume 1 division fewer and *might* be - * faster than length: - * // [z=1/(2*x) => z=1/x*0.5] - * vec3 ro = r + vec3(0.0, 0.0, 1.0); - * float m = inversesqrt(dot(ro,ro))*0.5; - */ - - mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n"); - mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n", - i, c, c); - mstring_append(body, "}\n"); - break; - case TEXGEN_REFLECTION_MAP: - assert(j < 3); /* Channels S,T,R only! */ - mstring_append(body, "{\n"); - /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */ - mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n"); - mstring_append(body, " vec3 r = reflect(u, tNormal);\n"); - mstring_append_fmt(body, " oT%d.%c = r.%c;\n", - i, c, c); - mstring_append(body, "}\n"); - break; - case TEXGEN_NORMAL_MAP: - assert(j < 3); /* Channels S,T,R only! */ - mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n", - i, c, c); - break; - default: - assert(false); - break; - } - } - } - - /* Apply texture matrices */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - if (state->texture_matrix_enable[i]) { - mstring_append_fmt(body, - "oT%d = oT%d * texMat%d;\n", - i, i, i); - } - } - - /* Lighting */ - if (state->lighting) { - - //FIXME: Do 2 passes if we want 2 sided-lighting? - - static char alpha_source_diffuse[] = "diffuse.a"; - static char alpha_source_specular[] = "specular.a"; - static char alpha_source_material[] = "material_alpha"; - const char *alpha_source = alpha_source_diffuse; - if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) { - mstring_append(header, "uniform float material_alpha;\n"); - alpha_source = alpha_source_material; - } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) { - alpha_source = alpha_source_specular; - } - - if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) { - mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source); - } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) { - mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source); - } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) { - mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source); - } - - mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n"); - if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) { - mstring_append(body, "oD0.rgb += sceneAmbientColor;\n"); - } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) { - mstring_append(body, "oD0.rgb += diffuse.rgb;\n"); - } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) { - mstring_append(body, "oD0.rgb += specular.rgb;\n"); - } - - mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n"); - - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - if (state->light[i] == LIGHT_OFF) { - continue; - } - - /* FIXME: It seems that we only have to handle the surface colors if - * they are not part of the material [= vertex colors]. - * If they are material the cpu will premultiply light - * colors - */ - - mstring_append_fmt(body, "/* Light %d */ {\n", i); - - if (state->light[i] == LIGHT_LOCAL - || state->light[i] == LIGHT_SPOT) { - - mstring_append_fmt(header, - "uniform vec3 lightLocalPosition%d;\n" - "uniform vec3 lightLocalAttenuation%d;\n", - i, i); - mstring_append_fmt(body, - " vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n" - " float d = length(VP);\n" -//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights? - " VP = normalize(VP);\n" - " float attenuation = 1.0 / (lightLocalAttenuation%d.x\n" - " + lightLocalAttenuation%d.y * d\n" - " + lightLocalAttenuation%d.z * d * d);\n" - " vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */ - " float nDotVP = max(0.0, dot(tNormal, VP));\n" - " float nDotHV = max(0.0, dot(tNormal, halfVector));\n", - i, i, i, i); - - } - - switch(state->light[i]) { - case LIGHT_INFINITE: - - /* lightLocalRange will be 1e+30 here */ - - mstring_append_fmt(header, - "uniform vec3 lightInfiniteHalfVector%d;\n" - "uniform vec3 lightInfiniteDirection%d;\n", - i, i); - mstring_append_fmt(body, - " float attenuation = 1.0;\n" - " float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n" - " float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n", - i, i); - - /* FIXME: Do specular */ - - /* FIXME: tBackDiffuse */ - - break; - case LIGHT_LOCAL: - /* Everything done already */ - break; - case LIGHT_SPOT: - /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */ - mstring_append_fmt(body, - " vec4 spotDir = lightSpotDirection(%d);\n" - " float invScale = 1/length(spotDir.xyz);\n" - " float cosHalfPhi = -invScale*spotDir.w;\n" - " float cosHalfTheta = invScale + cosHalfPhi;\n" - " float spotDirDotVP = dot(spotDir.xyz, VP);\n" - " float rho = invScale*spotDirDotVP;\n" - " if (rho > cosHalfTheta) {\n" - " } else if (rho <= cosHalfPhi) {\n" - " attenuation = 0.0;\n" - " } else {\n" - " attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */ - " }\n", - i); - break; - default: - assert(false); - break; - } - - mstring_append_fmt(body, - " float pf;\n" - " if (nDotVP == 0.0) {\n" - " pf = 0.0;\n" - " } else {\n" - " pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n" - " }\n" - " vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n" - " vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n" - " vec3 lightSpecular = lightSpecularColor(%d) * pf;\n", - i, i, i); - - mstring_append(body, - " oD0.xyz += lightAmbient;\n"); - - switch (state->diffuse_src) { - case MATERIAL_COLOR_SRC_MATERIAL: - mstring_append(body, - " oD0.xyz += lightDiffuse;\n"); - break; - case MATERIAL_COLOR_SRC_DIFFUSE: - mstring_append(body, - " oD0.xyz += diffuse.xyz * lightDiffuse;\n"); - break; - case MATERIAL_COLOR_SRC_SPECULAR: - mstring_append(body, - " oD0.xyz += specular.xyz * lightDiffuse;\n"); - break; - } - - mstring_append(body, - " oD1.xyz += specular.xyz * lightSpecular;\n"); - - mstring_append(body, "}\n"); - } - } else { - mstring_append(body, " oD0 = diffuse;\n"); - mstring_append(body, " oD1 = specular;\n"); - } - mstring_append(body, " oB0 = backDiffuse;\n"); - mstring_append(body, " oB1 = backSpecular;\n"); - - /* Fog */ - if (state->fog_enable) { - - /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */ - switch(state->foggen) { - case FOGGEN_SPEC_ALPHA: - /* FIXME: Do we have to clamp here? */ - mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n"); - break; - case FOGGEN_RADIAL: - mstring_append(body, " float fogDistance = length(tPosition.xyz);\n"); - break; - case FOGGEN_PLANAR: - case FOGGEN_ABS_PLANAR: - mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n"); - if (state->foggen == FOGGEN_ABS_PLANAR) { - mstring_append(body, " fogDistance = abs(fogDistance);\n"); - } - break; - case FOGGEN_FOG_X: - mstring_append(body, " float fogDistance = fogCoord;\n"); - break; - default: - assert(false); - break; - } - - } - - /* If skinning is off the composite matrix already includes the MV matrix */ - if (state->skinning == SKINNING_OFF) { - mstring_append(body, " tPosition = position;\n"); - } - - mstring_append(body, - " oPos = invViewport * (tPosition * compositeMat);\n" - " oPos.z = oPos.z * 2.0 - oPos.w;\n"); - - /* FIXME: Testing */ - if (state->point_params_enable) { - mstring_append_fmt( - body, - " float d_e = length(position * modelViewMat0);\n" - " oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n", - state->point_params[0], state->point_params[1], state->point_params[2], - state->point_params[6]); - mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n", - state->point_params[3], state->point_params[7], - state->surface_scale_factor); - } else { - mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size, - state->surface_scale_factor); - } - - mstring_append(body, - " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" - " vtx_inv_w = 1.0;\n" - " } else {\n" - " vtx_inv_w = 1.0 / oPos.w;\n" - " }\n" - " vtx_inv_w_flat = vtx_inv_w;\n"); -} - -static MString *generate_vertex_shader(const ShaderState *state, - bool prefix_outputs) -{ - int i; - MString *header = mstring_from_str( -"#version 400\n" -"\n" -"uniform vec4 clipRange;\n" -"uniform vec2 surfaceSize;\n" -"\n" -/* All constants in 1 array declaration */ -"uniform vec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n" -"\n" -"uniform vec4 fogColor;\n" -"uniform float fogParam[2];\n" -"\n" - -GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG)) -GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT)) -GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT)) -GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT)) -GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT)) - -"\n" -"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n" -"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n" -"\n" -"vec4 decompress_11_11_10(int cmp) {\n" -" float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n" -" float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n" -" float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n" -" return vec4(x, y, z, 1);\n" -"}\n"); - if (prefix_outputs) { - mstring_append(header, state->smooth_shading ? - STRUCT_V_VERTEX_DATA_OUT_SMOOTH : - STRUCT_V_VERTEX_DATA_OUT_FLAT); - mstring_append(header, - "#define vtx_inv_w v_vtx_inv_w\n" - "#define vtx_inv_w_flat v_vtx_inv_w_flat\n" - "#define vtxD0 v_vtxD0\n" - "#define vtxD1 v_vtxD1\n" - "#define vtxB0 v_vtxB0\n" - "#define vtxB1 v_vtxB1\n" - "#define vtxFog v_vtxFog\n" - "#define vtxT0 v_vtxT0\n" - "#define vtxT1 v_vtxT1\n" - "#define vtxT2 v_vtxT2\n" - "#define vtxT3 v_vtxT3\n" - ); - } else { - mstring_append(header, state->smooth_shading ? - STRUCT_VERTEX_DATA_OUT_SMOOTH : - STRUCT_VERTEX_DATA_OUT_FLAT); - } - mstring_append(header, "\n"); - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - if (state->compressed_attrs & (1 << i)) { - mstring_append_fmt(header, - "layout(location = %d) in int v%d_cmp;\n", i, i); - } else { - mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n", - i, i); - } - } - mstring_append(header, "\n"); - - MString *body = mstring_from_str("void main() {\n"); - - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - if (state->compressed_attrs & (1 << i)) { - mstring_append_fmt( - body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i); - } - } - - if (state->fixed_function) { - generate_fixed_function(state, header, body); - } else if (state->vertex_program) { - vsh_translate(VSH_VERSION_XVS, - (uint32_t*)state->program_data, - state->program_length, - state->z_perspective, - header, body); - } else { - assert(false); - } - - - /* Fog */ - - if (state->fog_enable) { - - if (state->vertex_program) { - /* FIXME: Does foggen do something here? Let's do some tracking.. - * - * "RollerCoaster Tycoon" has - * state->vertex_program = true; state->foggen == FOGGEN_PLANAR - * but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z - */ - mstring_append(body, " float fogDistance = oFog.x;\n"); - } - - /* FIXME: Do this per pixel? */ - - switch (state->fog_mode) { - case FOG_MODE_LINEAR: - case FOG_MODE_LINEAR_ABS: - - /* f = (end - d) / (end - start) - * fogParam[1] = -1 / (end - start) - * fogParam[0] = 1 - end * fogParam[1]; - */ - - mstring_append(body, - " if (isinf(fogDistance)) {\n" - " fogDistance = 0.0;\n" - " }\n" - ); - mstring_append(body, " float fogFactor = fogParam[0] + fogDistance * fogParam[1];\n"); - mstring_append(body, " fogFactor -= 1.0;\n"); - break; - case FOG_MODE_EXP: - mstring_append(body, - " if (isinf(fogDistance)) {\n" - " fogDistance = 0.0;\n" - " }\n" - ); - /* fallthru */ - case FOG_MODE_EXP_ABS: - - /* f = 1 / (e^(d * density)) - * fogParam[1] = -density / (2 * ln(256)) - * fogParam[0] = 1.5 - */ - - mstring_append(body, " float fogFactor = fogParam[0] + exp2(fogDistance * fogParam[1] * 16.0);\n"); - mstring_append(body, " fogFactor -= 1.5;\n"); - break; - case FOG_MODE_EXP2: - case FOG_MODE_EXP2_ABS: - - /* f = 1 / (e^((d * density)^2)) - * fogParam[1] = -density / (2 * sqrt(ln(256))) - * fogParam[0] = 1.5 - */ - - mstring_append(body, " float fogFactor = fogParam[0] + exp2(-fogDistance * fogDistance * fogParam[1] * fogParam[1] * 32.0);\n"); - mstring_append(body, " fogFactor -= 1.5;\n"); - break; - default: - assert(false); - break; - } - /* Calculate absolute for the modes which need it */ - switch (state->fog_mode) { - case FOG_MODE_LINEAR_ABS: - case FOG_MODE_EXP_ABS: - case FOG_MODE_EXP2_ABS: - mstring_append(body, " fogFactor = abs(fogFactor);\n"); - break; - default: - break; - } - - mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n"); - } else { - /* FIXME: Is the fog still calculated / passed somehow?! - */ - mstring_append(body, " oFog.xyzw = vec4(1.0);\n"); - } - - /* Set outputs */ - const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat"; - mstring_append_fmt(body, "\n" - " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n" - " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n" - " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n" - " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n" - " vtxFog = oFog.x * vtx_inv_w;\n" - " vtxT0 = oT0 * vtx_inv_w;\n" - " vtxT1 = oT1 * vtx_inv_w;\n" - " vtxT2 = oT2 * vtx_inv_w;\n" - " vtxT3 = oT3 * vtx_inv_w;\n" - " gl_Position = oPos;\n" - " gl_PointSize = oPts.x;\n" - " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near - " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far - "\n" - "}\n", - shade_model_mult, - shade_model_mult, - shade_model_mult, - shade_model_mult); - - - /* Return combined header + source */ - mstring_append(header, mstring_get_str(body)); - mstring_unref(body); - return header; - -} - -static GLuint create_gl_shader(GLenum gl_shader_type, - const char *code, - const char *name) -{ - GLint compiled = 0; - - NV2A_GL_DGROUP_BEGIN("Creating new %s", name); - - NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code); - - GLuint shader = glCreateShader(gl_shader_type); - glShaderSource(shader, 1, &code, 0); - glCompileShader(shader); - - /* Check it compiled */ - compiled = 0; - glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); - if (!compiled) { - GLchar* log; - GLint log_length; - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); - log = g_malloc(log_length * sizeof(GLchar)); - glGetShaderInfoLog(shader, log_length, NULL, log); - fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log); - g_free(log); - - NV2A_GL_DGROUP_END(); - abort(); - } - - NV2A_GL_DGROUP_END(); - - return shader; -} - -void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state) -{ - int i, j; - char tmp[64]; - - /* set texture samplers */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - char samplerName[16]; - snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); - GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName); - if (texSampLoc >= 0) { - glUniform1i(texSampLoc, i); - } - } - - /* validate the program */ - glValidateProgram(binding->gl_program); - GLint valid = 0; - glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid); - if (!valid) { - GLchar log[1024]; - glGetProgramInfoLog(binding->gl_program, 1024, NULL, log); - fprintf(stderr, "nv2a: shader validation failed: %s\n", log); - abort(); - } - - /* lookup fragment shader uniforms */ - for (i = 0; i < 9; i++) { - for (j = 0; j < 2; j++) { - snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); - binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp); - } - } - binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef"); - for (i = 1; i < NV2A_MAX_TEXTURES; i++) { - snprintf(tmp, sizeof(tmp), "bumpMat%d", i); - binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "bumpScale%d", i); - binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); - binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - - for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { - snprintf(tmp, sizeof(tmp), "texScale%d", i); - binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - - /* lookup vertex shader uniforms */ - for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { - snprintf(tmp, sizeof(tmp), "c[%d]", i); - binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); - binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); - binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); - binding->fog_param_loc[0] = glGetUniformLocation(binding->gl_program, "fogParam[0]"); - binding->fog_param_loc[1] = glGetUniformLocation(binding->gl_program, "fogParam[1]"); - - binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); - for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); - binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); - binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < NV2A_LTC1_COUNT; i++) { - snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); - binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); - binding->light_infinite_half_vector_loc[i] = - glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); - binding->light_infinite_direction_loc[i] = - glGetUniformLocation(binding->gl_program, tmp); - - snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); - binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); - binding->light_local_attenuation_loc[i] = - glGetUniformLocation(binding->gl_program, tmp); - } - for (i = 0; i < 8; i++) { - snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); - binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp); - } - - if (state->fixed_function) { - binding->material_alpha_loc = - glGetUniformLocation(binding->gl_program, "material_alpha"); - } else { - binding->material_alpha_loc = -1; - } -} - -ShaderBinding *generate_shaders(const ShaderState *state) -{ - char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); - if (previous_numeric_locale) { - previous_numeric_locale = g_strdup(previous_numeric_locale); - } - - /* Ensure numeric values are printed with '.' radix, no grouping */ - setlocale(LC_NUMERIC, "C"); - GLuint program = glCreateProgram(); - - /* Create an optional geometry shader and find primitive type */ - GLenum gl_primitive_mode; - MString* geometry_shader_code = - generate_geometry_shader(state->polygon_front_mode, - state->polygon_back_mode, - state->primitive_mode, - &gl_primitive_mode, - state->smooth_shading); - if (geometry_shader_code) { - const char* geometry_shader_code_str = - mstring_get_str(geometry_shader_code); - GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER, - geometry_shader_code_str, - "geometry shader"); - glAttachShader(program, geometry_shader); - mstring_unref(geometry_shader_code); - } - - /* create the vertex shader */ - MString *vertex_shader_code = - generate_vertex_shader(state, geometry_shader_code != NULL); - GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER, - mstring_get_str(vertex_shader_code), - "vertex shader"); - glAttachShader(program, vertex_shader); - mstring_unref(vertex_shader_code); - - /* generate a fragment shader from register combiners */ - MString *fragment_shader_code = psh_translate(state->psh); - const char *fragment_shader_code_str = - mstring_get_str(fragment_shader_code); - GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER, - fragment_shader_code_str, - "fragment shader"); - glAttachShader(program, fragment_shader); - mstring_unref(fragment_shader_code); - - /* link the program */ - glLinkProgram(program); - GLint linked = 0; - glGetProgramiv(program, GL_LINK_STATUS, &linked); - if(!linked) { - GLchar log[2048]; - glGetProgramInfoLog(program, 2048, NULL, log); - fprintf(stderr, "nv2a: shader linking failed: %s\n", log); - abort(); - } - - glUseProgram(program); - - ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding)); - ret->gl_program = program; - ret->gl_primitive_mode = gl_primitive_mode; - - update_shader_constant_locations(ret, state); - - if (previous_numeric_locale) { - setlocale(LC_NUMERIC, previous_numeric_locale); - g_free(previous_numeric_locale); - } - - return ret; -} - -static const char *shader_gl_vendor = NULL; - -static void shader_create_cache_folder(void) -{ - char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path()); - qemu_mkdir(shader_path); - g_free(shader_path); -} - -static char *shader_get_lru_cache_path(void) -{ - return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path()); -} - -static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque) -{ - FILE *lru_list_file = (FILE*) opaque; - size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file); - if (written != 1) { - fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n", - (unsigned long long) node->hash); - } -} - -void shader_write_cache_reload_list(PGRAPHState *pg) -{ - if (!g_config.perf.cache_shaders) { - qatomic_set(&pg->shader_cache_writeback_pending, false); - qemu_event_set(&pg->shader_cache_writeback_complete); - return; - } - - char *shader_lru_path = shader_get_lru_cache_path(); - qemu_thread_join(&pg->shader_disk_thread); - - FILE *lru_list = qemu_fopen(shader_lru_path, "wb"); - g_free(shader_lru_path); - if (!lru_list) { - fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n"); - return; - } - - lru_visit_active(&pg->shader_cache, shader_write_lru_list_entry_to_disk, lru_list); - fclose(lru_list); - - lru_flush(&pg->shader_cache); - - qatomic_set(&pg->shader_cache_writeback_pending, false); - qemu_event_set(&pg->shader_cache_writeback_complete); -} - -bool shader_load_from_memory(ShaderLruNode *snode) -{ - assert(glGetError() == GL_NO_ERROR); - - if (!snode->program) { - return false; - } - - GLuint gl_program = glCreateProgram(); - glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size); - GLint gl_error = glGetError(); - if (gl_error != GL_NO_ERROR) { - NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error); - glDeleteProgram(gl_program); - return false; - } - - glValidateProgram(gl_program); - GLint valid = 0; - glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid); - if (!valid) { - GLchar log[1024]; - glGetProgramInfoLog(gl_program, 1024, NULL, log); - NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log); - glDeleteProgram(gl_program); - return false; - } - - glUseProgram(gl_program); - - ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding)); - binding->gl_program = gl_program; - binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode, - snode->state.primitive_mode); - snode->binding = binding; - - g_free(snode->program); - snode->program = NULL; - - update_shader_constant_locations(binding, &snode->state); - - return true; -} - -static char *shader_get_bin_directory(uint64_t hash) -{ - const char *cfg_dir = xemu_settings_get_base_path(); - uint64_t bin_mask = 0xffffUL << 48; - char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx", - cfg_dir, (hash & bin_mask) >> 48); - return shader_bin_dir; -} - -static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) -{ - uint64_t bin_mask = 0xffffUL << 48; - return g_strdup_printf("%s/%012lx", shader_bin_dir, - hash & (~bin_mask)); -} - -static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) -{ - char *shader_bin_dir = shader_get_bin_directory(hash); - char *shader_path = shader_get_binary_path(shader_bin_dir, hash); - char *cached_xemu_version = NULL; - char *cached_gl_vendor = NULL; - void *program_buffer = NULL; - - uint64_t cached_xemu_version_len; - uint64_t gl_vendor_len; - GLenum program_binary_format; - ShaderState state; - size_t shader_size; - - g_free(shader_bin_dir); - - qemu_mutex_lock(&pg->shader_cache_lock); - if (lru_contains_hash(&pg->shader_cache, hash)) { - qemu_mutex_unlock(&pg->shader_cache_lock); - return; - } - qemu_mutex_unlock(&pg->shader_cache_lock); - - FILE *shader_file = qemu_fopen(shader_path, "rb"); - if (!shader_file) { - goto error; - } - - size_t nread; - #define READ_OR_ERR(data, data_len) \ - do { \ - nread = fread(data, data_len, 1, shader_file); \ - if (nread != 1) { \ - fclose(shader_file); \ - goto error; \ - } \ - } while (0) - - READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len)); - - cached_xemu_version = g_malloc(cached_xemu_version_len +1); - READ_OR_ERR(cached_xemu_version, cached_xemu_version_len); - if (strcmp(cached_xemu_version, xemu_version) != 0) { - fclose(shader_file); - goto error; - } - - READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); - - cached_gl_vendor = g_malloc(gl_vendor_len); - READ_OR_ERR(cached_gl_vendor, gl_vendor_len); - if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) { - fclose(shader_file); - goto error; - } - - READ_OR_ERR(&program_binary_format, sizeof(program_binary_format)); - READ_OR_ERR(&state, sizeof(state)); - READ_OR_ERR(&shader_size, sizeof(shader_size)); - - program_buffer = g_malloc(shader_size); - READ_OR_ERR(program_buffer, shader_size); - - #undef READ_OR_ERR - - fclose(shader_file); - g_free(shader_path); - g_free(cached_xemu_version); - g_free(cached_gl_vendor); - - qemu_mutex_lock(&pg->shader_cache_lock); - LruNode *node = lru_lookup(&pg->shader_cache, hash, &state); - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - - /* If we happened to regenerate this shader already, then we may as well use the new one */ - if (snode->binding) { - qemu_mutex_unlock(&pg->shader_cache_lock); - return; - } - - snode->program_format = program_binary_format; - snode->program_size = shader_size; - snode->program = program_buffer; - snode->cached = true; - qemu_mutex_unlock(&pg->shader_cache_lock); - return; - -error: - /* Delete the shader so it won't be loaded again */ - qemu_unlink(shader_path); - g_free(shader_path); - g_free(program_buffer); - g_free(cached_xemu_version); - g_free(cached_gl_vendor); -} - -static void *shader_reload_lru_from_disk(void *arg) -{ - if (!g_config.perf.cache_shaders) { - return NULL; - } - - PGRAPHState *pg = (PGRAPHState*) arg; - char *shader_lru_path = shader_get_lru_cache_path(); - - FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb"); - g_free(shader_lru_path); - if (!lru_shaders_list) { - return NULL; - } - - uint64_t hash; - while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) { - shader_load_from_disk(pg, hash); - } - - return NULL; -} - -static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) -{ - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - memcpy(&snode->state, state, sizeof(ShaderState)); - snode->cached = false; - snode->binding = NULL; - snode->program = NULL; - snode->save_thread = NULL; -} - -static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) -{ - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - - if (snode->save_thread) { - qemu_thread_join(snode->save_thread); - g_free(snode->save_thread); - } - - if (snode->binding) { - glDeleteProgram(snode->binding->gl_program); - g_free(snode->binding); - } - - if (snode->program) { - g_free(snode->program); - } - - snode->cached = false; - snode->save_thread = NULL; - snode->binding = NULL; - snode->program = NULL; - memset(&snode->state, 0, sizeof(ShaderState)); -} - -static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) -{ - ShaderLruNode *snode = container_of(node, ShaderLruNode, node); - return memcmp(&snode->state, key, sizeof(ShaderState)); -} - -void shader_cache_init(PGRAPHState *pg) -{ - if (!shader_gl_vendor) { - shader_gl_vendor = (const char *) glGetString(GL_VENDOR); - } - - shader_create_cache_folder(); - - /* FIXME: Make this configurable */ - const size_t shader_cache_size = 50*1024; - lru_init(&pg->shader_cache); - pg->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode)); - assert(pg->shader_cache_entries != NULL); - for (int i = 0; i < shader_cache_size; i++) { - lru_add_free(&pg->shader_cache, &pg->shader_cache_entries[i].node); - } - - pg->shader_cache.init_node = shader_cache_entry_init; - pg->shader_cache.compare_nodes = shader_cache_entry_compare; - pg->shader_cache.post_node_evict = shader_cache_entry_post_evict; - - qemu_thread_create(&pg->shader_disk_thread, "pgraph.shader_cache", - shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); -} - -static void *shader_write_to_disk(void *arg) -{ - ShaderLruNode *snode = (ShaderLruNode*) arg; - - char *shader_bin = shader_get_bin_directory(snode->node.hash); - char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash); - - static uint64_t gl_vendor_len; - if (gl_vendor_len == 0) { - gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1); - } - - static uint64_t xemu_version_len = 0; - if (xemu_version_len == 0) { - xemu_version_len = (uint64_t) (strlen(xemu_version) + 1); - } - - qemu_mkdir(shader_bin); - g_free(shader_bin); - - FILE *shader_file = qemu_fopen(shader_path, "wb"); - if (!shader_file) { - goto error; - } - - size_t written; - #define WRITE_OR_ERR(data, data_size) \ - do { \ - written = fwrite(data, data_size, 1, shader_file); \ - if (written != 1) { \ - fclose(shader_file); \ - goto error; \ - } \ - } while (0) - - WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len)); - WRITE_OR_ERR(xemu_version, xemu_version_len); - - WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); - WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len); - - WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format)); - WRITE_OR_ERR(&snode->state, sizeof(snode->state)); - - WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size)); - WRITE_OR_ERR(snode->program, snode->program_size); - - #undef WRITE_OR_ERR - - fclose(shader_file); - - g_free(shader_path); - g_free(snode->program); - snode->program = NULL; - - return NULL; - -error: - fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path); - qemu_unlink(shader_path); - g_free(shader_path); - g_free(snode->program); - snode->program = NULL; - return NULL; -} - -void shader_cache_to_disk(ShaderLruNode *snode) -{ - if (!snode->binding || snode->cached) { - return; - } - - GLint program_size; - glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size); - - if (snode->program) { - g_free(snode->program); - snode->program = NULL; - } - - /* program_size might be zero on some systems, if no binary formats are supported */ - if (program_size == 0) { - return; - } - - snode->program = g_malloc(program_size); - GLsizei program_size_copied; - glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied, - &snode->program_format, snode->program); - assert(glGetError() == GL_NO_ERROR); - - snode->program_size = program_size_copied; - snode->cached = true; - - char name[24]; - snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash); - snode->save_thread = g_malloc0(sizeof(QemuThread)); - qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE); -} diff --git a/hw/xbox/nv2a/shaders_common.h b/hw/xbox/nv2a/shaders_common.h deleted file mode 100644 index ae2ba9f14d3..00000000000 --- a/hw/xbox/nv2a/shaders_common.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * QEMU Geforce NV2A shader common definitions - * - * Copyright (c) 2015 espes - * Copyright (c) 2015 Jannik Vogel - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#ifndef HW_NV2A_SHADERS_COMMON_H -#define HW_NV2A_SHADERS_COMMON_H - -#include "debug.h" - -#define DEF_VERTEX_DATA(qualifier, in_out, prefix, suffix) \ - "noperspective " in_out " float " prefix "vtx_inv_w" suffix ";\n" \ - "flat " in_out " float " prefix "vtx_inv_w_flat" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxD0" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxD1" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxB0" suffix ";\n" \ - qualifier " " in_out " vec4 " prefix "vtxB1" suffix ";\n" \ - "noperspective " in_out " float " prefix "vtxFog" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT0" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT1" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT2" suffix ";\n" \ - "noperspective " in_out " vec4 " prefix "vtxT3" suffix ";\n" - -#define STRUCT_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "", "") -#define STRUCT_VERTEX_DATA_IN_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "", "") -#define STRUCT_V_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "v_", "") -#define STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "v_", "[]") - -#define STRUCT_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "", "") -#define STRUCT_VERTEX_DATA_IN_FLAT DEF_VERTEX_DATA("flat", "in", "", "") -#define STRUCT_V_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "v_", "") -#define STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT DEF_VERTEX_DATA("flat", "in", "v_", "[]") - -typedef struct { - int ref; - gchar *string; -} MString; - -void mstring_append_fmt(MString *mstring, const char *fmt, ...); -MString *mstring_from_fmt(const char *fmt, ...); -void mstring_append_va(MString *mstring, const char *fmt, va_list va); - -static inline -void mstring_ref(MString *mstr) -{ - mstr->ref++; -} - -static inline -void mstring_unref(MString *mstr) -{ - mstr->ref--; - if (!mstr->ref) { - g_free(mstr->string); - g_free(mstr); - } -} - -static inline -void mstring_append(MString *mstr, const char *str) -{ - gchar *n = g_strconcat(mstr->string, str, NULL); - g_free(mstr->string); - mstr->string = n; -} - -static inline -void mstring_append_chr(MString *mstr, char chr) -{ - mstring_append_fmt(mstr, "%c", chr); -} - -static inline -void mstring_append_int(MString *mstr, int val) -{ - mstring_append_fmt(mstr, "%" PRId64, val); -} - -static inline -MString *mstring_new(void) -{ - MString *mstr = g_malloc(sizeof(MString)); - mstr->ref = 1; - mstr->string = g_strdup(""); - return mstr; -} - -static inline -MString *mstring_from_str(const char *str) -{ - MString *mstr = g_malloc(sizeof(MString)); - mstr->ref = 1; - mstr->string = g_strdup(str); - return mstr; -} - -static inline -const gchar *mstring_get_str(MString *mstr) -{ - return mstr->string; -} - -static inline -size_t mstring_get_length(MString *mstr) -{ - return strlen(mstr->string); -} - - -#endif diff --git a/hw/xbox/nv2a/lru.h b/include/qemu/lru.h similarity index 87% rename from hw/xbox/nv2a/lru.h rename to include/qemu/lru.h index c0dca7ec5d2..b5882702827 100644 --- a/hw/xbox/nv2a/lru.h +++ b/include/qemu/lru.h @@ -1,7 +1,7 @@ /* * LRU object list * - * Copyright (c) 2021 Matt Borgerson + * Copyright (c) 2021-2024 Matt Borgerson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -42,6 +42,8 @@ typedef struct Lru Lru; struct Lru { QTAILQ_HEAD(, LruNode) global; QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS]; + int num_used; + int num_free; /* Initialize a node. */ void (*init_node)(Lru *lru, LruNode *node, void *key); @@ -67,6 +69,8 @@ void lru_init(Lru *lru) lru->compare_nodes = NULL; lru->pre_node_evict = NULL; lru->post_node_evict = NULL; + lru->num_free = 0; + lru->num_used = 0; } static inline @@ -74,6 +78,7 @@ void lru_add_free(Lru *lru, LruNode *node) { node->next_bin.tqe_circ.tql_prev = NULL; QTAILQ_INSERT_TAIL(&lru->global, node, next_global); + lru->num_free += 1; } static inline @@ -106,29 +111,51 @@ void lru_evict_node(Lru *lru, LruNode *node) if (lru->post_node_evict) { lru->post_node_evict(lru, node); } + + lru->num_used -= 1; + lru->num_free += 1; } static inline -LruNode *lru_evict_one(Lru *lru) +LruNode *lru_try_evict_one(Lru *lru) { LruNode *found; QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) { - bool can_evict = true; - if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) { - can_evict = lru->pre_node_evict(lru, found); - } - if (can_evict) { - break; + if (lru_is_node_in_use(lru, found) + && (!lru->pre_node_evict || lru->pre_node_evict(lru, found))) { + lru_evict_node(lru, found); + return found; } } + return NULL; +} + +static inline +LruNode *lru_evict_one(Lru *lru) +{ + LruNode *found = lru_try_evict_one(lru); + assert(found != NULL); /* No evictable node! */ - lru_evict_node(lru, found); return found; } +static inline +LruNode *lru_get_one_free(Lru *lru) +{ + LruNode *found; + + QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) { + if (!lru_is_node_in_use(lru, found)) { + return found; + } + } + + return lru_evict_one(lru); +} + static inline bool lru_contains_hash(Lru *lru, uint64_t hash) { @@ -160,12 +187,15 @@ LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key) if (found) { QTAILQ_REMOVE(&lru->bins[bin], found, next_bin); } else { - found = lru_evict_one(lru); + found = lru_get_one_free(lru); found->hash = hash; if (lru->init_node) { lru->init_node(lru, found, key); } assert(found->hash == hash); + + lru->num_used += 1; + lru->num_free -= 1; } QTAILQ_REMOVE(&lru->global, found, next_global); diff --git a/include/qemu/mstring.h b/include/qemu/mstring.h new file mode 100644 index 00000000000..567fd4cdf38 --- /dev/null +++ b/include/qemu/mstring.h @@ -0,0 +1,82 @@ +#ifndef MSTRING_H +#define MSTRING_H + +#include "qemu/osdep.h" +#include + +typedef struct { + int ref; + gchar *string; +} MString; + +void mstring_append_fmt(MString *mstring, const char *fmt, ...); +MString *mstring_from_fmt(const char *fmt, ...); +void mstring_append_va(MString *mstring, const char *fmt, va_list va); + +static inline +void mstring_ref(MString *mstr) +{ + mstr->ref++; +} + +static inline +void mstring_unref(MString *mstr) +{ + mstr->ref--; + if (!mstr->ref) { + g_free(mstr->string); + g_free(mstr); + } +} + +static inline +void mstring_append(MString *mstr, const char *str) +{ + gchar *n = g_strconcat(mstr->string, str, NULL); + g_free(mstr->string); + mstr->string = n; +} + +static inline +void mstring_append_chr(MString *mstr, char chr) +{ + mstring_append_fmt(mstr, "%c", chr); +} + +static inline +void mstring_append_int(MString *mstr, int val) +{ + mstring_append_fmt(mstr, "%" PRId64, val); +} + +static inline +MString *mstring_new(void) +{ + MString *mstr = g_malloc(sizeof(MString)); + mstr->ref = 1; + mstr->string = g_strdup(""); + return mstr; +} + +static inline +MString *mstring_from_str(const char *str) +{ + MString *mstr = g_malloc(sizeof(MString)); + mstr->ref = 1; + mstr->string = g_strdup(str); + return mstr; +} + +static inline +const gchar *mstring_get_str(MString *mstr) +{ + return mstr->string; +} + +static inline +size_t mstring_get_length(MString *mstr) +{ + return strlen(mstr->string); +} + +#endif diff --git a/licenses/SPIRV-Reflect.license.txt b/licenses/SPIRV-Reflect.license.txt new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/licenses/SPIRV-Reflect.license.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/licenses/VulkanMemoryAllocator.license.txt b/licenses/VulkanMemoryAllocator.license.txt new file mode 100644 index 00000000000..b9fff388f1b --- /dev/null +++ b/licenses/VulkanMemoryAllocator.license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/licenses/volk.license.txt b/licenses/volk.license.txt new file mode 100644 index 00000000000..5a717f26780 --- /dev/null +++ b/licenses/volk.license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2018-2024 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/meson.build b/meson.build index 8980f55a134..7c12d40fb54 100644 --- a/meson.build +++ b/meson.build @@ -1180,6 +1180,34 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu link_args: config_host['EPOXY_LIBS'].split() + opengl_libs) endif +vulkan = not_found +if targetos == 'windows' + vulkan = declare_dependency( + compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR', '-DVK_NO_PROTOTYPES'], + ) + libglslang = declare_dependency(link_args: [ + '-lglslang', + '-lMachineIndependent', + '-lGenericCodeGen', + '-lSPIRV', + '-lSPIRV-Tools', + '-lSPIRV-Tools-opt' + ]) +elif targetos == 'linux' + vulkan = dependency('vulkan') + libglslang = declare_dependency(link_args: [ + '-lglslang', + '-lMachineIndependent', + '-lGenericCodeGen', + '-lSPIRV', + '-lSPIRV-Tools', + '-lSPIRV-Tools-opt' + ]) +endif + +subdir('thirdparty') + + gbm = not_found if (have_system or have_tools) and (virgl.found() or opengl.found()) gbm = dependency('gbm', method: 'pkg-config', required: false, @@ -1931,6 +1959,7 @@ config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found()) config_host_data.set('CONFIG_LIBPMEM', libpmem.found()) config_host_data.set('CONFIG_NUMA', numa.found()) config_host_data.set('CONFIG_OPENGL', opengl.found()) +config_host_data.set('CONFIG_VULKAN', vulkan.found()) config_host_data.set('CONFIG_PROFILER', get_option('profiler')) config_host_data.set('CONFIG_RBD', rbd.found()) config_host_data.set('CONFIG_RDMA', rdma.found()) @@ -4054,6 +4083,7 @@ summary_info += {'U2F support': u2f} summary_info += {'libusb': libusb} summary_info += {'usb net redir': usbredir} summary_info += {'OpenGL support (epoxy)': opengl} +summary_info += {'Vulkan support': vulkan} summary_info += {'GBM': gbm} summary_info += {'libiscsi support': libiscsi} summary_info += {'libnfs support': libnfs} diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index 0496ebeb6c7..5502c3bfd78 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -28,8 +28,12 @@ sub_file="${sub_tdir}/submodule.tar" # different to the host OS. submodules="dtc meson ui/keycodemapdb" submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3" -submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" # xemu extras + +# xemu extras +submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu" +submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" + sub_deinit="" function cleanup() { diff --git a/scripts/gen-license.py b/scripts/gen-license.py index b71d4ecd56a..216f441f3ce 100755 --- a/scripts/gen-license.py +++ b/scripts/gen-license.py @@ -228,7 +228,25 @@ def head(self): Lib('nv2a_vsh_cpu', 'https://github.com/abaire/nv2a_vsh_cpu', unlicense, 'https://raw.githubusercontent.com/abaire/nv2a_vsh_cpu/main/LICENSE', ships_static=all_platforms, - submodule=Submodule('hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu') + submodule=Submodule('hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu') + ), + +Lib('volk', 'https://github.com/zeux/volk', + mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md', + ships_static=all_platforms, + submodule=Submodule('thirdparty/volk') + ), + +Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator', + mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt', + ships_static=all_platforms, + submodule=Submodule('thirdparty/VulkanMemoryAllocator') + ), + +Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect', + apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE', + ships_static=all_platforms, + submodule=Submodule('thirdparty/SPIRV-Reflect') ), # @@ -344,6 +362,17 @@ def head(self): ships_static={windows}, platform={windows}, version='2.1.0' ), + +Lib('glslang', 'https://github.com/KhronosGroup/glslang', + bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt', + ships_static={windows}, platform={windows}, + ), + +Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools', + apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE', + ships_static={windows}, platform={windows}, + ), + ] def gen_license(): diff --git a/thirdparty/SPIRV-Reflect b/thirdparty/SPIRV-Reflect new file mode 160000 index 00000000000..1d674a82d7e --- /dev/null +++ b/thirdparty/SPIRV-Reflect @@ -0,0 +1 @@ +Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71 diff --git a/thirdparty/VulkanMemoryAllocator b/thirdparty/VulkanMemoryAllocator new file mode 160000 index 00000000000..009ecd192c1 --- /dev/null +++ b/thirdparty/VulkanMemoryAllocator @@ -0,0 +1 @@ +Subproject commit 009ecd192c1289c7529bff248a16cfe896254816 diff --git a/thirdparty/meson.build b/thirdparty/meson.build new file mode 100644 index 00000000000..99ecbd27961 --- /dev/null +++ b/thirdparty/meson.build @@ -0,0 +1,12 @@ +if vulkan.found() + +libvma = static_library('vma', sources: 'vma.cc', include_directories: 'VulkanMemoryAllocator/include', dependencies: vulkan) +vma = declare_dependency(include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) + +libvolk = static_library('volk', sources: 'volk/volk.c', dependencies: vulkan) +volk = declare_dependency(include_directories: 'volk', link_with: libvolk, dependencies: vulkan) + +libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan) +spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan) + +endif diff --git a/thirdparty/renderdoc_app.h b/thirdparty/renderdoc_app.h index 7ee24b69eed..c01e05932e2 100644 --- a/thirdparty/renderdoc_app.h +++ b/thirdparty/renderdoc_app.h @@ -1,7 +1,7 @@ /****************************************************************************** * The MIT License (MIT) * - * Copyright (c) 2019-2022 Baldur Karlsson + * Copyright (c) 2019-2024 Baldur Karlsson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -35,7 +35,7 @@ #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) #define RENDERDOC_CC __cdecl -#elif defined(__linux__) +#elif defined(__linux__) || defined(__FreeBSD__) #define RENDERDOC_CC #elif defined(__APPLE__) #define RENDERDOC_CC @@ -72,7 +72,8 @@ extern "C" { // RenderDoc capture options // -typedef enum RENDERDOC_CaptureOption { +typedef enum RENDERDOC_CaptureOption +{ // Allow the application to enable vsync // // Default - enabled @@ -214,6 +215,19 @@ typedef enum RENDERDOC_CaptureOption { // necessary as directed by a RenderDoc developer. eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, } RENDERDOC_CaptureOption; // Sets an option that controls how RenderDoc behaves on capture. @@ -233,7 +247,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_Capture // If the option is invalid, -FLT_MAX is returned typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); -typedef enum RENDERDOC_InputButton { +typedef enum RENDERDOC_InputButton +{ // '0' - '9' matches ASCII values eRENDERDOC_Key_0 = 0x30, eRENDERDOC_Key_1 = 0x31, @@ -321,7 +336,8 @@ typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton // If keys is NULL or num is 0, captures keys will be disabled typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); -typedef enum RENDERDOC_OverlayBits { +typedef enum RENDERDOC_OverlayBits +{ // This single bit controls whether the overlay is enabled or disabled globally eRENDERDOC_Overlay_Enabled = 0x1, @@ -452,6 +468,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTarget // ignored and the others will be filled out. typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + ////////////////////////////////////////////////////////////////////////// // Capturing functions // @@ -525,14 +550,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePoint typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); -// Requests that the replay UI show itself (if hidden or not the current top window). This can be -// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle -// showing the UI after making a capture. +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. // -// This will return 1 if the request was successfully passed on, though it's not guaranteed that -// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current -// target control connection to make such a request, or if there was another error -typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc API versions @@ -547,7 +573,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); // Note that this means the API returned can be higher than the one you might have requested. // e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned // instead of 1.0.0. You can check this with the GetAPIVersion entry point -typedef enum RENDERDOC_Version { +typedef enum RENDERDOC_Version +{ eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 @@ -560,6 +587,7 @@ typedef enum RENDERDOC_Version { eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 } RENDERDOC_Version; // API version changelog: @@ -588,8 +616,10 @@ typedef enum RENDERDOC_Version { // 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening // 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. // 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() -typedef struct RENDERDOC_API_1_5_0 +typedef struct RENDERDOC_API_1_6_0 { pRENDERDOC_GetAPIVersion GetAPIVersion; @@ -664,19 +694,23 @@ typedef struct RENDERDOC_API_1_5_0 // new function in 1.5.0 pRENDERDOC_ShowReplayUI ShowReplayUI; -} RENDERDOC_API_1_5_0; - -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_1; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_2; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_1; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_2; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_2_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_3_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_0; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_1; -typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_2; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc API entry point diff --git a/thirdparty/vma.cc b/thirdparty/vma.cc new file mode 100644 index 00000000000..a2023d33b25 --- /dev/null +++ b/thirdparty/vma.cc @@ -0,0 +1,2 @@ +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" diff --git a/thirdparty/volk b/thirdparty/volk new file mode 160000 index 00000000000..466085407d5 --- /dev/null +++ b/thirdparty/volk @@ -0,0 +1 @@ +Subproject commit 466085407d5d2f50583fd663c1d65f93a7709d3e diff --git a/ui/meson.build b/ui/meson.build index 18bb7c97c18..75b82df9270 100644 --- a/ui/meson.build +++ b/ui/meson.build @@ -40,10 +40,6 @@ xemu_cocoa = dependency('appleframeworks', modules: 'Cocoa') xemu_ss.add(xemu_cocoa) endif -if 'CONFIG_LINUX' in config_host -xemu_ss.add(gtk) -endif - xemu_ss.add(when: 'CONFIG_LINUX', if_true: [gtk, files('xemu-os-utils-linux.c')]) xemu_ss.add(when: 'CONFIG_WIN32', if_true: files('xemu-os-utils-windows.c')) xemu_ss.add(when: 'CONFIG_DARWIN', if_true: files('xemu-os-utils-macos.m')) diff --git a/ui/xemu.c b/ui/xemu.c index d0cec857f4e..0d01f224605 100644 --- a/ui/xemu.c +++ b/ui/xemu.c @@ -426,6 +426,7 @@ static void handle_keydown(SDL_Event *ev) { int win; struct sdl2_console *scon = get_scon_from_window(ev->key.windowID); + if (scon == NULL) return; int gui_key_modifier_pressed = get_mod_state(); int gui_keysym = 0; @@ -484,6 +485,7 @@ static void handle_keydown(SDL_Event *ev) static void handle_keyup(SDL_Event *ev) { struct sdl2_console *scon = get_scon_from_window(ev->key.windowID); + if (!scon) return; scon->ignore_hotkeys = false; sdl2_process_key(scon, &ev->key); @@ -944,7 +946,7 @@ static void sdl2_display_very_early_init(DisplayOptions *o) fprintf(stderr, "GL_SHADING_LANGUAGE_VERSION: %s\n", glGetString(GL_SHADING_LANGUAGE_VERSION)); // Initialize offscreen rendering context now - nv2a_gl_context_init(); + nv2a_context_init(); SDL_GL_MakeCurrent(NULL, NULL); // FIXME: atexit(sdl_cleanup); diff --git a/ui/xui/main-menu.cc b/ui/xui/main-menu.cc index 75b88cafb6e..a9a6c6ec85d 100644 --- a/ui/xui/main-menu.cc +++ b/ui/xui/main-menu.cc @@ -449,7 +449,15 @@ void MainMenuInputView::Draw() void MainMenuDisplayView::Draw() { - SectionTitle("Quality"); + SectionTitle("Renderer"); + ChevronCombo("Backend", &g_config.display.renderer, + "Null\0" + "OpenGL\0" +#ifdef CONFIG_VULKAN + "Vulkan\0" +#endif + , + "Select desired renderer implementation"); int rendering_scale = nv2a_get_surface_scale_factor() - 1; if (ChevronCombo("Internal resolution scale", &rendering_scale, "1x\0" diff --git a/ui/xui/main.cc b/ui/xui/main.cc index fd38aa4e7bb..069a6282f9b 100644 --- a/ui/xui/main.cc +++ b/ui/xui/main.cc @@ -216,7 +216,7 @@ void xemu_hud_render(void) ImGui::NewFrame(); ProcessKeyboardShortcuts(); -#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC) +#if defined(CONFIG_RENDERDOC) if (g_capture_renderdoc_frame) { nv2a_dbg_renderdoc_capture_frames(1); g_capture_renderdoc_frame = false; diff --git a/ui/xui/menubar.cc b/ui/xui/menubar.cc index 2d1f48c6045..bce0e7a0fb0 100644 --- a/ui/xui/menubar.cc +++ b/ui/xui/menubar.cc @@ -71,8 +71,8 @@ void ProcessKeyboardShortcuts(void) ActionScreenshot(); } -#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC) - if (ImGui::IsKeyPressed(ImGuiKey_F10)) { +#ifdef CONFIG_RENDERDOC + if (ImGui::IsKeyPressed(ImGuiKey_F10) && nv2a_dbg_renderdoc_available()) { nv2a_dbg_renderdoc_capture_frames(1); } #endif @@ -203,7 +203,7 @@ void ShowMainMenu() ImGui::MenuItem("Monitor", "~", &monitor_window.is_open); ImGui::MenuItem("Audio", NULL, &apu_window.m_is_open); ImGui::MenuItem("Video", NULL, &video_window.m_is_open); -#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC) +#ifdef CONFIG_RENDERDOC if (nv2a_dbg_renderdoc_available()) { ImGui::MenuItem("RenderDoc: Capture", NULL, &g_capture_renderdoc_frame); } diff --git a/util/meson.build b/util/meson.build index 4269ef4e38f..72ef1db2b57 100644 --- a/util/meson.build +++ b/util/meson.build @@ -59,6 +59,7 @@ util_ss.add(files('int128.c')) util_ss.add(files('memalign.c')) util_ss.add(when: 'CONFIG_WIN32', if_true: files('miniz/miniz.c')) util_ss.add(files('fast-hash.c')) +util_ss.add(files('mstring.c')) if have_user util_ss.add(files('selfmap.c')) diff --git a/util/mstring.c b/util/mstring.c new file mode 100644 index 00000000000..6cd0af7335e --- /dev/null +++ b/util/mstring.c @@ -0,0 +1,49 @@ +#include "qemu/osdep.h" +#include "qemu/mstring.h" + +#include + +void mstring_append_fmt(MString *qstring, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + mstring_append_va(qstring, fmt, ap); + va_end(ap); +} + +MString *mstring_from_fmt(const char *fmt, ...) +{ + MString *ret = mstring_new(); + va_list ap; + va_start(ap, fmt); + mstring_append_va(ret, fmt, ap); + va_end(ap); + + return ret; +} + +void mstring_append_va(MString *qstring, const char *fmt, va_list va) +{ + char scratch[256]; + + va_list ap; + va_copy(ap, va); + const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap); + va_end(ap); + + if (len == 0) { + return; + } else if (len < sizeof(scratch)) { + mstring_append(qstring, scratch); + return; + } + + /* overflowed out scratch buffer, alloc and try again */ + char *buf = g_malloc(len + 1); + va_copy(ap, va); + vsnprintf(buf, len + 1, fmt, ap); + va_end(ap); + + mstring_append(qstring, buf); + g_free(buf); +} diff --git a/xemu-version.c b/xemu-version.c index 523d9557601..f2e7a958e9e 100644 --- a/xemu-version.c +++ b/xemu-version.c @@ -1,5 +1,8 @@ #include "xemu-version-macro.h" +const int xemu_version_major = XEMU_VERSION_MAJOR; +const int xemu_version_minor = XEMU_VERSION_MINOR; +const int xemu_version_patch = XEMU_VERSION_PATCH; const char *xemu_version = XEMU_VERSION; const char *xemu_branch = XEMU_BRANCH;; const char *xemu_commit = XEMU_COMMIT; diff --git a/xemu-version.h b/xemu-version.h index 484af8a9deb..a1fe27fccbc 100644 --- a/xemu-version.h +++ b/xemu-version.h @@ -1,6 +1,9 @@ #ifndef XEMU_VERSION_H #define XEMU_VERSION_H +extern const int xemu_version_major; +extern const int xemu_version_minor; +extern const int xemu_version_patch; extern const char *xemu_version; extern const char *xemu_branch; extern const char *xemu_commit; From 87ccc7e2a2d0e726522f0297c407529148774175 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 032/176] archive-source.sh: Fix path to nv2a_vsh_cpu --- scripts/archive-source.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index 5502c3bfd78..e20751e50ef 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -31,7 +31,7 @@ submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloa # xemu extras submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" -submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu" +submodules="$submodules hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu" submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" sub_deinit="" From f392869cabefb0506c30b67f20db8927efa12778 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 033/176] debian/control: Drop vendored deps --- debian/control | 2 -- 1 file changed, 2 deletions(-) diff --git a/debian/control b/debian/control index 30603057eab..0792bdd71d9 100644 --- a/debian/control +++ b/debian/control @@ -16,9 +16,7 @@ Build-Depends: debhelper (>= 11), libssl-dev, libpcap-dev, libslirp-dev, - glslang-dev, libvulkan-dev, - Standards-Version: 3.9.8 Homepage: https://xemu.app XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu From 0d0dbc2886436c6c01a1e89e45b7808fb6a268c6 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 034/176] meson: Add -DVK_NO_PROTOTYPES compile args on volk --- thirdparty/meson.build | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/thirdparty/meson.build b/thirdparty/meson.build index 99ecbd27961..43dff02fe1d 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -1,10 +1,10 @@ if vulkan.found() -libvma = static_library('vma', sources: 'vma.cc', include_directories: 'VulkanMemoryAllocator/include', dependencies: vulkan) -vma = declare_dependency(include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) +libvolk = static_library('volk', sources: 'volk/volk.c', c_args: ['-DVK_NO_PROTOTYPES'], dependencies: vulkan) +volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: 'volk', link_with: libvolk, dependencies: vulkan) -libvolk = static_library('volk', sources: 'volk/volk.c', dependencies: vulkan) -volk = declare_dependency(include_directories: 'volk', link_with: libvolk, dependencies: vulkan) +libvma = static_library('vma', sources: 'vma.cc', include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) +vma = declare_dependency(include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan) spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan) From cd130f85d0ca83c79eaaf04b2fd28b9aa24af8cb Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 035/176] scripts/gen-license.py: Fix version detect --- licenses/SPIRV-Tools.license.txt | 202 ++++++ licenses/glslang.license.txt | 1016 ++++++++++++++++++++++++++++++ scripts/gen-license.py | 3 +- 3 files changed, 1220 insertions(+), 1 deletion(-) create mode 100644 licenses/SPIRV-Tools.license.txt create mode 100644 licenses/glslang.license.txt diff --git a/licenses/SPIRV-Tools.license.txt b/licenses/SPIRV-Tools.license.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/licenses/SPIRV-Tools.license.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/licenses/glslang.license.txt b/licenses/glslang.license.txt new file mode 100644 index 00000000000..054e68a4614 --- /dev/null +++ b/licenses/glslang.license.txt @@ -0,0 +1,1016 @@ +Here, glslang proper means core GLSL parsing, HLSL parsing, and SPIR-V code +generation. Glslang proper requires use of a number of licenses, one that covers +preprocessing and others that covers non-preprocessing. + +Bison was removed long ago. You can build glslang from the source grammar, +using tools of your choice, without using bison or any bison files. + +Other parts, outside of glslang proper, include: + +- gl_types.h, only needed for OpenGL-like reflection, and can be left out of + a parse and codegen project. See it for its license. + +- update_glslang_sources.py, which is not part of the project proper and does + not need to be used. + +- the SPIR-V "remapper", which is optional, but has the same license as + glslang proper + +- Google tests and SPIR-V tools, and anything in the external subdirectory + are external and optional; see them for their respective licenses. + +-------------------------------------------------------------------------------- + +The core of glslang-proper, minus the preprocessor is licenced as follows: + +-------------------------------------------------------------------------------- +3-Clause BSD License +-------------------------------------------------------------------------------- + +// +// Copyright (C) 2015-2018 Google, Inc. +// Copyright (C) +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + + +-------------------------------------------------------------------------------- +2-Clause BSD License +-------------------------------------------------------------------------------- + +Copyright 2020 The Khronos Group Inc + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +-------------------------------------------------------------------------------- +The MIT License +-------------------------------------------------------------------------------- + +Copyright 2020 The Khronos Group Inc + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +-------------------------------------------------------------------------------- +APACHE LICENSE, VERSION 2.0 +-------------------------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +-------------------------------------------------------------------------------- +GPL 3 with special bison exception +-------------------------------------------------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +Bison Exception + +As a special exception, you may create a larger work that contains part or all +of the Bison parser skeleton and distribute that work under terms of your +choice, so long as that work isn't itself a parser generator using the skeleton +or a modified version thereof as a parser skeleton. Alternatively, if you +modify or redistribute the parser skeleton itself, you may (at your option) +remove this special exception, which will cause the skeleton and the resulting +Bison output files to be licensed under the GNU General Public License without +this special exception. + +This special exception was added by the Free Software Foundation in version +2.2 of Bison. + + END OF TERMS AND CONDITIONS + +-------------------------------------------------------------------------------- +================================================================================ +-------------------------------------------------------------------------------- + +The preprocessor has the core licenses stated above, plus additional licences: + +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ diff --git a/scripts/gen-license.py b/scripts/gen-license.py index 216f441f3ce..88733871ad7 100755 --- a/scripts/gen-license.py +++ b/scripts/gen-license.py @@ -366,13 +366,14 @@ def head(self): Lib('glslang', 'https://github.com/KhronosGroup/glslang', bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt', ships_static={windows}, platform={windows}, + version='14.3.0' ), Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools', apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE', ships_static={windows}, platform={windows}, + pkgconfig=PkgConfig('SPIRV-Tools') ), - ] def gen_license(): From ff5f2768b6afb1dc21686ce37e6fb9a9195e2376 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 036/176] meson: Vendor glslang --- debian/rules | 2 +- meson.build | 29 ++++++++++++++++++----------- subprojects/glslang.wrap | 4 ++++ 3 files changed, 23 insertions(+), 12 deletions(-) create mode 100644 subprojects/glslang.wrap diff --git a/debian/rules b/debian/rules index 25a0f7b9d08..184c3a848c4 100755 --- a/debian/rules +++ b/debian/rules @@ -63,7 +63,7 @@ override_dh_auto_configure: override_dh_auto_build: ./build.sh ${XEMU_BUILD_OPTIONS} ${common_configure_opts} || \ - { echo ===== BUILD FAILED ===; tail -n 50 config.log; exit 1; } + { echo ===== BUILD FAILED ===; cat build/meson-logs/meson-log.txt; exit 1; } cp debian/copyright debian/qemu.deb.copyright cp dist/LICENSE.txt debian/copyright diff --git a/meson.build b/meson.build index 7c12d40fb54..058c0092d41 100644 --- a/meson.build +++ b/meson.build @@ -1181,10 +1181,10 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu endif vulkan = not_found +libglslang = not_found + if targetos == 'windows' - vulkan = declare_dependency( - compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR', '-DVK_NO_PROTOTYPES'], - ) + vulkan = declare_dependency(compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR']) libglslang = declare_dependency(link_args: [ '-lglslang', '-lMachineIndependent', @@ -1195,14 +1195,21 @@ if targetos == 'windows' ]) elif targetos == 'linux' vulkan = dependency('vulkan') - libglslang = declare_dependency(link_args: [ - '-lglslang', - '-lMachineIndependent', - '-lGenericCodeGen', - '-lSPIRV', - '-lSPIRV-Tools', - '-lSPIRV-Tools-opt' - ]) +endif + +if vulkan.found() and not libglslang.found() + cmake = import('cmake') + # FIXME: Get spirv-tools to enable opt. + glslang_opts = cmake.subproject_options() + glslang_opts.add_cmake_defines({'ENABLE_OPT': false}) + glslang_subpro = cmake.subproject('glslang', options: glslang_opts) + libglslang = declare_dependency(link_with: [ + glslang_subpro.target('glslang'), + glslang_subpro.target('MachineIndependent'), + glslang_subpro.target('GenericCodeGen'), + glslang_subpro.target('SPIRV'), + ], include_directories: ['subprojects' / 'glslang'] + ) endif subdir('thirdparty') diff --git a/subprojects/glslang.wrap b/subprojects/glslang.wrap new file mode 100644 index 00000000000..7e6fe8414ef --- /dev/null +++ b/subprojects/glslang.wrap @@ -0,0 +1,4 @@ +[wrap-git] +url=https://github.com/KhronosGroup/glslang +revision=vulkan-sdk-1.3.283.0 +depth=1 From 2bc8cb3050459ba92d244607b7c8a4dd2c86071f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 037/176] nv2a/vk: Remove extraneous VK_NO_PROTOTYPES defn --- hw/xbox/nv2a/pgraph/vk/renderer.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index a509de8d71a..9834525abd2 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -20,8 +20,6 @@ #ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H #define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H -#define VK_NO_PROTOTYPES 1 - #include "qemu/osdep.h" #include "qemu/thread.h" #include "qemu/queue.h" From 2800b8d22a537e481c2f4fbecbbe01dc24fa8365 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 038/176] meson: Move VMA options into meson config --- hw/xbox/nv2a/pgraph/vk/renderer.h | 3 --- thirdparty/meson.build | 8 ++++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 9834525abd2..f2d6bedbd3e 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -35,9 +35,6 @@ #include #include #include - -#define VMA_STATIC_VULKAN_FUNCTIONS 1 -#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0 #include #include "debug.h" diff --git a/thirdparty/meson.build b/thirdparty/meson.build index 43dff02fe1d..fed47721125 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -3,8 +3,12 @@ if vulkan.found() libvolk = static_library('volk', sources: 'volk/volk.c', c_args: ['-DVK_NO_PROTOTYPES'], dependencies: vulkan) volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: 'volk', link_with: libvolk, dependencies: vulkan) -libvma = static_library('vma', sources: 'vma.cc', include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) -vma = declare_dependency(include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) +vma_defns = [ + '-DVMA_STATIC_VULKAN_FUNCTIONS=1', + '-DVMA_DYNAMIC_VULKAN_FUNCTIONS=0', +] +libvma = static_library('vma', sources: 'vma.cc', c_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) +vma = declare_dependency(compile_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan) spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan) From 093e654725dbd512617dd59a84f39c5aadecb8c0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 039/176] nv2a: Wrap pfifo direct renderer calls --- hw/xbox/nv2a/pfifo.c | 4 ++-- hw/xbox/nv2a/pgraph/pgraph.c | 12 ++++++++++++ hw/xbox/nv2a/pgraph/pgraph.h | 2 ++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pfifo.c b/hw/xbox/nv2a/pfifo.c index 295cbbf27b2..0e55826ad3c 100644 --- a/hw/xbox/nv2a/pfifo.c +++ b/hw/xbox/nv2a/pfifo.c @@ -461,13 +461,13 @@ void *pfifo_thread(void *arg) while (true) { d->pfifo.fifo_kick = false; - d->pgraph.renderer->ops.process_pending(d); + pgraph_process_pending(d); if (!d->pfifo.halt) { pfifo_run_pusher(d); } - d->pgraph.renderer->ops.process_pending_reports(d); + pgraph_process_pending_reports(d); if (!d->pfifo.fifo_kick) { qemu_cond_broadcast(&d->pfifo.fifo_idle_cond); diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index 0062efa15f4..b2e1f220df6 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -2872,3 +2872,15 @@ void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, NV2A_DPRINTF("Report result %d @%" HWADDR_PRIx, result, offset); } + +void pgraph_process_pending(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->renderer->ops.process_pending(d); +} + +void pgraph_process_pending_reports(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->renderer->ops.process_pending_reports(d); +} diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 799e879c062..cc409c058e3 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -250,6 +250,8 @@ void pgraph_init(NV2AState *d); void pgraph_init_thread(NV2AState *d); void pgraph_destroy(PGRAPHState *pg); void pgraph_context_switch(NV2AState *d, unsigned int channel_id); +void pgraph_process_pending(NV2AState *d); +void pgraph_process_pending_reports(NV2AState *d); int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method, uint32_t parameter, uint32_t *parameters, size_t num_words_available, size_t max_lookahead_words, From 27531ec1ca7a2d3b1e778fa8c7621d1cb6693a66 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 040/176] nv2a: Wrap nv2a_vm_state_change direct renderer calls --- hw/xbox/nv2a/nv2a.c | 8 ++++---- hw/xbox/nv2a/pgraph/pgraph.c | 25 +++++++++++++++++++++++++ hw/xbox/nv2a/pgraph/pgraph.h | 5 +++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c index 7b161131159..fd1bcbaba6c 100644 --- a/hw/xbox/nv2a/nv2a.c +++ b/hw/xbox/nv2a/nv2a.c @@ -375,10 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) if (state == RUN_STATE_SAVE_VM) { nv2a_lock_fifo(d); qatomic_set(&d->pfifo.halt, true); - d->pgraph.renderer->ops.pre_savevm_trigger(d); + pgraph_pre_savevm_trigger(d); nv2a_unlock_fifo(d); qemu_mutex_unlock_iothread(); - d->pgraph.renderer->ops.pre_savevm_wait(d); + pgraph_pre_savevm_wait(d); qemu_mutex_lock_iothread(); nv2a_lock_fifo(d); } else if (state == RUN_STATE_RESTORE_VM) { @@ -391,10 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state) nv2a_unlock_fifo(d); } else if (state == RUN_STATE_SHUTDOWN) { nv2a_lock_fifo(d); - d->pgraph.renderer->ops.pre_shutdown_trigger(d); + pgraph_pre_shutdown_trigger(d); nv2a_unlock_fifo(d); qemu_mutex_unlock_iothread(); - d->pgraph.renderer->ops.pre_shutdown_wait(d); + pgraph_pre_shutdown_wait(d); qemu_mutex_lock_iothread(); } } diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index b2e1f220df6..95fd0e5e62f 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -2884,3 +2884,28 @@ void pgraph_process_pending_reports(NV2AState *d) PGRAPHState *pg = &d->pgraph; pg->renderer->ops.process_pending_reports(d); } + +void pgraph_pre_savevm_trigger(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->renderer->ops.pre_savevm_trigger(d); +} + +void pgraph_pre_savevm_wait(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->renderer->ops.pre_savevm_wait(d); +} + +void pgraph_pre_shutdown_trigger(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->renderer->ops.pre_shutdown_trigger(d); +} + +void pgraph_pre_shutdown_wait(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + pg->renderer->ops.pre_shutdown_wait(d); +} + diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index cc409c058e3..fa94603e6d8 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -252,6 +252,11 @@ void pgraph_destroy(PGRAPHState *pg); void pgraph_context_switch(NV2AState *d, unsigned int channel_id); void pgraph_process_pending(NV2AState *d); void pgraph_process_pending_reports(NV2AState *d); +void pgraph_pre_savevm_trigger(NV2AState *d); +void pgraph_pre_savevm_wait(NV2AState *d); +void pgraph_pre_shutdown_trigger(NV2AState *d); +void pgraph_pre_shutdown_wait(NV2AState *d); + int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method, uint32_t parameter, uint32_t *parameters, size_t num_words_available, size_t max_lookahead_words, From 1973ed7f58b719f3f637d4484eec119d93ac107a Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 041/176] n2va/vk: Drop debug message --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index c4f2cd85e05..f1261c412b7 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -115,8 +115,6 @@ static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node) vkDestroyPipelineLayout(r->device, snode->layout, NULL); snode->layout = VK_NULL_HANDLE; - - fprintf(stderr, "released pipeline\n"); } static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key) From fadaf19cddeadd0d3c50e05a7e3709b7453672c3 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 042/176] nv2a/vk: Fix GArray free --- hw/xbox/nv2a/pgraph/vk/instance.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 4023fd5858c..65205921462 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -28,8 +28,8 @@ #include -typedef GArray VkExtensionPropertiesArray; -typedef GArray StringArray; +#define VkExtensionPropertiesArray GArray +#define StringArray GArray static bool enable_validation = false; @@ -217,10 +217,10 @@ static void create_instance(PGRAPHState *pg) .apiVersion = VK_API_VERSION_1_3, }; - g_autofree VkExtensionPropertiesArray *available_extensions = + g_autoptr(VkExtensionPropertiesArray) available_extensions = get_available_instance_extensions(pg); - g_autofree StringArray *enabled_extension_names = + g_autoptr(StringArray) enabled_extension_names = get_required_instance_extension_names(pg); bool all_required_extensions_available = true; @@ -374,7 +374,7 @@ static void add_optional_device_extension_names( static bool check_device_support_required_extensions(VkPhysicalDevice device) { - g_autofree VkExtensionPropertiesArray *available_extensions = + g_autoptr(VkExtensionPropertiesArray) available_extensions = get_available_device_extensions(device); for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) { @@ -457,10 +457,10 @@ static void create_logical_device(PGRAPHState *pg) QueueFamilyIndices indices = pgraph_vk_find_queue_families(r->physical_device); - g_autofree VkExtensionPropertiesArray *available_extensions = + g_autoptr(VkExtensionPropertiesArray) available_extensions = get_available_device_extensions(r->physical_device); - g_autofree StringArray *enabled_extension_names = + g_autoptr(StringArray) enabled_extension_names = get_required_device_extension_names(); add_optional_device_extension_names(pg, available_extensions, From 6403c693268f50baa57cb8018350e690a1e05524 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 043/176] nv2a/vk: Fix shader cache node init flag --- hw/xbox/nv2a/pgraph/vk/renderer.h | 2 ++ hw/xbox/nv2a/pgraph/vk/shaders.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index f2d6bedbd3e..1897948e115 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -156,6 +156,8 @@ typedef struct ShaderModuleInfo { typedef struct ShaderBinding { LruNode node; + bool initialized; + ShaderState state; ShaderModuleInfo *geometry; ShaderModuleInfo *vertex; diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 7d5000d7517..04ad25e9144 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -311,6 +311,7 @@ static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) { ShaderBinding *snode = container_of(node, ShaderBinding, node); memcpy(&snode->state, state, sizeof(ShaderState)); + snode->initialized = false; } static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) @@ -329,7 +330,7 @@ static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) } } - memset(&snode->state, 0, sizeof(ShaderState)); + snode->initialized = false; } static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) @@ -373,7 +374,7 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode); - if (!snode->fragment) { + if (!snode->initialized) { NV2A_VK_DPRINTF("cache miss"); nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); @@ -422,6 +423,8 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) } update_shader_constant_locations(snode); + + snode->initialized = true; } return snode; From 40526389f6b45f325eecd2a856a5156a700b1467 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 044/176] nv2a/vk: Clear bindings before tearing down cache --- hw/xbox/nv2a/pgraph/vk/texture.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 10a4ccd2e44..ea6adf671c6 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1447,10 +1447,10 @@ void pgraph_vk_finalize_textures(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - destroy_dummy_texture(r); - texture_cache_finalize(r); - for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { r->texture_bindings[i] = NULL; } + + destroy_dummy_texture(r); + texture_cache_finalize(r); } From c1eb48b62f7e707616314d079be7cd304c5411e7 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 045/176] nv2a/vk: Fix snode->geometry init --- hw/xbox/nv2a/pgraph/vk/shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 04ad25e9144..fde19242a52 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -397,7 +397,7 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) mstring_get_str(geometry_shader_code)); mstring_unref(geometry_shader_code); } else { - memset(&snode->geometry, 0, sizeof(snode->geometry)); + snode->geometry = NULL; } MString *vertex_shader_code = From 84dd112186e477992ae35003a3127286badc4dc9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 046/176] nv2a/gl: Improve renderer teardown --- hw/xbox/nv2a/pgraph/gl/display.c | 27 ++++++++++ hw/xbox/nv2a/pgraph/gl/renderer.c | 83 +++++++++++++++++-------------- hw/xbox/nv2a/pgraph/gl/renderer.h | 9 ++-- hw/xbox/nv2a/pgraph/gl/reports.c | 20 ++++++++ hw/xbox/nv2a/pgraph/gl/shaders.c | 6 ++- hw/xbox/nv2a/pgraph/gl/surface.c | 51 +++++++++++++++---- hw/xbox/nv2a/pgraph/gl/texture.c | 9 +++- hw/xbox/nv2a/pgraph/gl/vertex.c | 32 +++++++++++- 8 files changed, 180 insertions(+), 57 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index 804fec2c2d1..ed0992e8832 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -102,6 +102,33 @@ void pgraph_gl_init_display_renderer(NV2AState *d) assert(glGetError() == GL_NO_ERROR); } +void pgraph_gl_finalize_display(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + glo_set_current(g_nv2a_context_display); + + glDeleteTextures(1, &r->gl_display_buffer); + r->gl_display_buffer = 0; + + glDeleteProgram(r->disp_rndr.prog); + r->disp_rndr.prog = 0; + + glDeleteVertexArrays(1, &r->disp_rndr.vao); + r->disp_rndr.vao = 0; + + glDeleteBuffers(1, &r->disp_rndr.vbo); + r->disp_rndr.vbo = 0; + + glDeleteFramebuffers(1, &r->disp_rndr.fbo); + r->disp_rndr.fbo = 0; + + glDeleteTextures(1, &r->disp_rndr.pvideo_tex); + r->disp_rndr.pvideo_tex = 0; + + glo_set_current(g_nv2a_context_render); +} + static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data, unsigned int width, unsigned int height, diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 2114608683d..930e4454c1a 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -33,24 +33,63 @@ static void nv2a_gl_context_init(void) g_nv2a_context_display = glo_context_create(); } +static void pgraph_gl_init(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + pg->gl_renderer_state = g_malloc0(sizeof(*pg->gl_renderer_state)); + + /* fire up opengl */ + glo_set_current(g_nv2a_context_render); + +#ifdef DEBUG_NV2A_GL + gl_debug_initialize(); +#endif + + /* DXT textures */ + assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); + /* Internal RGB565 texture format */ + assert(glo_check_extension("GL_ARB_ES2_compatibility")); + + pgraph_gl_init_surfaces(pg); + pgraph_gl_init_reports(d); + pgraph_gl_init_texture_cache(d); + pgraph_gl_init_vertex_cache(d); + pgraph_gl_init_shader_cache(pg); + + glo_set_current(g_nv2a_context_display); + pgraph_gl_init_display_renderer(d); + + pgraph_gl_update_entire_memory_buffer(d); + + glo_set_current(NULL); + + pg->uniform_attrs = 0; + pg->swizzle_attrs = 0; +} + static void pgraph_gl_init_thread(NV2AState *d) { glo_set_current(g_nv2a_context_render); } -static void pgraph_gl_deinit(NV2AState *d) +static void pgraph_gl_finalize(NV2AState *d) { PGRAPHState *pg = &d->pgraph; glo_set_current(g_nv2a_context_render); - pgraph_gl_deinit_surfaces(pg); - pgraph_gl_deinit_shader_cache(pg); - pgraph_gl_deinit_texture_cache(pg); + pgraph_gl_finalize_surfaces(pg); + pgraph_gl_finalize_shaders(pg); + pgraph_gl_finalize_textures(pg); + pgraph_gl_finalize_reports(pg); + pgraph_gl_finalize_vertex(pg); + pgraph_gl_finalize_display(pg); glo_set_current(NULL); - glo_context_destroy(g_nv2a_context_render); - glo_context_destroy(g_nv2a_context_display); + + g_free(pg->gl_renderer_state); + pg->gl_renderer_state = NULL; } static void pgraph_gl_flip_stall(NV2AState *d) @@ -136,36 +175,6 @@ static void pgraph_gl_pre_shutdown_wait(NV2AState *d) qemu_event_wait(&r->shader_cache_writeback_complete); } -static void pgraph_gl_init(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - pg->gl_renderer_state = g_malloc(sizeof(PGRAPHGLState)); - - /* fire up opengl */ - glo_set_current(g_nv2a_context_render); - -#ifdef DEBUG_NV2A_GL - gl_debug_initialize(); -#endif - - /* DXT textures */ - assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); - /* Internal RGB565 texture format */ - assert(glo_check_extension("GL_ARB_ES2_compatibility")); - - pgraph_gl_init_surfaces(pg); - pgraph_gl_init_reports(d); - pgraph_gl_init_texture_cache(d); - pgraph_gl_init_vertex_cache(d); - pgraph_gl_init_shader_cache(pg); - - glo_set_current(g_nv2a_context_display); - pgraph_gl_init_display_renderer(d); - - glo_set_current(NULL); -} - static PGRAPHRenderer pgraph_gl_renderer = { .type = CONFIG_DISPLAY_RENDERER_OPENGL, .name = "OpenGL", @@ -173,7 +182,7 @@ static PGRAPHRenderer pgraph_gl_renderer = { .init = pgraph_gl_init, .early_context_init = nv2a_gl_context_init, .init_thread = pgraph_gl_init_thread, - .finalize = pgraph_gl_deinit, + .finalize = pgraph_gl_finalize, .clear_report_value = pgraph_gl_clear_report_value, .clear_surface = pgraph_gl_clear_surface, .draw_begin = pgraph_gl_draw_begin, diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index fff4ac7d536..5044f9d5a25 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -240,9 +240,6 @@ void pgraph_gl_bind_textures(NV2AState *d); void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element); bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape); GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src); -void pgraph_gl_deinit_shader_cache(PGRAPHState *pg); -void pgraph_gl_deinit_surfaces(PGRAPHState *pg); -void pgraph_gl_deinit_texture_cache(PGRAPHState *pg); void pgraph_gl_download_dirty_surfaces(NV2AState *d); void pgraph_gl_clear_report_value(NV2AState *d); void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter); @@ -258,11 +255,17 @@ void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool void pgraph_gl_sync(NV2AState *d); void pgraph_gl_update_entire_memory_buffer(NV2AState *d); void pgraph_gl_init_display_renderer(NV2AState *d); +void pgraph_gl_finalize_display(PGRAPHState *pg); void pgraph_gl_init_reports(NV2AState *d); +void pgraph_gl_finalize_reports(PGRAPHState *pg); void pgraph_gl_init_shader_cache(PGRAPHState *pg); +void pgraph_gl_finalize_shaders(PGRAPHState *pg); void pgraph_gl_init_surfaces(PGRAPHState *pg); +void pgraph_gl_finalize_surfaces(PGRAPHState *pg); void pgraph_gl_init_texture_cache(NV2AState *d); +void pgraph_gl_finalize_textures(PGRAPHState *pg); void pgraph_gl_init_vertex_cache(NV2AState *d); +void pgraph_gl_finalize_vertex(PGRAPHState *pg); void pgraph_gl_process_pending_downloads(NV2AState *d); void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg); void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit); diff --git a/hw/xbox/nv2a/pgraph/gl/reports.c b/hw/xbox/nv2a/pgraph/gl/reports.c index 0673c37e0c5..2dea09e590e 100644 --- a/hw/xbox/nv2a/pgraph/gl/reports.c +++ b/hw/xbox/nv2a/pgraph/gl/reports.c @@ -109,3 +109,23 @@ void pgraph_gl_get_report(NV2AState *d, uint32_t parameter) r->gl_zpass_pixel_count_query_count = 0; r->gl_zpass_pixel_count_queries = NULL; } + +void pgraph_gl_finalize_reports(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + QueryReport *report, *next; + QSIMPLEQ_FOREACH_SAFE (report, &r->report_queue, entry, next) { + if (report->query_count) { + glDeleteQueries(report->query_count, report->queries); + } + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); + g_free(report); + } + + if (r->gl_zpass_pixel_count_query_count) { + glDeleteQueries(r->gl_zpass_pixel_count_query_count, + r->gl_zpass_pixel_count_queries); + r->gl_zpass_pixel_count_query_count = 0; + } +} diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 0bb4eaa5981..ab3928afd74 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -581,13 +581,15 @@ void pgraph_gl_init_shader_cache(PGRAPHState *pg) shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); } -void pgraph_gl_deinit_shader_cache(PGRAPHState *pg) +void pgraph_gl_finalize_shaders(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; // Clear out shader cache - pgraph_gl_shader_write_cache_reload_list(pg); + pgraph_gl_shader_write_cache_reload_list(pg); // FIXME: also flushes, rename for clarity free(r->shader_cache_entries); + r->shader_cache_entries = NULL; + qemu_mutex_destroy(&r->shader_cache_lock); } diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c index 332ca7199ee..a2a00d09ac8 100644 --- a/hw/xbox/nv2a/pgraph/gl/surface.c +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -162,6 +162,23 @@ static void init_render_to_texture(PGRAPHState *pg) glGenFramebuffers(1, &r->s2t_rndr.fbo); } +static void finalize_render_to_texture(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + glDeleteProgram(r->s2t_rndr.prog); + r->s2t_rndr.prog = 0; + + glDeleteVertexArrays(1, &r->s2t_rndr.vao); + r->s2t_rndr.vao = 0; + + glDeleteBuffers(1, &r->s2t_rndr.vbo); + r->s2t_rndr.vbo = 0; + + glDeleteFramebuffers(1, &r->s2t_rndr.fbo); + r->s2t_rndr.fbo = 0; +} + static bool surface_to_texture_can_fastpath(SurfaceBinding *surface, TextureShape *shape) { @@ -1365,21 +1382,11 @@ void pgraph_gl_init_surfaces(PGRAPHState *pg) init_render_to_texture(pg); } -void pgraph_gl_deinit_surfaces(PGRAPHState *pg) -{ - PGRAPHGLState *r = pg->gl_renderer_state; - - glDeleteFramebuffers(1, &r->gl_framebuffer); - // TODO: clear out surfaces -} - -void pgraph_gl_surface_flush(NV2AState *d) +static void flush_surfaces(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; - bool update_surface = (r->color_binding || r->zeta_binding); - /* Clear last surface shape to force recreation of buffers at next draw */ pg->surface_color.draw_dirty = false; pg->surface_zeta.draw_dirty = false; @@ -1391,6 +1398,28 @@ void pgraph_gl_surface_flush(NV2AState *d) QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { pgraph_gl_surface_invalidate(d, s); } +} + +void pgraph_gl_finalize_surfaces(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHGLState *r = pg->gl_renderer_state; + + flush_surfaces(d); + glDeleteFramebuffers(1, &r->gl_framebuffer); + r->gl_framebuffer = 0; + + finalize_render_to_texture(pg); +} + +void pgraph_gl_surface_flush(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHGLState *r = pg->gl_renderer_state; + + bool update_surface = (r->color_binding || r->zeta_binding); + + flush_surfaces(d); pgraph_gl_reload_surface_scale_factor(pg); diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c index bf072f44d65..942d1fe1948 100644 --- a/hw/xbox/nv2a/pgraph/gl/texture.c +++ b/hw/xbox/nv2a/pgraph/gl/texture.c @@ -809,11 +809,16 @@ void pgraph_gl_init_texture_cache(NV2AState *d) r->texture_cache.post_node_evict = texture_cache_entry_post_evict; } -void pgraph_gl_deinit_texture_cache(PGRAPHState *pg) +void pgraph_gl_finalize_textures(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; - // Clear out texture cache + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { + r->texture_binding[i] = NULL; + } + lru_flush(&r->texture_cache); free(r->texture_cache_entries); + + r->texture_cache_entries = NULL; } diff --git a/hw/xbox/nv2a/pgraph/gl/vertex.c b/hw/xbox/nv2a/pgraph/gl/vertex.c index 21f42b647c5..2fd39db7c69 100644 --- a/hw/xbox/nv2a/pgraph/gl/vertex.c +++ b/hw/xbox/nv2a/pgraph/gl/vertex.c @@ -245,14 +245,15 @@ static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) return memcmp(&vnode->key, key, sizeof(VertexKey)); } +static const size_t element_cache_size = 50*1024; + void pgraph_gl_init_vertex_cache(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; - const size_t element_cache_size = 50*1024; lru_init(&r->element_cache); - r->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode)); + r->element_cache_entries = g_malloc_n(element_cache_size, sizeof(VertexLruNode)); assert(r->element_cache_entries != NULL); GLuint element_cache_buffers[element_cache_size]; glGenBuffers(element_cache_size, element_cache_buffers); @@ -281,3 +282,30 @@ void pgraph_gl_init_vertex_cache(NV2AState *d) assert(glGetError() == GL_NO_ERROR); } + +void pgraph_gl_finalize_vertex(PGRAPHState *pg) +{ + PGRAPHGLState *r = pg->gl_renderer_state; + + GLuint element_cache_buffers[element_cache_size]; + for (int i = 0; i < element_cache_size; i++) { + element_cache_buffers[i] = r->element_cache_entries[i].gl_buffer; + } + glDeleteBuffers(element_cache_size, element_cache_buffers); + lru_flush(&r->element_cache); + + g_free(r->element_cache_entries); + r->element_cache_entries = NULL; + + glDeleteBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer); + memset(r->gl_inline_buffer, 0, sizeof(r->gl_inline_buffer)); + + glDeleteBuffers(1, &r->gl_inline_array_buffer); + r->gl_inline_array_buffer = 0; + + glDeleteBuffers(1, &r->gl_memory_buffer); + r->gl_memory_buffer = 0; + + glDeleteVertexArrays(1, &r->gl_vertex_array); + r->gl_vertex_array = 0; +} \ No newline at end of file From cd2278dd62d8c5a4476af0cc54c49827c1ea5078 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 047/176] nv2a/gl: Ensure shader_binding exists before dirty check --- hw/xbox/nv2a/pgraph/gl/shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index ab3928afd74..f9042e8c19f 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -1017,7 +1017,7 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg) fixed_function ? "yes" : "no"); bool binding_changed = false; - if (!test_shaders_dirty(pg) && !pg->program_data_dirty) { + if (r->shader_binding && !test_shaders_dirty(pg) && !pg->program_data_dirty) { nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); goto update_constants; } From 4bd040679748e535a95c872ee5123edd0bdf4c55 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 048/176] nv2a/gl: Make sure vulkan is not set on ShaderState --- hw/xbox/nv2a/pgraph/gl/shaders.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index f9042e8c19f..5569277252a 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -1027,6 +1027,7 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg) ShaderBinding* old_binding = r->shader_binding; ShaderState state = pgraph_get_shader_state(pg); + assert(!state.vulkan); uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); qemu_mutex_lock(&r->shader_cache_lock); From 71d00d2208958cc6c3ab7b0e25d1c70aba55f8a9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 049/176] nv2a/vk: Ensure texture bindings exist before dirty check --- hw/xbox/nv2a/pgraph/vk/texture.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index ea6adf671c6..70fc9c70236 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1283,8 +1283,10 @@ static void create_texture(PGRAPHState *pg, int texture_idx) static bool check_textures_dirty(PGRAPHState *pg) { + PGRAPHVkState *r = pg->vk_renderer_state; + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { - if (pg->texture_dirty[i]) { + if (!r->texture_bindings[i] || pg->texture_dirty[i]) { return true; } } @@ -1324,7 +1326,8 @@ void pgraph_vk_bind_textures(NV2AState *d) r->texture_bindings[i] = &r->dummy_texture; continue; } - if (!pg->texture_dirty[i]) { // FIXME: Fails to check memory + if (r->texture_bindings[i] && !pg->texture_dirty[i]) { + // FIXME: Fails to check memory continue; } From c4ac083d3daf11616caaec6e789c2a5cc4fada66 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 050/176] nv2a/vk: Sync RAM buffer during renderer init --- hw/xbox/nv2a/pgraph/vk/renderer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index f947aa39e5f..d95c263f486 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -49,6 +49,9 @@ static void pgraph_vk_init_thread(NV2AState *d) pgraph_vk_init_reports(pg); pgraph_vk_init_compute(pg); pgraph_vk_init_display(pg); + + pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr, + memory_region_size(d->vram)); } static void pgraph_vk_finalize(NV2AState *d) From d6e8307950379829e9c3a649a1ae088ba03a962b Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 051/176] nv2a/vk: Always provide early_context_init --- hw/xbox/nv2a/pgraph/vk/renderer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index d95c263f486..7d6a24df1ec 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -24,12 +24,14 @@ #if HAVE_EXTERNAL_MEMORY static GloContext *g_gl_context; +#endif -static void gl_context_init(void) +static void early_context_init(void) { +#if HAVE_EXTERNAL_MEMORY g_gl_context = glo_context_create(); -} #endif +} static void pgraph_vk_init_thread(NV2AState *d) { @@ -201,9 +203,7 @@ static PGRAPHRenderer pgraph_vk_renderer = { .name = "Vulkan", .ops = { .init = pgraph_vk_init, -#if HAVE_EXTERNAL_MEMORY - .early_context_init = gl_context_init, -#endif + .early_context_init = early_context_init, .init_thread = pgraph_vk_init_thread, .finalize = pgraph_vk_finalize, .clear_report_value = pgraph_vk_clear_report_value, From 3ccea5fa1266dabb2062a738a148055e09a5a1e9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 052/176] nv2a/vk: Release renderer state at finalize --- hw/xbox/nv2a/pgraph/vk/renderer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index 7d6a24df1ec..e57a6e9a0cb 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -70,6 +70,9 @@ static void pgraph_vk_finalize(NV2AState *d) pgraph_vk_finalize_buffers(d); pgraph_vk_finalize_command_buffers(pg); pgraph_vk_finalize_instance(pg); + + g_free(pg->vk_renderer_state); + pg->vk_renderer_state = NULL; } static void pgraph_vk_flush(NV2AState *d) From 7b6b5f396c4313ff6bd034a4abd6e2e4beccf0c5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 053/176] nv2a/vk: Release report queue nodes at finalize --- hw/xbox/nv2a/pgraph/vk/reports.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c index 2e6bdf96f32..8c5d7136be0 100644 --- a/hw/xbox/nv2a/pgraph/vk/reports.c +++ b/hw/xbox/nv2a/pgraph/vk/reports.c @@ -43,6 +43,12 @@ void pgraph_vk_finalize_reports(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; + QueryReport *q, *next; + QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) { + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); + g_free(q); + } + vkDestroyQueryPool(r->device, r->query_pool, NULL); } From 6fd0f175b4dfe4d4eaf7390664158c08b02fed34 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 054/176] nv2a: Support switching renderers at runtime --- hw/xbox/nv2a/pgraph/pgraph.c | 69 ++++++++++++++++++++++++++++++++---- hw/xbox/nv2a/pgraph/pgraph.h | 1 + 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index 95fd0e5e62f..26fe4f7ac25 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -218,6 +218,7 @@ void pgraph_init(NV2AState *d) PGRAPHState *pg = &d->pgraph; qemu_mutex_init(&pg->lock); + qemu_mutex_init(&pg->renderer_lock); qemu_event_init(&pg->sync_complete, false); qemu_event_init(&pg->flush_complete, false); @@ -279,8 +280,17 @@ void nv2a_context_init(void) renderers[g_config.display.renderer]->name); } - if (renderers[g_config.display.renderer]->ops.early_context_init) { - renderers[g_config.display.renderer]->ops.early_context_init(); + // FIXME: We need a mechanism for renderer to initialize new GL contexts + // on the main thread at run time. For now, just let them all create + // what they need. + for (int i = 0; i < ARRAY_SIZE(renderers); i++) { + const PGRAPHRenderer *r = renderers[i]; + if (!r) { + continue; + } + if (r->ops.early_context_init) { + r->ops.early_context_init(); + } } } @@ -298,32 +308,40 @@ void pgraph_destroy(PGRAPHState *pg) int nv2a_get_framebuffer_surface(void) { NV2AState *d = g_nv2a; + int s = 0; + qemu_mutex_lock(&d->pgraph.renderer_lock); if (d->pgraph.renderer->ops.get_framebuffer_surface) { - return d->pgraph.renderer->ops.get_framebuffer_surface(d); + s = d->pgraph.renderer->ops.get_framebuffer_surface(d); } + qemu_mutex_unlock(&d->pgraph.renderer_lock); - return 0; + return s; } void nv2a_set_surface_scale_factor(unsigned int scale) { NV2AState *d = g_nv2a; + qemu_mutex_lock(&d->pgraph.renderer_lock); if (d->pgraph.renderer->ops.set_surface_scale_factor) { d->pgraph.renderer->ops.set_surface_scale_factor(d, scale); } + qemu_mutex_unlock(&d->pgraph.renderer_lock); } unsigned int nv2a_get_surface_scale_factor(void) { NV2AState *d = g_nv2a; + int s = 1; + qemu_mutex_lock(&d->pgraph.renderer_lock); if (d->pgraph.renderer->ops.get_surface_scale_factor) { - return d->pgraph.renderer->ops.get_surface_scale_factor(d); + s = d->pgraph.renderer->ops.get_surface_scale_factor(d); } + qemu_mutex_unlock(&d->pgraph.renderer_lock); - return 1; + return s; } #define METHOD_ADDR(gclass, name) \ @@ -2877,6 +2895,45 @@ void pgraph_process_pending(NV2AState *d) { PGRAPHState *pg = &d->pgraph; pg->renderer->ops.process_pending(d); + + if (g_config.display.renderer != pg->renderer->type) { + qemu_mutex_lock(&d->pgraph.renderer_lock); + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + + if (pg->renderer) { + qemu_event_reset(&pg->flush_complete); + pg->flush_pending = true; + + qemu_mutex_lock(&d->pfifo.lock); + qemu_mutex_unlock(&d->pgraph.lock); + + if (pg->renderer->ops.process_pending) { + pg->renderer->ops.process_pending(d); + } + + qemu_mutex_unlock(&d->pfifo.lock); + qemu_mutex_lock(&d->pgraph.lock); + + if (pg->renderer->ops.finalize) { + pg->renderer->ops.finalize(d); + } + } + + // FIXME: Handle missing renderer, init errors + pg->renderer = renderers[g_config.display.renderer]; + + if (pg->renderer->ops.init) { + pg->renderer->ops.init(d); + } + if (pg->renderer->ops.init_thread) { + pg->renderer->ops.init_thread(d); + } + + qemu_mutex_unlock(&d->pgraph.renderer_lock); + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + } } void pgraph_process_pending_reports(NV2AState *d) diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index fa94603e6d8..6d6d04cdd16 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -127,6 +127,7 @@ typedef struct PGRAPHRenderer { typedef struct PGRAPHState { QemuMutex lock; + QemuMutex renderer_lock; uint32_t pending_interrupts; uint32_t enabled_interrupts; From 98fa394f8432e5af2cec7a6563fc55b798b7bdf5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 055/176] nv2a: Add note about downloading dirty surfaces during flush --- hw/xbox/nv2a/pgraph/gl/surface.c | 3 +++ hw/xbox/nv2a/pgraph/vk/surface.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c index a2a00d09ac8..c0aa4a4f260 100644 --- a/hw/xbox/nv2a/pgraph/gl/surface.c +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -1396,6 +1396,9 @@ static void flush_surfaces(NV2AState *d) SurfaceBinding *s, *next; QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + // FIXME: We should download all surfaces to ram, but need to + // investigate corruption issue + // pgraph_gl_surface_download_if_dirty(d, s); pgraph_gl_surface_invalidate(d, s); } } diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 9df98666bff..43b0e743d72 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -1477,6 +1477,9 @@ void pgraph_vk_surface_flush(NV2AState *d) SurfaceBinding *s, *next; QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { + // FIXME: We should download all surfaces to ram, but need to + // investigate corruption issue + pgraph_vk_surface_download_if_dirty(d, s); invalidate_surface(d, s); } prune_invalid_surfaces(r, 0); From f3b6d50d99cc517aa117cde4a3057aaf420951a1 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 056/176] nv2a: Merge renderer init, init_thread methods --- hw/xbox/nv2a/pgraph/gl/display.c | 4 ++++ hw/xbox/nv2a/pgraph/gl/renderer.c | 16 +++------------- hw/xbox/nv2a/pgraph/pgraph.c | 8 ++------ hw/xbox/nv2a/pgraph/pgraph.h | 1 - hw/xbox/nv2a/pgraph/vk/renderer.c | 16 +++++----------- 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index ed0992e8832..23472ac4e1d 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -30,6 +30,8 @@ void pgraph_gl_init_display_renderer(NV2AState *d) struct PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; + glo_set_current(g_nv2a_context_display); + glGenTextures(1, &r->gl_display_buffer); r->gl_display_buffer_internal_format = 0; r->gl_display_buffer_width = 0; @@ -100,6 +102,8 @@ void pgraph_gl_init_display_renderer(NV2AState *d) glGenFramebuffers(1, &r->disp_rndr.fbo); glGenTextures(1, &r->disp_rndr.pvideo_tex); assert(glGetError() == GL_NO_ERROR); + + glo_set_current(g_nv2a_context_render); } void pgraph_gl_finalize_display(PGRAPHState *pg) diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 930e4454c1a..d65b8cdeffa 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -27,7 +27,7 @@ GloContext *g_nv2a_context_render; GloContext *g_nv2a_context_display; -static void nv2a_gl_context_init(void) +static void early_context_init(void) { g_nv2a_context_render = glo_context_create(); g_nv2a_context_display = glo_context_create(); @@ -56,23 +56,14 @@ static void pgraph_gl_init(NV2AState *d) pgraph_gl_init_texture_cache(d); pgraph_gl_init_vertex_cache(d); pgraph_gl_init_shader_cache(pg); - - glo_set_current(g_nv2a_context_display); pgraph_gl_init_display_renderer(d); pgraph_gl_update_entire_memory_buffer(d); - glo_set_current(NULL); - pg->uniform_attrs = 0; pg->swizzle_attrs = 0; } -static void pgraph_gl_init_thread(NV2AState *d) -{ - glo_set_current(g_nv2a_context_render); -} - static void pgraph_gl_finalize(NV2AState *d) { PGRAPHState *pg = &d->pgraph; @@ -172,7 +163,7 @@ static void pgraph_gl_pre_shutdown_wait(NV2AState *d) PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; - qemu_event_wait(&r->shader_cache_writeback_complete); + qemu_event_wait(&r->shader_cache_writeback_complete); } static PGRAPHRenderer pgraph_gl_renderer = { @@ -180,8 +171,7 @@ static PGRAPHRenderer pgraph_gl_renderer = { .name = "OpenGL", .ops = { .init = pgraph_gl_init, - .early_context_init = nv2a_gl_context_init, - .init_thread = pgraph_gl_init_thread, + .early_context_init = early_context_init, .finalize = pgraph_gl_finalize, .clear_report_value = pgraph_gl_clear_report_value, .clear_surface = pgraph_gl_clear_surface, diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index 26fe4f7ac25..e66dd86039f 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -240,7 +240,6 @@ void pgraph_init(NV2AState *d) pgraph_clear_dirty_reg_map(pg); pg->renderer = renderers[g_config.display.renderer]; - pg->renderer->ops.init(d); } void pgraph_clear_dirty_reg_map(PGRAPHState *pg) @@ -250,8 +249,8 @@ void pgraph_clear_dirty_reg_map(PGRAPHState *pg) void pgraph_init_thread(NV2AState *d) { - if (d->pgraph.renderer->ops.init_thread) { - d->pgraph.renderer->ops.init_thread(d); + if (d->pgraph.renderer->ops.init) { + d->pgraph.renderer->ops.init(d); } } @@ -2926,9 +2925,6 @@ void pgraph_process_pending(NV2AState *d) if (pg->renderer->ops.init) { pg->renderer->ops.init(d); } - if (pg->renderer->ops.init_thread) { - pg->renderer->ops.init_thread(d); - } qemu_mutex_unlock(&d->pgraph.renderer_lock); qemu_mutex_unlock(&d->pgraph.lock); diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 6d6d04cdd16..576cefcdb0e 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -101,7 +101,6 @@ typedef struct PGRAPHRenderer { struct { void (*early_context_init)(void); void (*init)(NV2AState *d); - void (*init_thread)(NV2AState *d); void (*finalize)(NV2AState *d); void (*clear_report_value)(NV2AState *d); void (*clear_surface)(NV2AState *d, uint32_t parameter); diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index e57a6e9a0cb..0cf9ee41eaf 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -33,14 +33,18 @@ static void early_context_init(void) #endif } -static void pgraph_vk_init_thread(NV2AState *d) +static void pgraph_vk_init(NV2AState *d) { PGRAPHState *pg = &d->pgraph; + pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState)); + #if HAVE_EXTERNAL_MEMORY glo_set_current(g_gl_context); #endif + pgraph_vk_debug_init(); + pgraph_vk_init_instance(pg); pgraph_vk_init_command_buffers(pg); pgraph_vk_init_buffers(d); @@ -192,22 +196,12 @@ static int pgraph_vk_get_framebuffer_surface(NV2AState *d) #endif } -static void pgraph_vk_init(NV2AState *d) -{ - PGRAPHState *pg = &d->pgraph; - - pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState)); - - pgraph_vk_debug_init(); -} - static PGRAPHRenderer pgraph_vk_renderer = { .type = CONFIG_DISPLAY_RENDERER_VULKAN, .name = "Vulkan", .ops = { .init = pgraph_vk_init, .early_context_init = early_context_init, - .init_thread = pgraph_vk_init_thread, .finalize = pgraph_vk_finalize, .clear_report_value = pgraph_vk_clear_report_value, .clear_surface = pgraph_vk_clear_surface, From c1bbe39f227e72ddd8e9d14faf7b6ea8c57d1821 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 057/176] nv2a/gl: Rename some functions for clarity --- hw/xbox/nv2a/pgraph/gl/display.c | 2 +- hw/xbox/nv2a/pgraph/gl/renderer.c | 10 +++++----- hw/xbox/nv2a/pgraph/gl/renderer.h | 10 +++++----- hw/xbox/nv2a/pgraph/gl/shaders.c | 2 +- hw/xbox/nv2a/pgraph/gl/texture.c | 2 +- hw/xbox/nv2a/pgraph/gl/vertex.c | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index 23472ac4e1d..fbea7d2f0c0 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -25,7 +25,7 @@ #include -void pgraph_gl_init_display_renderer(NV2AState *d) +void pgraph_gl_init_display(NV2AState *d) { struct PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index d65b8cdeffa..02d28a2130c 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -53,10 +53,10 @@ static void pgraph_gl_init(NV2AState *d) pgraph_gl_init_surfaces(pg); pgraph_gl_init_reports(d); - pgraph_gl_init_texture_cache(d); - pgraph_gl_init_vertex_cache(d); - pgraph_gl_init_shader_cache(pg); - pgraph_gl_init_display_renderer(d); + pgraph_gl_init_textures(d); + pgraph_gl_init_buffers(d); + pgraph_gl_init_shaders(pg); + pgraph_gl_init_display(d); pgraph_gl_update_entire_memory_buffer(d); @@ -74,7 +74,7 @@ static void pgraph_gl_finalize(NV2AState *d) pgraph_gl_finalize_shaders(pg); pgraph_gl_finalize_textures(pg); pgraph_gl_finalize_reports(pg); - pgraph_gl_finalize_vertex(pg); + pgraph_gl_finalize_buffers(pg); pgraph_gl_finalize_display(pg); glo_set_current(NULL); diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 5044f9d5a25..14160b21b79 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -254,18 +254,18 @@ void pgraph_gl_surface_flush(NV2AState *d); void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write); void pgraph_gl_sync(NV2AState *d); void pgraph_gl_update_entire_memory_buffer(NV2AState *d); -void pgraph_gl_init_display_renderer(NV2AState *d); +void pgraph_gl_init_display(NV2AState *d); void pgraph_gl_finalize_display(PGRAPHState *pg); void pgraph_gl_init_reports(NV2AState *d); void pgraph_gl_finalize_reports(PGRAPHState *pg); -void pgraph_gl_init_shader_cache(PGRAPHState *pg); +void pgraph_gl_init_shaders(PGRAPHState *pg); void pgraph_gl_finalize_shaders(PGRAPHState *pg); void pgraph_gl_init_surfaces(PGRAPHState *pg); void pgraph_gl_finalize_surfaces(PGRAPHState *pg); -void pgraph_gl_init_texture_cache(NV2AState *d); +void pgraph_gl_init_textures(NV2AState *d); void pgraph_gl_finalize_textures(PGRAPHState *pg); -void pgraph_gl_init_vertex_cache(NV2AState *d); -void pgraph_gl_finalize_vertex(PGRAPHState *pg); +void pgraph_gl_init_buffers(NV2AState *d); +void pgraph_gl_finalize_buffers(PGRAPHState *pg); void pgraph_gl_process_pending_downloads(NV2AState *d); void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg); void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit); diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 5569277252a..0d725dd304b 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -551,7 +551,7 @@ static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) return memcmp(&snode->state, key, sizeof(ShaderState)); } -void pgraph_gl_init_shader_cache(PGRAPHState *pg) +void pgraph_gl_init_shaders(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c index 942d1fe1948..4011e979fec 100644 --- a/hw/xbox/nv2a/pgraph/gl/texture.c +++ b/hw/xbox/nv2a/pgraph/gl/texture.c @@ -791,7 +791,7 @@ static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) return memcmp(&tnode->key, key, sizeof(TextureKey)); } -void pgraph_gl_init_texture_cache(NV2AState *d) +void pgraph_gl_init_textures(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; diff --git a/hw/xbox/nv2a/pgraph/gl/vertex.c b/hw/xbox/nv2a/pgraph/gl/vertex.c index 2fd39db7c69..c2eccdb128b 100644 --- a/hw/xbox/nv2a/pgraph/gl/vertex.c +++ b/hw/xbox/nv2a/pgraph/gl/vertex.c @@ -247,7 +247,7 @@ static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key) static const size_t element_cache_size = 50*1024; -void pgraph_gl_init_vertex_cache(NV2AState *d) +void pgraph_gl_init_buffers(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; @@ -283,7 +283,7 @@ void pgraph_gl_init_vertex_cache(NV2AState *d) assert(glGetError() == GL_NO_ERROR); } -void pgraph_gl_finalize_vertex(PGRAPHState *pg) +void pgraph_gl_finalize_buffers(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; From 25afb8603d42fd1b057e3ff8e21a3ea6f4bfd148 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 058/176] nv2a: Handle renderer init errors more gracefully --- hw/xbox/nv2a/pgraph/gl/renderer.c | 2 +- hw/xbox/nv2a/pgraph/null/renderer.c | 2 +- hw/xbox/nv2a/pgraph/pgraph.c | 70 ++++++++++--- hw/xbox/nv2a/pgraph/pgraph.h | 2 +- hw/xbox/nv2a/pgraph/vk/instance.c | 151 ++++++++++++++++++++-------- hw/xbox/nv2a/pgraph/vk/renderer.c | 8 +- hw/xbox/nv2a/pgraph/vk/renderer.h | 2 +- 7 files changed, 173 insertions(+), 64 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 02d28a2130c..9e22a80f6d5 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -33,7 +33,7 @@ static void early_context_init(void) g_nv2a_context_display = glo_context_create(); } -static void pgraph_gl_init(NV2AState *d) +static void pgraph_gl_init(NV2AState *d, Error **errp) { PGRAPHState *pg = &d->pgraph; diff --git a/hw/xbox/nv2a/pgraph/null/renderer.c b/hw/xbox/nv2a/pgraph/null/renderer.c index 9a9c2512ccf..8b34efc5d17 100644 --- a/hw/xbox/nv2a/pgraph/null/renderer.c +++ b/hw/xbox/nv2a/pgraph/null/renderer.c @@ -111,7 +111,7 @@ static void pgraph_null_surface_update(NV2AState *d, bool upload, { } -static void pgraph_null_init(NV2AState *d) +static void pgraph_null_init(NV2AState *d, Error **errp) { PGRAPHState *pg = &d->pgraph; pg->null_renderer_state = NULL; diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index e66dd86039f..7ffc1ae5de1 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -20,6 +20,7 @@ */ #include "../nv2a_int.h" +#include "ui/xemu-notifications.h" #include "ui/xemu-settings.h" #include "util.h" #include "swizzle.h" @@ -238,8 +239,6 @@ void pgraph_init(NV2AState *d) } pgraph_clear_dirty_reg_map(pg); - - pg->renderer = renderers[g_config.display.renderer]; } void pgraph_clear_dirty_reg_map(PGRAPHState *pg) @@ -247,13 +246,6 @@ void pgraph_clear_dirty_reg_map(PGRAPHState *pg) memset(pg->regs_dirty, 0, sizeof(pg->regs_dirty)); } -void pgraph_init_thread(NV2AState *d) -{ - if (d->pgraph.renderer->ops.init) { - d->pgraph.renderer->ops.init(d); - } -} - static CONFIG_DISPLAY_RENDERER get_default_renderer(void) { #ifdef CONFIG_OPENGL @@ -293,6 +285,59 @@ void nv2a_context_init(void) } } +static bool attempt_renderer_init(PGRAPHState *pg) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + + pg->renderer = renderers[g_config.display.renderer]; + if (!pg->renderer) { + xemu_queue_error_message("Configured renderer not available"); + return false; + } + + Error *local_err = NULL; + if (pg->renderer->ops.init) { + pg->renderer->ops.init(d, &local_err); + } + if (local_err) { + const char *msg = error_get_pretty(local_err); + xemu_queue_error_message(msg); + error_free(local_err); + local_err = NULL; + return false; + } + + return true; +} + +static void init_renderer(PGRAPHState *pg) +{ + if (attempt_renderer_init(pg)) { + return; // Success + } + + CONFIG_DISPLAY_RENDERER default_renderer = get_default_renderer(); + if (default_renderer != g_config.display.renderer) { + g_config.display.renderer = default_renderer; + if (attempt_renderer_init(pg)) { + g_autofree gchar *msg = g_strdup_printf( + "Switched to default renderer: %s", pg->renderer->name); + xemu_queue_notification(msg); + return; + } + } + + // FIXME: Try others + + fprintf(stderr, "Fatal error: cannot initialize renderer\n"); + exit(1); +} + +void pgraph_init_thread(NV2AState *d) +{ + init_renderer(&d->pgraph); +} + void pgraph_destroy(PGRAPHState *pg) { NV2AState *d = container_of(pg, NV2AState, pgraph); @@ -2919,12 +2964,7 @@ void pgraph_process_pending(NV2AState *d) } } - // FIXME: Handle missing renderer, init errors - pg->renderer = renderers[g_config.display.renderer]; - - if (pg->renderer->ops.init) { - pg->renderer->ops.init(d); - } + init_renderer(pg); qemu_mutex_unlock(&d->pgraph.renderer_lock); qemu_mutex_unlock(&d->pgraph.lock); diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 576cefcdb0e..bbff5f2c58f 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -100,7 +100,7 @@ typedef struct PGRAPHRenderer { const char *name; struct { void (*early_context_init)(void); - void (*init)(NV2AState *d); + void (*init)(NV2AState *d, Error **errp); void (*finalize)(NV2AState *d); void (*clear_report_value)(NV2AState *d); void (*clear_surface)(NV2AState *d, uint32_t parameter); diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 65205921462..d9504402dbd 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -98,19 +98,23 @@ static bool check_validation_layer_support(void) return true; } -static SDL_Window *create_window(void) +static void create_window(PGRAPHVkState *r, Error **errp) { - SDL_Window *window = SDL_CreateWindow( + r->window = SDL_CreateWindow( "SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN); - if (window == NULL) { - fprintf(stderr, "%s: Failed to create window\n", __func__); - SDL_Quit(); - exit(1); + if (r->window == NULL) { + error_setg(errp, "SDL_CreateWindow failed: %s", SDL_GetError()); } +} - return window; +static void destroy_window(PGRAPHVkState *r) +{ + if (r->window) { + SDL_DestroyWindow(r->window); + r->window = NULL; + } } static VkExtensionPropertiesArray * @@ -199,13 +203,22 @@ add_optional_instance_extension_names(PGRAPHState *pg, VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } -static void create_instance(PGRAPHState *pg) +static bool create_instance(PGRAPHState *pg, Error **errp) { PGRAPHVkState *r = pg->vk_renderer_state; + VkResult result; - r->window = create_window(); + create_window(r, errp); + if (*errp) { + return false; + } - VK_CHECK(volkInitialize()); + result = volkInitialize(); + if (result != VK_SUCCESS) { + error_setg(errp, "volkInitialize failed"); + destroy_window(r); + return false; + } VkApplicationInfo app_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, @@ -234,14 +247,19 @@ static void create_instance(PGRAPHState *pg) all_required_extensions_available = false; } } - assert(all_required_extensions_available); + + if (!all_required_extensions_available) { + error_setg(errp, "Required instance extensions not available"); + goto error; + } add_optional_instance_extension_names(pg, available_extensions, enabled_extension_names); fprintf(stderr, "Enabled instance extensions:\n"); for (int i = 0; i < enabled_extension_names->len; i++) { - fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i)); + fprintf(stderr, "- %s\n", + g_array_index(enabled_extension_names, char *, i)); } VkInstanceCreateInfo create_info = { @@ -270,7 +288,8 @@ static void create_instance(PGRAPHState *pg) if (enable_validation) { if (check_validation_layer_support()) { - fprintf(stderr, "Warning: Validation layers enabled. Expect performance impact.\n"); + fprintf(stderr, "Warning: Validation layers enabled. Expect " + "performance impact.\n"); create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); create_info.ppEnabledLayerNames = validation_layers; if (r->debug_utils_extension_enabled) { @@ -283,9 +302,19 @@ static void create_instance(PGRAPHState *pg) } } - VK_CHECK(vkCreateInstance(&create_info, NULL, &r->instance)); + result = vkCreateInstance(&create_info, NULL, &r->instance); + if (result != VK_SUCCESS) { + error_setg(errp, "Failed to create instance"); + return false; + } volkLoadInstance(r->instance); + return true; + +error: + volkFinalize(); + destroy_window(r); + return false; } static bool is_queue_family_indicies_complete(QueueFamilyIndices indices) @@ -399,15 +428,18 @@ static bool is_device_compatible(VkPhysicalDevice device) // FIXME: Check vram } -static void select_physical_device(PGRAPHState *pg) +static bool select_physical_device(PGRAPHState *pg, Error **errp) { PGRAPHVkState *r = pg->vk_renderer_state; + VkResult result; uint32_t num_physical_devices = 0; - vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL); - if (num_physical_devices == 0) { - assert(!"failed to find GPUs with Vulkan support"); + result = + vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL); + if (result != VK_SUCCESS || num_physical_devices == 0) { + error_setg(errp, "Failed to find GPUs with Vulkan support"); + return false; } g_autofree VkPhysicalDevice *devices = @@ -430,7 +462,8 @@ static void select_physical_device(PGRAPHState *pg) } } if (r->physical_device == VK_NULL_HANDLE) { - assert(!"failed to find a suitable GPU"); + error_setg(errp, "Failed to find a suitable GPU"); + return false; } vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props); @@ -448,11 +481,13 @@ static void select_physical_device(PGRAPHState *pg) size_t vsh_attr_values_size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size); + return true; } -static void create_logical_device(PGRAPHState *pg) +static bool create_logical_device(PGRAPHState *pg, Error **errp) { PGRAPHVkState *r = pg->vk_renderer_state; + VkResult result; QueueFamilyIndices indices = pgraph_vk_find_queue_families(r->physical_device); @@ -468,7 +503,8 @@ static void create_logical_device(PGRAPHState *pg) fprintf(stderr, "Enabled device extensions:\n"); for (int i = 0; i < enabled_extension_names->len; i++) { - fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i)); + fprintf(stderr, "- %s\n", + g_array_index(enabled_extension_names, char *, i)); } float queuePriority = 1.0f; @@ -501,12 +537,18 @@ static void create_logical_device(PGRAPHState *pg) bool all_features_available = true; for (int i = 0; i < ARRAY_SIZE(required_features); i++) { if (required_features[i].available != VK_TRUE) { - fprintf(stderr, "Error: Device does not support required feature %s\n", required_features[i].name); + fprintf(stderr, + "Error: Device does not support required feature %s\n", + required_features[i].name); all_features_available = false; } *required_features[i].enabled = VK_TRUE; } - assert(all_features_available); + + if (!all_features_available) { + error_setg(errp, "Device does not support required features"); + return false; + } void *next_struct = NULL; @@ -548,10 +590,15 @@ static void create_logical_device(PGRAPHState *pg) device_create_info.ppEnabledLayerNames = validation_layers; } - VK_CHECK(vkCreateDevice(r->physical_device, &device_create_info, NULL, - &r->device)); + result = vkCreateDevice(r->physical_device, &device_create_info, NULL, + &r->device); + if (result != VK_SUCCESS) { + error_setg(errp, "Failed to create logical device"); + return false; + } vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue); + return true; } uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, @@ -570,9 +617,10 @@ uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, return 0xFFFFFFFF; // Unable to find memoryType } -static void init_allocator(PGRAPHState *pg) +static bool init_allocator(PGRAPHState *pg, Error **errp) { PGRAPHVkState *r = pg->vk_renderer_state; + VkResult result; VmaVulkanFunctions vulkanFunctions = { /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. @@ -631,32 +679,49 @@ static void init_allocator(PGRAPHState *pg) .pVulkanFunctions = &vulkanFunctions, }; - VK_CHECK(vmaCreateAllocator(&create_info, &r->allocator)); -} - -static void finalize_allocator(PGRAPHState *pg) -{ - PGRAPHVkState *r = pg->vk_renderer_state; + result = vmaCreateAllocator(&create_info, &r->allocator); + if (result != VK_SUCCESS) { + error_setg(errp, "vmaCreateAllocator failed"); + return false; + } - vmaDestroyAllocator(r->allocator); + return true; } -void pgraph_vk_init_instance(PGRAPHState *pg) +void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp) { - create_instance(pg); - select_physical_device(pg); - create_logical_device(pg); - init_allocator(pg); + if (create_instance(pg, errp) && + select_physical_device(pg, errp) && + create_logical_device(pg, errp) && + init_allocator(pg, errp)) { + return; + } + + if (*errp) { + error_prepend(errp, "Failed to initialize Vulkan renderer: "); + } + pgraph_vk_finalize_instance(pg); } void pgraph_vk_finalize_instance(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - finalize_allocator(pg); - vkDestroyDevice(r->device, NULL); - r->device = VK_NULL_HANDLE; + if (r->allocator != VK_NULL_HANDLE) { + vmaDestroyAllocator(r->allocator); + r->allocator = VK_NULL_HANDLE; + } + + if (r->device != VK_NULL_HANDLE) { + vkDestroyDevice(r->device, NULL); + r->device = VK_NULL_HANDLE; + } + + if (r->instance != VK_NULL_HANDLE) { + vkDestroyInstance(r->instance, NULL); + r->instance = VK_NULL_HANDLE; + } - vkDestroyInstance(r->instance, NULL); - r->instance = VK_NULL_HANDLE; + volkFinalize(); + destroy_window(r); } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index 0cf9ee41eaf..bc8adb62a12 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -33,7 +33,7 @@ static void early_context_init(void) #endif } -static void pgraph_vk_init(NV2AState *d) +static void pgraph_vk_init(NV2AState *d, Error **errp) { PGRAPHState *pg = &d->pgraph; @@ -45,7 +45,11 @@ static void pgraph_vk_init(NV2AState *d) pgraph_vk_debug_init(); - pgraph_vk_init_instance(pg); + pgraph_vk_init_instance(pg, errp); + if (*errp) { + return; + } + pgraph_vk_init_command_buffers(pg); pgraph_vk_init_buffers(d); pgraph_vk_init_surfaces(pg); diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 1897948e115..787f3df8a56 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -380,7 +380,7 @@ void pgraph_vk_check_memory_budget(PGRAPHState *pg); void pgraph_vk_debug_init(void); // instance.c -void pgraph_vk_init_instance(PGRAPHState *pg); +void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp); void pgraph_vk_finalize_instance(PGRAPHState *pg); QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device); uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, From 792ed56d58ce5c9a90ef5c9899ce72e6416427f8 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 059/176] nv2a: Block renderer finalization on display present --- hw/xbox/nv2a/nv2a.h | 1 + hw/xbox/nv2a/pgraph/gl/surface.c | 4 ---- hw/xbox/nv2a/pgraph/pgraph.c | 29 +++++++++++++++++++++++++---- hw/xbox/nv2a/pgraph/pgraph.h | 3 +++ hw/xbox/nv2a/pgraph/vk/surface.c | 4 ---- ui/xemu.c | 1 + 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/hw/xbox/nv2a/nv2a.h b/hw/xbox/nv2a/nv2a.h index a5c4468debe..2a9b7312d4b 100644 --- a/hw/xbox/nv2a/nv2a.h +++ b/hw/xbox/nv2a/nv2a.h @@ -24,6 +24,7 @@ void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram); void nv2a_context_init(void); int nv2a_get_framebuffer_surface(void); +void nv2a_release_framebuffer_surface(void); void nv2a_set_surface_scale_factor(unsigned int scale); unsigned int nv2a_get_surface_scale_factor(void); const uint8_t *nv2a_get_dac_palette(void); diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c index c0aa4a4f260..802a3febc0c 100644 --- a/hw/xbox/nv2a/pgraph/gl/surface.c +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -39,8 +39,6 @@ void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale) g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pfifo.lock); qatomic_set(&d->pfifo.halt, true); qemu_mutex_unlock(&d->pfifo.lock); @@ -67,8 +65,6 @@ void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale) qatomic_set(&d->pfifo.halt, false); pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); - - qemu_mutex_lock_iothread(); } unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d) diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index 7ffc1ae5de1..534daa3c0fe 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -222,6 +222,7 @@ void pgraph_init(NV2AState *d) qemu_mutex_init(&pg->renderer_lock); qemu_event_init(&pg->sync_complete, false); qemu_event_init(&pg->flush_complete, false); + qemu_cond_init(&pg->framebuffer_released); pg->frame_time = 0; pg->draw_time = 0; @@ -352,26 +353,41 @@ void pgraph_destroy(PGRAPHState *pg) int nv2a_get_framebuffer_surface(void) { NV2AState *d = g_nv2a; + PGRAPHState *pg = &d->pgraph; int s = 0; - qemu_mutex_lock(&d->pgraph.renderer_lock); - if (d->pgraph.renderer->ops.get_framebuffer_surface) { - s = d->pgraph.renderer->ops.get_framebuffer_surface(d); + qemu_mutex_lock(&pg->renderer_lock); + assert(!pg->framebuffer_in_use); + pg->framebuffer_in_use = true; + if (pg->renderer->ops.get_framebuffer_surface) { + s = pg->renderer->ops.get_framebuffer_surface(d); } - qemu_mutex_unlock(&d->pgraph.renderer_lock); + qemu_mutex_unlock(&pg->renderer_lock); return s; } +void nv2a_release_framebuffer_surface(void) +{ + NV2AState *d = g_nv2a; + PGRAPHState *pg = &d->pgraph; + qemu_mutex_lock(&pg->renderer_lock); + pg->framebuffer_in_use = false; + qemu_cond_broadcast(&pg->framebuffer_released); + qemu_mutex_unlock(&pg->renderer_lock); +} + void nv2a_set_surface_scale_factor(unsigned int scale) { NV2AState *d = g_nv2a; + qemu_mutex_unlock_iothread(); qemu_mutex_lock(&d->pgraph.renderer_lock); if (d->pgraph.renderer->ops.set_surface_scale_factor) { d->pgraph.renderer->ops.set_surface_scale_factor(d, scale); } qemu_mutex_unlock(&d->pgraph.renderer_lock); + qemu_mutex_lock_iothread(); } unsigned int nv2a_get_surface_scale_factor(void) @@ -379,11 +395,13 @@ unsigned int nv2a_get_surface_scale_factor(void) NV2AState *d = g_nv2a; int s = 1; + qemu_mutex_unlock_iothread(); qemu_mutex_lock(&d->pgraph.renderer_lock); if (d->pgraph.renderer->ops.get_surface_scale_factor) { s = d->pgraph.renderer->ops.get_surface_scale_factor(d); } qemu_mutex_unlock(&d->pgraph.renderer_lock); + qemu_mutex_lock_iothread(); return s; } @@ -2958,6 +2976,9 @@ void pgraph_process_pending(NV2AState *d) qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_lock(&d->pgraph.lock); + while (pg->framebuffer_in_use) { + qemu_cond_wait(&d->pgraph.framebuffer_released, &d->pgraph.renderer_lock); + } if (pg->renderer->ops.finalize) { pg->renderer->ops.finalize(d); diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index bbff5f2c58f..4ecc1c0d254 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -235,6 +235,9 @@ typedef struct PGRAPHState { bool sync_pending; QemuEvent sync_complete; + bool framebuffer_in_use; + QemuCond framebuffer_released; + unsigned int surface_scale_factor; uint8_t *scale_buf; diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 43b0e743d72..a348de1e71d 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -36,8 +36,6 @@ void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale) { g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; - qemu_mutex_unlock_iothread(); - qemu_mutex_lock(&d->pfifo.lock); qatomic_set(&d->pfifo.halt, true); qemu_mutex_unlock(&d->pfifo.lock); @@ -65,8 +63,6 @@ void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale) qatomic_set(&d->pfifo.halt, false); pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); - - qemu_mutex_lock_iothread(); } unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d) diff --git a/ui/xemu.c b/ui/xemu.c index 0d01f224605..c8030fc8d0b 100644 --- a/ui/xemu.c +++ b/ui/xemu.c @@ -1209,6 +1209,7 @@ void sdl2_gl_refresh(DisplayChangeListener *dcl) qemu_mutex_unlock_main_loop(); glFinish(); + nv2a_release_framebuffer_surface(); SDL_GL_SwapWindow(scon->real_window); /* VGA update (see note above) + vblank */ From 22674f782c58b1857ae592d419d1a33f543d7b09 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 060/176] meson: Fix VMA options --- thirdparty/meson.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/meson.build b/thirdparty/meson.build index fed47721125..b9b9256f97a 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -4,10 +4,10 @@ libvolk = static_library('volk', sources: 'volk/volk.c', c_args: ['-DVK_NO_PROTO volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: 'volk', link_with: libvolk, dependencies: vulkan) vma_defns = [ - '-DVMA_STATIC_VULKAN_FUNCTIONS=1', + '-DVMA_STATIC_VULKAN_FUNCTIONS=0', '-DVMA_DYNAMIC_VULKAN_FUNCTIONS=0', ] -libvma = static_library('vma', sources: 'vma.cc', c_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) +libvma = static_library('vma', sources: 'vma.cc', cpp_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) vma = declare_dependency(compile_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan) From 0169caadd09b8199596acca061845813984aea47 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 061/176] nv2a/vk: Fallback to UBO if maxPushConstantsSize is insufficient --- hw/xbox/nv2a/pgraph/glsl/vsh.c | 29 +++++++++++----- hw/xbox/nv2a/pgraph/pgraph.h | 3 ++ hw/xbox/nv2a/pgraph/shaders.c | 4 +-- hw/xbox/nv2a/pgraph/shaders.h | 1 + hw/xbox/nv2a/pgraph/vertex.c | 20 +++++++++++ hw/xbox/nv2a/pgraph/vk/draw.c | 57 +++++++++++++++++++------------ hw/xbox/nv2a/pgraph/vk/instance.c | 3 -- hw/xbox/nv2a/pgraph/vk/renderer.h | 2 ++ hw/xbox/nv2a/pgraph/vk/shaders.c | 30 ++++++++++++++++ hw/xbox/nv2a/pgraph/vk/vertex.c | 2 +- 10 files changed, 114 insertions(+), 37 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 4fcc09cac56..84609e18df8 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -93,15 +93,19 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) ); } mstring_append(header, "\n"); - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + int num_uniform_attrs = 0; + + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { bool is_uniform = state->uniform_attrs & (1 << i); bool is_compressed = state->compressed_attrs & (1 << i); assert(!(is_uniform && is_compressed)); if (is_uniform) { - mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i); + mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, + num_uniform_attrs); + num_uniform_attrs += 1; } else { if (state->compressed_attrs & (1 << i)) { mstring_append_fmt(header, @@ -249,17 +253,24 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) shade_model_mult, shade_model_mult); - /* Return combined header + source */ if (state->vulkan) { + // FIXME: Optimize uniforms + if (state->use_push_constants_for_uniform_attrs) { + mstring_append_fmt(output, + "layout(push_constant) uniform PushConstants {\n" + " vec4 inlineValue[%d];\n" + "};\n\n", num_uniform_attrs); + } else { + mstring_append_fmt(uniforms, " vec4 inlineValue[%d];\n", + num_uniform_attrs); + } mstring_append_fmt( - output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n", + output, + "layout(binding = %d, std140) uniform VshUniforms {\n" + "%s" + "};\n\n", VSH_UBO_BINDING, mstring_get_str(uniforms)); - // FIXME: Only needed for vk, for gl we use glVertexAttrib - mstring_append_fmt(output, - "layout(push_constant) uniform PushConstants {\n" - "vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n" - "};\n\n"); } else { mstring_append( output, mstring_get_str(uniforms)); diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 4ecc1c0d254..634f15c8add 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -373,6 +373,9 @@ void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); void pgraph_reset_inline_buffers(PGRAPHState *pg); void pgraph_reset_draw_arrays(PGRAPHState *pg); void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data); +void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs, + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4], + int *count); /* RDI */ uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select, diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c index 82737b44f48..285d24f4392 100644 --- a/hw/xbox/nv2a/pgraph/shaders.c +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -39,10 +39,9 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) ShaderState state; - // We will hash it, so make sure any padding is zerod + // We will hash it, so make sure any padding is zeroed memset(&state, 0, sizeof(ShaderState)); - state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN; state.surface_scale_factor = pg->surface_scale_factor; state.compressed_attrs = pg->compressed_attrs; @@ -50,7 +49,6 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) state.swizzle_attrs = pg->swizzle_attrs; /* register combiner stuff */ - state.psh.vulkan = state.vulkan; state.psh.window_clip_exclusive = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE; state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL); diff --git a/hw/xbox/nv2a/pgraph/shaders.h b/hw/xbox/nv2a/pgraph/shaders.h index 842658f8087..71febe2e2f8 100644 --- a/hw/xbox/nv2a/pgraph/shaders.h +++ b/hw/xbox/nv2a/pgraph/shaders.h @@ -55,6 +55,7 @@ enum MaterialColorSource { typedef struct ShaderState { bool vulkan; + bool use_push_constants_for_uniform_attrs; unsigned int surface_scale_factor; PshState psh; diff --git a/hw/xbox/nv2a/pgraph/vertex.c b/hw/xbox/nv2a/pgraph/vertex.c index 47f7cb56880..31076896e7e 100644 --- a/hw/xbox/nv2a/pgraph/vertex.c +++ b/hw/xbox/nv2a/pgraph/vertex.c @@ -82,6 +82,26 @@ void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data) } } +void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs, + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4], + int *count) +{ + int num_attributes = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + if (attrs & (1 << i)) { + memcpy(values[num_attributes], + pg->vertex_attributes[i].inline_value, 4 * sizeof(float)); + num_attributes += 1; + } + } + + if (count) { + *count = num_attributes; + } +} + + void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr) { VertexAttribute *attribute = &pg->vertex_attributes[attr]; diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index f1261c412b7..8500852101d 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -982,19 +982,29 @@ static void create_pipeline(PGRAPHState *pg) // FIXME: No direct analog. Just do it with MSAA. // } - VkPushConstantRange push_constant_range = { - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .offset = 0, - // FIXME: Minimize push constants - .size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float), - }; + VkPipelineLayoutCreateInfo pipeline_layout_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &r->descriptor_set_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_constant_range, }; + + VkPushConstantRange push_constant_range; + if (r->shader_binding->state.use_push_constants_for_uniform_attrs) { + int num_uniform_attributes = + __builtin_popcount(r->shader_binding->state.uniform_attrs); + if (num_uniform_attributes) { + push_constant_range = (VkPushConstantRange){ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + // FIXME: Minimize push constants + .size = num_uniform_attributes * 4 * sizeof(float), + }; + pipeline_layout_info.pushConstantRangeCount = 1; + pipeline_layout_info.pPushConstantRanges = &push_constant_range; + } + } + VkPipelineLayout layout; VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, &layout)); @@ -1031,23 +1041,28 @@ static void create_pipeline(PGRAPHState *pg) NV2A_VK_DGROUP_END(); } -static void push_vertex_attrib_values(PGRAPHState *pg) +static void push_vertex_attr_values(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - // FIXME: Do partial updates - - float attrib_values[NV2A_VERTEXSHADER_ATTRIBUTES * 4]; - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - attrib_values[i * 4 + 0] = pg->vertex_attributes[i].inline_value[0]; - attrib_values[i * 4 + 1] = pg->vertex_attributes[i].inline_value[1]; - attrib_values[i * 4 + 2] = pg->vertex_attributes[i].inline_value[2]; - attrib_values[i * 4 + 3] = pg->vertex_attributes[i].inline_value[3]; + if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) { + return; } - vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout, - VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(attrib_values), - &attrib_values); + // FIXME: Partial updates + + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4]; + int num_uniform_attrs = 0; + + pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values, + &num_uniform_attrs); + + if (num_uniform_attrs > 0) { + vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout, + VK_SHADER_STAGE_VERTEX_BIT, 0, + num_uniform_attrs * 4 * sizeof(float), + &values); + } } static void bind_descriptor_sets(PGRAPHState *pg) @@ -1405,7 +1420,7 @@ static void begin_draw(PGRAPHState *pg) if (!pg->clearing) { bind_descriptor_sets(pg); - push_vertex_attrib_values(pg); + push_vertex_attr_values(pg); } r->in_draw = true; diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index d9504402dbd..cb7c677e453 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -478,9 +478,6 @@ static bool select_physical_device(PGRAPHState *pg, Error **errp) VK_VERSION_MINOR(r->device_props.driverVersion), VK_VERSION_PATCH(r->device_props.driverVersion)); - size_t vsh_attr_values_size = - NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); - assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size); return true; } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 787f3df8a56..c889626bb9b 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -192,6 +192,8 @@ typedef struct ShaderBinding { int clip_region_loc; int material_alpha_loc; + + int uniform_attrs_loc; } ShaderBinding; typedef struct TextureKey { diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index fde19242a52..6ba540a5114 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -34,6 +34,8 @@ #include "renderer.h" #include +const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); + static void create_descriptor_pool(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; @@ -305,6 +307,9 @@ static void update_shader_constant_locations(ShaderBinding *binding) binding->material_alpha_loc = uniform_index(&binding->vertex->uniforms, "material_alpha"); + + binding->uniform_attrs_loc = + uniform_index(&binding->vertex->uniforms, "inlineValue"); } static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) @@ -430,11 +435,26 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) return snode; } +static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding) +{ + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4]; + int num_uniform_attrs = 0; + + pgraph_get_inline_values(pg, binding->state.uniform_attrs, values, + &num_uniform_attrs); + + if (num_uniform_attrs > 0) { + uniform1fv(&binding->vertex->uniforms, binding->uniform_attrs_loc, + num_uniform_attrs * 4, &values[0][0]); + } +} + // FIXME: Move to common static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function) { + ShaderState *state = &binding->state; int i, j; /* update combiner constants */ @@ -662,6 +682,10 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc, pg->material_alpha); } + + if (!state->use_push_constants_for_uniform_attrs && state->uniform_attrs) { + update_uniform_attr_values(pg, binding); + } } // Quickly check PGRAPH state to see if any registers have changed that @@ -742,6 +766,12 @@ void pgraph_vk_bind_shaders(PGRAPHState *pg) ShaderState new_state; memset(&new_state, 0, sizeof(ShaderState)); new_state = pgraph_get_shader_state(pg); + new_state.vulkan = true; + new_state.psh.vulkan = true; + new_state.use_push_constants_for_uniform_attrs = + (r->device_props.limits.maxPushConstantsSize >= + MAX_UNIFORM_ATTR_VALUES_SIZE); + if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) { r->shader_binding = gen_shaders(pg, &new_state); r->shader_bindings_changed = true; diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c index 6625520c65e..5c4580aaa40 100644 --- a/hw/xbox/nv2a/pgraph/vk/vertex.c +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -309,4 +309,4 @@ void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d) pg->uniform_attrs |= 1 << i; } } -} \ No newline at end of file +} From 84c09c631ca8e7f463180a167e4fa72de0335a7b Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 062/176] nv2a/vk: Ensure error message is always set on instance failure --- hw/xbox/nv2a/pgraph/vk/instance.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index cb7c677e453..7366ff8678b 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -694,10 +694,14 @@ void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp) return; } + pgraph_vk_finalize_instance(pg); + + const char *msg = "Failed to initialize Vulkan renderer"; if (*errp) { - error_prepend(errp, "Failed to initialize Vulkan renderer: "); + error_prepend(errp, "%s: ", msg); + } else { + error_setg(errp, "%s", msg); } - pgraph_vk_finalize_instance(pg); } void pgraph_vk_finalize_instance(PGRAPHState *pg) From 1a57d4ae941c3d2309243a93ff8ea943cb4376a2 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 063/176] nv2a/vk: Fix finish queue submission semaphore wait stage --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 8500852101d..b9a5ec657a6 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1201,7 +1201,7 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer)); r->in_aux_command_buffer = false; - VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; VkSubmitInfo submit_infos[] = { { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, From 9ab8549a4ec23876490842b110779099a6b5599c Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 064/176] nv2a/vk: Insert barrier after staging buffer copies --- hw/xbox/nv2a/pgraph/vk/draw.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index b9a5ec657a6..5c0e55b28ed 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1122,6 +1122,39 @@ static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd, VkBufferCopy copy_region = { .size = b_src->buffer_offset }; vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, ©_region); + VkAccessFlags dst_access_mask; + VkPipelineStageFlags dst_stage_mask; + + switch (index_dst) { + case BUFFER_INDEX: + dst_access_mask = VK_ACCESS_INDEX_READ_BIT; + dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + break; + case BUFFER_VERTEX_INLINE: + dst_access_mask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + break; + case BUFFER_UNIFORM: + dst_access_mask = VK_ACCESS_UNIFORM_READ_BIT; + dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + break; + default: + assert(0); + break; + } + + VkBufferMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = dst_access_mask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = b_dst->buffer, + .size = b_src->buffer_offset + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, 0, + 0, NULL, 1, &barrier, 0, NULL); + b_src->buffer_offset = 0; } From 89db9590f7cb321aff2a92ac7ed142a413046493 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 065/176] nv2a/vk: Flush vertex ram buffer, insert barrier --- hw/xbox/nv2a/pgraph/vk/draw.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 5c0e55b28ed..91c46720c5c 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1158,6 +1158,30 @@ static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd, b_src->buffer_offset = 0; } +static void flush_memory_buffer(PGRAPHState *pg, VkCommandBuffer cmd) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VK_CHECK(vmaFlushAllocation( + r->allocator, r->storage_buffers[BUFFER_VERTEX_RAM].allocation, 0, + VK_WHOLE_SIZE)); + + VkBufferMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_VERTEX_RAM].buffer, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, NULL, 1, + &barrier, 0, NULL); +} + static void begin_render_pass(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; @@ -1231,6 +1255,7 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) BUFFER_VERTEX_INLINE); sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM); bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size); + flush_memory_buffer(pg, cmd); VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer)); r->in_aux_command_buffer = false; From 9c43c0d702f483407a0794df0e45fef5a8dfd9cf Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 066/176] nv2a/vk: Add barrier after compute to staging buffer copy --- hw/xbox/nv2a/pgraph/vk/surface.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index a348de1e71d..76a5e44a315 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -291,6 +291,19 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, .size = packed_size, }; vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region); + + VkBufferMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, + &barrier, 0, NULL); } size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel * From 3f0a7e514d3957f9a63ee601999c68e60cd9d198 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 067/176] nv2a/vk: Fix display dimensions --- hw/xbox/nv2a/pgraph/vk/display.c | 38 ++++++++++++++------------------ 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 595f119ca24..824c1fa2a14 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -382,8 +382,7 @@ static void destroy_current_display_image(PGRAPHState *pg) // FIXME: We may need to use two images. One for actually rendering display, // and another for GL in the correct tiling mode -static void create_display_image_from_surface(PGRAPHState *pg, - SurfaceBinding *surface) +static void create_display_image(PGRAPHState *pg, int width, int height) { PGRAPHVkState *r = pg->vk_renderer_state; PGRAPHVkDisplayState *d = &r->display; @@ -418,8 +417,8 @@ static void create_display_image_from_surface(PGRAPHState *pg, VkImageCreateInfo image_create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .extent.width = surface->width, - .extent.height = surface->height, + .extent.width = width, + .extent.height = height, .extent.depth = 1, .mipLevels = 1, .arrayLayers = 1, @@ -430,8 +429,6 @@ static void create_display_image_from_surface(PGRAPHState *pg, .samples = VK_SAMPLE_COUNT_1_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; - pgraph_apply_scaling_factor(pg, &image_create_info.extent.width, - &image_create_info.extent.height); VkExternalMemoryImageCreateInfo external_memory_image_create_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, @@ -581,24 +578,15 @@ static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) { NV2AState *d = container_of(pg, NV2AState, pgraph); PGRAPHVkState *r = pg->vk_renderer_state; + ShaderUniformLayout *l = &r->display.display_frag->push_constants; + + int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache + uniform2f(l, display_size_loc, r->display.width, r->display.height); - unsigned int width, height; uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); int line_offset = surface->pitch / pline_offset; - - /* Adjust viewport height for interlaced mode, used only in 1080i */ - if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { - height *= 2; - } - - pgraph_apply_scaling_factor(pg, &width, &height); - - ShaderUniformLayout *l = &r->display.display_frag->push_constants; - int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache int line_offset_loc = uniform_index(l, "line_offset"); - uniform2f(l, display_size_loc, width, height); uniform1f(l, line_offset_loc, line_offset); #if 0 // FIXME: PVIDEO overlay @@ -878,18 +866,26 @@ void pgraph_vk_render_display(PGRAPHState *pg) uint32_t pline_offset, pstart_addr, pline_compare; d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset); if (surface == NULL || !surface->color) { return; } - unsigned int width = surface->width, height = surface->height; + unsigned int width = 0, height = 0; + d->vga.get_resolution(&d->vga, (int *)&width, (int *)&height); + + /* Adjust viewport height for interlaced mode, used only in 1080i */ + if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { + height *= 2; + } + pgraph_apply_scaling_factor(pg, &width, &height); PGRAPHVkDisplayState *disp = &r->display; if (!disp->image || disp->width != width || disp->height != height) { - create_display_image_from_surface(pg, surface); + create_display_image(pg, width, height); } render_display(pg, surface); From f35d489203d173dd060e09de01ec4d844bf6d8b0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 068/176] nv2a/vk: Fix renderer debug messenger registration --- hw/xbox/nv2a/pgraph/vk/instance.c | 40 ++++++++++++++++--------------- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 + 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 7366ff8678b..66a121a48c5 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -60,7 +60,6 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData) { - NV2A_VK_DPRINTF("[vk] %s", pCallbackData->pMessage); fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage); if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) && @@ -270,20 +269,6 @@ static bool create_instance(PGRAPHState *pg, Error **errp) &g_array_index(enabled_extension_names, const char *, 0), }; - VkDebugUtilsMessengerCreateInfoEXT dbg_create_info; - if (r->debug_utils_extension_enabled) { - dbg_create_info = (VkDebugUtilsMessengerCreateInfoEXT){ - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, - .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, - .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, - .pfnUserCallback = debugCallback, - }; - } - enable_validation = g_config.display.vulkan.validation_layers; if (enable_validation) { @@ -292,10 +277,6 @@ static bool create_instance(PGRAPHState *pg, Error **errp) "performance impact.\n"); create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); create_info.ppEnabledLayerNames = validation_layers; - if (r->debug_utils_extension_enabled) { - create_info.pNext = - (VkDebugUtilsMessengerCreateInfoEXT *)&dbg_create_info; - } } else { fprintf(stderr, "Warning: validation layers not available\n"); enable_validation = false; @@ -309,6 +290,22 @@ static bool create_instance(PGRAPHState *pg, Error **errp) } volkLoadInstance(r->instance); + + if (r->debug_utils_extension_enabled) { + VkDebugUtilsMessengerCreateInfoEXT messenger_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = debugCallback, + }; + VK_CHECK(vkCreateDebugUtilsMessengerEXT(r->instance, &messenger_info, + NULL, &r->debug_messenger)); + } + return true; error: @@ -718,6 +715,11 @@ void pgraph_vk_finalize_instance(PGRAPHState *pg) r->device = VK_NULL_HANDLE; } + if (r->debug_messenger != VK_NULL_HANDLE) { + vkDestroyDebugUtilsMessengerEXT(r->instance, r->debug_messenger, NULL); + r->debug_messenger = VK_NULL_HANDLE; + } + if (r->instance != VK_NULL_HANDLE) { vkDestroyInstance(r->instance, NULL); r->instance = VK_NULL_HANDLE; diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index c889626bb9b..78409fda06d 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -271,6 +271,7 @@ typedef struct PGRAPHVkComputeState { typedef struct PGRAPHVkState { void *window; VkInstance instance; + VkDebugUtilsMessengerEXT debug_messenger; bool debug_utils_extension_enabled; bool custom_border_color_extension_enabled; From 126a51724ba052ae65fabfc21983ebf01672e6d0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 069/176] nv2a/vk: Finalize clear shaders --- hw/xbox/nv2a/pgraph/vk/draw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 91c46720c5c..49f8e2b8823 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -188,6 +188,14 @@ static void init_clear_shaders(PGRAPHState *pg) r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl); } +static void finalize_clear_shaders(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + pgraph_vk_destroy_shader_module(r, r->quad_vert_module); + pgraph_vk_destroy_shader_module(r, r->solid_frag_module); +} + void pgraph_vk_init_pipelines(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; @@ -212,6 +220,7 @@ void pgraph_vk_finalize_pipelines(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; + finalize_clear_shaders(pg); finalize_pipeline_cache(pg); vkDestroyFence(r->device, r->command_buffer_fence, NULL); From b0e3d00a5d53635debfd90c6bf18e14e7d5dd603 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 070/176] nv2a/vk: Drop fragment shader when only depth attachment is cleared --- hw/xbox/nv2a/pgraph/vk/draw.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 49f8e2b8823..3784aa1acaf 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -448,20 +448,24 @@ static void create_clear_pipeline(PGRAPHState *pg) bool partial_color_clear = clear_any_color_channels && !clear_all_color_channels; - VkPipelineShaderStageCreateInfo shader_stages[] = { + int num_active_shader_stages = 0; + VkPipelineShaderStageCreateInfo shader_stages[2]; + shader_stages[num_active_shader_stages++] = (VkPipelineShaderStageCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = r->quad_vert_module->module, .pName = "main", - }, - (VkPipelineShaderStageCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = r->solid_frag_module->module, - .pName = "main", - }, - }; + }; + if (clear_any_color_channels) { + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->solid_frag_module->module, + .pName = "main", + }; + } VkPipelineVertexInputStateCreateInfo vertex_input = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -565,7 +569,7 @@ static void create_clear_pipeline(PGRAPHState *pg) VkGraphicsPipelineCreateInfo pipeline_info = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = ARRAY_SIZE(shader_stages), + .stageCount = num_active_shader_stages, .pStages = shader_stages, .pVertexInputState = &vertex_input, .pInputAssemblyState = &input_assembly, From 2bb2084eced3499441235689b0cf41c7a2dbb6d4 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 071/176] nv2a/vk: Use textureLod in psh_append_shadowmap --- hw/xbox/nv2a/pgraph/glsl/psh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 58ad5cf7acf..35e95855bd1 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -643,10 +643,10 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa if (ps->state.tex_x8y24[i]) { mstring_append_fmt( vars, - "uvec4 t%d_depth_raw = texture(texSamp%d, pT%d.xy/pT%d.w);\n", i, i, i, i); + "uvec4 t%d_depth_raw = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i, i, i, i); mstring_append_fmt( vars, - "vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);", + "vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);\n", i, i); } else { mstring_append_fmt( From 497bac61c9c2403f48bdcbb13632d73b797c4ccb Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 072/176] nv2a/vk: Ensure border color type matches format type --- hw/xbox/nv2a/pgraph/vk/texture.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 70fc9c70236..011cfc0cd8d 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1207,14 +1207,19 @@ static void create_texture(PGRAPHState *pg, int texture_idx) } else { // FIXME: Handle custom color in shader if (border_color_pack32 == 0x00000000) { - vk_border_color = VK_BORDER_COLOR_INT_TRANSPARENT_BLACK; + vk_border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } else if (border_color_pack32 == 0xff000000) { - vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + vk_border_color = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; } else { - vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_WHITE; + vk_border_color = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; } } + if (vkf.vk_format == VK_FORMAT_R32_UINT) { + // Border color type must match sampled type + vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + } + uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4); if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); From a80cfc8573588812e48f60b8b132033eabc456a9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 073/176] nv2a/vk: Don't call vkCmdBindVertexBuffers if 0 bindings --- hw/xbox/nv2a/pgraph/vk/draw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 3784aa1acaf..8ba1acfbcae 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1796,6 +1796,10 @@ static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx, assert(buffer_idx == BUFFER_VERTEX_RAM || buffer_idx == BUFFER_VERTEX_INLINE); + if (r->num_active_vertex_binding_descriptions == 0) { + return; + } + VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES]; VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; From e8a39d6f45fd77eab51ae0bb2ef4add075815f4f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 074/176] nv2a/vk: Use correct min,mag texture filters --- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 + hw/xbox/nv2a/pgraph/vk/texture.c | 35 +++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 78409fda06d..b053909b81c 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -348,6 +348,7 @@ typedef struct PGRAPHVkState { TextureBinding *texture_bindings[NV2A_MAX_TEXTURES]; TextureBinding dummy_texture; bool texture_bindings_changed; + VkFormatProperties *texture_format_properties; Lru shader_cache; ShaderBinding *shader_cache_entries; diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 011cfc0cd8d..34e903d9586 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1005,6 +1005,13 @@ static void set_texture_label(PGRAPHState *pg, TextureBinding *texture) vmaSetAllocationName(r->allocator, texture->allocation, label); } +static bool is_linear_filter_supported_for_format(PGRAPHVkState *r, + int kelvin_format) +{ + return r->texture_format_properties[kelvin_format].optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; +} + static void create_texture(PGRAPHState *pg, int texture_idx) { NV2A_VK_DGROUP_BEGIN("Creating texture %d", texture_idx); @@ -1230,12 +1237,25 @@ static void create_texture(PGRAPHState *pg, int texture_idx) if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED"); + VkFilter vk_min_filter, vk_mag_filter; unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); assert(mag_filter < ARRAY_SIZE(pgraph_texture_mag_filter_vk_map)); unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); assert(min_filter < ARRAY_SIZE(pgraph_texture_min_filter_vk_map)); + if (is_linear_filter_supported_for_format(r, state.color_format)) { + vk_mag_filter = pgraph_texture_min_filter_vk_map[mag_filter]; + vk_min_filter = pgraph_texture_min_filter_vk_map[min_filter]; + + if (f_basic.linear && vk_mag_filter != vk_min_filter) { + // Per spec, if coordinates unnormalized, filters must be same + vk_mag_filter = vk_min_filter = VK_FILTER_LINEAR; + } + } else { + vk_mag_filter = vk_min_filter = VK_FILTER_NEAREST; + } + bool mipmap_nearest = f_basic.linear || image_create_info.mipLevels == 1 || min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD || @@ -1246,8 +1266,8 @@ static void create_texture(PGRAPHState *pg, int texture_idx) VkSamplerCreateInfo sampler_create_info = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_FILTER_LINEAR, // FIXME - .minFilter = VK_FILTER_LINEAR, // FIXME + .magFilter = vk_min_filter, + .minFilter = vk_mag_filter, .addressModeU = lookup_texture_address_mode( GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU)), .addressModeV = lookup_texture_address_mode( @@ -1449,6 +1469,14 @@ void pgraph_vk_init_textures(PGRAPHState *pg) texture_cache_init(r); create_dummy_texture(pg); + + r->texture_format_properties = g_malloc0_n( + ARRAY_SIZE(kelvin_color_format_vk_map), sizeof(VkFormatProperties)); + for (int i = 0; i < ARRAY_SIZE(kelvin_color_format_vk_map); i++) { + vkGetPhysicalDeviceFormatProperties( + r->physical_device, kelvin_color_format_vk_map[i].vk_format, + &r->texture_format_properties[i]); + } } void pgraph_vk_finalize_textures(PGRAPHState *pg) @@ -1461,4 +1489,7 @@ void pgraph_vk_finalize_textures(PGRAPHState *pg) destroy_dummy_texture(r); texture_cache_finalize(r); + + g_free(r->texture_format_properties); + r->texture_format_properties = NULL; } From 7b37a9443827c8b782f9246e64ea4b88e20dc219 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 075/176] nv2a/vk: Finalize, simplify render passes --- hw/xbox/nv2a/pgraph/vk/draw.c | 60 ++++++++++++++++--------------- hw/xbox/nv2a/pgraph/vk/renderer.h | 4 +-- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 8ba1acfbcae..0816835113c 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -196,12 +196,27 @@ static void finalize_clear_shaders(PGRAPHState *pg) pgraph_vk_destroy_shader_module(r, r->solid_frag_module); } +static void init_render_passes(PGRAPHVkState *r) +{ + r->render_passes = g_array_new(false, false, sizeof(RenderPass)); +} + +static void finalize_render_passes(PGRAPHVkState *r) +{ + for (int i = 0; i < r->render_passes->len; i++) { + RenderPass *p = &g_array_index(r->render_passes, RenderPass, i); + vkDestroyRenderPass(r->device, p->render_pass, NULL); + } + g_array_free(r->render_passes, true); +} + void pgraph_vk_init_pipelines(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; init_pipeline_cache(pg); init_clear_shaders(pg); + init_render_passes(r); VkSemaphoreCreateInfo semaphore_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO @@ -222,6 +237,7 @@ void pgraph_vk_finalize_pipelines(PGRAPHState *pg) finalize_clear_shaders(pg); finalize_pipeline_cache(pg); + finalize_render_passes(r); vkDestroyFence(r->device, r->command_buffer_fence, NULL); vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL); @@ -238,12 +254,10 @@ static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state) VK_FORMAT_UNDEFINED; } -static VkRenderPass create_render_pass(PGRAPHState *pg, RenderPassState *state) +static VkRenderPass create_render_pass(PGRAPHVkState *r, RenderPassState *state) { NV2A_VK_DPRINTF("Creating render pass"); - PGRAPHVkState *r = pg->vk_renderer_state; - VkAttachmentDescription attachments[2]; int num_attachments = 0; @@ -328,36 +342,24 @@ static VkRenderPass create_render_pass(PGRAPHState *pg, RenderPassState *state) return render_pass; } -static VkRenderPass add_new_render_pass(PGRAPHState *pg, RenderPassState *state) +static VkRenderPass add_new_render_pass(PGRAPHVkState *r, RenderPassState *state) { - PGRAPHVkState *r = pg->vk_renderer_state; - - if (r->render_passes_index == r->render_passes_capacity) { - int n_blocks = r->render_passes_capacity; - r->render_passes_capacity = n_blocks ? (n_blocks * 2) : 256; - r->render_passes = - g_realloc_n(r->render_passes, r->render_passes_capacity, - sizeof(*r->render_passes)); - } - - RenderPass *rp = &r->render_passes[r->render_passes_index++]; - memcpy(&rp->state, state, sizeof(*state)); - rp->render_pass = create_render_pass(pg, state); - - return rp->render_pass; + RenderPass new_pass; + memcpy(&new_pass.state, state, sizeof(*state)); + new_pass.render_pass = create_render_pass(r, state); + g_array_append_vals(r->render_passes, &new_pass, 1); + return new_pass.render_pass; } -static VkRenderPass get_render_pass(PGRAPHState *pg, RenderPassState *state) +static VkRenderPass get_render_pass(PGRAPHVkState *r, RenderPassState *state) { - PGRAPHVkState *r = pg->vk_renderer_state; - - for (int i = 0; i < r->render_passes_index; i++) { - if (!memcmp(&r->render_passes[i].state, state, sizeof(*state))) { - return r->render_passes[i].render_pass; + for (int i = 0; i < r->render_passes->len; i++) { + RenderPass *p = &g_array_index(r->render_passes, RenderPass, i); + if (!memcmp(&p->state, state, sizeof(*state))) { + return p->render_pass; } } - - return add_new_render_pass(pg, state); + return add_new_render_pass(r, state); } static void create_frame_buffer(PGRAPHState *pg) @@ -580,7 +582,7 @@ static void create_clear_pipeline(PGRAPHState *pg) .pColorBlendState = &color_blending, .pDynamicState = &dynamic_state, .layout = layout, - .renderPass = get_render_pass(pg, &key.render_pass_state), + .renderPass = get_render_pass(r, &key.render_pass_state), .subpass = 0, .basePipelineHandle = VK_NULL_HANDLE, }; @@ -1035,7 +1037,7 @@ static void create_pipeline(PGRAPHState *pg) .pColorBlendState = &color_blending, .pDynamicState = &dynamic_state, .layout = layout, - .renderPass = get_render_pass(pg, &key.render_pass_state), + .renderPass = get_render_pass(r, &key.render_pass_state), .subpass = 0, .basePipelineHandle = VK_NULL_HANDLE, }; diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index b053909b81c..c0a566c7791 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -303,9 +303,7 @@ typedef struct PGRAPHVkState { bool framebuffer_dirty; VkRenderPass render_pass; - RenderPass *render_passes; - int render_passes_index; - int render_passes_capacity; + GArray *render_passes; // RenderPass bool in_render_pass; bool in_draw; From bb69e5cd2f6ad36da1b5eb799059525d7ebffdce Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 076/176] nv2a/vk: Finalize more display state --- hw/xbox/nv2a/pgraph/vk/display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 824c1fa2a14..53dcb377b3f 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -317,6 +317,12 @@ static void destroy_display_pipeline(PGRAPHState *pg) vkDestroyPipeline(r->device, r->display.pipeline, NULL); r->display.pipeline = VK_NULL_HANDLE; + + vkDestroyPipelineLayout(r->device, r->display.pipeline_layout, NULL); + r->display.pipeline_layout = VK_NULL_HANDLE; + + pgraph_vk_destroy_shader_module(r, r->display.display_frag); + r->display.display_frag = NULL; } static void create_frame_buffer(PGRAPHState *pg) From 49ae57600674b462e4bf8398eaa728f8fa9806d6 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 077/176] n2va/vk: Assert instead of exit() on validation error --- hw/xbox/nv2a/pgraph/vk/instance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 66a121a48c5..2003efd76af 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -65,7 +65,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) && (messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) { - exit(1); + assert(false); } return VK_FALSE; } From 7018f379b3818d1bdc56a39f1cccf027c855c842 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 078/176] nv2a/vk: Add debug helper function for inserting markers --- hw/xbox/nv2a/pgraph/vk/debug.c | 13 +++++++++++++ hw/xbox/nv2a/pgraph/vk/renderer.h | 1 + 2 files changed, 14 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c index a8cb08c4a2f..f7a54de9fc0 100644 --- a/hw/xbox/nv2a/pgraph/vk/debug.c +++ b/hw/xbox/nv2a/pgraph/vk/debug.c @@ -57,3 +57,16 @@ void pgraph_vk_debug_frame_terminator(void) } #endif } + +void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, + const char *name, float color[4]) +{ + if (r->debug_utils_extension_enabled) { + VkDebugUtilsLabelEXT label_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pLabelName = name, + }; + memcpy(label_info.color, color, 4 * sizeof(float)); + vkCmdInsertDebugUtilsLabelEXT(cmd, &label_info); + } +} diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index c0a566c7791..0b835e1c5c3 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -380,6 +380,7 @@ void pgraph_vk_check_memory_budget(PGRAPHState *pg); // debug.c void pgraph_vk_debug_init(void); +void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, const char* name, float color[4]); // instance.c void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp); From da1e72a39a3c4e9bedd9f64e52fbbfb6b88fef38 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH 079/176] nv2a/vk: Use additional descriptor sets in compute ops --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 ++ hw/xbox/nv2a/pgraph/vk/renderer.h | 5 ++- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 39 ++++++++++++++++++------ hw/xbox/nv2a/pgraph/vk/surface.c | 13 +++++--- hw/xbox/nv2a/pgraph/vk/texture.c | 14 ++++++--- 5 files changed, 55 insertions(+), 18 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 0816835113c..91aa6d13455 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1322,6 +1322,8 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) NV2AState *d = container_of(pg, NV2AState, pgraph); pgraph_vk_process_pending_reports_internal(d); + + pgraph_vk_compute_finish_complete(r); } void pgraph_vk_begin_command_buffer(PGRAPHState *pg) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 0b835e1c5c3..c66e3541930 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -260,7 +260,8 @@ typedef struct PGRAPHVkDisplayState { typedef struct PGRAPHVkComputeState { VkDescriptorPool descriptor_pool; VkDescriptorSetLayout descriptor_set_layout; - VkDescriptorSet descriptor_sets[1]; + VkDescriptorSet descriptor_sets[1024]; + int descriptor_set_index; VkPipelineLayout pipeline_layout; VkPipeline pipeline_pack_d24s8; VkPipeline pipeline_unpack_d24s8; @@ -458,6 +459,8 @@ void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg); // surface-compute.c void pgraph_vk_init_compute(PGRAPHState *pg); +bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r); +void pgraph_vk_compute_finish_complete(PGRAPHVkState *r); void pgraph_vk_finalize_compute(PGRAPHState *pg); void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, VkCommandBuffer cmd, VkBuffer src, diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 045f8231b8f..dc14840dd14 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -189,7 +189,7 @@ static void create_descriptor_sets(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)]; + VkDescriptorSetLayout layouts[ARRAY_SIZE(r->compute.descriptor_sets)]; for (int i = 0; i < ARRAY_SIZE(layouts); i++) { layouts[i] = r->compute.descriptor_set_layout; } @@ -269,12 +269,15 @@ static void update_descriptor_sets(PGRAPHState *pg, assert(count == 3); VkWriteDescriptorSet descriptor_writes[3]; - const int descriptor_set_index = 0; + + assert(r->compute.descriptor_set_index < + ARRAY_SIZE(r->compute.descriptor_sets)); for (int i = 0; i < count; i++) { descriptor_writes[i] = (VkWriteDescriptorSet){ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = r->compute.descriptor_sets[descriptor_set_index], + .dstSet = + r->compute.descriptor_sets[r->compute.descriptor_set_index], .dstBinding = i, .dstArrayElement = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -283,6 +286,21 @@ static void update_descriptor_sets(PGRAPHState *pg, }; } vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL); + + r->compute.descriptor_set_index += 1; +} + +bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r) +{ + bool need_descriptor_write_reset = (r->compute.descriptor_set_index >= + ARRAY_SIZE(r->compute.descriptor_sets)); + + return need_descriptor_write_reset; +} + +void pgraph_vk_compute_finish_complete(PGRAPHVkState *r) +{ + r->compute.descriptor_set_index = 0; } // @@ -329,6 +347,7 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, .range = output_size, }, }; + update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { @@ -340,9 +359,10 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, } else { assert(!"Unsupported pack format"); } - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_layout, 0, 1, - &r->compute.descriptor_sets[0], 0, NULL); + vkCmdBindDescriptorSets( + cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0, + NULL); uint32_t push_constants[2] = { input_width, output_width }; assert(sizeof(push_constants) == 8); @@ -408,9 +428,10 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, } else { assert(!"Unsupported pack format"); } - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_layout, 0, 1, - &r->compute.descriptor_sets[0], 0, NULL); + vkCmdBindDescriptorSets( + cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0, + NULL); assert(output_width >= input_width); uint32_t push_constants[2] = { input_width, output_width }; diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 76a5e44a315..f45a1295253 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -130,9 +130,18 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); + bool use_compute_to_convert_depth_stencil_format = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + bool compute_needs_finish = (use_compute_to_convert_depth_stencil_format && + pgraph_vk_compute_needs_finish(r)); + if (r->in_command_buffer && surface->draw_time >= r->command_buffer_start_time) { pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN); + } else if (compute_needs_finish) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); } bool downscale = (pg->surface_scale_factor != 1); @@ -175,10 +184,6 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, .imageSubresource.layerCount = 1, }; - bool use_compute_to_convert_depth_stencil_format = - surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; - VkImage surface_image_loc; if (downscale && !use_compute_to_convert_depth_stencil_format) { copy_regions[0].imageExtent = diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 34e903d9586..2478f61d711 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -591,6 +591,16 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac TextureShape *state = &texture->key.state; VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + bool use_compute_to_convert_depth_stencil = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + bool compute_needs_finish = use_compute_to_convert_depth_stencil && + pgraph_vk_compute_needs_finish(r); + if (compute_needs_finish) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + } + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); trace_nv2a_pgraph_surface_render_to_texture( @@ -644,10 +654,6 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, }; } - - bool use_compute_to_convert_depth_stencil = - surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; assert(use_compute_to_convert_depth_stencil && "Unimplemented"); StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST]; From ad0aec9adb6974210ab9f19780585419b5ed07b3 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 080/176] nv2a/vk: Move overlapping surface download logic to function --- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 + hw/xbox/nv2a/pgraph/vk/surface.c | 17 +++++++++++++++++ hw/xbox/nv2a/pgraph/vk/texture.c | 11 ++--------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index c66e3541930..e7648322390 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -447,6 +447,7 @@ void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface); SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr); void pgraph_vk_wait_for_surface_download(SurfaceBinding *e); void pgraph_vk_download_dirty_surfaces(NV2AState *d); +void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size); void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index f45a1295253..d90995ba01a 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -122,6 +122,23 @@ static void memcpy_image(void *dst, void const *src, int dst_stride, } } +void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + SurfaceBinding *surface; + + hwaddr end = start + size - 1; + + QTAILQ_FOREACH(surface, &r->surfaces, entry) { + hwaddr surf_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= end || start >= surf_end); + if (overlapping) { + pgraph_vk_surface_download_if_dirty( + container_of(pg, NV2AState, pgraph), surface); + } + } +} + static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, uint8_t *pixels) { diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 2478f61d711..cbdabf660f6 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1066,15 +1066,8 @@ static void create_texture(PGRAPHState *pg, int texture_idx) // FIXME: Restructure to support rendering surfaces to cubemap faces // Writeback any surfaces which this texture may index - hwaddr tex_vram_end = texture_vram_offset + texture_length - 1; - QTAILQ_FOREACH(surface, &r->surfaces, entry) { - hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr >= tex_vram_end - || texture_vram_offset >= surf_vram_end); - if (overlapping) { - pgraph_vk_surface_download_if_dirty(d, surface); - } - } + pgraph_vk_download_surfaces_in_range_if_dirty( + pg, texture_vram_offset, texture_length); } if (surface_to_texture && pg->surface_scale_factor > 1) { From f08d4a89fbc0d870025eb6339ab3d2d316150c53 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 081/176] nv2a/vk: Download any dirty surfaces covering vertex buffers --- hw/xbox/nv2a/pgraph/vk/vertex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c index 5c4580aaa40..af13bd67b49 100644 --- a/hw/xbox/nv2a/pgraph/vk/vertex.c +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -47,6 +47,8 @@ void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, { PGRAPHVkState *r = pg->vk_renderer_state; + pgraph_vk_download_surfaces_in_range_if_dirty(pg, offset, size); + size_t offset_bit = offset / 4096; size_t nbits = size / 4096; if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) { From b9e68a760b61a172222cc18c096f6f9bf4e4d044 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 082/176] nv2a/vk: Fix shadowmap readback in frag shader --- hw/xbox/nv2a/pgraph/glsl/psh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 35e95855bd1..2187e807419 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -646,7 +646,7 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa "uvec4 t%d_depth_raw = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i, i, i, i); mstring_append_fmt( vars, - "vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);\n", + "vec4 t%d_depth = vec4(float(t%d_depth_raw.x >> 8) / 0xFFFFFF, 1.0, 0.0, 0.0);\n", i, i); } else { mstring_append_fmt( From c63cdc18de8258180c85c4ba07f6653f7a4c68bf Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 083/176] nv2a/vk: Fix integer format border color --- hw/xbox/nv2a/pgraph/vk/texture.c | 37 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index cbdabf660f6..1655a8c60b0 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1192,27 +1192,36 @@ static void create_texture(PGRAPHState *pg, int texture_idx) uint32_t border_color_pack32 = pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4); - if (r->custom_border_color_extension_enabled) { - float border_color_rgba[4]; - pgraph_argb_pack32_to_rgba_float(border_color_pack32, border_color_rgba); + bool is_integer_type = vkf.vk_format == VK_FORMAT_R32_UINT; + if (r->custom_border_color_extension_enabled) { + vk_border_color = is_integer_type ? VK_BORDER_COLOR_INT_CUSTOM_EXT : + VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; custom_border_color_create_info = (VkSamplerCustomBorderColorCreateInfoEXT){ .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, - .customBorderColor.float32 = { border_color_rgba[0], - border_color_rgba[1], - border_color_rgba[2], - border_color_rgba[3] }, .format = image_view_create_info.format, .pNext = sampler_next_struct }; - - vk_border_color = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; + if (is_integer_type) { + float rgba[4]; + pgraph_argb_pack32_to_rgba_float(border_color_pack32, rgba); + for (int i = 0; i < 4; i++) { + custom_border_color_create_info.customBorderColor.uint32[i] = + (uint32_t)((double)rgba[i] * (double)0xffffffff); + } + } else { + pgraph_argb_pack32_to_rgba_float( + border_color_pack32, + custom_border_color_create_info.customBorderColor.float32); + } sampler_next_struct = &custom_border_color_create_info; } else { // FIXME: Handle custom color in shader - if (border_color_pack32 == 0x00000000) { + if (is_integer_type) { + vk_border_color = VK_BORDER_COLOR_INT_TRANSPARENT_BLACK; + } else if (border_color_pack32 == 0x00000000) { vk_border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } else if (border_color_pack32 == 0xff000000) { vk_border_color = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; @@ -1221,11 +1230,6 @@ static void create_texture(PGRAPHState *pg, int texture_idx) } } - if (vkf.vk_format == VK_FORMAT_R32_UINT) { - // Border color type must match sampled type - vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_BLACK; - } - uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4); if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); @@ -1248,7 +1252,8 @@ static void create_texture(PGRAPHState *pg, int texture_idx) vk_min_filter = pgraph_texture_min_filter_vk_map[min_filter]; if (f_basic.linear && vk_mag_filter != vk_min_filter) { - // Per spec, if coordinates unnormalized, filters must be same + // FIXME: Per spec, if coordinates unnormalized, filters must be + // same. vk_mag_filter = vk_min_filter = VK_FILTER_LINEAR; } } else { From bb0221af8298e0bb9bdb6e5caa44d021f22841ac Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 084/176] nv2a/vk: Target glslang to SPV 1.6 --- hw/xbox/nv2a/pgraph/vk/glsl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.c b/hw/xbox/nv2a/pgraph/vk/glsl.c index fb3aed34f59..9e4faaafb57 100644 --- a/hw/xbox/nv2a/pgraph/vk/glsl.c +++ b/hw/xbox/nv2a/pgraph/vk/glsl.c @@ -148,7 +148,7 @@ GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, .client = GLSLANG_CLIENT_VULKAN, .client_version = GLSLANG_TARGET_VULKAN_1_3, .target_language = GLSLANG_TARGET_SPV, - .target_language_version = GLSLANG_TARGET_SPV_1_5, + .target_language_version = GLSLANG_TARGET_SPV_1_6, .code = glsl_source, .default_version = 460, .default_profile = GLSLANG_NO_PROFILE, From 6bb96f607b0bcf556e945f62a822eccdb84a7dfd Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 085/176] nv2a/vk: Add debug_shaders option --- config_spec.yml | 1 + hw/xbox/nv2a/pgraph/vk/glsl.c | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/config_spec.yml b/config_spec.yml index f2c3736a8f9..68232bbcd4a 100644 --- a/config_spec.yml +++ b/config_spec.yml @@ -136,6 +136,7 @@ display: default: OPENGL vulkan: validation_layers: bool + debug_shaders: bool quality: surface_scale: type: integer diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.c b/hw/xbox/nv2a/pgraph/vk/glsl.c index 9e4faaafb57..d3ae05a34be 100644 --- a/hw/xbox/nv2a/pgraph/vk/glsl.c +++ b/hw/xbox/nv2a/pgraph/vk/glsl.c @@ -17,6 +17,7 @@ * License along with this library; if not, see . */ +#include "ui/xemu-settings.h" #include "renderer.h" #include @@ -206,14 +207,22 @@ GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, glslang_spv_options_t spv_options = { .validate = true, - -#if defined(CONFIG_RENDERDOC) - .disable_optimizer = true, - .generate_debug_info = true, - .emit_nonsemantic_shader_debug_info = true, - .emit_nonsemantic_shader_debug_source = true, -#endif }; + + if (g_config.display.vulkan.debug_shaders) { + spv_options.disable_optimizer = true; + spv_options.generate_debug_info = true; + spv_options.emit_nonsemantic_shader_debug_info = true; + spv_options.emit_nonsemantic_shader_debug_source = true; + + // XXX: Note emit_nonsemantic_shader_debug_source actually does nothing + // as of 2024.07.25. To actually get glsl source embedded in spv, we + // must do the following... + // + // ref: https://github.com/KhronosGroup/glslang/issues/3252 + glslang_program_add_source_text(program, input.stage, input.code, + strlen(input.code)); + } glslang_program_SPIRV_generate_with_options(program, stage, &spv_options); const char *spirv_messages = glslang_program_SPIRV_get_messages(program); From a2a193b4e3fac92a18de82cb6cb2d9e45d17a90f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 086/176] nv2a/vk: Add assert_on_validation_msg option --- config_spec.yml | 1 + hw/xbox/nv2a/pgraph/vk/instance.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config_spec.yml b/config_spec.yml index 68232bbcd4a..087d255faef 100644 --- a/config_spec.yml +++ b/config_spec.yml @@ -137,6 +137,7 @@ display: vulkan: validation_layers: bool debug_shaders: bool + assert_on_validation_msg: bool quality: surface_scale: type: integer diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 2003efd76af..811698c6683 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -65,7 +65,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) && (messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) { - assert(false); + assert(!g_config.display.vulkan.assert_on_validation_msg); } return VK_FALSE; } From c41853a3f316fcc485b64ad1632d8a7867b53573 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 087/176] nv2a/vk: Finish when queue is empty --- hw/xbox/nv2a/debug.h | 1 + hw/xbox/nv2a/pgraph/vk/draw.c | 1 + hw/xbox/nv2a/pgraph/vk/renderer.h | 1 + hw/xbox/nv2a/pgraph/vk/reports.c | 9 +++++++++ 4 files changed, 12 insertions(+) diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h index 8a7fcc14492..5164295581e 100644 --- a/hw/xbox/nv2a/debug.h +++ b/hw/xbox/nv2a/debug.h @@ -72,6 +72,7 @@ _X(NV2A_PROF_FINISH_PRESENTING) \ _X(NV2A_PROF_FINISH_FLIP_STALL) \ _X(NV2A_PROF_FINISH_FLUSH) \ + _X(NV2A_PROF_FINISH_STALLED) \ _X(NV2A_PROF_CLEAR) \ _X(NV2A_PROF_QUEUE_SUBMIT) \ _X(NV2A_PROF_QUEUE_SUBMIT_AUX) \ diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 91aa6d13455..0d84385caa7 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1244,6 +1244,7 @@ const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = { [VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING, [VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL, [VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH, + [VK_FINISH_REASON_STALLED] = NV2A_PROF_FINISH_STALLED, }; void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index e7648322390..f9d8a9f31b5 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -507,6 +507,7 @@ typedef enum FinishReason { VK_FINISH_REASON_PRESENTING, VK_FINISH_REASON_FLIP_STALL, VK_FINISH_REASON_FLUSH, + VK_FINISH_REASON_STALLED, } FinishReason; // draw.c diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c index 8c5d7136be0..2231f4ffe57 100644 --- a/hw/xbox/nv2a/pgraph/vk/reports.c +++ b/hw/xbox/nv2a/pgraph/vk/reports.c @@ -137,4 +137,13 @@ void pgraph_vk_process_pending_reports_internal(NV2AState *d) void pgraph_vk_process_pending_reports(NV2AState *d) { + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + uint32_t *dma_get = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET]; + uint32_t *dma_put = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT]; + + if (*dma_get == *dma_put && r->in_command_buffer) { + pgraph_vk_finish(pg, VK_FINISH_REASON_STALLED); + } } From a562007f8466d05e1132f3fe9b29486785132454 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 088/176] nv2a/vk: Enable synchronization validation --- hw/xbox/nv2a/pgraph/vk/instance.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 811698c6683..ae1abe102f4 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -271,12 +271,24 @@ static bool create_instance(PGRAPHState *pg, Error **errp) enable_validation = g_config.display.vulkan.validation_layers; + VkValidationFeatureEnableEXT enables[] = { + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + // VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, + }; + + VkValidationFeaturesEXT validationFeatures = { + .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, + .enabledValidationFeatureCount = ARRAY_SIZE(enables), + .pEnabledValidationFeatures = enables, + }; + if (enable_validation) { if (check_validation_layer_support()) { fprintf(stderr, "Warning: Validation layers enabled. Expect " "performance impact.\n"); create_info.enabledLayerCount = ARRAY_SIZE(validation_layers); create_info.ppEnabledLayerNames = validation_layers; + create_info.pNext = &validationFeatures; } else { fprintf(stderr, "Warning: validation layers not available\n"); enable_validation = false; From 5119e3a3656c10e2476d6dbbbf6c1f59d0fb8344 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 089/176] nv2a/vk: Change display renderpass loadOp to don't care --- hw/xbox/nv2a/pgraph/vk/display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 53dcb377b3f..c839cc79394 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -141,7 +141,7 @@ static void create_render_pass(PGRAPHState *pg) attachment = (VkAttachmentDescription){ .format = VK_FORMAT_R8G8B8A8_UNORM, .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, From 177dcc75591543009e18b601737b5ca11767f0c7 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 090/176] nv2a/vk: Clear render_passes on finalization --- hw/xbox/nv2a/pgraph/vk/draw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 0d84385caa7..a95ee5904d4 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -208,6 +208,7 @@ static void finalize_render_passes(PGRAPHVkState *r) vkDestroyRenderPass(r->device, p->render_pass, NULL); } g_array_free(r->render_passes, true); + r->render_passes = NULL; } void pgraph_vk_init_pipelines(PGRAPHState *pg) From a209df8aa59141b85eb9ec3bd7e583d4f8953c55 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 091/176] nv2a/vk: Tighten a few layout transitions --- hw/xbox/nv2a/pgraph/vk/image.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/image.c b/hw/xbox/nv2a/pgraph/vk/image.c index 1161d81f547..728df3db60a 100644 --- a/hw/xbox/nv2a/pgraph/vk/image.c +++ b/hw/xbox/nv2a/pgraph/vk/image.c @@ -146,7 +146,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, // Color -> Dst } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; @@ -171,7 +171,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, // Depth -> Dst } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; @@ -179,7 +179,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, // Src -> Color } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { - barrier.srcAccessMask = 0; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; @@ -187,7 +187,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, // Src -> Depth } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { - barrier.srcAccessMask = 0; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; From dd3bb50f1154e32281f3ecd656fca77c4b59b57a Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 092/176] nv2a/vk: Tighten renderpass dependencies --- hw/xbox/nv2a/pgraph/vk/draw.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index a95ee5904d4..ab3ff19f171 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -308,17 +308,27 @@ static VkRenderPass create_render_pass(PGRAPHVkState *r, RenderPassState *state) if (color) { dependency.srcStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; dependency.dstStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; } if (zeta) { dependency.srcStageMask |= - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency.srcAccessMask |= + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; dependency.dstStageMask |= - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; - dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask |= + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; } VkSubpassDescription subpass = { From fc803fe375530c0d3043a566a42b9d4084a8273c Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 093/176] nv2a/vk: Omit pipeline fragment shader when no color binding --- hw/xbox/nv2a/pgraph/vk/draw.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index ab3ff19f171..1fa5fba82c7 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -746,6 +746,13 @@ static void create_pipeline(PGRAPHState *pg) int num_active_shader_stages = 0; VkPipelineShaderStageCreateInfo shader_stages[3]; + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = r->shader_binding->vertex->module, + .pName = "main", + }; if (r->shader_binding->geometry) { shader_stages[num_active_shader_stages++] = (VkPipelineShaderStageCreateInfo){ @@ -755,20 +762,15 @@ static void create_pipeline(PGRAPHState *pg) .pName = "main", }; } - shader_stages[num_active_shader_stages++] = - (VkPipelineShaderStageCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = r->shader_binding->vertex->module, - .pName = "main", - }; - shader_stages[num_active_shader_stages++] = - (VkPipelineShaderStageCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = r->shader_binding->fragment->module, - .pName = "main", - }; + if (r->color_binding) { + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->shader_binding->fragment->module, + .pName = "main", + }; + } VkPipelineVertexInputStateCreateInfo vertex_input = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -792,7 +794,6 @@ static void create_pipeline(PGRAPHState *pg) .scissorCount = 1, }; - void *rasterizer_next_struct = NULL; VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; From 36e7cca1e979b047f92962b5709df4335c119047 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 094/176] nv2a/glsl: Initialize vars --- hw/xbox/nv2a/pgraph/glsl/psh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 2187e807419..243aee04ed1 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -1175,7 +1175,7 @@ static MString* psh_convert(struct PixelShader *ps) } for (i = 0; i < ps->num_var_refs; i++) { - mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]); + mstring_append_fmt(vars, "vec4 %s = vec4(0);\n", ps->var_refs[i]); if (strcmp(ps->var_refs[i], "r0") == 0) { if (ps->tex_modes[0] != PS_TEXTUREMODES_NONE) { mstring_append(vars, "r0.a = t0.a;\n"); From ebe3ee155b13702d98a229431ddeaff31ada3116 Mon Sep 17 00:00:00 2001 From: mborgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 095/176] nv2a/vk: Fix compute descriptor pool size --- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index dc14840dd14..8c6dd17ff06 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -129,7 +129,7 @@ static void create_descriptor_pool(PGRAPHState *pg) VkDescriptorPoolSize pool_sizes[] = { { .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 3, + .descriptorCount = 3 * ARRAY_SIZE(r->compute.descriptor_sets), }, }; From 334c6293fed22ab4ad1ce3bfef41df1e041dded2 Mon Sep 17 00:00:00 2001 From: mborgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 096/176] nv2a/vk: Fix swapped texture filters --- hw/xbox/nv2a/pgraph/vk/texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 1655a8c60b0..7cf22f5d712 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1270,8 +1270,8 @@ static void create_texture(PGRAPHState *pg, int texture_idx) VkSamplerCreateInfo sampler_create_info = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = vk_min_filter, - .minFilter = vk_mag_filter, + .magFilter = vk_mag_filter, + .minFilter = vk_min_filter, .addressModeU = lookup_texture_address_mode( GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU)), .addressModeV = lookup_texture_address_mode( From ddc1a388e86d5f6d4773abb4d0830828b2a17c45 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 097/176] nv2a/vk: Add vertex memory offset fixme --- hw/xbox/nv2a/pgraph/vk/vertex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c index af13bd67b49..ee567f9dc64 100644 --- a/hw/xbox/nv2a/pgraph/vk/vertex.c +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -263,6 +263,7 @@ void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, }; r->vertex_attribute_offsets[i] = attrib_data_addr; + // FIXME: Data alignment may not meet requirements NV2A_VK_DGROUP_END(); } From 8e5a77c45d867a63a544c00a91a5145d6ce4aca5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 098/176] nv2a/vk: Fix missing scale in d32_sfloat_s8_uint pack --- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 8c6dd17ff06..eb28a2d5b0d 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -83,9 +83,10 @@ const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" "uint get_input_idx(uint idx_out) {\n" - " uint y = idx_out / width_out;\n" - " uint x = idx_out % width_out;\n" - " return (y * width_in + x) * (width_in / width_out);\n" + " uint scale = width_in / width_out;" + " uint y = (idx_out / width_out) * scale;\n" + " uint x = (idx_out % width_out) * scale;\n" + " return y * width_in + x;\n" "}\n" "void main() {\n" " uint idx_out = gl_GlobalInvocationID.x;\n" From bc46a9303d57e914de7cfd325c5bb17e98b3c010 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 099/176] nv2a/vk: Add compute pipeline cache, scale workgroups --- hw/xbox/nv2a/pgraph/vk/renderer.h | 18 +- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 244 ++++++++++++++++------- 2 files changed, 191 insertions(+), 71 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index f9d8a9f31b5..9fb13ee1fc6 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -257,16 +257,26 @@ typedef struct PGRAPHVkDisplayState { GLuint gl_texture_id; } PGRAPHVkDisplayState; +typedef struct ComputePipelineKey { + VkFormat host_fmt; + bool pack; + int workgroup_size; +} ComputePipelineKey; + +typedef struct ComputePipeline { + LruNode node; + ComputePipelineKey key; + VkPipeline pipeline; +} ComputePipeline; + typedef struct PGRAPHVkComputeState { VkDescriptorPool descriptor_pool; VkDescriptorSetLayout descriptor_set_layout; VkDescriptorSet descriptor_sets[1024]; int descriptor_set_index; VkPipelineLayout pipeline_layout; - VkPipeline pipeline_pack_d24s8; - VkPipeline pipeline_unpack_d24s8; - VkPipeline pipeline_pack_f32s8; - VkPipeline pipeline_unpack_f32s8; + Lru pipeline_cache; + ComputePipeline *pipeline_cache_entries; } PGRAPHVkComputeState; typedef struct PGRAPHVkState { diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index eb28a2d5b0d..8009b993dc1 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -18,6 +18,8 @@ */ #include "hw/xbox/nv2a/pgraph/pgraph.h" +#include "qemu/fast-hash.h" +#include "qemu/lru.h" #include "renderer.h" #include @@ -28,14 +30,12 @@ // swizzle shader we will need more flexibility. const char *pack_d24_unorm_s8_uint_to_z24s8_glsl = - "#version 450\n" - "layout(local_size_x = 256) in;\n" "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" "layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n" "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" "uint get_input_idx(uint idx_out) {\n" - " uint scale = width_in / width_out;" + " uint scale = width_in / width_out;\n" " uint y = (idx_out / width_out) * scale;\n" " uint x = (idx_out % width_out) * scale;\n" " return y * width_in + x;\n" @@ -49,14 +49,12 @@ const char *pack_d24_unorm_s8_uint_to_z24s8_glsl = "}\n"; const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl = - "#version 450\n" - "layout(local_size_x = 256) in;\n" "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" "layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n" "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" "uint get_input_idx(uint idx_out) {\n" - " uint scale = width_out / width_in;" + " uint scale = width_out / width_in;\n" " uint y = (idx_out / width_out) / scale;\n" " uint x = (idx_out % width_out) / scale;\n" " return y * width_in + x;\n" @@ -76,14 +74,12 @@ const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl = "}\n"; const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = - "#version 450\n" - "layout(local_size_x = 256) in;\n" "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" "layout(binding = 0) buffer DepthIn { float depth_in[]; };\n" "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" "uint get_input_idx(uint idx_out) {\n" - " uint scale = width_in / width_out;" + " uint scale = width_in / width_out;\n" " uint y = (idx_out / width_out) * scale;\n" " uint x = (idx_out % width_out) * scale;\n" " return y * width_in + x;\n" @@ -97,14 +93,12 @@ const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = "}\n"; const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = - "#version 450\n" - "layout(local_size_x = 256) in;\n" "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" "layout(binding = 0) buffer DepthOut { float depth_out[]; };\n" "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" "uint get_input_idx(uint idx_out) {\n" - " uint scale = width_out / width_in;" + " uint scale = width_out / width_in;\n" " uint y = (idx_out / width_out) / scale;\n" " uint x = (idx_out % width_out) / scale;\n" " return y * width_in + x;\n" @@ -123,6 +117,35 @@ const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = " }\n" "}\n"; +static gchar *get_compute_shader_glsl(VkFormat host_fmt, bool pack, + int workgroup_size) +{ + const char *template; + + switch (host_fmt) { + case VK_FORMAT_D24_UNORM_S8_UINT: + template = pack ? pack_d24_unorm_s8_uint_to_z24s8_glsl : + unpack_z24s8_to_d24_unorm_s8_uint_glsl; + break; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + template = pack ? pack_d32_sfloat_s8_uint_to_z24s8_glsl : + unpack_z24s8_to_d32_sfloat_s8_uint_glsl; + break; + default: + assert(!"Unsupported host fmt"); + break; + } + assert(template); + + gchar *glsl = g_strdup_printf( + "#version 450\n" + "layout(local_size_x = %d) in;\n" + "%s", workgroup_size, template); + assert(glsl); + + return glsl; +} + static void create_descriptor_pool(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; @@ -235,10 +258,14 @@ static void create_compute_pipeline_layout(PGRAPHState *pg) &r->compute.pipeline_layout)); } -static VkPipeline create_compute_pipeline(PGRAPHState *pg, const char *glsl) +static void destroy_compute_pipeline_layout(PGRAPHVkState *r) { - PGRAPHVkState *r = pg->vk_renderer_state; + vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL); + r->compute.pipeline_layout = VK_NULL_HANDLE; +} +static VkPipeline create_compute_pipeline(PGRAPHVkState *r, const char *glsl) +{ ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl( r, VK_SHADER_STAGE_COMPUTE_BIT, glsl); @@ -304,6 +331,47 @@ void pgraph_vk_compute_finish_complete(PGRAPHVkState *r) r->compute.descriptor_set_index = 0; } +static int get_workgroup_size_for_output_units(PGRAPHVkState *r, int output_units) +{ + int group_size = 1024; + + // FIXME: Smarter workgroup size calculation could factor in multiple + // submissions. For now we will just pick the highest number that + // evenly divides output_units. + + while (group_size > 1) { + if (group_size > r->device_props.limits.maxComputeWorkGroupSize[0]) { + continue; + } + if (output_units % group_size == 0) { + break; + } + group_size /= 2; + } + + return group_size; +} + +static ComputePipeline *get_compute_pipeline(PGRAPHVkState *r, VkFormat host_fmt, bool pack, int output_units) +{ + int workgroup_size = get_workgroup_size_for_output_units(r, output_units); + + ComputePipelineKey key; + memset(&key, 0, sizeof(key)); + + key.host_fmt = host_fmt; + key.pack = pack; + key.workgroup_size = workgroup_size; + + LruNode *node = lru_lookup(&r->compute.pipeline_cache, + fast_hash((void *)&key, sizeof(key)), &key); + ComputePipeline *pipeline = container_of(node, ComputePipeline, node); + + assert(pipeline); + + return pipeline; +} + // // Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8 // formatted buffer with depth in bits 31-8 and stencil in bits 7-0. @@ -351,15 +419,20 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); - if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_pack_d24s8); - } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_pack_f32s8); - } else { - assert(!"Unsupported pack format"); - } + size_t output_size_in_units = output_width * output_height; + ComputePipeline *pipeline = get_compute_pipeline( + r, surface->host_fmt.vk_format, true, output_size_in_units); + + size_t workgroup_size_in_units = pipeline->key.workgroup_size; + assert(output_size_in_units % workgroup_size_in_units == 0); + size_t group_count = output_size_in_units / workgroup_size_in_units; + + assert(r->device_props.limits.maxComputeWorkGroupSize[0] >= workgroup_size_in_units); + assert(r->device_props.limits.maxComputeWorkGroupCount[0] >= group_count); + + // FIXME: Smarter workgroup scaling + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); vkCmdBindDescriptorSets( cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0, @@ -371,11 +444,6 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); - size_t workgroup_size_in_units = 256; - size_t output_size_in_units = output_width * output_height; - assert(output_size_in_units % workgroup_size_in_units == 0); - size_t group_count = output_size_in_units / workgroup_size_in_units; - // FIXME: Check max group count vkCmdDispatch(cmd, group_count, 1, 1); @@ -420,15 +488,20 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, }; update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); - if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_unpack_d24s8); - } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_unpack_f32s8); - } else { - assert(!"Unsupported pack format"); - } + size_t output_size_in_units = output_width * output_height; + ComputePipeline *pipeline = get_compute_pipeline( + r, surface->host_fmt.vk_format, false, output_size_in_units); + + size_t workgroup_size_in_units = pipeline->key.workgroup_size; + assert(output_size_in_units % workgroup_size_in_units == 0); + size_t group_count = output_size_in_units / workgroup_size_in_units; + + assert(r->device_props.limits.maxComputeWorkGroupSize[0] >= workgroup_size_in_units); + assert(r->device_props.limits.maxComputeWorkGroupCount[0] >= group_count); + + // FIXME: Smarter workgroup scaling + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); vkCmdBindDescriptorSets( cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0, @@ -440,15 +513,73 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, vkCmdPushConstants(cmd, r->compute.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); + vkCmdDispatch(cmd, group_count, 1, 1); +} - size_t workgroup_size_in_units = 256; - size_t output_size_in_units = output_width * output_height; - assert(output_size_in_units % workgroup_size_in_units == 0); - size_t group_count = output_size_in_units / workgroup_size_in_units; +static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, compute.pipeline_cache); + ComputePipeline *snode = container_of(node, ComputePipeline, node); - // FIXME: Check max group count + memcpy(&snode->key, state, sizeof(snode->key)); - vkCmdDispatch(cmd, group_count, 1, 1); + if (snode->key.workgroup_size == 1) { + fprintf(stderr, + "Warning: Needed compute shader with workgroup size = 1\n"); + } + + gchar *glsl = get_compute_shader_glsl( + snode->key.host_fmt, snode->key.pack, snode->key.workgroup_size); + assert(glsl); + snode->pipeline = create_compute_pipeline(r, glsl); + g_free(glsl); +} + +static void pipeline_cache_release_node_resources(PGRAPHVkState *r, ComputePipeline *snode) +{ + vkDestroyPipeline(r->device, snode->pipeline, NULL); + snode->pipeline = VK_NULL_HANDLE; +} + +static bool pipeline_cache_entry_pre_evict(Lru *lru, LruNode *node) +{ + // FIXME: Check pipeline not in use + return false; +} + +static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node) +{ + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache); + ComputePipeline *snode = container_of(node, ComputePipeline, node); + pipeline_cache_release_node_resources(r, snode); +} + +static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key) +{ + ComputePipeline *snode = container_of(node, ComputePipeline, node); + return memcmp(&snode->key, key, sizeof(ComputePipelineKey)); +} + +static void pipeline_cache_init(PGRAPHVkState *r) +{ + const size_t pipeline_cache_size = 100; // FIXME: Trim + lru_init(&r->compute.pipeline_cache); + r->compute.pipeline_cache_entries = g_malloc_n(pipeline_cache_size, sizeof(ComputePipeline)); + assert(r->compute.pipeline_cache_entries != NULL); + for (int i = 0; i < pipeline_cache_size; i++) { + lru_add_free(&r->compute.pipeline_cache, &r->compute.pipeline_cache_entries[i].node); + } + r->compute.pipeline_cache.init_node = pipeline_cache_entry_init; + r->compute.pipeline_cache.compare_nodes = pipeline_cache_entry_compare; + r->compute.pipeline_cache.pre_node_evict = pipeline_cache_entry_pre_evict; + r->compute.pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict; +} + +static void pipeline_cache_finalize(PGRAPHVkState *r) +{ + lru_flush(&r->compute.pipeline_cache); + g_free(r->compute.pipeline_cache_entries); + r->compute.pipeline_cache_entries = NULL; } void pgraph_vk_init_compute(PGRAPHState *pg) @@ -459,36 +590,15 @@ void pgraph_vk_init_compute(PGRAPHState *pg) create_descriptor_set_layout(pg); create_descriptor_sets(pg); create_compute_pipeline_layout(pg); - - r->compute.pipeline_pack_d24s8 = - create_compute_pipeline(pg, pack_d24_unorm_s8_uint_to_z24s8_glsl); - r->compute.pipeline_unpack_d24s8 = - create_compute_pipeline(pg, unpack_z24s8_to_d24_unorm_s8_uint_glsl); - r->compute.pipeline_pack_f32s8 = - create_compute_pipeline(pg, pack_d32_sfloat_s8_uint_to_z24s8_glsl); - r->compute.pipeline_unpack_f32s8 = - create_compute_pipeline(pg, unpack_z24s8_to_d32_sfloat_s8_uint_glsl); + pipeline_cache_init(r); } void pgraph_vk_finalize_compute(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - VkPipeline *pipelines[] = { - &r->compute.pipeline_pack_d24s8, - &r->compute.pipeline_unpack_d24s8, - &r->compute.pipeline_pack_f32s8, - &r->compute.pipeline_unpack_f32s8, - }; - - for (int i = 0; i < ARRAY_SIZE(pipelines); i++) { - vkDestroyPipeline(r->device, *pipelines[i], NULL); - pipelines[i] = VK_NULL_HANDLE; - } - - vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL); - r->compute.pipeline_layout = VK_NULL_HANDLE; - + pipeline_cache_finalize(r); + destroy_compute_pipeline_layout(r); destroy_descriptor_sets(pg); destroy_descriptor_set_layout(pg); destroy_descriptor_pool(pg); From 017444b8a024599cf4e8053f364c2ebc9ba0fc9f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:02 -0700 Subject: [PATCH 100/176] nv2a/glsl: Ensure sampler type matches texture dimensionality --- hw/xbox/nv2a/pgraph/glsl/psh.c | 24 ++++++++++++++++++++++++ hw/xbox/nv2a/pgraph/psh.h | 1 + hw/xbox/nv2a/pgraph/shaders.c | 2 ++ 3 files changed, 27 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 243aee04ed1..ffb387d997b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -562,6 +562,24 @@ static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo fina ps->varE = ps->varF = NULL; } +static enum PS_TEXTUREMODES correct_texture_mode_for_dimensionality(enum PS_TEXTUREMODES mode, const PshState *state, int i) +{ + int dim = state->dim_tex[i]; + + switch (mode) { + case PS_TEXTUREMODES_PROJECT2D: + return dim == 2 ? PS_TEXTUREMODES_PROJECT2D : + dim == 3 ? PS_TEXTUREMODES_PROJECT3D : + mode; + case PS_TEXTUREMODES_PROJECT3D: + return dim == 2 ? PS_TEXTUREMODES_PROJECT2D : mode; + case PS_TEXTUREMODES_DOT_STR_3D: + return dim == 2 ? PS_TEXTUREMODES_DOT_ST : mode; + default: + return mode; + } +} + static const char sampler2D[] = "sampler2D"; static const char sampler3D[] = "sampler3D"; static const char samplerCube[] = "samplerCube"; @@ -575,6 +593,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s return NULL; case PS_TEXTUREMODES_PROJECT2D: + assert(state->dim_tex[i] == 2); return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_BUMPENVMAP: @@ -584,6 +603,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode); assert(!"Shadow map support not implemented for this mode"); } + assert(state->dim_tex[i] == 2); return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_PROJECT3D: @@ -594,6 +614,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s if (state->shadow_map[i]) { return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; } + assert(state->dim_tex[i] == 3); return sampler3D; case PS_TEXTUREMODES_CUBEMAP: @@ -604,6 +625,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode); assert(!"Shadow map support not implemented for this mode"); } + assert(state->dim_tex[i] == 2); return samplerCube; case PS_TEXTUREMODES_DPNDNT_AR: @@ -612,6 +634,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode); assert(!"Shadow map support not implemented for this mode"); } + assert(state->dim_tex[i] == 2); return sampler2D; } } @@ -1245,6 +1268,7 @@ MString *pgraph_gen_psh_glsl(const PshState state) ps.flags = state.combiner_control >> 8; for (i = 0; i < 4; i++) { ps.tex_modes[i] = (state.shader_stage_program >> (i * 5)) & 0x1F; + ps.tex_modes[i] = correct_texture_mode_for_dimensionality(ps.tex_modes[i], &state, i); } ps.dot_map[0] = 0; diff --git a/hw/xbox/nv2a/pgraph/psh.h b/hw/xbox/nv2a/pgraph/psh.h index 6232a2834a5..13660457078 100644 --- a/hw/xbox/nv2a/pgraph/psh.h +++ b/hw/xbox/nv2a/pgraph/psh.h @@ -71,6 +71,7 @@ typedef struct PshState { bool alphakill[4]; enum ConvolutionFilter conv_tex[4]; bool tex_x8y24[4]; + int dim_tex[4]; float border_logical_size[4][3]; float border_inv_real_size[4][3]; diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c index 285d24f4392..8d2c77a535b 100644 --- a/hw/xbox/nv2a/pgraph/shaders.c +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -204,6 +204,8 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4); + state.psh.dim_tex[i] = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); + unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR); BasicColorFormatInfo f = kelvin_color_format_info_map[color_format]; state.psh.rect_tex[i] = f.linear; From 567bc33a70b01052251765ec0f98d27fc44879ca Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 18:05:19 -0700 Subject: [PATCH 101/176] nv2a/glsl: Fix project2d usampler lookup --- hw/xbox/nv2a/pgraph/glsl/psh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index ffb387d997b..f93fbd82edd 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -587,6 +587,7 @@ static const char sampler2DRect[] = "sampler2DRect"; static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *state, int i) { + // FIXME: Cleanup switch (mode) { default: case PS_TEXTUREMODES_NONE: @@ -594,6 +595,9 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s case PS_TEXTUREMODES_PROJECT2D: assert(state->dim_tex[i] == 2); + if (state->tex_x8y24[i] && state->vulkan) { + return "usampler2D"; + } return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; case PS_TEXTUREMODES_BUMPENVMAP: From 28092ad13b1899b469a886ead34d7ba5c2074076 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 27 Jul 2024 12:04:46 -0700 Subject: [PATCH 102/176] nv2a/vk: Only include uniform attr definition when necessary --- hw/xbox/nv2a/pgraph/glsl/vsh.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 84609e18df8..1cc1753f058 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -256,14 +256,16 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) /* Return combined header + source */ if (state->vulkan) { // FIXME: Optimize uniforms - if (state->use_push_constants_for_uniform_attrs) { - mstring_append_fmt(output, - "layout(push_constant) uniform PushConstants {\n" - " vec4 inlineValue[%d];\n" - "};\n\n", num_uniform_attrs); - } else { - mstring_append_fmt(uniforms, " vec4 inlineValue[%d];\n", - num_uniform_attrs); + if (num_uniform_attrs > 0) { + if (state->use_push_constants_for_uniform_attrs) { + mstring_append_fmt(output, + "layout(push_constant) uniform PushConstants {\n" + " vec4 inlineValue[%d];\n" + "};\n\n", num_uniform_attrs); + } else { + mstring_append_fmt(uniforms, " vec4 inlineValue[%d];\n", + num_uniform_attrs); + } } mstring_append_fmt( output, From 374eada8efb963ec70e74d3ac503e6dd0991ee2d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 27 Jul 2024 14:29:38 -0700 Subject: [PATCH 103/176] nv2a/vk: Ensure pg->clearing set before pgraph_vk_surface_update --- hw/xbox/nv2a/pgraph/pgraph.h | 2 +- hw/xbox/nv2a/pgraph/vk/draw.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 634f15c8add..64b671e71da 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -224,7 +224,7 @@ typedef struct PGRAPHState { uint32_t regs_[0x2000]; DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t)); - bool clearing; + bool clearing; // FIXME: Internal bool waiting_for_nop; bool waiting_for_flip; bool waiting_for_context_switch; diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 1fa5fba82c7..1edcf946809 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1673,16 +1673,18 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) bool write_zeta = (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL)); + pg->clearing = true; + // FIXME: If doing a full surface clear, mark the surface for full clear // and we can just do the clear as part of the surface load. pgraph_vk_surface_update(d, true, write_color, write_zeta); if (!(r->color_binding || r->zeta_binding)) { /* Nothing bound to clear */ + pg->clearing = false; return; } - pg->clearing = true; r->clear_parameter = parameter; unsigned int xmin = From c881f8641f3f44e0b60ced01d07bd7e14fdbba4d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 27 Jul 2024 14:31:12 -0700 Subject: [PATCH 104/176] nv2a/vk: Ensure pack buffer offsets meet minStorageBufferOffsetAlignment --- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 8 ++++++-- hw/xbox/nv2a/pgraph/vk/surface.c | 11 ++++++++--- hw/xbox/nv2a/pgraph/vk/texture.c | 4 +++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 8009b993dc1..3f5659d8a3f 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -407,7 +407,9 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, }, { .buffer = src, - .offset = depth_size, + .offset = ROUND_UP( + depth_size, + r->device_props.limits.minStorageBufferOffsetAlignment), .range = stencil_size, }, { @@ -477,7 +479,9 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, }, { .buffer = dst, - .offset = depth_size, + .offset = ROUND_UP( + depth_size, + r->device_props.limits.minStorageBufferOffsetAlignment), .range = stencil_size, }, { diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index d90995ba01a..5351e5602cc 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -252,11 +252,14 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, } if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + size_t depth_size = scaled_width * scaled_height * 4; copy_regions[num_copy_regions++] = (VkBufferImageCopy){ - .bufferOffset = scaled_width * scaled_height * 4, + .bufferOffset = ROUND_UP( + depth_size, + r->device_props.limits.minStorageBufferOffsetAlignment), .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, .imageSubresource.layerCount = 1, - .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, + .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }, }; } @@ -961,7 +964,9 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, // Already scaled during compute. Adjust copy regions. regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }; regions[1].imageExtent = regions[0].imageExtent; - regions[1].bufferOffset = unpacked_depth_image_size; + regions[1].bufferOffset = + ROUND_UP(unpacked_depth_image_size, + r->device_props.limits.minStorageBufferOffsetAlignment); copy_buffer = unpack_buffer; } diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 7cf22f5d712..543b907ebb8 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -638,7 +638,9 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac }; if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { - stencil_buffer_offset = scaled_width * scaled_height * 4; + stencil_buffer_offset = + ROUND_UP(scaled_width * scaled_height * 4, + r->device_props.limits.minStorageBufferOffsetAlignment); stencil_buffer_size = scaled_width * scaled_height; copied_image_size += stencil_buffer_size; From 1fec4947b6c7904382e3205c207495ca6fb114d8 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 27 Jul 2024 14:40:59 -0700 Subject: [PATCH 105/176] nv2a/vk: Ensure clear rect never extends beyond surface --- hw/xbox/nv2a/pgraph/vk/draw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 1edcf946809..e6681534cc0 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1679,7 +1679,8 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) // and we can just do the clear as part of the surface load. pgraph_vk_surface_update(d, true, write_color, write_zeta); - if (!(r->color_binding || r->zeta_binding)) { + SurfaceBinding *binding = r->color_binding ?: r->zeta_binding; + if (!binding) { /* Nothing bound to clear */ pg->clearing = false; return; @@ -1700,6 +1701,11 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) ymax, write_color ? " color" : "", write_zeta ? " zeta" : ""); + xmin = MIN(xmin, binding->width - 1); + ymin = MIN(xmin, binding->height - 1); + xmax = MIN(xmax, binding->width - 1); + ymax = MIN(ymax, binding->height - 1); + begin_pre_draw(pg); begin_draw(pg); From c802468d054b38e0c9f1c7f90e93643f9aef56ed Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 27 Jul 2024 14:42:33 -0700 Subject: [PATCH 106/176] nv2a/vk: Relax invalid surface compat check --- hw/xbox/nv2a/pgraph/vk/surface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 5351e5602cc..f6d69881e42 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -748,7 +748,6 @@ static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface, return surface->host_fmt.vk_format == target->host_fmt.vk_format && surface->width == target->width && surface->height == target->height && - surface->pitch == target->pitch && surface->host_fmt.usage == target->host_fmt.usage; } From 9161e3e14ace05109b26efbe29347e46cf439046 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sat, 27 Jul 2024 14:44:38 -0700 Subject: [PATCH 107/176] nv2a/vk: Create surface if one not currently bound --- hw/xbox/nv2a/pgraph/vk/surface.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index f6d69881e42..1159cb53c02 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -1178,7 +1178,11 @@ static void update_surface_part(NV2AState *d, bool upload, bool color) d->vram, target.vram_addr, target.size, DIRTY_MEMORY_NV2A); - if (upload && (pg_surface->buffer_dirty || mem_dirty)) { + SurfaceBinding *current_binding = color ? r->color_binding + : r->zeta_binding; + + if (!current_binding || + (upload && (pg_surface->buffer_dirty || mem_dirty))) { // FIXME: We don't need to be so aggressive flushing the command list // pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); pgraph_vk_ensure_not_in_render_pass(pg); From dc5a2204862e34e914bab283543181c772cb0a2d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 108/176] nv2a/vk: Specify set in compute shader layout --- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 3f5659d8a3f..e59a44f07de 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -31,9 +31,9 @@ const char *pack_d24_unorm_s8_uint_to_z24s8_glsl = "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" - "layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n" - "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" - "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" + "layout(set = 0, binding = 0) buffer DepthIn { uint depth_in[]; };\n" + "layout(set = 0, binding = 1) buffer StencilIn { uint stencil_in[]; };\n" + "layout(set = 0, binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" "uint get_input_idx(uint idx_out) {\n" " uint scale = width_in / width_out;\n" " uint y = (idx_out / width_out) * scale;\n" @@ -50,9 +50,9 @@ const char *pack_d24_unorm_s8_uint_to_z24s8_glsl = const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl = "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" - "layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n" - "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" - "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" + "layout(set = 0, binding = 0) buffer DepthOut { uint depth_out[]; };\n" + "layout(set = 0, binding = 1) buffer StencilOut { uint stencil_out[]; };\n" + "layout(set = 0, binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" "uint get_input_idx(uint idx_out) {\n" " uint scale = width_out / width_in;\n" " uint y = (idx_out / width_out) / scale;\n" @@ -75,9 +75,9 @@ const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl = const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" - "layout(binding = 0) buffer DepthIn { float depth_in[]; };\n" - "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n" - "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" + "layout(set = 0, binding = 0) buffer DepthIn { float depth_in[]; };\n" + "layout(set = 0, binding = 1) buffer StencilIn { uint stencil_in[]; };\n" + "layout(set = 0, binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n" "uint get_input_idx(uint idx_out) {\n" " uint scale = width_in / width_out;\n" " uint y = (idx_out / width_out) * scale;\n" @@ -94,9 +94,9 @@ const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl = const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n" - "layout(binding = 0) buffer DepthOut { float depth_out[]; };\n" - "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n" - "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" + "layout(set = 0, binding = 0) buffer DepthOut { float depth_out[]; };\n" + "layout(set = 0, binding = 1) buffer StencilOut { uint stencil_out[]; };\n" + "layout(set = 0, binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n" "uint get_input_idx(uint idx_out) {\n" " uint scale = width_out / width_in;\n" " uint y = (idx_out / width_out) / scale;\n" From 64e8046d1ff44914acc810ad1b0ff0d254911667 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 109/176] nv2a/vk: Specify local_size_{y,z} = 1 in compute shader --- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index e59a44f07de..ebbaca65a8a 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -139,7 +139,7 @@ static gchar *get_compute_shader_glsl(VkFormat host_fmt, bool pack, gchar *glsl = g_strdup_printf( "#version 450\n" - "layout(local_size_x = %d) in;\n" + "layout(local_size_x = %d, local_size_y = 1, local_size_z = 1) in;\n" "%s", workgroup_size, template); assert(glsl); From 4e75b5e3ce6abe8a23b7be1efe888ebe64e05083 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 110/176] nv2a/vk: Always use staging image for surface upload --- hw/xbox/nv2a/pgraph/vk/surface.c | 377 +++++++++++++++++++++++-------- 1 file changed, 285 insertions(+), 92 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 1159cb53c02..17552ccda39 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -151,6 +151,12 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + bool no_conversion_necessary = + surface->color || use_compute_to_convert_depth_stencil_format || + surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM; + + assert(no_conversion_necessary); + bool compute_needs_finish = (use_compute_to_convert_depth_stencil_format && pgraph_vk_compute_needs_finish(r)); @@ -263,15 +269,43 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, }; } + // + // Copy image to staging buffer, or to compute_dst if we need to pack it + // + + size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel * + surface->width * surface->height; + assert((downloaded_image_size) <= + r->storage_buffers[BUFFER_STAGING_DST].buffer_size); + int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ? BUFFER_COMPUTE_DST : BUFFER_STAGING_DST; VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer; + VkBufferMemoryBarrier pre_copy_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &pre_copy_dst_barrier, 0, NULL); + vkCmdCopyImageToBuffer(cmd, surface_image_loc, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, num_copy_regions, copy_regions); + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + // FIXME: Verify output of depth stencil conversion // FIXME: Track current layout and only transition when required @@ -281,7 +315,11 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, downscale ? (surface->width * surface->height * bytes_per_pixel) : (scaled_width * scaled_height * bytes_per_pixel); - VkBufferMemoryBarrier pre_pack_barrier = { + // + // Pack the depth-stencil image into compute_src buffer + // + + VkBufferMemoryBarrier pre_compute_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, @@ -292,13 +330,40 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, - 1, &pre_pack_barrier, 0, NULL); + 1, &pre_compute_src_barrier, 0, NULL); VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer; + + VkBufferMemoryBarrier pre_compute_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = pack_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_compute_dst_barrier, 0, NULL); + pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer, downscale); - VkBufferMemoryBarrier post_pack_barrier = { + VkBufferMemoryBarrier post_compute_src_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_compute_src_barrier, 0, NULL); + + VkBufferMemoryBarrier post_compute_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, @@ -309,56 +374,74 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, - &post_pack_barrier, 0, NULL); + &post_compute_dst_barrier, 0, NULL); + + // + // Copy packed image over to staging buffer for host download + // copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer; + + VkBufferMemoryBarrier pre_copy_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &pre_copy_dst_barrier, 0, NULL); + VkBufferCopy buffer_copy_region = { .size = packed_size, }; vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region); - VkBufferMemoryBarrier barrier = { + VkBufferMemoryBarrier post_copy_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = copy_buffer, + .buffer = pack_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, - &barrier, 0, NULL); + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_copy_src_barrier, 0, NULL); } - size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel * - surface->width * surface->height; - assert((downloaded_image_size) <= - r->storage_buffers[BUFFER_STAGING_DST].buffer_size); - - pgraph_vk_transition_image_layout( - pg, cmd, surface->image, surface->host_fmt.vk_format, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + // + // Download image data to host + // + + VkBufferMemoryBarrier post_copy_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, + &post_copy_dst_barrier, 0, NULL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1); pgraph_vk_end_single_time_commands(pg, cmd); - void *mapped_memory_ptr; + void *mapped_memory_ptr = NULL; VK_CHECK(vmaMapMemory(r->allocator, r->storage_buffers[BUFFER_STAGING_DST].allocation, &mapped_memory_ptr)); - // FIXME: Swizzle in shader - // FIXME: Eliminate this extra copy if we need to swizzle - // FIXME: Use native buffer copy options for pitch adjust - - bool no_conversion_necessary = - surface->color || use_compute_to_convert_depth_stencil_format || - surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM; - - assert(no_conversion_necessary); + vmaInvalidateAllocation(r->allocator, + r->storage_buffers[BUFFER_STAGING_DST].allocation, + 0, VK_WHOLE_SIZE); memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch, surface->width * surface->fmt.bytes_per_pixel, @@ -368,6 +451,7 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, r->storage_buffers[BUFFER_STAGING_DST].allocation); if (surface->swizzle) { + // FIXME: Swizzle in shader swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, surface->pitch, surface->fmt.bytes_per_pixel); nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); @@ -646,6 +730,9 @@ static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) unsigned int width = surface->width, height = surface->height; pgraph_apply_scaling_factor(pg, &width, &height); + assert(!surface->image); + assert(!surface->image_scratch); + NV2A_VK_DPRINTF( "Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx, width, height, surface->vram_addr); @@ -670,25 +757,17 @@ static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) VmaAllocationCreateInfo alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT }; VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, - &alloc_create_info, &surface->image, - &surface->allocation, NULL)); - - if (pg->surface_scale_factor > 1) { - VkImageCreateInfo scratch_image_create_info = image_create_info; - scratch_image_create_info.extent.width = surface->width; - scratch_image_create_info.extent.height = surface->height; - VK_CHECK( - vmaCreateImage(r->allocator, &scratch_image_create_info, - &alloc_create_info, &surface->image_scratch, - &surface->allocation_scratch, NULL)); - surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; - } else { - surface->image_scratch = VK_NULL_HANDLE; - surface->allocation_scratch = VK_NULL_HANDLE; - } + &alloc_create_info, &surface->image, + &surface->allocation, NULL)); + + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &surface->image_scratch, + &surface->allocation_scratch, NULL)); + surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; VkImageViewCreateInfo image_view_create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -735,11 +814,16 @@ static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src) static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface) { vkDestroyImageView(r->device, surface->image_view, NULL); + surface->image_view = VK_NULL_HANDLE; + vmaDestroyImage(r->allocator, surface->image, surface->allocation); - if (surface->image_scratch) { - vmaDestroyImage(r->allocator, surface->image_scratch, - surface->allocation_scratch); - } + surface->image = VK_NULL_HANDLE; + surface->allocation = VK_NULL_HANDLE; + + vmaDestroyImage(r->allocator, surface->image_scratch, + surface->allocation_scratch); + surface->image_scratch = VK_NULL_HANDLE; + surface->allocation_scratch = VK_NULL_HANDLE; } static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface, @@ -864,33 +948,16 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, gl_read_buf = buf; } - // FIXME: Eliminate extra copies - - VkBufferImageCopy regions[2]; - int num_regions = 1; - regions[0] = (VkBufferImageCopy){ - .imageSubresource.aspectMask = surface->color ? - VK_IMAGE_ASPECT_COLOR_BIT : - VK_IMAGE_ASPECT_DEPTH_BIT, - .imageSubresource.layerCount = 1, - .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, - }; - - if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { - regions[num_regions++] = (VkBufferImageCopy){ - .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, - .imageSubresource.layerCount = 1, - .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, - }; - } + // + // Upload image data from host to staging buffer + // + StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC]; size_t uploaded_image_size = surface->height * surface->width * surface->fmt.bytes_per_pixel; - - StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC]; assert(uploaded_image_size <= copy_buffer->buffer_size); - void *mapped_memory_ptr; + void *mapped_memory_ptr = NULL; VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation, &mapped_memory_ptr)); @@ -907,14 +974,55 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, surface->width * surface->fmt.bytes_per_pixel, surface->pitch, surface->height); + vmaFlushAllocation(r->allocator, copy_buffer->allocation, 0, VK_WHOLE_SIZE); vmaUnmapMemory(r->allocator, copy_buffer->allocation); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + VkBufferMemoryBarrier host_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer->buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &host_barrier, 0, NULL); + + // Set up image copy regions (which may be modified by compute unpack) + + VkBufferImageCopy regions[2]; + int num_regions = 0; + + regions[num_regions++] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = surface->color ? + VK_IMAGE_ASPECT_COLOR_BIT : + VK_IMAGE_ASPECT_DEPTH_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, + }; + + if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { + regions[num_regions++] = (VkBufferImageCopy){ + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, + }; + } + + unsigned int scaled_width = surface->width, scaled_height = surface->height; pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); if (use_compute_to_convert_depth_stencil_format) { + + // + // Copy packed image buffer to compute_dst for unpacking + // + size_t packed_size = uploaded_image_size; VkBufferCopy buffer_copy_region = { .size = packed_size, @@ -929,25 +1037,69 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, size_t unpacked_size = unpacked_depth_image_size + unpacked_stencil_image_size; - VkBufferMemoryBarrier pre_unpack_barrier = { + VkBufferMemoryBarrier post_copy_src_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer->buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_copy_src_barrier, 0, NULL); + + // + // Unpack depth-stencil image into compute_src + // + + VkBufferMemoryBarrier pre_unpack_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, - .size = packed_size + .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, - 1, &pre_unpack_barrier, 0, NULL); + 1, &pre_unpack_src_barrier, 0, NULL); StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC]; + + VkBufferMemoryBarrier pre_unpack_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = unpack_buffer->buffer, + .size = unpacked_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, + &pre_unpack_dst_barrier, 0, NULL); + pgraph_vk_unpack_depth_stencil( pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer, unpack_buffer->buffer); - VkBufferMemoryBarrier post_unpack_barrier = { + VkBufferMemoryBarrier post_unpack_src_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_unpack_src_barrier, 0, NULL); + + VkBufferMemoryBarrier post_unpack_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, @@ -958,7 +1110,7 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, - &post_unpack_barrier, 0, NULL); + &post_unpack_dst_barrier, 0, NULL); // Already scaled during compute. Adjust copy regions. regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }; @@ -970,11 +1122,12 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, copy_buffer = unpack_buffer; } - bool upscale = !use_compute_to_convert_depth_stencil_format && - pg->surface_scale_factor > 1; + // + // Copy image data from buffer to staging image + // - if (upscale && surface->image_scratch_current_layout != - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + if (surface->image_scratch_current_layout != + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, surface->image_scratch_current_layout, @@ -983,25 +1136,44 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; } + vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, surface->image_scratch, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions, + regions); + + VkBufferMemoryBarrier post_copy_src_buffer_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = copy_buffer->buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_copy_src_buffer_barrier, 0, NULL); + + // + // Copy staging image to final image + // + + pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, + surface->host_fmt.vk_format, + surface->image_scratch_current_layout, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + surface->image_scratch_current_layout = + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, - upscale ? surface->image_scratch : surface->image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions, - regions); + bool upscale = pg->surface_scale_factor > 1 && + !use_compute_to_convert_depth_stencil_format; if (upscale) { - pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, - surface->host_fmt.vk_format, - surface->image_scratch_current_layout, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - surface->image_scratch_current_layout = - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - unsigned int scaled_width = surface->width, scaled_height = surface->height; pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); @@ -1026,6 +1198,26 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, surface->image_scratch_current_layout, surface->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion, surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } else { + // Note: We should be able to vkCmdCopyBufferToImage directly into + // surface->image, but there is an apparent AMD Windows driver + // synchronization bug we'll hit when doing this. For this reason, + // always use a staging image. + + for (int i = 0; i < num_regions; i++) { + VkImageAspectFlags aspect = regions[i].imageSubresource.aspectMask; + VkImageCopy copy_region = { + .srcSubresource.aspectMask = aspect, + .srcSubresource.layerCount = 1, + .dstSubresource.aspectMask = aspect, + .dstSubresource.layerCount = 1, + .extent = regions[i].imageExtent, + }; + vkCmdCopyImage(cmd, surface->image_scratch, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, surface->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + ©_region); + } } pgraph_vk_transition_image_layout( @@ -1170,6 +1362,7 @@ static void update_surface_part(NV2AState *d, bool upload, bool color) PGRAPHVkState *r = pg->vk_renderer_state; SurfaceBinding target; + memset(&target, 0, sizeof(target)); populate_surface_binding_target(d, color, &target); Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta; From f6d70abd04795d49cc68927492eeb0d867847f13 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 111/176] nv2a: Fix nv2a_dbg_renderdoc_init on Windows --- hw/xbox/nv2a/pgraph/debug_renderdoc.c | 43 +++++++++++++++++---------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/debug_renderdoc.c b/hw/xbox/nv2a/pgraph/debug_renderdoc.c index ded339e23f9..273e3079737 100644 --- a/hw/xbox/nv2a/pgraph/debug_renderdoc.c +++ b/hw/xbox/nv2a/pgraph/debug_renderdoc.c @@ -45,26 +45,37 @@ void nv2a_dbg_renderdoc_init(void) #ifdef _WIN32 HMODULE renderdoc = GetModuleHandleA("renderdoc.dll"); - if (renderdoc) { - pRENDERDOC_GetAPI RENDERDOC_GetAPI = - (pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI"); -#else - void *renderdoc = dlopen( + if (!renderdoc) { + fprintf(stderr, "Error: Failed to open renderdoc library: 0x%lx\n", + GetLastError()); + return; + } + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI"); +#else // _WIN32 #ifdef __APPLE__ - "librenderdoc.dylib", + void *renderdoc = dlopen("librenderdoc.dylib", RTLD_LAZY); #else - "librenderdoc.so", + void *renderdoc = dlopen("librenderdoc.so", RTLD_LAZY); #endif - RTLD_LAZY); - if (renderdoc) { - pRENDERDOC_GetAPI RENDERDOC_GetAPI = - (pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI"); + if (!renderdoc) { + fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", + dlerror()); + return; + } + pRENDERDOC_GetAPI RENDERDOC_GetAPI = + (pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI"); #endif // _WIN32 - int ret = - RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api); - assert(ret == 1 && "Failed to retrieve RenderDoc API."); - } else { - fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", dlerror()); + + if (!RENDERDOC_GetAPI) { + fprintf(stderr, "Error: Could not get RENDERDOC_GetAPI address\n"); + return; + } + + int ret = + RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api); + if (ret != 1) { + fprintf(stderr, "Error: Failed to retrieve RenderDoc API.\n"); } } From 33db2787f74781b17c4eb8cf0160b6ed53222dc3 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 112/176] nv2a/vk: Fixup depth transition stages --- hw/xbox/nv2a/pgraph/vk/image.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/image.c b/hw/xbox/nv2a/pgraph/vk/image.c index 728df3db60a..de8e4d30da7 100644 --- a/hw/xbox/nv2a/pgraph/vk/image.c +++ b/hw/xbox/nv2a/pgraph/vk/image.c @@ -109,7 +109,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; - destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; // Dst -> Src } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && @@ -173,7 +173,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + sourceStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; // Src -> Color @@ -190,7 +190,7 @@ void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; - destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; // Src -> Dst } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL && From e20a2b15b319295cb557986ec75e93fcf0471099 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 113/176] nv2a/vk: Don't release textures until after draw completes --- hw/xbox/nv2a/pgraph/vk/draw.c | 9 +++++++-- hw/xbox/nv2a/pgraph/vk/surface.c | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index e6681534cc0..135e7b303d1 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1312,6 +1312,8 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) r->command_buffer_fence)); r->submit_count += 1; + bool check_budget = false; + // Periodically check memory budget const int max_num_submits_before_budget_update = 5; if (finish_reason == VK_FINISH_REASON_FLIP_STALL || @@ -1321,8 +1323,7 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) // VMA queries budget via vmaSetCurrentFrameIndex vmaSetCurrentFrameIndex(r->allocator, r->submit_count); r->allocator_last_submit_index = r->submit_count; - - pgraph_vk_check_memory_budget(pg); + check_budget = true; } VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence, @@ -1331,6 +1332,10 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) r->descriptor_set_index = 0; r->in_command_buffer = false; destroy_framebuffers(pg); + + if (check_budget) { + pgraph_vk_check_memory_budget(pg); + } } NV2AState *d = container_of(pg, NV2AState, pgraph); diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 17552ccda39..cadacb8ccfc 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -757,7 +757,6 @@ static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) VmaAllocationCreateInfo alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, - .flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT }; VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, From f68159d1c43c0efb876a52c67d7107ad653382d7 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 114/176] nv2a/vk: Simplify zeta to texture, add barrier --- hw/xbox/nv2a/pgraph/vk/texture.c | 162 +++++++++++++++++++------------ 1 file changed, 98 insertions(+), 64 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 543b907ebb8..0e9f28added 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -612,12 +612,6 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac scaled_height = surface->height; pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); - pgraph_vk_transition_image_layout( - pg, cmd, surface->image, surface->host_fmt.vk_format, - surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - size_t copied_image_size = scaled_width * scaled_height * surface->host_fmt.host_bytes_per_pixel; size_t stencil_buffer_offset = 0; @@ -661,72 +655,112 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST]; assert(dst_storage_buffer->buffer_size >= copied_image_size); + pgraph_vk_transition_image_layout( + pg, cmd, surface->image, surface->host_fmt.vk_format, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + vkCmdCopyImageToBuffer( cmd, surface->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_storage_buffer->buffer, num_regions, regions); - if (use_compute_to_convert_depth_stencil) { - size_t packed_image_size = scaled_width * scaled_height * 4; - - VkBufferMemoryBarrier pre_pack_barrier = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, - .size = VK_WHOLE_SIZE - }; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, - 1, &pre_pack_barrier, 0, NULL); - - pgraph_vk_pack_depth_stencil( - pg, surface, cmd, - r->storage_buffers[BUFFER_COMPUTE_DST].buffer, - r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, false); - - VkBufferMemoryBarrier post_pack_barrier = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, - .size = packed_image_size - }; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, - &post_pack_barrier, 0, NULL); - - pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, - texture->current_layout, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - - regions[0] = (VkBufferImageCopy){ - .bufferOffset = 0, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .imageSubresource.mipLevel = 0, - .imageSubresource.baseArrayLayer = 0, - .imageSubresource.layerCount = 1, - .imageOffset = (VkOffset3D){ 0, 0, 0 }, - .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }, - }; - - vkCmdCopyBufferToImage( - cmd, r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, texture->image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, regions); - } - pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + size_t packed_image_size = scaled_width * scaled_height * 4; + + VkBufferMemoryBarrier pre_pack_src_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_pack_src_barrier, 0, NULL); + + VkBufferMemoryBarrier pre_pack_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, + 1, &pre_pack_dst_barrier, 0, NULL); + + pgraph_vk_pack_depth_stencil( + pg, surface, cmd, + r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, false); + + VkBufferMemoryBarrier post_pack_src_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_pack_src_barrier, 0, NULL); + + VkBufferMemoryBarrier post_pack_dst_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, + .size = packed_image_size + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_pack_dst_barrier, 0, NULL); + + pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, + texture->current_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + regions[0] = (VkBufferImageCopy){ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }, + }; + vkCmdCopyBufferToImage( + cmd, r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, texture->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, regions); + + VkBufferMemoryBarrier post_copy_src_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &post_copy_src_barrier, 0, NULL); pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format, texture->current_layout, From 894bcf1b58440a9fa7c2bad635e9b0e0b7bd5004 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 115/176] nv2a/vk: Add texture finalize checks --- hw/xbox/nv2a/pgraph/vk/texture.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 0e9f28added..f925bb19e30 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1523,6 +1523,8 @@ void pgraph_vk_finalize_textures(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; + assert(!r->in_command_buffer); + for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { r->texture_bindings[i] = NULL; } @@ -1530,6 +1532,8 @@ void pgraph_vk_finalize_textures(PGRAPHState *pg) destroy_dummy_texture(r); texture_cache_finalize(r); + assert(r->texture_cache.num_used == 0); + g_free(r->texture_format_properties); r->texture_format_properties = NULL; } From c66bdbe85d50126670fb088d3a7b7a18b1a9e854 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 116/176] nv2a/vk: Include result code in device/instance creation error messages --- hw/xbox/nv2a/pgraph/vk/instance.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index ae1abe102f4..9df440930c0 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -297,7 +297,7 @@ static bool create_instance(PGRAPHState *pg, Error **errp) result = vkCreateInstance(&create_info, NULL, &r->instance); if (result != VK_SUCCESS) { - error_setg(errp, "Failed to create instance"); + error_setg(errp, "Failed to create instance (%d)", result); return false; } @@ -599,7 +599,7 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp) result = vkCreateDevice(r->physical_device, &device_create_info, NULL, &r->device); if (result != VK_SUCCESS) { - error_setg(errp, "Failed to create logical device"); + error_setg(errp, "Failed to create logical device (%d)", result); return false; } From 9200246c62a030e1b3464b988028d74a1711d8bc Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 117/176] nv2a/vk: Make main buffers dedicated --- hw/xbox/nv2a/pgraph/vk/buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/buffer.c b/hw/xbox/nv2a/pgraph/vk/buffer.c index 440f8ae56e9..93458b254ac 100644 --- a/hw/xbox/nv2a/pgraph/vk/buffer.c +++ b/hw/xbox/nv2a/pgraph/vk/buffer.c @@ -53,10 +53,12 @@ void pgraph_vk_init_buffers(NV2AState *d) VmaAllocationCreateInfo host_alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, - .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, }; VmaAllocationCreateInfo device_alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT }; r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){ From 86daae14534f4bf0b449ceff03a46aac2f1a9242 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 118/176] meson: Define some VMA debug options for convenience --- thirdparty/meson.build | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/thirdparty/meson.build b/thirdparty/meson.build index b9b9256f97a..8bd29db98ef 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -3,10 +3,20 @@ if vulkan.found() libvolk = static_library('volk', sources: 'volk/volk.c', c_args: ['-DVK_NO_PROTOTYPES'], dependencies: vulkan) volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: 'volk', link_with: libvolk, dependencies: vulkan) +debug_vma = false + vma_defns = [ '-DVMA_STATIC_VULKAN_FUNCTIONS=0', '-DVMA_DYNAMIC_VULKAN_FUNCTIONS=0', -] + ] + +if debug_vma + vma_defns += [ + '-DVMA_DEBUG_MARGIN=16', + '-DVMA_DEBUG_DETECT_CORRUPTION=1', + '-DVMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY=256', + ] +endif libvma = static_library('vma', sources: 'vma.cc', cpp_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) vma = declare_dependency(compile_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) From 824a378a233d7f9d2707fe44599f20d6c5fc92fc Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 28 Jul 2024 23:59:32 -0700 Subject: [PATCH 119/176] nv2a/vk: Disable memory trimmer for now --- hw/xbox/nv2a/pgraph/vk/renderer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index bc8adb62a12..272b5f6ae5c 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -235,6 +235,7 @@ static void __attribute__((constructor)) register_renderer(void) void pgraph_vk_check_memory_budget(PGRAPHState *pg) { +#if 0 // FIXME PGRAPHVkState *r = pg->vk_renderer_state; VkPhysicalDeviceMemoryProperties const *props; @@ -260,6 +261,7 @@ void pgraph_vk_check_memory_budget(PGRAPHState *pg) if (near_budget) { pgraph_vk_trim_texture_cache(pg); } +#endif #if 0 char *s; From 5a6b8a14da0852f2460b5c09571443456ec3fef9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 29 Jul 2024 17:25:37 -0700 Subject: [PATCH 120/176] nv2a/vk: Only clear stencil on images with stencil aspect --- hw/xbox/nv2a/pgraph/vk/draw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 135e7b303d1..556d6cc23f1 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1764,10 +1764,13 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value); VkImageAspectFlags aspect = 0; - if (parameter & NV097_CLEAR_SURFACE_Z) + if (parameter & NV097_CLEAR_SURFACE_Z) { aspect |= VK_IMAGE_ASPECT_DEPTH_BIT; - if (parameter & NV097_CLEAR_SURFACE_STENCIL) + } + if ((parameter & NV097_CLEAR_SURFACE_STENCIL) && + (r->zeta_binding->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT)) { aspect |= VK_IMAGE_ASPECT_STENCIL_BIT; + } attachments[num_attachments++] = (VkClearAttachment){ .aspectMask = aspect, From a5877b17f8ab17116a9a4d26dfb4d6c6f608b5d2 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 29 Jul 2024 17:25:37 -0700 Subject: [PATCH 121/176] nv2a/vk: Fix compute pipeline finalize --- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index ebbaca65a8a..104f91d4687 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -545,15 +545,9 @@ static void pipeline_cache_release_node_resources(PGRAPHVkState *r, ComputePipel snode->pipeline = VK_NULL_HANDLE; } -static bool pipeline_cache_entry_pre_evict(Lru *lru, LruNode *node) -{ - // FIXME: Check pipeline not in use - return false; -} - static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node) { - PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache); + PGRAPHVkState *r = container_of(lru, PGRAPHVkState, compute.pipeline_cache); ComputePipeline *snode = container_of(node, ComputePipeline, node); pipeline_cache_release_node_resources(r, snode); } @@ -575,7 +569,6 @@ static void pipeline_cache_init(PGRAPHVkState *r) } r->compute.pipeline_cache.init_node = pipeline_cache_entry_init; r->compute.pipeline_cache.compare_nodes = pipeline_cache_entry_compare; - r->compute.pipeline_cache.pre_node_evict = pipeline_cache_entry_pre_evict; r->compute.pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict; } @@ -601,6 +594,8 @@ void pgraph_vk_finalize_compute(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; + assert(!r->in_command_buffer); + pipeline_cache_finalize(r); destroy_compute_pipeline_layout(r); destroy_descriptor_sets(pg); From 6171e40c390cdd587a1d5c1526844a1e8c59b1ae Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 29 Jul 2024 17:25:37 -0700 Subject: [PATCH 122/176] nv2a/vk: Minor cleanup --- hw/xbox/nv2a/pgraph/vk/texture.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index f925bb19e30..c97b6392d0a 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -229,11 +229,7 @@ static TextureLayout *get_texture_layout(PGRAPHState *pg, int texture_idx) } if (s.dimensionality == 2) { - hwaddr layer_size = 0; - if (s.cubemap) { - layer_size = get_cubemap_layer_size(pg, s); - } - + hwaddr layer_size = s.cubemap ? get_cubemap_layer_size(pg, s) : 0; const int num_layers = s.cubemap ? 6 : 1; for (int layer = 0; layer < num_layers; layer++) { unsigned int width = adjusted_width, height = adjusted_height; From bc066fdea9cb171beb6414a965002fd6f8fc9382 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 29 Jul 2024 17:25:37 -0700 Subject: [PATCH 123/176] nv2a/vk: Flush/barrier on texture upload --- hw/xbox/nv2a/pgraph/vk/texture.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index c97b6392d0a..ee72d6624c6 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -545,13 +545,31 @@ static void upload_texture_image(PGRAPHState *pg, int texture_idx, region++; } } - assert(buffer_offset <= texture_data_size); + assert(buffer_offset <= r->storage_buffers[BUFFER_STAGING_SRC].buffer_size); + + vmaFlushAllocation(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, 0, + VK_WHOLE_SIZE); + vmaUnmapMemory(r->allocator, r->storage_buffers[BUFFER_STAGING_SRC].allocation); // FIXME: Use nondraw. Need to fill and copy tex buffer at once VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + VkBufferMemoryBarrier host_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_STAGING_SRC].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &host_barrier, 0, NULL); + pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format, binding->current_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); @@ -974,6 +992,11 @@ static void create_dummy_texture(PGRAPHState *pg) r->storage_buffers[BUFFER_STAGING_SRC].allocation, (void *)&mapped_memory_ptr)); memset(mapped_memory_ptr, 0xff, texture_data_size); + + vmaFlushAllocation(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, 0, + VK_WHOLE_SIZE); + vmaUnmapMemory(r->allocator, r->storage_buffers[BUFFER_STAGING_SRC].allocation); From a3d0131a035b12e48e28e7ef46c054d7eeae0ff9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 29 Jul 2024 17:25:37 -0700 Subject: [PATCH 124/176] nv2a/vk: Fix mipmap filtering for _LOD0 min filters --- hw/xbox/nv2a/pgraph/vk/texture.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index ee72d6624c6..726350213ee 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1315,6 +1315,12 @@ static void create_texture(PGRAPHState *pg, int texture_idx) vk_mag_filter = vk_min_filter = VK_FILTER_NEAREST; } + bool mipmap_en = + !f_basic.linear && + !(min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0 || + min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0 || + min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0); + bool mipmap_nearest = f_basic.linear || image_create_info.mipLevels == 1 || min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD || @@ -1342,8 +1348,8 @@ static void create_texture(PGRAPHState *pg, int texture_idx) .compareOp = VK_COMPARE_OP_ALWAYS, .mipmapMode = mipmap_nearest ? VK_SAMPLER_MIPMAP_MODE_NEAREST : VK_SAMPLER_MIPMAP_MODE_LINEAR, - .minLod = 0.0, - .maxLod = f_basic.linear ? 0.0 : image_create_info.mipLevels, + .minLod = mipmap_en ? MIN(state.min_mipmap_level, state.levels - 1) : 0.0, + .maxLod = mipmap_en ? MIN(state.max_mipmap_level, state.levels - 1) : 0.0, .mipLodBias = 0.0, .pNext = sampler_next_struct, }; From 605605d0da567ba3b783ffbb2e6ab4e731118832 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 00:00:33 -0700 Subject: [PATCH 125/176] nv2a/vk: Fix scissor overflow --- hw/xbox/nv2a/pgraph/vk/draw.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 556d6cc23f1..52844b76863 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1693,29 +1693,29 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) r->clear_parameter = parameter; - unsigned int xmin = - GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN); - unsigned int xmax = - GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX); - unsigned int ymin = - GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN); - unsigned int ymax = - GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX); + uint32_t clearrectx = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX); + uint32_t clearrecty = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY); + + int xmin = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMIN); + int xmax = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMAX); + int ymin = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMIN); + int ymax = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMAX); NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax, ymax, write_color ? " color" : "", write_zeta ? " zeta" : ""); + begin_pre_draw(pg); + begin_draw(pg); + + // FIXME: What does hardware do when min <= max? xmin = MIN(xmin, binding->width - 1); ymin = MIN(xmin, binding->height - 1); xmax = MIN(xmax, binding->width - 1); ymax = MIN(ymax, binding->height - 1); - begin_pre_draw(pg); - begin_draw(pg); - - unsigned int scissor_width = xmax - xmin + 1, - scissor_height = ymax - ymin + 1; + int scissor_width = MAX(0, xmax - xmin + 1), + scissor_height = MAX(0, ymax - ymin + 1); pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); From 6ba917fed98cfb14bd0e020306bc6c36231f20f0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 00:00:33 -0700 Subject: [PATCH 126/176] nv2a/vk: Fix clear scissor ymin --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 52844b76863..6e8883af0c7 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1710,7 +1710,7 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) // FIXME: What does hardware do when min <= max? xmin = MIN(xmin, binding->width - 1); - ymin = MIN(xmin, binding->height - 1); + ymin = MIN(ymin, binding->height - 1); xmax = MIN(xmax, binding->width - 1); ymax = MIN(ymax, binding->height - 1); From 20f318f43725fbeaec98a18dba174d4965236046 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 00:00:33 -0700 Subject: [PATCH 127/176] nv2a/glsl: Add swizzled attr check --- hw/xbox/nv2a/pgraph/glsl/vsh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 1cc1753f058..a60fbe265dd 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -98,9 +98,11 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { bool is_uniform = state->uniform_attrs & (1 << i); + bool is_swizzled = state->swizzle_attrs & (1 << i); bool is_compressed = state->compressed_attrs & (1 << i); assert(!(is_uniform && is_compressed)); + assert(!(is_uniform && is_swizzled)); if (is_uniform) { mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, From ec591483331bfa2aaa003c8d9981887656e3c25b Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 00:00:33 -0700 Subject: [PATCH 128/176] nv2a: Normalize unnormalized texture coordinates in shader --- hw/xbox/nv2a/pgraph/gl/texture.c | 60 ++++++-------- hw/xbox/nv2a/pgraph/glsl/psh.c | 130 ++++++++++--------------------- hw/xbox/nv2a/pgraph/vk/texture.c | 7 -- 3 files changed, 65 insertions(+), 132 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c index 4011e979fec..b951b7e1e0c 100644 --- a/hw/xbox/nv2a/pgraph/gl/texture.c +++ b/hw/xbox/nv2a/pgraph/gl/texture.c @@ -203,7 +203,6 @@ void pgraph_gl_bind_textures(NV2AState *d) glActiveTexture(GL_TEXTURE0 + i); if (!enabled) { glBindTexture(GL_TEXTURE_CUBE_MAP, 0); - glBindTexture(GL_TEXTURE_RECTANGLE, 0); glBindTexture(GL_TEXTURE_1D, 0); glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_3D, 0); @@ -363,11 +362,7 @@ void pgraph_gl_bind_textures(NV2AState *d) surface->vram_addr, surface->width, surface->height); pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i); binding->draw_time = surface->draw_time; - if (binding->gl_target == GL_TEXTURE_RECTANGLE) { - binding->scale = pg->surface_scale_factor; - } else { - binding->scale = 1; - } + binding->scale = pg->surface_scale_factor; } apply_texture_parameters(binding, @@ -428,28 +423,29 @@ static void upload_gl_texture(GLenum gl_target, case GL_TEXTURE_1D: assert(false); break; - case GL_TEXTURE_RECTANGLE: { - /* Can't handle strides unaligned to pixels */ - assert(s.pitch % f.bytes_per_pixel == 0); - - uint8_t *converted = pgraph_convert_texture_data( - s, texture_data, palette_data, adjusted_width, adjusted_height, 1, - adjusted_pitch, 0, NULL); - glPixelStorei(GL_UNPACK_ROW_LENGTH, - converted ? 0 : adjusted_pitch / f.bytes_per_pixel); - glTexImage2D(gl_target, 0, f.gl_internal_format, - adjusted_width, adjusted_height, 0, - f.gl_format, f.gl_type, - converted ? converted : texture_data); - - if (converted) { - g_free(converted); - } - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - break; - } case GL_TEXTURE_2D: + if (f.linear) { + /* Can't handle strides unaligned to pixels */ + assert(s.pitch % f.bytes_per_pixel == 0); + + uint8_t *converted = pgraph_convert_texture_data( + s, texture_data, palette_data, adjusted_width, adjusted_height, 1, + adjusted_pitch, 0, NULL); + glPixelStorei(GL_UNPACK_ROW_LENGTH, + converted ? 0 : adjusted_pitch / f.bytes_per_pixel); + glTexImage2D(GL_TEXTURE_2D, 0, f.gl_internal_format, + adjusted_width, adjusted_height, 0, + f.gl_format, f.gl_type, + converted ? converted : texture_data); + + if (converted) { + g_free(converted); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + break; + } + /* fallthru */ case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: @@ -645,15 +641,7 @@ static TextureBinding* generate_texture(const TextureShape s, gl_target = GL_TEXTURE_CUBE_MAP; } else { if (f.linear) { - /* linear textures use unnormalised texcoords. - * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but - * does not allow repeat and mirror wrap modes. - * (or mipmapping, but xbox d3d says 'Non swizzled and non - * compressed textures cannot be mip mapped.') - * Not sure if that'll be an issue. */ - - /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ - gl_target = GL_TEXTURE_RECTANGLE; + gl_target = GL_TEXTURE_2D; assert(s.dimensionality == 2); } else { switch(s.dimensionality) { diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index f93fbd82edd..ff38a51106b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -562,31 +562,13 @@ static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo fina ps->varE = ps->varF = NULL; } -static enum PS_TEXTUREMODES correct_texture_mode_for_dimensionality(enum PS_TEXTUREMODES mode, const PshState *state, int i) +static const char *get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *state, int i) { + const char *sampler2D = "sampler2D"; + const char *sampler3D = "sampler3D"; + const char *samplerCube = "samplerCube"; int dim = state->dim_tex[i]; - switch (mode) { - case PS_TEXTUREMODES_PROJECT2D: - return dim == 2 ? PS_TEXTUREMODES_PROJECT2D : - dim == 3 ? PS_TEXTUREMODES_PROJECT3D : - mode; - case PS_TEXTUREMODES_PROJECT3D: - return dim == 2 ? PS_TEXTUREMODES_PROJECT2D : mode; - case PS_TEXTUREMODES_DOT_STR_3D: - return dim == 2 ? PS_TEXTUREMODES_DOT_ST : mode; - default: - return mode; - } -} - -static const char sampler2D[] = "sampler2D"; -static const char sampler3D[] = "sampler3D"; -static const char samplerCube[] = "samplerCube"; -static const char sampler2DRect[] = "sampler2DRect"; - -static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *state, int i) -{ // FIXME: Cleanup switch (mode) { default: @@ -598,7 +580,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s if (state->tex_x8y24[i] && state->vulkan) { return "usampler2D"; } - return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; + return sampler2D; case PS_TEXTUREMODES_BUMPENVMAP: case PS_TEXTUREMODES_BUMPENVMAP_LUM: @@ -608,7 +590,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s assert(!"Shadow map support not implemented for this mode"); } assert(state->dim_tex[i] == 2); - return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; + return sampler2D; case PS_TEXTUREMODES_PROJECT3D: case PS_TEXTUREMODES_DOT_STR_3D: @@ -616,10 +598,9 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s return "usampler2D"; } if (state->shadow_map[i]) { - return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D; + return sampler2D; } - assert(state->dim_tex[i] == 3); - return sampler3D; + return dim == 2 ? sampler2D : sampler3D; case PS_TEXTUREMODES_CUBEMAP: case PS_TEXTUREMODES_DOT_RFLCT_DIFF: @@ -664,27 +645,19 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa return; } - mstring_append_fmt(vars, "pT%d.xy *= texScale%d;\n", i, i); const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func]; - if (ps->state.rect_tex[i] && ps->state.vulkan) { - if (ps->state.tex_x8y24[i]) { - mstring_append_fmt( - vars, - "uvec4 t%d_depth_raw = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i, i, i, i); - mstring_append_fmt( - vars, - "vec4 t%d_depth = vec4(float(t%d_depth_raw.x >> 8) / 0xFFFFFF, 1.0, 0.0, 0.0);\n", - i, i); - } else { - mstring_append_fmt( - vars, - "vec4 t%d_depth = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i, - i, i, i); - } - } else { - mstring_append_fmt( - vars, "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", i, i, - i); + + bool extract_msb_24b = ps->state.tex_x8y24[i] && ps->state.vulkan; + + mstring_append_fmt(vars, + "%svec4 t%d_depth%s = textureProj(texSamp%d, pT%d.xyw);\n", + extract_msb_24b ? "u" : "", i, extract_msb_24b ? "_raw" : "", i, i); + + if (extract_msb_24b) { + mstring_append_fmt(vars, + "vec4 t%d_depth = vec4(float(t%d_depth_raw.x >> 8) " + "/ 0xFFFFFF, 1.0, 0.0, 0.0);\n", + i, i); } // Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float. @@ -733,26 +706,13 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars, static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex) { - // FIXME: Convolution for 2D textures // FIXME: Quincunx - assert(ps->state.rect_tex[tex]); - - if (ps->state.vulkan) { - mstring_append_fmt(vars, - "vec4 t%d = vec4(0.0);\n" - "for (int i = 0; i < 9; i++) {\n" - " vec2 texCoord = pT%d.xy/pT%d.w + convolution3x3[i];\n" - " t%d += textureLod(texSamp%d, texCoord, 0) * gaussian3x3[i];\n" - "}\n", tex, tex, tex, tex, tex); - } else { - mstring_append_fmt(vars, - "vec4 t%d = vec4(0.0);\n" - "for (int i = 0; i < 9; i++) {\n" - " vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i], 0);\n" - " t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n" - "}\n", tex, tex, tex, tex, tex); - - } + mstring_append_fmt(vars, + "vec4 t%d = vec4(0.0);\n" + "for (int i = 0; i < 9; i++) {\n" + " vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i] / (textureSize(texSamp%d, 0) * texScale%d), 0);\n" + " t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n" + "}\n", tex, tex, tex, tex, tex, tex, tex); } static MString* psh_convert(struct PixelShader *ps) @@ -924,6 +884,12 @@ static MString* psh_convert(struct PixelShader *ps) ps->code = mstring_new(); + for (i = 0; i < 4; i++) { + if (ps->state.rect_tex[i]) { + mstring_append_fmt(vars, "pT%d.xy /= textureSize(texSamp%d, 0) / texScale%d;\n", i, i, i); + } + } + for (i = 0; i < 4; i++) { const char *sampler_type = get_sampler_type(ps->tex_modes[i], &ps->state, i); @@ -944,22 +910,9 @@ static MString* psh_convert(struct PixelShader *ps) psh_append_shadowmap(ps, i, false, vars); } else { apply_border_adjustment(ps, vars, i, "pT%d"); - mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i); - if (ps->state.rect_tex[i]) { - if ((ps->state.conv_tex[i] == - CONVOLUTION_FILTER_GAUSSIAN) || - (ps->state.conv_tex[i] == - CONVOLUTION_FILTER_QUINCUNX)) { - apply_convolution_filter(ps, vars, i); - } else { - if (ps->state.vulkan) { - mstring_append_fmt(vars, "vec4 t%d = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", - i, i, i, i); - } else { - mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", - i, i, i); - } - } + if (((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN) || + (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX))) { + apply_convolution_filter(ps, vars, i); } else { mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", i, i, i); @@ -1011,8 +964,8 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "dsdt%d = bumpMat%d * dsdt%d;\n", i, i, i, i); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * (pT%d.xy + dsdt%d));\n", - i, i, i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, (pT%d.xy + dsdt%d));\n", + i, i, i, i); break; case PS_TEXTUREMODES_BUMPENVMAP_LUM: assert(i >= 1); @@ -1029,8 +982,8 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "dsdtl%d.st = bumpMat%d * dsdtl%d.st;\n", i, i, i, i); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * (pT%d.xy + dsdtl%d.st));\n", - i, i, i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, (pT%d.xy + dsdtl%d.st));\n", + i, i, i, i); mstring_append_fmt(vars, "t%d = t%d * (bumpScale%d * dsdtl%d.p + bumpOffset%d);\n", i, i, i, i, i); break; @@ -1049,8 +1002,8 @@ static MString* psh_convert(struct PixelShader *ps) i, i, dotmap_func, ps->input_tex[i], i, i-1, i); apply_border_adjustment(ps, vars, i, "dotST%d"); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * dotST%d);\n", - i, i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, dotST%d);\n", + i, i, i); break; case PS_TEXTUREMODES_DOT_ZW: assert(i >= 2); @@ -1272,7 +1225,6 @@ MString *pgraph_gen_psh_glsl(const PshState state) ps.flags = state.combiner_control >> 8; for (i = 0; i < 4; i++) { ps.tex_modes[i] = (state.shader_stage_program >> (i * 5)) & 0x1F; - ps.tex_modes[i] = correct_texture_mode_for_dimensionality(ps.tex_modes[i], &state, i); } ps.dot_map[0] = 0; diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 726350213ee..f74f5c0c4f4 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1305,12 +1305,6 @@ static void create_texture(PGRAPHState *pg, int texture_idx) if (is_linear_filter_supported_for_format(r, state.color_format)) { vk_mag_filter = pgraph_texture_min_filter_vk_map[mag_filter]; vk_min_filter = pgraph_texture_min_filter_vk_map[min_filter]; - - if (f_basic.linear && vk_mag_filter != vk_min_filter) { - // FIXME: Per spec, if coordinates unnormalized, filters must be - // same. - vk_mag_filter = vk_min_filter = VK_FILTER_LINEAR; - } } else { vk_mag_filter = vk_min_filter = VK_FILTER_NEAREST; } @@ -1343,7 +1337,6 @@ static void create_texture(PGRAPHState *pg, int texture_idx) // .anisotropyEnable = VK_TRUE, // .maxAnisotropy = properties.limits.maxSamplerAnisotropy, .borderColor = vk_border_color, - .unnormalizedCoordinates = f_basic.linear ? VK_TRUE : VK_FALSE, .compareEnable = VK_FALSE, .compareOp = VK_COMPARE_OP_ALWAYS, .mipmapMode = mipmap_nearest ? VK_SAMPLER_MIPMAP_MODE_NEAREST : From 1c38a0a42b1418fb7d39c9209fb6707fe498e72c Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 13:48:48 -0700 Subject: [PATCH 129/176] nv2a/psh: Normalize coords at sample time --- hw/xbox/nv2a/pgraph/glsl/psh.c | 67 ++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index ff38a51106b..af29b97d8b0 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -645,13 +645,17 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa return; } + g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", i); + const char *tex_remap = ps->state.rect_tex[i] ? normalize_tex_coords : ""; + const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func]; bool extract_msb_24b = ps->state.tex_x8y24[i] && ps->state.vulkan; - mstring_append_fmt(vars, - "%svec4 t%d_depth%s = textureProj(texSamp%d, pT%d.xyw);\n", - extract_msb_24b ? "u" : "", i, extract_msb_24b ? "_raw" : "", i, i); + mstring_append_fmt( + vars, "%svec4 t%d_depth%s = textureProj(texSamp%d, %s(pT%d.xyw));\n", + extract_msb_24b ? "u" : "", i, extract_msb_24b ? "_raw" : "", i, + tex_remap, i); if (extract_msb_24b) { mstring_append_fmt(vars, @@ -707,12 +711,17 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars, static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex) { // FIXME: Quincunx + + g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", tex); + const char *tex_remap = ps->state.rect_tex[tex] ? normalize_tex_coords : ""; + mstring_append_fmt(vars, "vec4 t%d = vec4(0.0);\n" "for (int i = 0; i < 9; i++) {\n" - " vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i] / (textureSize(texSamp%d, 0) * texScale%d), 0);\n" - " t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n" - "}\n", tex, tex, tex, tex, tex, tex, tex); + " vec3 texCoordDelta = vec3(convolution3x3[i], 0);\n" + " texCoordDelta.xy /= textureSize(texSamp%d, 0);\n" + " t%d += textureProj(texSamp%d, %s(pT%d.xyw) + texCoordDelta) * gaussian3x3[i];\n" + "}\n", tex, tex, tex, tex, tex_remap, tex); } static MString* psh_convert(struct PixelShader *ps) @@ -884,16 +893,13 @@ static MString* psh_convert(struct PixelShader *ps) ps->code = mstring_new(); - for (i = 0; i < 4; i++) { - if (ps->state.rect_tex[i]) { - mstring_append_fmt(vars, "pT%d.xy /= textureSize(texSamp%d, 0) / texScale%d;\n", i, i, i); - } - } - for (i = 0; i < 4; i++) { const char *sampler_type = get_sampler_type(ps->tex_modes[i], &ps->state, i); + g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", i); + const char *tex_remap = ps->state.rect_tex[i] ? normalize_tex_coords : ""; + assert(ps->dot_map[i] < 8); const char *dotmap_func = dotmap_funcs[ps->dot_map[i]]; if (ps->dot_map[i] > 3) { @@ -914,8 +920,8 @@ static MString* psh_convert(struct PixelShader *ps) (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX))) { apply_convolution_filter(ps, vars, i); } else { - mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n", - i, i, i); + mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, %s(pT%d.xyw));\n", + i, i, tex_remap, i); } } break; @@ -964,8 +970,8 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "dsdt%d = bumpMat%d * dsdt%d;\n", i, i, i, i); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, (pT%d.xy + dsdt%d));\n", - i, i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdt%d));\n", + i, i, tex_remap, i, i); break; case PS_TEXTUREMODES_BUMPENVMAP_LUM: assert(i >= 1); @@ -982,8 +988,8 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "dsdtl%d.st = bumpMat%d * dsdtl%d.st;\n", i, i, i, i); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, (pT%d.xy + dsdtl%d.st));\n", - i, i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdtl%d.st));\n", + i, i, tex_remap, i, i); mstring_append_fmt(vars, "t%d = t%d * (bumpScale%d * dsdtl%d.p + bumpOffset%d);\n", i, i, i, i, i); break; @@ -1002,8 +1008,8 @@ static MString* psh_convert(struct PixelShader *ps) i, i, dotmap_func, ps->input_tex[i], i, i-1, i); apply_border_adjustment(ps, vars, i, "dotST%d"); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, dotST%d);\n", - i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(dotST%d));\n", + i, i, tex_remap, i); break; case PS_TEXTUREMODES_DOT_ZW: assert(i >= 2); @@ -1071,16 +1077,16 @@ static MString* psh_convert(struct PixelShader *ps) assert(!ps->state.rect_tex[i]); mstring_append_fmt(vars, "vec2 t%dAR = t%d.ar;\n", i, ps->input_tex[i]); apply_border_adjustment(ps, vars, i, "t%dAR"); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%dAR);\n", - i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(t%dAR));\n", + i, i, tex_remap, i); break; case PS_TEXTUREMODES_DPNDNT_GB: assert(i >= 1); assert(!ps->state.rect_tex[i]); mstring_append_fmt(vars, "vec2 t%dGB = t%d.gb;\n", i, ps->input_tex[i]); apply_border_adjustment(ps, vars, i, "t%dGB"); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%dGB);\n", - i, i, i); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(t%dGB));\n", + i, i, tex_remap, i); break; case PS_TEXTUREMODES_DOTPRODUCT: assert(i == 1 || i == 2); @@ -1112,6 +1118,19 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "if (t%d.a == 0.0) { discard; };\n", i); } + + if (ps->state.rect_tex[i]) { + mstring_append_fmt(preflight, + "vec2 norm%d(vec2 coord) {\n" + " return coord / (textureSize(texSamp%d, 0) / texScale%d);\n" + "}\n", + i, i, i); + mstring_append_fmt(preflight, + "vec3 norm%d(vec3 coord) {\n" + " return vec3(norm%d(coord.xy), coord.z);\n" + "}\n", + i, i); + } } } From 62acb2db7ebc02ad7e4eaf64f44ba33c6cf98118 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 16:23:14 -0700 Subject: [PATCH 130/176] nv2a/psh: Drop rect_tex assertion --- hw/xbox/nv2a/pgraph/glsl/psh.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index af29b97d8b0..fae76043fd0 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -930,7 +930,6 @@ static MString* psh_convert(struct PixelShader *ps) if (ps->state.shadow_map[i]) { psh_append_shadowmap(ps, i, true, vars); } else { - assert(!ps->state.rect_tex[i]); apply_border_adjustment(ps, vars, i, "pT%d"); mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n", i, i, i); From 76e2b779e3b27bf02b847f79e17d0f9a9329373f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 17:01:41 -0700 Subject: [PATCH 131/176] nv2a/psh: Handle rect tex on project3d --- hw/xbox/nv2a/pgraph/glsl/psh.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index fae76043fd0..295cc9a1f7c 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -931,8 +931,8 @@ static MString* psh_convert(struct PixelShader *ps) psh_append_shadowmap(ps, i, true, vars); } else { apply_border_adjustment(ps, vars, i, "pT%d"); - mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n", - i, i, i); + mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, %s(pT%d.xyzw));\n", + i, i, tex_remap, i); } break; case PS_TEXTUREMODES_CUBEMAP: @@ -1129,6 +1129,11 @@ static MString* psh_convert(struct PixelShader *ps) " return vec3(norm%d(coord.xy), coord.z);\n" "}\n", i, i); + mstring_append_fmt(preflight, + "vec4 norm%d(vec4 coord) {\n" + " return vec4(norm%d(coord.xy), 0, coord.w);\n" + "}\n", + i, i); } } } From a2b994d80d95f5120f49512471ced33fed177343 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 20:32:45 -0700 Subject: [PATCH 132/176] nv2a/vk: Only bind clear fragment shader on partial color clear --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 6e8883af0c7..60340cf5e0b 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -470,7 +470,7 @@ static void create_clear_pipeline(PGRAPHState *pg) .module = r->quad_vert_module->module, .pName = "main", }; - if (clear_any_color_channels) { + if (partial_color_clear) { shader_stages[num_active_shader_stages++] = (VkPipelineShaderStageCreateInfo){ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, From 3096f2a9c8fa6e3503b1b254396de678bc022d19 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 30 Jul 2024 20:33:05 -0700 Subject: [PATCH 133/176] nv2a/vk: Always bind fragment shader in draw pipeline --- hw/xbox/nv2a/pgraph/vk/draw.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 60340cf5e0b..b9ea6c0dd32 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -762,15 +762,13 @@ static void create_pipeline(PGRAPHState *pg) .pName = "main", }; } - if (r->color_binding) { - shader_stages[num_active_shader_stages++] = - (VkPipelineShaderStageCreateInfo){ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = r->shader_binding->fragment->module, - .pName = "main", - }; - } + shader_stages[num_active_shader_stages++] = + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = r->shader_binding->fragment->module, + .pName = "main", + }; VkPipelineVertexInputStateCreateInfo vertex_input = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, From 2f910eeacf2b2f670d492ee2450e87cc3665faaf Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 00:37:51 -0700 Subject: [PATCH 134/176] nv2a/vk: Fixup unaligned attribute data in inline buffer --- hw/xbox/nv2a/pgraph/vk/draw.c | 184 ++++++++++++++++++++++++++++-- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 + hw/xbox/nv2a/pgraph/vk/vertex.c | 1 - 3 files changed, 177 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index b9ea6c0dd32..78d1a055593 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1819,14 +1819,11 @@ static void pgraph_vk_debug_attrs(NV2AState *d) } #endif -static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx, +static void bind_vertex_buffer(PGRAPHState *pg, uint16_t inline_map, VkDeviceSize offset) { PGRAPHVkState *r = pg->vk_renderer_state; - assert(buffer_idx == BUFFER_VERTEX_RAM || - buffer_idx == BUFFER_VERTEX_INLINE); - if (r->num_active_vertex_binding_descriptions == 0) { return; } @@ -1836,6 +1833,8 @@ static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx, for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) { int attr_idx = r->vertex_attribute_descriptions[i].location; + int buffer_idx = (inline_map & (1 << attr_idx)) ? BUFFER_VERTEX_INLINE : + BUFFER_VERTEX_RAM; buffers[i] = r->storage_buffers[buffer_idx].buffer; offsets[i] = offset + r->vertex_attribute_offsets[attr_idx]; } @@ -1845,6 +1844,11 @@ static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx, offsets); } +static void bind_inline_vertex_buffer(PGRAPHState *pg, VkDeviceSize offset) +{ + bind_vertex_buffer(pg, 0xffff, offset); +} + void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) { NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta, @@ -1881,6 +1885,160 @@ static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size) return false; } +static void get_size_and_count_for_format(VkFormat fmt, size_t *size, size_t *count) +{ + static const struct { + size_t size; + size_t count; + } table[] = { + [VK_FORMAT_R8_UNORM] = { 1, 1 }, + [VK_FORMAT_R8G8_UNORM] = { 1, 2 }, + [VK_FORMAT_R8G8B8_UNORM] = { 1, 3 }, + [VK_FORMAT_R8G8B8A8_UNORM] = { 1, 4 }, + [VK_FORMAT_R16_SNORM] = { 2, 1 }, + [VK_FORMAT_R16G16_SNORM] = { 2, 2 }, + [VK_FORMAT_R16G16B16_SNORM] = { 2, 3 }, + [VK_FORMAT_R16G16B16A16_SNORM] = { 2, 4 }, + [VK_FORMAT_R16_SSCALED] = { 2, 1 }, + [VK_FORMAT_R16G16_SSCALED] = { 2, 2 }, + [VK_FORMAT_R16G16B16_SSCALED] = { 2, 3 }, + [VK_FORMAT_R16G16B16A16_SSCALED] = { 2, 4 }, + [VK_FORMAT_R32_SFLOAT] = { 4, 1 }, + [VK_FORMAT_R32G32_SFLOAT] = { 4, 2 }, + [VK_FORMAT_R32G32B32_SFLOAT] = { 4, 3 }, + [VK_FORMAT_R32G32B32A32_SFLOAT] = { 4, 4 }, + [VK_FORMAT_R32_SINT] = { 4, 1 }, + }; + + assert(fmt < ARRAY_SIZE(table)); + assert(table[fmt].size); + + *size = table[fmt].size; + *count = table[fmt].count; +} + +typedef struct VertexBufferRemap { + uint16_t attributes; + size_t buffer_space_required; + struct { + VkDeviceAddress offset; + VkDeviceSize stride; + } map[NV2A_VERTEXSHADER_ATTRIBUTES]; +} VertexBufferRemap; + +static VertexBufferRemap remap_unaligned_attributes(PGRAPHState *pg, + uint32_t num_vertices) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + + VertexBufferRemap remap = {0}; + + VkDeviceAddress output_offset = 0; + + for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) { + int desc_loc = r->vertex_attribute_to_description_location[attr_id]; + if (desc_loc < 0) { + continue; + } + + VkVertexInputBindingDescription *desc = + &r->vertex_binding_descriptions[desc_loc]; + VkVertexInputAttributeDescription *attr = + &r->vertex_attribute_descriptions[desc_loc]; + + size_t element_size, element_count; + get_size_and_count_for_format(attr->format, &element_size, &element_count); + + bool offset_valid = + (r->vertex_attribute_offsets[attr_id] % element_size == 0); + bool stride_valid = (desc->stride % element_size == 0); + + if (offset_valid && stride_valid) { + continue; + } + + remap.attributes |= 1 << attr_id; + remap.map[attr_id].offset = ROUND_UP(output_offset, element_size); + remap.map[attr_id].stride = element_size * element_count; + + // fprintf(stderr, + // "attr %02d remapped: " + // "%08" HWADDR_PRIx "->%08" HWADDR_PRIx " " + // "stride=%d->%zd\n", + // attr_id, r->vertex_attribute_offsets[attr_id], + // remap.map[attr_id].offset, desc->stride, + // remap.map[attr_id].stride); + + output_offset = + remap.map[attr_id].offset + remap.map[attr_id].stride * num_vertices; + } + + remap.buffer_space_required = output_offset; + return remap; +} + +static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg, + VertexBufferRemap remap, + uint32_t start_vertex, + uint32_t num_vertices) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + StorageBuffer *buffer = &r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING]; + + r->vertex_buffer_inline = remap.attributes; + + if (!remap.attributes) { + return; + } + + VkDeviceSize starting_offset = ROUND_UP(buffer->buffer_offset, 16); + size_t total_space_required = + (starting_offset - buffer->buffer_offset) + remap.buffer_space_required; + ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, total_space_required); + assert(pgraph_vk_buffer_has_space_for(pg, BUFFER_VERTEX_INLINE_STAGING, + total_space_required, 1)); + + buffer->buffer_offset = starting_offset; // Aligned + + // FIXME: SIMD memcpy + // FIXME: Caching + // FIXME: Account for only what is drawn + assert(start_vertex == 0); + assert(buffer->mapped); + + // Copy vertex data + for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) { + if (!(remap.attributes & (1 << attr_id))) { + continue; + } + + int bind_desc_loc = + r->vertex_attribute_to_description_location[attr_id]; + assert(bind_desc_loc >= 0); + + VkVertexInputBindingDescription *bind_desc = + &r->vertex_binding_descriptions[bind_desc_loc]; + + VkDeviceSize attr_buffer_offset = + buffer->buffer_offset + remap.map[attr_id].offset; + + uint8_t *out_ptr = buffer->mapped + attr_buffer_offset; + uint8_t *in_ptr = d->vram_ptr + r->vertex_attribute_offsets[attr_id]; + + for (int vertex_id = 0; vertex_id < num_vertices; vertex_id++) { + memcpy(out_ptr, in_ptr, remap.map[attr_id].stride); + out_ptr += remap.map[attr_id].stride; + in_ptr += bind_desc->stride; + } + + r->vertex_attribute_offsets[attr_id] = attr_buffer_offset; + bind_desc->stride = remap.map[attr_id].stride; + } + + buffer->buffer_offset += remap.buffer_space_required; +} + void pgraph_vk_flush_draw(NV2AState *d) { PGRAPHState *pg = &d->pgraph; @@ -1904,11 +2062,19 @@ void pgraph_vk_flush_draw(NV2AState *d) pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start, pg->draw_arrays_max_count - 1, false, 0, pg->draw_arrays_max_count - 1); + uint32_t min_element = INT_MAX; + uint32_t max_element = 0; + for (int i = 0; i < pg->draw_arrays_length; i++) { + min_element = MIN(pg->draw_arrays_start[i], min_element); + max_element = MAX(max_element, pg->draw_arrays_start[i] + pg->draw_arrays_count[i]); + } sync_vertex_ram_buffer(pg); + VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element); + copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element); begin_pre_draw(pg); begin_draw(pg); - bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0); + bind_vertex_buffer(pg, remap.attributes, 0); for (int i = 0; i < pg->draw_arrays_length; i++) { uint32_t start = pg->draw_arrays_start[i], count = pg->draw_arrays_count[i]; @@ -1940,12 +2106,14 @@ void pgraph_vk_flush_draw(NV2AState *d) d, min_element, max_element, false, 0, pg->inline_elements[pg->inline_elements_length - 1]); sync_vertex_ram_buffer(pg); + VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element + 1); + copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element + 1); begin_pre_draw(pg); VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer( pg, pg->inline_elements, index_data_size); begin_draw(pg); - bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0); + bind_vertex_buffer(pg, remap.attributes, 0); vkCmdBindIndexBuffer(r->command_buffer, r->storage_buffers[BUFFER_INDEX].buffer, buffer_offset, VK_INDEX_TYPE_UINT32); @@ -1983,7 +2151,7 @@ void pgraph_vk_flush_draw(NV2AState *d) VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( pg, data, sizes, r->num_active_vertex_attribute_descriptions); begin_draw(pg); - bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset); + bind_inline_vertex_buffer(pg, buffer_offset); vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0); end_draw(pg); @@ -2024,7 +2192,7 @@ void pgraph_vk_flush_draw(NV2AState *d) VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( pg, &inline_array_data, &inline_array_data_size, 1); begin_draw(pg); - bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset); + bind_inline_vertex_buffer(pg, buffer_offset); vkCmdDraw(r->command_buffer, index_count, 1, 0, 0); end_draw(pg); NV2A_VK_DGROUP_END(); diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 9fb13ee1fc6..aa177723ad6 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -343,6 +343,7 @@ typedef struct PGRAPHVkState { VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; int num_active_vertex_binding_descriptions; hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; + uint16_t vertex_buffer_inline; QTAILQ_HEAD(, SurfaceBinding) surfaces; QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces; diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c index ee567f9dc64..af13bd67b49 100644 --- a/hw/xbox/nv2a/pgraph/vk/vertex.c +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -263,7 +263,6 @@ void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, }; r->vertex_attribute_offsets[i] = attrib_data_addr; - // FIXME: Data alignment may not meet requirements NV2A_VK_DGROUP_END(); } From d47fef94672a08cf43a1c2f8fc28ffac767e279a Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 00:37:51 -0700 Subject: [PATCH 135/176] nv2a/vk: Fix reports --- hw/xbox/nv2a/pgraph/vk/reports.c | 62 ++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c index 2231f4ffe57..b6570523722 100644 --- a/hw/xbox/nv2a/pgraph/vk/reports.c +++ b/hw/xbox/nv2a/pgraph/vk/reports.c @@ -26,7 +26,7 @@ void pgraph_vk_init_reports(PGRAPHState *pg) QSIMPLEQ_INIT(&r->report_queue); r->num_queries_in_flight = 0; r->max_queries_in_flight = 1024; - r->new_query_needed = true; + r->new_query_needed = false; r->query_in_flight = false; r->zpass_pixel_count_result = 0; @@ -43,10 +43,10 @@ void pgraph_vk_finalize_reports(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - QueryReport *q, *next; - QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) { + QueryReport *report; + while ((report = QSIMPLEQ_FIRST(&r->report_queue)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); - g_free(q); + g_free(report); } vkDestroyQueryPool(r->device, r->query_pool, NULL); @@ -57,9 +57,13 @@ void pgraph_vk_clear_report_value(NV2AState *d) PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; - QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate - q->clear = true; - QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry); + QueryReport *report = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate + report->clear = true; + report->parameter = 0; + report->query_count = r->num_queries_in_flight; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry); + + r->new_query_needed = true; } void pgraph_vk_get_report(NV2AState *d, uint32_t parameter) @@ -70,11 +74,11 @@ void pgraph_vk_get_report(NV2AState *d, uint32_t parameter) uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE); assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT); - QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate - q->clear = false; - q->parameter = parameter; - q->query_count = r->num_queries_in_flight; - QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry); + QueryReport *report = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate + report->clear = false; + report->parameter = parameter; + report->query_count = r->num_queries_in_flight; + QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry); r->new_query_needed = true; } @@ -105,30 +109,36 @@ void pgraph_vk_process_pending_reports_internal(NV2AState *d) } // Write out queries - QueryReport *q, *next; int num_results_counted = 0; + const int result_divisor = + pg->surface_scale_factor * pg->surface_scale_factor; - int result_divisor = pg->surface_scale_factor * pg->surface_scale_factor; + QueryReport *report; + while ((report = QSIMPLEQ_FIRST(&r->report_queue)) != NULL) { + assert(report->query_count >= num_results_counted); + assert(report->query_count <= r->num_queries_in_flight); + + while (num_results_counted < report->query_count) { + r->zpass_pixel_count_result += + query_results[num_results_counted++]; + } - QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) { - if (q->clear) { + if (report->clear) { NV2A_VK_DPRINTF("Cleared"); r->zpass_pixel_count_result = 0; } else { - assert(q->query_count >= num_results_counted); - assert(q->query_count <= r->num_queries_in_flight); - - while (num_results_counted < q->query_count) { - r->zpass_pixel_count_result += - query_results[num_results_counted++]; - } - pgraph_write_zpass_pixel_cnt_report( - d, q->parameter, + d, report->parameter, r->zpass_pixel_count_result / result_divisor); } + QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry); - g_free(q); + g_free(report); + } + + // Add remaining results + while (num_results_counted < r->num_queries_in_flight) { + r->zpass_pixel_count_result += query_results[num_results_counted++]; } r->num_queries_in_flight = 0; From 31db8d04b061cfbc8165c0be52348edc8e151f33 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 00:37:51 -0700 Subject: [PATCH 136/176] nv2a/vk: Ensure queries do not include clears --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 78d1a055593..48f56c1fd09 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1442,7 +1442,7 @@ static void begin_draw(PGRAPHState *pg) assert(r->in_command_buffer); // Visibility testing - if (pg->zpass_pixel_count_enable) { + if (!pg->clearing && pg->zpass_pixel_count_enable) { if (r->new_query_needed && r->query_in_flight) { end_render_pass(r); end_query(r); From ca42f0f2df2c40928e1cda491feb1d422ad0ed33 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 00:37:51 -0700 Subject: [PATCH 137/176] nv2a/vk: Clear in separate renderpass for now --- hw/xbox/nv2a/pgraph/vk/draw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 48f56c1fd09..148bfd60825 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1456,6 +1456,10 @@ static void begin_draw(PGRAPHState *pg) end_query(r); } + if (pg->clearing) { + end_render_pass(r); + } + bool must_bind_pipeline = r->pipeline_binding_changed; if (!r->in_render_pass) { @@ -1524,6 +1528,10 @@ static void end_draw(PGRAPHState *pg) assert(r->in_command_buffer); assert(r->in_render_pass); + if (pg->clearing) { + end_render_pass(r); + } + r->in_draw = false; // FIXME: We could clear less From 9ab1f96911524ab6ca38372b26be9e6f6d36c7d9 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 00:52:54 -0700 Subject: [PATCH 138/176] nv2a/vk: Make pgraph_vk_insert_debug_marker format strings --- hw/xbox/nv2a/pgraph/vk/debug.c | 27 +++++++++++++++++++-------- hw/xbox/nv2a/pgraph/vk/renderer.h | 3 ++- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c index f7a54de9fc0..90f5b15a845 100644 --- a/hw/xbox/nv2a/pgraph/vk/debug.c +++ b/hw/xbox/nv2a/pgraph/vk/debug.c @@ -59,14 +59,25 @@ void pgraph_vk_debug_frame_terminator(void) } void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, - const char *name, float color[4]) + float color[4], const char *format, ...) { - if (r->debug_utils_extension_enabled) { - VkDebugUtilsLabelEXT label_info = { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - .pLabelName = name, - }; - memcpy(label_info.color, color, 4 * sizeof(float)); - vkCmdInsertDebugUtilsLabelEXT(cmd, &label_info); + if (!r->debug_utils_extension_enabled) { + return; } + + char *buf = NULL; + + va_list args; + va_start(args, format); + int err = vasprintf(&buf, format, args); + assert(err >= 0); + va_end(args); + + VkDebugUtilsLabelEXT label_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pLabelName = buf, + }; + memcpy(label_info.color, color, 4 * sizeof(float)); + vkCmdInsertDebugUtilsLabelEXT(cmd, &label_info); + free(buf); } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index aa177723ad6..43279046d2f 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -392,7 +392,8 @@ void pgraph_vk_check_memory_budget(PGRAPHState *pg); // debug.c void pgraph_vk_debug_init(void); -void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, const char* name, float color[4]); +void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, + float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5))); // instance.c void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp); From 69b5318cb5ab617dd7c9ea3f763819329cb1571d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 01:01:25 -0700 Subject: [PATCH 139/176] nv2a/vk: Fix create_pipeline debug marker inbalance --- hw/xbox/nv2a/pgraph/vk/draw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 148bfd60825..5de11690414 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -42,7 +42,6 @@ void pgraph_vk_draw_begin(NV2AState *d) if (is_nop_draw) { NV2A_VK_DPRINTF("nop!"); - NV2A_VK_DGROUP_END(); return; } } @@ -709,6 +708,8 @@ static void create_pipeline(PGRAPHState *pg) // FIXME: If nothing was dirty, don't even try creating the key or hashing. // Just use the same pipeline. if (r->pipeline_binding && !check_pipeline_dirty(pg)) { + NV2A_VK_DPRINTF("Cache hit"); + NV2A_VK_DGROUP_END(); return; } From f26b8c32d62bb5675ef87069d9ff717858829646 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 15:50:50 -0700 Subject: [PATCH 140/176] n2va/vk: Key textures on sampler state for now --- hw/xbox/nv2a/pgraph/vk/renderer.h | 3 +++ hw/xbox/nv2a/pgraph/vk/texture.c | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 43279046d2f..7baa49acb0b 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -203,6 +203,9 @@ typedef struct TextureKey { hwaddr palette_vram_offset; hwaddr palette_length; float scale; + uint32_t filter; + uint32_t address; + uint32_t border_color; } TextureKey; typedef struct TextureBinding { diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index f74f5c0c4f4..98acf4aec1f 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1090,6 +1090,13 @@ static void create_texture(PGRAPHState *pg, int texture_idx) size_t texture_length = pgraph_get_texture_length(pg, &state); + uint32_t filter = + pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4); + uint32_t address = + pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4); + uint32_t border_color_pack32 = + pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4); + TextureKey key; memset(&key, 0, sizeof(key)); key.state = state; @@ -1099,6 +1106,11 @@ static void create_texture(PGRAPHState *pg, int texture_idx) key.palette_length = texture_palette_data_size; key.scale = 1; + // FIXME: Separate sampler from texture + key.filter = filter; + key.address = address; + key.border_color = border_color_pack32; + bool is_indexed = (state.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8); @@ -1244,8 +1256,6 @@ static void create_texture(PGRAPHState *pg, int texture_idx) VkSamplerCustomBorderColorCreateInfoEXT custom_border_color_create_info; VkBorderColor vk_border_color; - uint32_t border_color_pack32 = - pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4); bool is_integer_type = vkf.vk_format == VK_FORMAT_R32_UINT; @@ -1285,7 +1295,6 @@ static void create_texture(PGRAPHState *pg, int texture_idx) } } - uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4); if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED"); if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) @@ -1320,9 +1329,6 @@ static void create_texture(PGRAPHState *pg, int texture_idx) min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD || min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD; - uint32_t address = - pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4); - VkSamplerCreateInfo sampler_create_info = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = vk_mag_filter, From d054b366f87089c5f978262fce59e178477b661f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 1 Aug 2024 17:41:52 -0700 Subject: [PATCH 141/176] nv2a/vk: Add pvideo support --- hw/xbox/nv2a/pgraph/vk/display.c | 411 ++++++++++++++++++++++-------- hw/xbox/nv2a/pgraph/vk/renderer.h | 35 +++ 2 files changed, 335 insertions(+), 111 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index c839cc79394..93caeff8b24 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -19,40 +19,220 @@ #include "renderer.h" +static uint8_t *convert_texture_data__CR8YB8CB8YA8(uint8_t *data_out, + const uint8_t *data_in, + unsigned int width, + unsigned int height, + unsigned int pitch) +{ + int x, y; + for (y = 0; y < height; y++) { + const uint8_t *line = &data_in[y * pitch]; + const uint32_t row_offset = y * width; + for (x = 0; x < width; x++) { + uint8_t *pixel = &data_out[(row_offset + x) * 4]; + convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); + pixel[3] = 255; + } + } + return data_out; +} + +static float pvideo_calculate_scale(unsigned int din_dout, + unsigned int output_size) +{ + float calculated_in = din_dout * (output_size - 1); + calculated_in = floorf(calculated_in / (1 << 20) + 0.5f); + return (calculated_in + 1.0f) / output_size; +} + +static void destroy_pvideo_image(PGRAPHState *pg) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *d = &r->display; + + if (d->pvideo.sampler != VK_NULL_HANDLE) { + vkDestroySampler(r->device, d->pvideo.sampler, NULL); + d->pvideo.sampler = VK_NULL_HANDLE; + } + + if (d->pvideo.image_view != VK_NULL_HANDLE) { + vkDestroyImageView(r->device, d->pvideo.image_view, NULL); + d->pvideo.image_view = VK_NULL_HANDLE; + } + + if (d->pvideo.image != VK_NULL_HANDLE) { + vmaDestroyImage(r->allocator, d->pvideo.image, d->pvideo.allocation); + d->pvideo.image = VK_NULL_HANDLE; + d->pvideo.allocation = VK_NULL_HANDLE; + } +} + +static void create_pvideo_image(PGRAPHState *pg, int width, int height) +{ + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *d = &r->display; + + if (d->pvideo.image == VK_NULL_HANDLE || d->pvideo.width != width || + d->pvideo.height != height) { + destroy_pvideo_image(pg); + } + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = width, + .extent.height = height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .flags = 0, + }; + VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + }; + VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, + &alloc_create_info, &d->pvideo.image, + &d->pvideo.allocation, NULL)); + + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = d->pvideo.image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.baseMipLevel = 0, + .subresourceRange.levelCount = image_create_info.mipLevels, + .subresourceRange.baseArrayLayer = 0, + .subresourceRange.layerCount = image_create_info.arrayLayers, + }; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &d->pvideo.image_view)); + + VkSamplerCreateInfo sampler_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT, + .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + }; + VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL, + &d->pvideo.sampler)); +} + +static void upload_pvideo_image(PGRAPHState *pg, PvideoState state) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + PGRAPHVkDisplayState *disp = &r->display; + + create_pvideo_image(pg, state.in_width, state.in_height); + + // FIXME: Dirty tracking. We don't necessarily need to upload so much. + + // Copy texture data to mapped device buffer + uint8_t *mapped_memory_ptr; + + VK_CHECK(vmaMapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, + (void *)&mapped_memory_ptr)); + + convert_texture_data__CR8YB8CB8YA8( + mapped_memory_ptr, d->vram_ptr + state.base + state.offset, + state.in_width, state.in_height, state.pitch); + + vmaFlushAllocation(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation, 0, + VK_WHOLE_SIZE); + + vmaUnmapMemory(r->allocator, + r->storage_buffers[BUFFER_STAGING_SRC].allocation); + + // FIXME: Merge with display renderer command buffer + + VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + + VkBufferMemoryBarrier host_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = r->storage_buffers[BUFFER_STAGING_SRC].buffer, + .size = VK_WHOLE_SIZE + }; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, + &host_barrier, 0, NULL); + + pgraph_vk_transition_image_layout( + pg, cmd, disp->pvideo.image, VK_FORMAT_R8_UNORM, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + .imageOffset = (VkOffset3D){ 0, 0, 0 }, + .imageExtent = (VkExtent3D){ state.in_width, state.in_height, 1 }, + }; + vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer, + disp->pvideo.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + pgraph_vk_transition_image_layout(pg, cmd, disp->pvideo.image, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + pgraph_vk_end_single_time_commands(pg, cmd); +} + static const char *display_frag_glsl = "#version 450\n" "layout(binding = 0) uniform sampler2D tex;\n" "layout(binding = 1) uniform sampler2D pvideo_tex;\n" "layout(push_constant, std430) uniform PushConstants {\n" + " float line_offset;\n" + " vec2 display_size;\n" " bool pvideo_enable;\n" " vec2 pvideo_in_pos;\n" " vec4 pvideo_pos;\n" - " vec3 pvideo_scale;\n" + " vec4 pvideo_scale;\n" " bool pvideo_color_key_enable;\n" - " vec2 display_size;\n" - " float line_offset;\n" " vec4 pvideo_color_key;\n" "};\n" "layout(location = 0) out vec4 out_Color;\n" "void main()\n" "{\n" - " vec2 texCoord = gl_FragCoord.xy/display_size;\n" - " texCoord.y = 1 - texCoord.y;\n" // GL compat + " vec2 tex_coord = gl_FragCoord.xy/display_size;\n" + " tex_coord.y = 1 - tex_coord.y;\n" // GL compat " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" - " out_Color.rgba = texture(tex, texCoord);\n" - // " if (pvideo_enable) {\n" - // " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" - // " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" - // " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n" - // " greaterThan(screenCoord, output_region.zw));\n" - // " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" - // " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n" - // " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" - // " in_st.y *= -1.0;\n" - // " out_Color.rgba = texture(pvideo_tex, in_st);\n" - // " }\n" - // " }\n" + " tex_coord.y = 1 + rel*(tex_coord.y - 1);" + " out_Color.rgba = texture(tex, tex_coord);\n" + " if (pvideo_enable) {\n" + " vec2 screen_coord = vec2(gl_FragCoord.x, display_size.y - gl_FragCoord.y) * pvideo_scale.z;\n" + " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n" + " bvec4 clip = bvec4(lessThan(screen_coord, output_region.xy),\n" + " greaterThan(screen_coord, output_region.zw));\n" + " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n" + " vec2 out_xy = screen_coord - pvideo_pos.xy;\n" + " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n" + " out_Color.rgba = texture(pvideo_tex, in_st);\n" + " }\n" + " }\n" "}\n"; static void create_descriptor_pool(PGRAPHState *pg) @@ -516,7 +696,7 @@ static void create_display_image(PGRAPHState *pg, int width, int height) assert(glGetError() == GL_NO_ERROR); #endif // WIN32 - + glGenTextures(1, &d->gl_texture_id); glBindTexture(GL_TEXTURE_2D, d->gl_texture_id); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); @@ -561,11 +741,21 @@ static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface) }; // FIXME: PVIDEO Overlay - image_infos[1] = (VkDescriptorImageInfo){ - .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - .imageView = r->dummy_texture.image_view, - .sampler = r->dummy_texture.sampler, - }; + if (r->display.pvideo.state.enabled) { + assert(r->display.pvideo.image_view != VK_NULL_HANDLE); + assert(r->display.pvideo.sampler != VK_NULL_HANDLE); + image_infos[1] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = r->display.pvideo.image_view, + .sampler = r->display.pvideo.sampler, + }; + } else { + image_infos[1] = (VkDescriptorImageInfo){ + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageView = r->dummy_texture.image_view, + .sampler = r->dummy_texture.sampler, + }; + } descriptor_writes[1] = (VkWriteDescriptorSet){ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = r->display.descriptor_set, @@ -580,22 +770,11 @@ static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface) descriptor_writes, 0, NULL); } -static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) +static PvideoState get_pvideo_state(PGRAPHState *pg) { NV2AState *d = container_of(pg, NV2AState, pgraph); - PGRAPHVkState *r = pg->vk_renderer_state; - ShaderUniformLayout *l = &r->display.display_frag->push_constants; - - int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache - uniform2f(l, display_size_loc, r->display.width, r->display.height); - - uint32_t pline_offset, pstart_addr, pline_compare; - d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - int line_offset = surface->pitch / pline_offset; - int line_offset_loc = uniform_index(l, "line_offset"); - uniform1f(l, line_offset_loc, line_offset); + PvideoState state; -#if 0 // FIXME: PVIDEO overlay // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior. // Many games seem to pass this value when initializing or tearing down // PVIDEO. On its own, this generally does not result in the overlay being @@ -605,109 +784,112 @@ static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) // Since the value seems to be set to 0xFFFFFFFF only in cases where the // content is not valid, it is probably good enough to treat it as an // implicit stop. - bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) + state.enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE) && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF; - glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_enable_loc, enabled); - if (!enabled) { - return; + if (!state.enabled) { + return state; } - hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; - hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; - hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + state.base = d->pvideo.regs[NV_PVIDEO_BASE]; + state.limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; + state.offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; - int in_width = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); - int in_height = - GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); - - int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_S); - int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_T); - - int in_pitch = + state.pitch = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH); - int in_color = + state.format = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR); - unsigned int out_width = + /* TODO: support other color formats */ + assert(state.format == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + + state.in_width = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH); + state.in_height = + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT); + + state.out_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH); - unsigned int out_height = + state.out_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT); - float scale_x = 1.0f; - float scale_y = 1.0f; - unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; - unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; - if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) { - scale_x = pvideo_calculate_scale(ds_dx, out_width); - } - if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) { - scale_y = pvideo_calculate_scale(dt_dy, out_height); - } + state.in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_S); + state.in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_T); + + uint32_t ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX]; + uint32_t dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY]; + state.scale_x = ds_dx == NV_PVIDEO_DIN_DOUT_UNITY ? + 1.0f : + pvideo_calculate_scale(ds_dx, state.out_width); + state.scale_y = dt_dy == NV_PVIDEO_DIN_DOUT_UNITY ? + 1.0f : + pvideo_calculate_scale(dt_dy, state.out_height); // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results // in them being capped to the output size, content is not scaled. This is // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF // during initialization or teardown. - if (in_width > out_width) { - in_width = floorf((float)out_width * scale_x + 0.5f); + if (state.in_width > state.out_width) { + state.in_width = floorf((float)state.out_width * state.scale_x + 0.5f); } - if (in_height > out_height) { - in_height = floorf((float)out_height * scale_y + 0.5f); + if (state.in_height > state.out_height) { + state.in_height = floorf((float)state.out_height * state.scale_y + 0.5f); } - /* TODO: support other color formats */ - assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); - - unsigned int out_x = + state.out_x = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X); - unsigned int out_y = + state.out_y = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y); - unsigned int color_key_enabled = + state.color_key_enabled = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY); - glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_enable_loc, - color_key_enabled); // TODO: Verify that masking off the top byte is correct. // SeaBlade sets a color key of 0x80000000 but the texture passed into the // shader is cleared to 0 alpha. - unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; - glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_loc, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, - GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); - - assert(offset + in_pitch * in_height <= limit); - hwaddr end = base + offset + in_pitch * in_height; + state.color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF; + + assert(state.offset + state.pitch * state.in_height <= state.limit); + hwaddr end = state.base + state.offset + state.pitch * state.in_height; assert(end <= memory_region_size(d->vram)); - pgraph_apply_scaling_factor(pg, &out_x, &out_y); - pgraph_apply_scaling_factor(pg, &out_width, &out_height); + return state; +} + +static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) +{ + NV2AState *d = container_of(pg, NV2AState, pgraph); + PGRAPHVkState *r = pg->vk_renderer_state; + ShaderUniformLayout *l = &r->display.display_frag->push_constants; + + int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache + uniform2f(l, display_size_loc, r->display.width, r->display.height); - // Translate for the GL viewport origin. - out_y = MAX(pg->renderer_state->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0); + uint32_t pline_offset, pstart_addr, pline_compare; + d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); + int line_offset = surface->pitch / pline_offset; + int line_offset_loc = uniform_index(l, "line_offset"); + uniform1f(l, line_offset_loc, line_offset); - glActiveTexture(GL_TEXTURE0 + 1); - glBindTexture(GL_TEXTURE_2D, d->pgraph.renderer_state->disp_rndr.pvideo_tex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8( - d->vram_ptr + base + offset, in_width, in_height, in_pitch); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA, - GL_UNSIGNED_BYTE, tex_rgba); - g_free(tex_rgba); - glUniform1i(d->pgraph.renderer_state->disp_rndr.pvideo_tex_loc, 1); - glUniform2f(d->pgraph.renderer_state->disp_rndr.pvideo_in_pos_loc, in_s, in_t); - glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_pos_loc, - out_x, out_y, out_width, out_height); - glUniform3f(d->pgraph.renderer_state->disp_rndr.pvideo_scale_loc, - scale_x, scale_y, 1.0f / pg->surface_scale_factor); -#endif + PvideoState *pvideo = &r->display.pvideo.state; + uniform1i(l, uniform_index(l, "pvideo_enable"), pvideo->enabled); + if (pvideo->enabled) { + uniform1i(l, uniform_index(l, "pvideo_color_key_enable"), + pvideo->color_key_enabled); + uniform4f( + l, uniform_index(l, "pvideo_color_key"), + GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0, + GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0, + GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0, + GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0); + uniform2f(l, uniform_index(l, "pvideo_in_pos"), pvideo->in_s, + pvideo->in_t); + uniform4f(l, uniform_index(l, "pvideo_pos"), pvideo->out_x, + pvideo->out_y, pvideo->out_width, pvideo->out_height); + uniform4f(l, uniform_index(l, "pvideo_scale"), pvideo->scale_x, + pvideo->scale_y, 1.0f / pg->surface_scale_factor, 1.0); + } } static void render_display(PGRAPHState *pg, SurfaceBinding *surface) @@ -724,6 +906,11 @@ static void render_display(PGRAPHState *pg, SurfaceBinding *surface) pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING); } + disp->pvideo.state = get_pvideo_state(pg); + if (disp->pvideo.state.enabled) { + upload_pvideo_image(pg, disp->pvideo.state); + } + update_uniforms(pg, surface); update_descriptor_set(pg, surface); @@ -854,6 +1041,8 @@ void pgraph_vk_finalize_display(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; + destroy_pvideo_image(pg); + if (r->display.image != VK_NULL_HANDLE) { destroy_current_display_image(pg); } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 7baa49acb0b..8a6b5090aa4 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -229,6 +229,32 @@ typedef struct QueryReport { unsigned int query_count; } QueryReport; +typedef struct PvideoState { + bool enabled; + hwaddr base; + hwaddr limit; + hwaddr offset; + + int pitch; + int format; + + int in_width; + int in_height; + int out_width; + int out_height; + + int in_s; + int in_t; + int out_x; + int out_y; + + float scale_x; + float scale_y; + + bool color_key_enabled; + uint32_t color_key; +} PvideoState; + typedef struct PGRAPHVkDisplayState { ShaderModuleInfo *display_frag; @@ -247,6 +273,15 @@ typedef struct PGRAPHVkDisplayState { VkDeviceMemory memory; VkSampler sampler; + struct { + PvideoState state; + int width, height; + VkImage image; + VkImageView image_view; + VmaAllocation allocation; + VkSampler sampler; + } pvideo; + int width, height; int draw_time; From e5be3f2714964381e0b0cdfcc41d0fd19d251ad6 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 2 Aug 2024 11:14:12 -0700 Subject: [PATCH 142/176] nv2a/vk: Add missing math.h include --- hw/xbox/nv2a/pgraph/vk/display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 93caeff8b24..89a1cb87b3b 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -18,6 +18,7 @@ */ #include "renderer.h" +#include static uint8_t *convert_texture_data__CR8YB8CB8YA8(uint8_t *data_out, const uint8_t *data_in, From 0c5b41d6f755c6c2abd0927db79ad5e338c9a6a3 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 2 Aug 2024 12:49:59 -0700 Subject: [PATCH 143/176] ui: Add menubar backend selection --- ui/xui/menubar.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ui/xui/menubar.cc b/ui/xui/menubar.cc index bce0e7a0fb0..f0b6c1d5c27 100644 --- a/ui/xui/menubar.cc +++ b/ui/xui/menubar.cc @@ -168,6 +168,15 @@ void ShowMainMenu() g_config.display.ui.scale = ui_scale_idx; } } + + ImGui::Combo("Backend", &g_config.display.renderer, + "Null\0" + "OpenGL\0" +#ifdef CONFIG_VULKAN + "Vulkan\0" +#endif + ); + int rendering_scale = nv2a_get_surface_scale_factor() - 1; if (ImGui::Combo("Int. Resolution Scale", &rendering_scale, "1x\0" From 5527e908b769bba1c179ee7b61a7cb29fbb7b5ec Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 10 Nov 2024 14:46:30 -0700 Subject: [PATCH 144/176] nv2a/vk: Process pending surface upload just in time for display --- hw/xbox/nv2a/pgraph/vk/display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 89a1cb87b3b..19364aa3b0e 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -895,6 +895,7 @@ static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) static void render_display(PGRAPHState *pg, SurfaceBinding *surface) { + NV2AState *d = container_of(pg, NV2AState, pgraph); PGRAPHVkState *r = pg->vk_renderer_state; PGRAPHVkDisplayState *disp = &r->display; @@ -907,6 +908,8 @@ static void render_display(PGRAPHState *pg, SurfaceBinding *surface) pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING); } + pgraph_vk_upload_surface_data(d, surface, !tcg_enabled()); + disp->pvideo.state = get_pvideo_state(pg); if (disp->pvideo.state.enabled) { upload_pvideo_image(pg, disp->pvideo.state); From 580c2e9da4e7594c6e7a2ca949a440a3c351760c Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 10 Nov 2024 22:29:33 -0700 Subject: [PATCH 145/176] nv2a/vk: Run full dirty texture check --- hw/xbox/nv2a/pgraph/vk/texture.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 98acf4aec1f..3c4f095360a 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -1415,10 +1415,6 @@ void pgraph_vk_bind_textures(NV2AState *d) r->texture_bindings[i] = &r->dummy_texture; continue; } - if (r->texture_bindings[i] && !pg->texture_dirty[i]) { - // FIXME: Fails to check memory - continue; - } create_texture(pg, i); From c7f82ab79f25e175736e25d3fe851cfefcc5ef91 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 10 Nov 2024 23:50:06 -0700 Subject: [PATCH 146/176] nv2a/gl: Fix bind_shaders dgroup --- hw/xbox/nv2a/pgraph/gl/shaders.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 0d725dd304b..b532d9e17bb 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -1012,10 +1012,6 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; - NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, - vertex_program ? "yes" : "no", - fixed_function ? "yes" : "no"); - bool binding_changed = false; if (r->shader_binding && !test_shaders_dirty(pg) && !pg->program_data_dirty) { nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); @@ -1029,6 +1025,10 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg) ShaderState state = pgraph_get_shader_state(pg); assert(!state.vulkan); + NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, + state.vertex_program ? "yes" : "no", + state.fixed_function ? "yes" : "no"); + uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); qemu_mutex_lock(&r->shader_cache_lock); LruNode *node = lru_lookup(&r->shader_cache, shader_state_hash, &state); @@ -1054,11 +1054,11 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg) glUseProgram(r->shader_binding->gl_program); } + NV2A_GL_DGROUP_END(); + update_constants: shader_update_constants(pg, r->shader_binding, binding_changed, state.vertex_program, state.fixed_function); - - NV2A_GL_DGROUP_END(); } GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src) From 974b2be87ad8c732828d13dca312172a29c81829 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 11 Nov 2024 00:30:35 -0700 Subject: [PATCH 147/176] nv2a/vk: Add command buffer region debug markers --- hw/xbox/nv2a/pgraph/vk/debug.c | 38 ++++++++++++++++++++++++ hw/xbox/nv2a/pgraph/vk/display.c | 3 ++ hw/xbox/nv2a/pgraph/vk/draw.c | 18 ++++++++++- hw/xbox/nv2a/pgraph/vk/renderer.h | 11 +++++++ hw/xbox/nv2a/pgraph/vk/surface-compute.c | 4 +++ hw/xbox/nv2a/pgraph/vk/surface.c | 7 +++++ hw/xbox/nv2a/pgraph/vk/texture.c | 10 +++++++ 7 files changed, 90 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c index 90f5b15a845..5c31c9f1194 100644 --- a/hw/xbox/nv2a/pgraph/vk/debug.c +++ b/hw/xbox/nv2a/pgraph/vk/debug.c @@ -81,3 +81,41 @@ void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, vkCmdInsertDebugUtilsLabelEXT(cmd, &label_info); free(buf); } + +void pgraph_vk_begin_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, + float color[4], const char *format, ...) +{ + if (!r->debug_utils_extension_enabled) { + return; + } + + char *buf = NULL; + + va_list args; + va_start(args, format); + int err = vasprintf(&buf, format, args); + assert(err >= 0); + va_end(args); + + VkDebugUtilsLabelEXT label_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pLabelName = buf, + }; + memcpy(label_info.color, color, 4 * sizeof(float)); + vkCmdBeginDebugUtilsLabelEXT(cmd, &label_info); + free(buf); + + r->debug_depth += 1; + assert(r->debug_depth < 10 && "Missing pgraph_vk_debug_marker_end?"); +} + +void pgraph_vk_end_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd) +{ + if (!r->debug_utils_extension_enabled) { + return; + } + + vkCmdEndDebugUtilsLabelEXT(cmd); + assert(r->debug_depth > 0); + r->debug_depth -= 1; +} diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 19364aa3b0e..f740b545232 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -919,6 +919,8 @@ static void render_display(PGRAPHState *pg, SurfaceBinding *surface) update_descriptor_set(pg, surface); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_YELLOW, + "Display Surface %08"HWADDR_PRIx); pgraph_vk_transition_image_layout(pg, cmd, surface->image, surface->host_fmt.vk_format, @@ -994,6 +996,7 @@ static void render_display(PGRAPHState *pg, SurfaceBinding *surface) VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5); diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 5de11690414..16fdfca4302 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1263,9 +1263,9 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) PGRAPHVkState *r = pg->vk_renderer_state; assert(!r->in_draw); + assert(r->debug_depth == 0); if (r->in_command_buffer) { - nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]); if (r->in_render_pass) { @@ -1713,6 +1713,9 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) write_zeta ? " zeta" : ""); begin_pre_draw(pg); + pgraph_vk_begin_debug_marker(r, r->command_buffer, + RGBA_BLUE, "Clear %08" HWADDR_PRIx, + binding->vram_addr); begin_draw(pg); // FIXME: What does hardware do when min <= max? @@ -1791,6 +1794,7 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) 1, &clear_rect); } end_draw(pg); + pgraph_vk_end_debug_marker(r, r->command_buffer); pg->clearing = false; @@ -2082,6 +2086,8 @@ void pgraph_vk_flush_draw(NV2AState *d) copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element); begin_pre_draw(pg); + pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE, + "Draw Arrays"); begin_draw(pg); bind_vertex_buffer(pg, remap.attributes, 0); for (int i = 0; i < pg->draw_arrays_length; i++) { @@ -2091,6 +2097,7 @@ void pgraph_vk_flush_draw(NV2AState *d) vkCmdDraw(r->command_buffer, count, 1, start, 0); } end_draw(pg); + pgraph_vk_end_debug_marker(r, r->command_buffer); NV2A_VK_DGROUP_END(); } else if (pg->inline_elements_length) { @@ -2121,6 +2128,8 @@ void pgraph_vk_flush_draw(NV2AState *d) begin_pre_draw(pg); VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer( pg, pg->inline_elements, index_data_size); + pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE, + "Inline Elements"); begin_draw(pg); bind_vertex_buffer(pg, remap.attributes, 0); vkCmdBindIndexBuffer(r->command_buffer, @@ -2129,6 +2138,7 @@ void pgraph_vk_flush_draw(NV2AState *d) vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0, 0); end_draw(pg); + pgraph_vk_end_debug_marker(r, r->command_buffer); NV2A_VK_DGROUP_END(); } else if (pg->inline_buffer_length) { @@ -2159,10 +2169,13 @@ void pgraph_vk_flush_draw(NV2AState *d) begin_pre_draw(pg); VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( pg, data, sizes, r->num_active_vertex_attribute_descriptions); + pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE, + "Inline Buffer"); begin_draw(pg); bind_inline_vertex_buffer(pg, buffer_offset); vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0); end_draw(pg); + pgraph_vk_end_debug_marker(r, r->command_buffer); NV2A_VK_DGROUP_END(); } else if (pg->inline_array_length) { @@ -2200,10 +2213,13 @@ void pgraph_vk_flush_draw(NV2AState *d) void *inline_array_data = pg->inline_array; VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer( pg, &inline_array_data, &inline_array_data_size, 1); + pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE, + "Inline Array"); begin_draw(pg); bind_inline_vertex_buffer(pg, buffer_offset); vkCmdDraw(r->command_buffer, index_count, 1, 0, 0); end_draw(pg); + pgraph_vk_end_debug_marker(r, r->command_buffer); NV2A_VK_DGROUP_END(); } else { NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END"); diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 8a6b5090aa4..250b92f6953 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -321,6 +321,7 @@ typedef struct PGRAPHVkState { void *window; VkInstance instance; VkDebugUtilsMessengerEXT debug_messenger; + int debug_depth; bool debug_utils_extension_enabled; bool custom_border_color_extension_enabled; @@ -429,9 +430,19 @@ typedef struct PGRAPHVkState { void pgraph_vk_check_memory_budget(PGRAPHState *pg); // debug.c +#define RGBA_RED (float[4]){1,0,0,1} +#define RGBA_YELLOW (float[4]){1,1,0,1} +#define RGBA_GREEN (float[4]){0,1,0,1} +#define RGBA_BLUE (float[4]){0,0,1,1} +#define RGBA_PINK (float[4]){1,0,1,1} +#define RGBA_DEFAULT (float[4]){0,0,0,0} + void pgraph_vk_debug_init(void); void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5))); +void pgraph_vk_begin_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, + float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5))); +void pgraph_vk_end_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd); // instance.c void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp); diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 104f91d4687..155eaa2e854 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -434,6 +434,7 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, // FIXME: Smarter workgroup scaling + pgraph_vk_begin_debug_marker(r, cmd, RGBA_PINK, __func__); vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); vkCmdBindDescriptorSets( cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, @@ -449,6 +450,7 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, // FIXME: Check max group count vkCmdDispatch(cmd, group_count, 1, 1); + pgraph_vk_end_debug_marker(r, cmd); } void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, @@ -505,6 +507,7 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, // FIXME: Smarter workgroup scaling + pgraph_vk_begin_debug_marker(r, cmd, RGBA_PINK, __func__); vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline); vkCmdBindDescriptorSets( cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, @@ -518,6 +521,7 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); vkCmdDispatch(cmd, group_count, 1, 1); + pgraph_vk_end_debug_marker(r, cmd); } static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state) diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index cadacb8ccfc..f7f68bb0d4d 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -191,6 +191,7 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__); pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, @@ -432,6 +433,7 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, &post_copy_dst_barrier, 0, NULL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1); + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); void *mapped_memory_ptr = NULL; @@ -782,6 +784,8 @@ static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) // FIXME: Go right into main command buffer VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__); + pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, VK_IMAGE_LAYOUT_UNDEFINED, @@ -789,6 +793,7 @@ static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3); + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE); } @@ -977,6 +982,7 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, vmaUnmapMemory(r->allocator, copy_buffer->allocation); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__); VkBufferMemoryBarrier host_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, @@ -1226,6 +1232,7 @@ void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2); + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); surface->initialized = true; diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 3c4f095360a..c5d3cf7fe2f 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -556,6 +556,7 @@ static void upload_texture_image(PGRAPHState *pg, int texture_idx, // FIXME: Use nondraw. Need to fill and copy tex buffer at once VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__); VkBufferMemoryBarrier host_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, @@ -585,6 +586,7 @@ static void upload_texture_image(PGRAPHState *pg, int texture_idx, binding->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_4); + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); // Release decoded texture data @@ -621,6 +623,7 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac surface->vram_addr, surface->width, surface->height); VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__); unsigned int scaled_width = surface->width, scaled_height = surface->height; @@ -781,6 +784,7 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_nondraw_commands(pg, cmd); texture->draw_time = surface->draw_time; @@ -795,6 +799,7 @@ static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, return; } + PGRAPHVkState *r = pg->vk_renderer_state; TextureShape *state = &texture->key.state; VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; @@ -804,6 +809,7 @@ static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, surface->vram_addr, surface->width, surface->height); VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__); pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, @@ -842,6 +848,7 @@ static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_nondraw_commands(pg, cmd); texture->draw_time = surface->draw_time; @@ -1001,6 +1008,7 @@ static void create_dummy_texture(PGRAPHState *pg) r->storage_buffers[BUFFER_STAGING_SRC].allocation); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); + pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__); pgraph_vk_transition_image_layout( pg, cmd, texture_image, VK_FORMAT_R8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED, @@ -1026,6 +1034,8 @@ static void create_dummy_texture(PGRAPHState *pg) VK_FORMAT_R8_UNORM, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); r->dummy_texture = (TextureBinding){ From 986b18214cbed836d69ed239c3d63e9427e8e79f Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Wed, 13 Nov 2024 23:35:59 -0700 Subject: [PATCH 148/176] nv2a/vk: Drop display update early-out --- hw/xbox/nv2a/pgraph/vk/display.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index f740b545232..863d1133575 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -899,10 +899,6 @@ static void render_display(PGRAPHState *pg, SurfaceBinding *surface) PGRAPHVkState *r = pg->vk_renderer_state; PGRAPHVkDisplayState *disp = &r->display; - if (disp->draw_time >= surface->draw_time) { - return; - } - if (r->in_command_buffer && surface->draw_time >= r->command_buffer_start_time) { pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING); From de1381c932bf7f5ed6351bcf18ec5579a6f933a5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 14 Nov 2024 01:32:31 -0700 Subject: [PATCH 149/176] nv2a/vk: Drop pipeline merge stat --- hw/xbox/nv2a/debug.h | 1 - hw/xbox/nv2a/pgraph/vk/draw.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h index 5164295581e..a843df3259a 100644 --- a/hw/xbox/nv2a/debug.h +++ b/hw/xbox/nv2a/debug.h @@ -79,7 +79,6 @@ _X(NV2A_PROF_PIPELINE_NOTDIRTY) \ _X(NV2A_PROF_PIPELINE_GEN) \ _X(NV2A_PROF_PIPELINE_BIND) \ - _X(NV2A_PROF_PIPELINE_MERGE) \ _X(NV2A_PROF_PIPELINE_RENDERPASSES) \ _X(NV2A_PROF_BEGIN_ENDS) \ _X(NV2A_PROF_DRAW_ARRAYS) \ diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 16fdfca4302..99e7f65e1b5 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -717,12 +717,6 @@ static void create_pipeline(PGRAPHState *pg) init_pipeline_key(pg, &key); uint64_t hash = fast_hash((void *)&key, sizeof(key)); - static uint64_t last_hash; - if (hash == last_hash) { - nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_MERGE); - } - last_hash = hash; - LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key); PipelineBinding *snode = container_of(node, PipelineBinding, node); if (snode->pipeline != VK_NULL_HANDLE) { From 4cd4153937978aa92c2a20a1a7f4461c1f0aed80 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 14 Nov 2024 01:33:22 -0700 Subject: [PATCH 150/176] nv2a/vk: Move reg dirty clear into create_pipeline --- hw/xbox/nv2a/pgraph/vk/draw.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 99e7f65e1b5..ffc2c241c8d 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -707,7 +707,12 @@ static void create_pipeline(PGRAPHState *pg) // FIXME: If nothing was dirty, don't even try creating the key or hashing. // Just use the same pipeline. - if (r->pipeline_binding && !check_pipeline_dirty(pg)) { + bool pipeline_dirty = check_pipeline_dirty(pg); + + pgraph_clear_dirty_reg_map(pg); + // FIXME: We could clear less + + if (r->pipeline_binding && !pipeline_dirty) { NV2A_VK_DPRINTF("Cache hit"); NV2A_VK_DGROUP_END(); return; @@ -1528,9 +1533,6 @@ static void end_draw(PGRAPHState *pg) } r->in_draw = false; - - // FIXME: We could clear less - pgraph_clear_dirty_reg_map(pg); } void pgraph_vk_draw_end(NV2AState *d) From 7afeda5da0d67a6de1b897bc875588df2997bddd Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 14 Nov 2024 01:34:16 -0700 Subject: [PATCH 151/176] nv2a/vk: Add regs control_{0,3}, setupraster to shader dirty test --- hw/xbox/nv2a/pgraph/vk/shaders.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 6ba540a5114..b1a9410bc73 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -710,15 +710,19 @@ static bool check_shaders_dirty(PGRAPHState *pg) return true; } } + unsigned int regs[] = { NV_PGRAPH_COMBINECTL, NV_PGRAPH_COMBINESPECFOG0, NV_PGRAPH_COMBINESPECFOG1, + NV_PGRAPH_CONTROL_0, + NV_PGRAPH_CONTROL_3, NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV1_A, NV_PGRAPH_CSV1_B, NV_PGRAPH_POINTSIZE, + NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SHADERCLIPMODE, NV_PGRAPH_SHADERCTL, NV_PGRAPH_SHADERPROG, From 8dc3b646a3a7e61cc799a2e6ad1b7757add6291d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 14 Nov 2024 02:02:34 -0700 Subject: [PATCH 152/176] nv2a/vk: Move display GL compat after line_offset adjust --- hw/xbox/nv2a/pgraph/vk/display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 863d1133575..030ab2dea85 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -219,9 +219,9 @@ static const char *display_frag_glsl = "void main()\n" "{\n" " vec2 tex_coord = gl_FragCoord.xy/display_size;\n" - " tex_coord.y = 1 - tex_coord.y;\n" // GL compat " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" " tex_coord.y = 1 + rel*(tex_coord.y - 1);" + " tex_coord.y = 1 - tex_coord.y;\n" // GL compat " out_Color.rgba = texture(tex, tex_coord);\n" " if (pvideo_enable) {\n" " vec2 screen_coord = vec2(gl_FragCoord.x, display_size.y - gl_FragCoord.y) * pvideo_scale.z;\n" From 58c1daf5949aa2e40844419c24459959c5f8ea45 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 14 Nov 2024 14:55:21 -0700 Subject: [PATCH 153/176] nv2a/vk: Report dirty if no pipeline is bound --- hw/xbox/nv2a/pgraph/vk/draw.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index ffc2c241c8d..47b132541f7 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -628,10 +628,9 @@ static bool check_render_pass_dirty(PGRAPHState *pg) static bool check_pipeline_dirty(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - assert(r->pipeline_binding); - if (r->shader_bindings_changed || r->texture_bindings_changed || - check_render_pass_dirty(pg)) { + if (!r->pipeline_binding || r->shader_bindings_changed || + r->texture_bindings_changed || check_render_pass_dirty(pg)) { return true; } From 88835a1019fb0f6bb78676c7f4d5b927fd330b27 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 27 Dec 2024 00:25:49 -0700 Subject: [PATCH 154/176] nv2a/vk,gl: Handle case where pline_offset == 0 --- hw/xbox/nv2a/pgraph/gl/display.c | 2 +- hw/xbox/nv2a/pgraph/vk/display.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index fbea7d2f0c0..a978c54ab0e 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -287,7 +287,7 @@ static void render_display(NV2AState *d, SurfaceBinding *surface) uint32_t pline_offset, pstart_addr, pline_compare; d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - int line_offset = surface->pitch / pline_offset; + int line_offset = pline_offset ? surface->pitch / pline_offset : 1; /* Adjust viewport height for interlaced mode, used only in 1080i */ if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 030ab2dea85..76a95cbbf2d 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -869,7 +869,7 @@ static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface) uint32_t pline_offset, pstart_addr, pline_compare; d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - int line_offset = surface->pitch / pline_offset; + int line_offset = pline_offset ? surface->pitch / pline_offset : 1; int line_offset_loc = uniform_index(l, "line_offset"); uniform1f(l, line_offset_loc, line_offset); From 085fb331414122f839a568b053697e8a9b851be5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 27 Dec 2024 00:26:30 -0700 Subject: [PATCH 155/176] nv2a/vk: Fix external memory handle type on Windows --- hw/xbox/nv2a/pgraph/vk/display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 76a95cbbf2d..ac2bc31a248 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -619,7 +619,11 @@ static void create_display_image(PGRAPHState *pg, int width, int height) VkExternalMemoryImageCreateInfo external_memory_image_create_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, +#ifdef WIN32 + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, +#else .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, +#endif }; image_create_info.pNext = &external_memory_image_create_info; From c098b821085a17a088f8167189611b86e58ec257 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 27 Dec 2024 00:38:56 -0700 Subject: [PATCH 156/176] nv2a/vk: VK_CHECK call to vkBindImageMemory --- hw/xbox/nv2a/pgraph/vk/display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index ac2bc31a248..37fc33786f2 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -654,8 +654,7 @@ static void create_display_image(PGRAPHState *pg, int width, int height) alloc_info.pNext = &export_memory_alloc_info; VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory)); - - vkBindImageMemory(r->device, d->image, d->memory, 0); + VK_CHECK(vkBindImageMemory(r->device, d->image, d->memory, 0)); // Create Image View VkImageViewCreateInfo image_view_create_info = { From 28c9f5f6efb4ccb801a9e15f291cebc08a1bb591 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 19:41:11 -0700 Subject: [PATCH 157/176] nv2a/vk: Load 16b float depth textures as unorm to match surface w/a --- hw/xbox/nv2a/pgraph/vk/constants.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/constants.h b/hw/xbox/nv2a/pgraph/vk/constants.h index 9ae8ba6dd4c..eb7e97dbc04 100644 --- a/hw/xbox/nv2a/pgraph/vk/constants.h +++ b/hw/xbox/nv2a/pgraph/vk/constants.h @@ -287,7 +287,7 @@ static const VkColorFormatInfo kelvin_color_format_vk_map[66] = { { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO }, }, [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { - VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16_UNORM, // FIXME { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO }, }, [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { From 1524486e735b54736f0a92b7015365d5c66f0e1b Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 29 Dec 2024 23:35:03 -0700 Subject: [PATCH 158/176] meson: Bump glslang subproject version --- subprojects/glslang.wrap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subprojects/glslang.wrap b/subprojects/glslang.wrap index 7e6fe8414ef..223723e721c 100644 --- a/subprojects/glslang.wrap +++ b/subprojects/glslang.wrap @@ -1,4 +1,4 @@ [wrap-git] url=https://github.com/KhronosGroup/glslang -revision=vulkan-sdk-1.3.283.0 +revision=vulkan-sdk-1.3.296.0 depth=1 From 477d5489ac6f28c15ccee3e80420eaaa102c9b08 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 03:01:14 -0700 Subject: [PATCH 159/176] nv2a/vk: Copy remapped vert data after pre-draw --- hw/xbox/nv2a/pgraph/vk/draw.c | 52 ++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 47b132541f7..d7d63ac09b2 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1930,7 +1930,8 @@ typedef struct VertexBufferRemap { size_t buffer_space_required; struct { VkDeviceAddress offset; - VkDeviceSize stride; + VkDeviceSize old_stride; + VkDeviceSize new_stride; } map[NV2A_VERTEXSHADER_ATTRIBUTES]; } VertexBufferRemap; @@ -1967,21 +1968,35 @@ static VertexBufferRemap remap_unaligned_attributes(PGRAPHState *pg, remap.attributes |= 1 << attr_id; remap.map[attr_id].offset = ROUND_UP(output_offset, element_size); - remap.map[attr_id].stride = element_size * element_count; + remap.map[attr_id].old_stride = desc->stride; + remap.map[attr_id].new_stride = element_size * element_count; // fprintf(stderr, // "attr %02d remapped: " // "%08" HWADDR_PRIx "->%08" HWADDR_PRIx " " // "stride=%d->%zd\n", // attr_id, r->vertex_attribute_offsets[attr_id], - // remap.map[attr_id].offset, desc->stride, - // remap.map[attr_id].stride); + // remap.map[attr_id].offset, + // remap.map[attr_id].old_stride, + // remap.map[attr_id].new_stride); output_offset = - remap.map[attr_id].offset + remap.map[attr_id].stride * num_vertices; + remap.map[attr_id].offset + remap.map[attr_id].new_stride * num_vertices; + desc->stride = remap.map[attr_id].new_stride; } remap.buffer_space_required = output_offset; + + // reserve space + if (remap.attributes) { + StorageBuffer *buffer = &r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING]; + VkDeviceSize starting_offset = ROUND_UP(buffer->buffer_offset, 16); + size_t total_space_required = + (starting_offset - buffer->buffer_offset) + remap.buffer_space_required; + ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, total_space_required); + buffer->buffer_offset = ROUND_UP(buffer->buffer_offset, 16); + } + return remap; } @@ -2000,14 +2015,8 @@ static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg, return; } - VkDeviceSize starting_offset = ROUND_UP(buffer->buffer_offset, 16); - size_t total_space_required = - (starting_offset - buffer->buffer_offset) + remap.buffer_space_required; - ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, total_space_required); assert(pgraph_vk_buffer_has_space_for(pg, BUFFER_VERTEX_INLINE_STAGING, - total_space_required, 1)); - - buffer->buffer_offset = starting_offset; // Aligned + remap.buffer_space_required, 256)); // FIXME: SIMD memcpy // FIXME: Caching @@ -2021,13 +2030,6 @@ static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg, continue; } - int bind_desc_loc = - r->vertex_attribute_to_description_location[attr_id]; - assert(bind_desc_loc >= 0); - - VkVertexInputBindingDescription *bind_desc = - &r->vertex_binding_descriptions[bind_desc_loc]; - VkDeviceSize attr_buffer_offset = buffer->buffer_offset + remap.map[attr_id].offset; @@ -2035,15 +2037,15 @@ static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg, uint8_t *in_ptr = d->vram_ptr + r->vertex_attribute_offsets[attr_id]; for (int vertex_id = 0; vertex_id < num_vertices; vertex_id++) { - memcpy(out_ptr, in_ptr, remap.map[attr_id].stride); - out_ptr += remap.map[attr_id].stride; - in_ptr += bind_desc->stride; + memcpy(out_ptr, in_ptr, remap.map[attr_id].new_stride); + out_ptr += remap.map[attr_id].new_stride; + in_ptr += remap.map[attr_id].old_stride; } r->vertex_attribute_offsets[attr_id] = attr_buffer_offset; - bind_desc->stride = remap.map[attr_id].stride; } + buffer->buffer_offset += remap.buffer_space_required; } @@ -2078,9 +2080,9 @@ void pgraph_vk_flush_draw(NV2AState *d) } sync_vertex_ram_buffer(pg); VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element); - copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element); begin_pre_draw(pg); + copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element); pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE, "Draw Arrays"); begin_draw(pg); @@ -2118,9 +2120,9 @@ void pgraph_vk_flush_draw(NV2AState *d) pg->inline_elements[pg->inline_elements_length - 1]); sync_vertex_ram_buffer(pg); VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element + 1); - copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element + 1); begin_pre_draw(pg); + copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element + 1); VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer( pg, pg->inline_elements, index_data_size); pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE, From 1e5cae068a287cf8d7c8e805e845ce31ee0d54ba Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 03:08:28 -0700 Subject: [PATCH 160/176] nv2a/vk: Drop unused vertex_buffer_inline field --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 -- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 - 2 files changed, 3 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index d7d63ac09b2..121bea5d742 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -2009,8 +2009,6 @@ static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg, PGRAPHVkState *r = pg->vk_renderer_state; StorageBuffer *buffer = &r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING]; - r->vertex_buffer_inline = remap.attributes; - if (!remap.attributes) { return; } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 250b92f6953..781cc8dc498 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -382,7 +382,6 @@ typedef struct PGRAPHVkState { VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; int num_active_vertex_binding_descriptions; hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; - uint16_t vertex_buffer_inline; QTAILQ_HEAD(, SurfaceBinding) surfaces; QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces; From f834b85bb74c5259d83b66348e7aef118e01adf6 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 15:25:52 -0700 Subject: [PATCH 161/176] thirdparty: Bump VulkanMemoryAllocator version --- thirdparty/VulkanMemoryAllocator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/VulkanMemoryAllocator b/thirdparty/VulkanMemoryAllocator index 009ecd192c1..3bab6924988 160000 --- a/thirdparty/VulkanMemoryAllocator +++ b/thirdparty/VulkanMemoryAllocator @@ -1 +1 @@ -Subproject commit 009ecd192c1289c7529bff248a16cfe896254816 +Subproject commit 3bab6924988e5f19bf36586a496156cf72f70d9f From 4a09eeb12107657b8a79d4303cb40eeaf42ca750 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 18:11:18 -0700 Subject: [PATCH 162/176] nv2a/vk: Use unsigned types for clear scissor calculation for now --- hw/xbox/nv2a/pgraph/vk/draw.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 121bea5d742..4cde028b1f4 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1698,10 +1698,10 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) uint32_t clearrectx = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX); uint32_t clearrecty = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY); - int xmin = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMIN); - int xmax = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMAX); - int ymin = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMIN); - int ymax = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMAX); + unsigned int xmin = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMIN); + unsigned int xmax = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMAX); + unsigned int ymin = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMIN); + unsigned int ymax = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMAX); NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax, ymax, write_color ? " color" : "", @@ -1713,14 +1713,15 @@ void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter) binding->vram_addr); begin_draw(pg); - // FIXME: What does hardware do when min <= max? + // FIXME: What does hardware do when min >= max? + // FIXME: What does hardware do when min >= surface size? xmin = MIN(xmin, binding->width - 1); ymin = MIN(ymin, binding->height - 1); - xmax = MIN(xmax, binding->width - 1); - ymax = MIN(ymax, binding->height - 1); + xmax = MAX(xmin, MIN(xmax, binding->width - 1)); + ymax = MAX(ymin, MIN(ymax, binding->height - 1)); - int scissor_width = MAX(0, xmax - xmin + 1), - scissor_height = MAX(0, ymax - ymin + 1); + unsigned int scissor_width = MAX(0, xmax - xmin + 1); + unsigned int scissor_height = MAX(0, ymax - ymin + 1); pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); From 407e4639387bc5ad23b7480794ccca3a3e0b26d4 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 18:42:50 -0700 Subject: [PATCH 163/176] scripts/gen-license.py: Support subprojects --- scripts/gen-license.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/gen-license.py b/scripts/gen-license.py index 88733871ad7..817b0b2ae3b 100755 --- a/scripts/gen-license.py +++ b/scripts/gen-license.py @@ -146,6 +146,14 @@ def __init__(self, path): @property def head(self): + if self.path.endswith(".wrap"): + with open(self.path, "r", encoding="utf-8") as file: + for line in file.readlines(): + revision_pfx = "revision=" + if line.startswith(revision_pfx): + return line[len(revision_pfx):].strip() + assert False, "revision not found for subproject" + try: return subprocess.run(['git', 'rev-parse', 'HEAD'], cwd=self.path, capture_output=True, @@ -234,7 +242,7 @@ def head(self): Lib('volk', 'https://github.com/zeux/volk', mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md', ships_static=all_platforms, - submodule=Submodule('thirdparty/volk') + submodule=Submodule('subprojects/volk.wrap') ), Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator', From c54964a44ac798ea2956dec3cc0a90631c544d3b Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 18:43:36 -0700 Subject: [PATCH 164/176] meson: Convert volk submodule to a subproject --- .gitmodules | 3 --- configure | 2 +- scripts/archive-source.sh | 2 +- subprojects/volk.wrap | 4 ++++ thirdparty/meson.build | 9 +++++++-- thirdparty/volk | 1 - 6 files changed, 13 insertions(+), 8 deletions(-) create mode 100644 subprojects/volk.wrap delete mode 160000 thirdparty/volk diff --git a/.gitmodules b/.gitmodules index 420d7d9cd2d..a0f46b92d4b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -91,9 +91,6 @@ [submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"] path = thirdparty/VulkanMemoryAllocator url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator -[submodule "thirdparty/volk"] - path = thirdparty/volk - url = https://github.com/zeux/volk [submodule "thirdparty/SPIRV-Reflect"] path = thirdparty/SPIRV-Reflect url = https://github.com/KhronosGroup/SPIRV-Reflect diff --git a/configure b/configure index 880f30c4bd2..379ba278a33 100755 --- a/configure +++ b/configure @@ -237,7 +237,7 @@ else git_submodules_action="ignore" fi -git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" +git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" git="git" # Don't accept a target_list environment variable. diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index e20751e50ef..50b066c1f2a 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -32,7 +32,7 @@ submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloa # xemu extras submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" submodules="$submodules hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu" -submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" +submodules="$submodules thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" sub_deinit="" diff --git a/subprojects/volk.wrap b/subprojects/volk.wrap new file mode 100644 index 00000000000..8089cbce56c --- /dev/null +++ b/subprojects/volk.wrap @@ -0,0 +1,4 @@ +[wrap-git] +url=https://github.com/zeux/volk +revision=1.3.295 +depth=1 diff --git a/thirdparty/meson.build b/thirdparty/meson.build index 8bd29db98ef..1531110dfab 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -1,7 +1,12 @@ if vulkan.found() -libvolk = static_library('volk', sources: 'volk/volk.c', c_args: ['-DVK_NO_PROTOTYPES'], dependencies: vulkan) -volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: 'volk', link_with: libvolk, dependencies: vulkan) +volk_opts = cmake.subproject_options() +volk_opts.add_cmake_defines({'VOLK_STATIC_DEFINES': 'VK_NO_PROTOTYPES'}) +volk_subproj = cmake.subproject('volk', options: volk_opts) +volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], + include_directories: volk_subproj.include_directories('volk'), + link_with: volk_subproj.target('volk'), + dependencies: vulkan) debug_vma = false diff --git a/thirdparty/volk b/thirdparty/volk deleted file mode 160000 index 466085407d5..00000000000 --- a/thirdparty/volk +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 466085407d5d2f50583fd663c1d65f93a7709d3e From 615748fe4bb2abb5c709ef37efd60984007ba637 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 18:54:57 -0700 Subject: [PATCH 165/176] meson: Convert SPIRV-Reflect submodule to a subproject --- .gitmodules | 3 --- configure | 2 +- scripts/archive-source.sh | 2 +- scripts/gen-license.py | 2 +- subprojects/SPIRV-Reflect.wrap | 4 ++++ thirdparty/SPIRV-Reflect | 1 - thirdparty/meson.build | 8 ++++++-- 7 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 subprojects/SPIRV-Reflect.wrap delete mode 160000 thirdparty/SPIRV-Reflect diff --git a/.gitmodules b/.gitmodules index a0f46b92d4b..0e4365649b2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -91,6 +91,3 @@ [submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"] path = thirdparty/VulkanMemoryAllocator url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator -[submodule "thirdparty/SPIRV-Reflect"] - path = thirdparty/SPIRV-Reflect - url = https://github.com/KhronosGroup/SPIRV-Reflect diff --git a/configure b/configure index 379ba278a33..b76dade5cb2 100755 --- a/configure +++ b/configure @@ -237,7 +237,7 @@ else git_submodules_action="ignore" fi -git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" +git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/VulkanMemoryAllocator" git="git" # Don't accept a target_list environment variable. diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index 50b066c1f2a..e26266c0d47 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -32,7 +32,7 @@ submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloa # xemu extras submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" submodules="$submodules hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu" -submodules="$submodules thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect" +submodules="$submodules thirdparty/VulkanMemoryAllocator" sub_deinit="" diff --git a/scripts/gen-license.py b/scripts/gen-license.py index 817b0b2ae3b..3389e68a87f 100755 --- a/scripts/gen-license.py +++ b/scripts/gen-license.py @@ -254,7 +254,7 @@ def head(self): Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect', apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE', ships_static=all_platforms, - submodule=Submodule('thirdparty/SPIRV-Reflect') + submodule=Submodule('subprojects/SPIRV-Reflect.wrap') ), # diff --git a/subprojects/SPIRV-Reflect.wrap b/subprojects/SPIRV-Reflect.wrap new file mode 100644 index 00000000000..6893ea7ecb9 --- /dev/null +++ b/subprojects/SPIRV-Reflect.wrap @@ -0,0 +1,4 @@ +[wrap-git] +url=https://github.com/KhronosGroup/SPIRV-Reflect +revision=vulkan-sdk-1.3.296.0 +depth=1 diff --git a/thirdparty/SPIRV-Reflect b/thirdparty/SPIRV-Reflect deleted file mode 160000 index 1d674a82d7e..00000000000 --- a/thirdparty/SPIRV-Reflect +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71 diff --git a/thirdparty/meson.build b/thirdparty/meson.build index 1531110dfab..09cb9311a87 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -25,7 +25,11 @@ endif libvma = static_library('vma', sources: 'vma.cc', cpp_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) vma = declare_dependency(compile_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) -libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan) -spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan) +spirv_reflect_opts = cmake.subproject_options() +spirv_reflect_opts.add_cmake_defines({'SPIRV_REFLECT_STATIC_LIB': 'ON'}) +spirv_reflect_subproj = cmake.subproject('SPIRV-Reflect', options: spirv_reflect_opts) +spirv_reflect = declare_dependency(include_directories: spirv_reflect_subproj.include_directories('spirv-reflect-static'), + link_with: spirv_reflect_subproj.target('spirv-reflect-static'), + dependencies: vulkan) endif From 209c0991a11e4ed76f820d195b057fd54efed384 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 19:18:41 -0700 Subject: [PATCH 166/176] meson: Convert VulkanMemoryAllocator submodule to a subproject --- .gitmodules | 3 --- configure | 2 +- scripts/archive-source.sh | 1 - scripts/gen-license.py | 2 +- subprojects/VulkanMemoryAllocator.wrap | 4 ++++ thirdparty/VulkanMemoryAllocator | 1 - thirdparty/meson.build | 28 +++++++++++++++++--------- 7 files changed, 24 insertions(+), 17 deletions(-) create mode 100644 subprojects/VulkanMemoryAllocator.wrap delete mode 160000 thirdparty/VulkanMemoryAllocator diff --git a/.gitmodules b/.gitmodules index 0e4365649b2..b46286b7f6f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -88,6 +88,3 @@ [submodule "ui/thirdparty/httplib"] path = ui/thirdparty/httplib url = https://github.com/yhirose/cpp-httplib -[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"] - path = thirdparty/VulkanMemoryAllocator - url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator diff --git a/configure b/configure index b76dade5cb2..90744ca8ce1 100755 --- a/configure +++ b/configure @@ -237,7 +237,7 @@ else git_submodules_action="ignore" fi -git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/VulkanMemoryAllocator" +git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu" git="git" # Don't accept a target_list environment variable. diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index e26266c0d47..cd12ea384fd 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -32,7 +32,6 @@ submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloa # xemu extras submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" submodules="$submodules hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu" -submodules="$submodules thirdparty/VulkanMemoryAllocator" sub_deinit="" diff --git a/scripts/gen-license.py b/scripts/gen-license.py index 3389e68a87f..1eb2f499455 100755 --- a/scripts/gen-license.py +++ b/scripts/gen-license.py @@ -248,7 +248,7 @@ def head(self): Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator', mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt', ships_static=all_platforms, - submodule=Submodule('thirdparty/VulkanMemoryAllocator') + submodule=Submodule('subprojects/VulkanMemoryAllocator.wrap') ), Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect', diff --git a/subprojects/VulkanMemoryAllocator.wrap b/subprojects/VulkanMemoryAllocator.wrap new file mode 100644 index 00000000000..26f6a0c8cd0 --- /dev/null +++ b/subprojects/VulkanMemoryAllocator.wrap @@ -0,0 +1,4 @@ +[wrap-git] +url=https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator +revision=v3.2.0 +depth=1 diff --git a/thirdparty/VulkanMemoryAllocator b/thirdparty/VulkanMemoryAllocator deleted file mode 160000 index 3bab6924988..00000000000 --- a/thirdparty/VulkanMemoryAllocator +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3bab6924988e5f19bf36586a496156cf72f70d9f diff --git a/thirdparty/meson.build b/thirdparty/meson.build index 09cb9311a87..56ef6443ead 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -11,19 +11,27 @@ volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], debug_vma = false vma_defns = [ - '-DVMA_STATIC_VULKAN_FUNCTIONS=0', - '-DVMA_DYNAMIC_VULKAN_FUNCTIONS=0', - ] + '-DVMA_STATIC_VULKAN_FUNCTIONS=0', + '-DVMA_DYNAMIC_VULKAN_FUNCTIONS=0', + ] if debug_vma - vma_defns += [ - '-DVMA_DEBUG_MARGIN=16', - '-DVMA_DEBUG_DETECT_CORRUPTION=1', - '-DVMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY=256', - ] + vma_defns += [ + '-DVMA_DEBUG_MARGIN=16', + '-DVMA_DEBUG_DETECT_CORRUPTION=1', + '-DVMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY=256', + ] endif -libvma = static_library('vma', sources: 'vma.cc', cpp_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk]) -vma = declare_dependency(compile_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', link_with: libvma) + +vma_subproj = cmake.subproject('VulkanMemoryAllocator') +libvma = static_library('vma', + sources: 'vma.cc', + cpp_args: vma_defns, + include_directories: vma_subproj.include_directories('VulkanMemoryAllocator'), + dependencies: [vulkan, volk]) +vma = declare_dependency(include_directories: vma_subproj.include_directories('VulkanMemoryAllocator'), + link_with: libvma, + dependencies: vulkan) spirv_reflect_opts = cmake.subproject_options() spirv_reflect_opts.add_cmake_defines({'SPIRV_REFLECT_STATIC_LIB': 'ON'}) From f92713def75b7ff8a2d0ae059496bf2f670001a0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 19:23:55 -0700 Subject: [PATCH 167/176] meson: Move volk, SPIRV-Reflect to main meson.build --- meson.build | 42 ++++++++++++++++++++++++++++++------------ thirdparty/meson.build | 15 --------------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/meson.build b/meson.build index 058c0092d41..fd2912c80f7 100644 --- a/meson.build +++ b/meson.build @@ -1197,19 +1197,37 @@ elif targetos == 'linux' vulkan = dependency('vulkan') endif -if vulkan.found() and not libglslang.found() +if vulkan.found() cmake = import('cmake') - # FIXME: Get spirv-tools to enable opt. - glslang_opts = cmake.subproject_options() - glslang_opts.add_cmake_defines({'ENABLE_OPT': false}) - glslang_subpro = cmake.subproject('glslang', options: glslang_opts) - libglslang = declare_dependency(link_with: [ - glslang_subpro.target('glslang'), - glslang_subpro.target('MachineIndependent'), - glslang_subpro.target('GenericCodeGen'), - glslang_subpro.target('SPIRV'), - ], include_directories: ['subprojects' / 'glslang'] - ) + + if not libglslang.found() + # FIXME: Get spirv-tools to enable opt. + glslang_opts = cmake.subproject_options() + glslang_opts.add_cmake_defines({'ENABLE_OPT': false}) + glslang_subpro = cmake.subproject('glslang', options: glslang_opts) + libglslang = declare_dependency(link_with: [ + glslang_subpro.target('glslang'), + glslang_subpro.target('MachineIndependent'), + glslang_subpro.target('GenericCodeGen'), + glslang_subpro.target('SPIRV'), + ], include_directories: ['subprojects' / 'glslang'] + ) + endif + + volk_opts = cmake.subproject_options() + volk_opts.add_cmake_defines({'VOLK_STATIC_DEFINES': 'VK_NO_PROTOTYPES'}) + volk_subproj = cmake.subproject('volk', options: volk_opts) + volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], + include_directories: volk_subproj.include_directories('volk'), + link_with: volk_subproj.target('volk'), + dependencies: vulkan) + + spirv_reflect_opts = cmake.subproject_options() + spirv_reflect_opts.add_cmake_defines({'SPIRV_REFLECT_STATIC_LIB': 'ON'}) + spirv_reflect_subproj = cmake.subproject('SPIRV-Reflect', options: spirv_reflect_opts) + spirv_reflect = declare_dependency(include_directories: spirv_reflect_subproj.include_directories('spirv-reflect-static'), + link_with: spirv_reflect_subproj.target('spirv-reflect-static'), + dependencies: vulkan) endif subdir('thirdparty') diff --git a/thirdparty/meson.build b/thirdparty/meson.build index 56ef6443ead..b04ef34fbbe 100644 --- a/thirdparty/meson.build +++ b/thirdparty/meson.build @@ -1,13 +1,5 @@ if vulkan.found() -volk_opts = cmake.subproject_options() -volk_opts.add_cmake_defines({'VOLK_STATIC_DEFINES': 'VK_NO_PROTOTYPES'}) -volk_subproj = cmake.subproject('volk', options: volk_opts) -volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], - include_directories: volk_subproj.include_directories('volk'), - link_with: volk_subproj.target('volk'), - dependencies: vulkan) - debug_vma = false vma_defns = [ @@ -33,11 +25,4 @@ vma = declare_dependency(include_directories: vma_subproj.include_directories('V link_with: libvma, dependencies: vulkan) -spirv_reflect_opts = cmake.subproject_options() -spirv_reflect_opts.add_cmake_defines({'SPIRV_REFLECT_STATIC_LIB': 'ON'}) -spirv_reflect_subproj = cmake.subproject('SPIRV-Reflect', options: spirv_reflect_opts) -spirv_reflect = declare_dependency(include_directories: spirv_reflect_subproj.include_directories('spirv-reflect-static'), - link_with: spirv_reflect_subproj.target('spirv-reflect-static'), - dependencies: vulkan) - endif From fb7feb7b1f734fd301dea1823276b6ce018b3bfb Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 19:25:59 -0700 Subject: [PATCH 168/176] nv2a/vk: Fix missing display surface addr in debug marker --- hw/xbox/nv2a/pgraph/vk/display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c index 37fc33786f2..42982148982 100644 --- a/hw/xbox/nv2a/pgraph/vk/display.c +++ b/hw/xbox/nv2a/pgraph/vk/display.c @@ -919,7 +919,7 @@ static void render_display(PGRAPHState *pg, SurfaceBinding *surface) VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); pgraph_vk_begin_debug_marker(r, cmd, RGBA_YELLOW, - "Display Surface %08"HWADDR_PRIx); + "Display Surface %08"HWADDR_PRIx, surface->vram_addr); pgraph_vk_transition_image_layout(pg, cmd, surface->image, surface->host_fmt.vk_format, From 140b426f512b030582185f17311fc4efbf98377c Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 20:32:40 -0700 Subject: [PATCH 169/176] meson: Pass VK_USE_PLATFORM_WIN32_KHR when building volk --- meson.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meson.build b/meson.build index fd2912c80f7..7c54c0be1fe 100644 --- a/meson.build +++ b/meson.build @@ -1216,6 +1216,9 @@ if vulkan.found() volk_opts = cmake.subproject_options() volk_opts.add_cmake_defines({'VOLK_STATIC_DEFINES': 'VK_NO_PROTOTYPES'}) + if targetos == 'windows' + volk_opts.append_compile_args('c', '-DVK_USE_PLATFORM_WIN32_KHR=1') + endif volk_subproj = cmake.subproject('volk', options: volk_opts) volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: volk_subproj.include_directories('volk'), From 08ce9524411cc282c1ff4709b6b0b83f9d15be15 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 22:03:28 -0700 Subject: [PATCH 170/176] meson: Add subprojects .gitignore --- subprojects/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 subprojects/.gitignore diff --git a/subprojects/.gitignore b/subprojects/.gitignore new file mode 100644 index 00000000000..61ae67bfe7b --- /dev/null +++ b/subprojects/.gitignore @@ -0,0 +1,4 @@ +glslang +SPIRV-Reflect +volk +VulkanMemoryAllocator From fde6b17ed0d9294e22b023fc26809ca32b32f3ba Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 22:29:30 -0700 Subject: [PATCH 171/176] scripts/archive-source.sh: Archive subprojects Cherry-pick part of QEMU 2019cab for Vulkan dependency support before merging future QEMU updates. --- .github/workflows/build.yml | 2 ++ scripts/archive-source.sh | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d7aeab6340a..105d4d2cda5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,6 +23,8 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install dependencies + run: sudo apt-get install meson # On push to master, increment patch version and create a new tag on release - name: Increment patch version if: github.event_name == 'push' && github.ref == 'refs/heads/master' diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index cd12ea384fd..08d6e4e2479 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -33,6 +33,8 @@ submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloa submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" submodules="$submodules hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu" +subprojects="glslang SPIRV-Reflect volk VulkanMemoryAllocator" + sub_deinit="" function cleanup() { @@ -56,6 +58,14 @@ function tree_ish() { git archive --format tar "$(tree_ish)" > "$tar_file" test $? -ne 0 && error "failed to archive qemu" + +for sp in $subprojects; do + meson subprojects download $sp + # test $? -ne 0 && error "failed to download subproject $sp" + tar --append --file "$tar_file" --exclude=.git subprojects/$sp + test $? -ne 0 && error "failed to append subproject $sp to $tar_file" +done + for sm in $submodules; do status="$(git submodule status "$sm")" smhash="${status#[ +-]}" From 92617d3a31136df9172e5fbe11efe3fab2729902 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 22:50:53 -0700 Subject: [PATCH 172/176] meson: Replace hardcoded glslang include path with subproject query --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 7c54c0be1fe..2d4a8af56af 100644 --- a/meson.build +++ b/meson.build @@ -1210,7 +1210,7 @@ if vulkan.found() glslang_subpro.target('MachineIndependent'), glslang_subpro.target('GenericCodeGen'), glslang_subpro.target('SPIRV'), - ], include_directories: ['subprojects' / 'glslang'] + ], include_directories: glslang_subpro.include_directories('glslang') ) endif From ae3fe912233e031e1755fe52c98a71b135483a19 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Mon, 30 Dec 2024 23:16:20 -0700 Subject: [PATCH 173/176] nv2a/gl: Rebase line width feature --- hw/xbox/nv2a/pgraph/gl/draw.c | 2 ++ hw/xbox/nv2a/pgraph/gl/renderer.c | 4 ++++ hw/xbox/nv2a/pgraph/gl/renderer.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 94e9beb50b0..8b1002e70d9 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -321,8 +321,10 @@ void pgraph_gl_draw_begin(NV2AState *d) if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { glEnable(GL_LINE_SMOOTH); + glLineWidth(MIN(r->supportedSmoothLineWidthRange[1], pg->surface_scale_factor)); } else { glDisable(GL_LINE_SMOOTH); + glLineWidth(MIN(r->supportedAliasedLineWidthRange[1], pg->surface_scale_factor)); } if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 9e22a80f6d5..0e557c2dc8e 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -38,6 +38,7 @@ static void pgraph_gl_init(NV2AState *d, Error **errp) PGRAPHState *pg = &d->pgraph; pg->gl_renderer_state = g_malloc0(sizeof(*pg->gl_renderer_state)); + PGRAPHGLState *r = pg->gl_renderer_state; /* fire up opengl */ glo_set_current(g_nv2a_context_render); @@ -51,6 +52,9 @@ static void pgraph_gl_init(NV2AState *d, Error **errp) /* Internal RGB565 texture format */ assert(glo_check_extension("GL_ARB_ES2_compatibility")); + glGetFloatv(GL_SMOOTH_LINE_WIDTH_RANGE, r->supportedSmoothLineWidthRange); + glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, r->supportedAliasedLineWidthRange); + pgraph_gl_init_surfaces(pg); pgraph_gl_init_reports(d); pgraph_gl_init_textures(d); diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 14160b21b79..0ca124c3fce 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -229,6 +229,9 @@ typedef struct PGRAPHGLState { GLint pvideo_color_key_loc; GLint palette_loc[256]; } disp_rndr; + + GLfloat supportedAliasedLineWidthRange[2]; + GLfloat supportedSmoothLineWidthRange[2]; } PGRAPHGLState; extern GloContext *g_nv2a_context_render; From e67f19d03bd2ceea89f05be6904a67aab15c0af5 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 31 Dec 2024 00:57:53 -0700 Subject: [PATCH 174/176] nv2a/vk,gl: Fix a couple 64b shift/printing bugs --- hw/xbox/nv2a/pgraph/gl/shaders.c | 10 ++++------ hw/xbox/nv2a/pgraph/vk/shaders.c | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index b532d9e17bb..5da89e10e3e 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -376,17 +376,15 @@ bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode) static char *shader_get_bin_directory(uint64_t hash) { const char *cfg_dir = xemu_settings_get_base_path(); - uint64_t bin_mask = 0xffffUL << 48; - char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx", - cfg_dir, (hash & bin_mask) >> 48); + char *shader_bin_dir = + g_strdup_printf("%s/shaders/%04x", cfg_dir, (uint32_t)(hash >> 48)); return shader_bin_dir; } static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) { - uint64_t bin_mask = 0xffffUL << 48; - return g_strdup_printf("%s/%012lx", shader_bin_dir, - hash & (~bin_mask)); + uint64_t bin_mask = (uint64_t)0xffff << 48; + return g_strdup_printf("%s/%012" PRIx64, shader_bin_dir, hash & ~bin_mask); } static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index b1a9410bc73..f831dece469 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -377,7 +377,7 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) LruNode *node = lru_lookup(&r->shader_cache, hash, state); ShaderBinding *snode = container_of(node, ShaderBinding, node); - NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode); + NV2A_VK_DPRINTF("shader state hash: %016" PRIx64 " %p", hash, snode); if (!snode->initialized) { NV2A_VK_DPRINTF("cache miss"); From b6d6a4709d7563a0cc03a06cd80f7107b54a52e2 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 31 Dec 2024 01:10:58 -0700 Subject: [PATCH 175/176] nv2a/gl: Use snake case for line width ranges --- hw/xbox/nv2a/pgraph/gl/draw.c | 4 ++-- hw/xbox/nv2a/pgraph/gl/renderer.c | 4 ++-- hw/xbox/nv2a/pgraph/gl/renderer.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 8b1002e70d9..a9032562fa9 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -321,10 +321,10 @@ void pgraph_gl_draw_begin(NV2AState *d) if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) { glEnable(GL_LINE_SMOOTH); - glLineWidth(MIN(r->supportedSmoothLineWidthRange[1], pg->surface_scale_factor)); + glLineWidth(MIN(r->supported_smooth_line_width_range[1], pg->surface_scale_factor)); } else { glDisable(GL_LINE_SMOOTH); - glLineWidth(MIN(r->supportedAliasedLineWidthRange[1], pg->surface_scale_factor)); + glLineWidth(MIN(r->supported_aliased_line_width_range[1], pg->surface_scale_factor)); } if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) { diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 0e557c2dc8e..f91a192b25b 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -52,8 +52,8 @@ static void pgraph_gl_init(NV2AState *d, Error **errp) /* Internal RGB565 texture format */ assert(glo_check_extension("GL_ARB_ES2_compatibility")); - glGetFloatv(GL_SMOOTH_LINE_WIDTH_RANGE, r->supportedSmoothLineWidthRange); - glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, r->supportedAliasedLineWidthRange); + glGetFloatv(GL_SMOOTH_LINE_WIDTH_RANGE, r->supported_smooth_line_width_range); + glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, r->supported_aliased_line_width_range); pgraph_gl_init_surfaces(pg); pgraph_gl_init_reports(d); diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 0ca124c3fce..918cbaa5886 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -230,8 +230,8 @@ typedef struct PGRAPHGLState { GLint palette_loc[256]; } disp_rndr; - GLfloat supportedAliasedLineWidthRange[2]; - GLfloat supportedSmoothLineWidthRange[2]; + GLfloat supported_aliased_line_width_range[2]; + GLfloat supported_smooth_line_width_range[2]; } PGRAPHGLState; extern GloContext *g_nv2a_context_render; From 8f478e017a0f7bac7d72d4abe23e77233f221bce Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 31 Dec 2024 03:13:32 -0700 Subject: [PATCH 176/176] nv2a/psh: Handle 3D textures in BUMPENVMAP[_LUM] modes --- hw/xbox/nv2a/pgraph/glsl/psh.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 295cc9a1f7c..fcb594d5009 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -589,8 +589,10 @@ static const char *get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode); assert(!"Shadow map support not implemented for this mode"); } - assert(state->dim_tex[i] == 2); - return sampler2D; + if (state->dim_tex[i] == 2) return sampler2D; + if (state->dim_tex[i] == 3 && mode != PS_TEXTUREMODES_DOT_ST) return sampler3D; + assert(!"Unhandled texture dimensions"); + return NULL; case PS_TEXTUREMODES_PROJECT3D: case PS_TEXTUREMODES_DOT_STR_3D: @@ -969,8 +971,17 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "dsdt%d = bumpMat%d * dsdt%d;\n", i, i, i, i); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdt%d));\n", - i, i, tex_remap, i, i); + + if (ps->state.dim_tex[i] == 2) { + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdt%d));\n", + i, i, tex_remap, i, i); + } else if (ps->state.dim_tex[i] == 3) { + // FIXME: Does hardware pass through the r/z coordinate or is it 0? + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, vec3(pT%d.xy + dsdt%d, pT%d.z));\n", + i, i, i, i, i); + } else { + assert(!"Unhandled texture dimensions"); + } break; case PS_TEXTUREMODES_BUMPENVMAP_LUM: assert(i >= 1); @@ -987,8 +998,18 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(vars, "dsdtl%d.st = bumpMat%d * dsdtl%d.st;\n", i, i, i, i); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdtl%d.st));\n", - i, i, tex_remap, i, i); + + if (ps->state.dim_tex[i] == 2) { + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdtl%d.st));\n", + i, i, tex_remap, i, i); + } else if (ps->state.dim_tex[i] == 3) { + // FIXME: Does hardware pass through the r/z coordinate or is it 0? + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, vec3(pT%d.xy + dsdtl%d.st, pT%d.z));\n", + i, i, i, i, i); + } else { + assert(!"Unhandled texture dimensions"); + } + mstring_append_fmt(vars, "t%d = t%d * (bumpScale%d * dsdtl%d.p + bumpOffset%d);\n", i, i, i, i, i); break;