Skip to content

Commit

Permalink
OpenGL: Do some minor changes to the 3.2 Core Profile renderer to mak…
Browse files Browse the repository at this point in the history
…e it more cross-compatible with OpenGL ES 3.0.

- PBO handling now works via glMapBufferRange() instead of glMapBuffer().
- Polygon states can now be uploaded using plain integer textures. 64k UBOs and TBOs are no longer required.
  • Loading branch information
rogerman committed Jul 6, 2024
1 parent 09090e9 commit 7dbece1
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 6 deletions.
86 changes: 80 additions & 6 deletions desmume/src/OGLRender_3_2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,10 @@ layout (std140) uniform PolyStates\n\
{\n\
ivec4 value[4096];\n\
} polyState;\n\
#else\n\
#elif IS_USING_TBO_POLY_STATES\n\
uniform isamplerBuffer PolyStates;\n\
#else\n\
uniform isampler2D PolyStates;\n\
#endif\n\
uniform int polyIndex;\n\
uniform bool polyDrawShadow;\n\
Expand All @@ -159,8 +161,10 @@ void main()\n\
#if IS_USING_UBO_POLY_STATES\n\
ivec4 polyStateVec = polyState.value[polyIndex >> 2];\n\
int polyStateBits = polyStateVec[polyIndex & 0x03];\n\
#else\n\
#elif IS_USING_TBO_POLY_STATES\n\
int polyStateBits = texelFetch(PolyStates, polyIndex).r;\n\
#else\n\
int polyStateBits = texelFetch(PolyStates, ivec2(polyIndex & 0x00FF, (polyIndex >> 8) & 0x007F), 0).r;\n\
#endif\n\
int texSizeShiftS = (polyStateBits >> 18) & 0x07;\n\
int texSizeShiftT = (polyStateBits >> 21) & 0x07;\n\
Expand Down Expand Up @@ -620,6 +624,7 @@ OpenGLRenderer_3_2::OpenGLRenderer_3_2()
{
_variantID = OpenGLVariantID_CoreProfile_3_2;
_is64kUBOSupported = false;
_isTBOSupported = false;
_isDualSourceBlendingSupported = false;
_isSampleShadingSupported = false;
_isConservativeDepthSupported = false;
Expand Down Expand Up @@ -653,6 +658,9 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUBOSize);
this->_is64kUBOSupported = (maxUBOSize >= 65536);

// TBOs should always be supported in 3.2 Core Profile.
this->_isTBOSupported = true;

GLfloat maxAnisotropyOGL = 1.0f;
glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyOGL);
this->_deviceInfo.maxAnisotropy = (float)maxAnisotropyOGL;
Expand Down Expand Up @@ -819,6 +827,18 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
return OGLERROR_NOERR;
}

Render3DError OpenGLRenderer_3_2::CreatePBOs()
{
OGLRenderRef &OGLRef = *this->ref;

glGenBuffers(1, &OGLRef.pboRenderDataID);
glBindBuffer(GL_PIXEL_PACK_BUFFER, OGLRef.pboRenderDataID);
glBufferData(GL_PIXEL_PACK_BUFFER, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ);
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);

return OGLERROR_NOERR;
}

Render3DError OpenGLRenderer_3_2::CreateFBOs()
{
OGLRenderRef &OGLRef = *this->ref;
Expand Down Expand Up @@ -1182,6 +1202,9 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()

if (this->_is64kUBOSupported)
{
// Try transferring the polygon states through a UBO first. This is the fastest method,
// but requires a GPU that supports 64k UBO transfers. Most modern GPUs should support
// this.
if (OGLRef.uboPolyStatesID == 0)
{
glGenBuffers(1, &OGLRef.uboPolyStatesID);
Expand All @@ -1190,8 +1213,11 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
glBindBufferBase(GL_UNIFORM_BUFFER, OGLBindingPointID_PolyStates, OGLRef.uboPolyStatesID);
}
}
else
else if (this->_isTBOSupported)
{
// Older GPUs that support 3.2 Core Profile but not 64k UBOs can transfer the polygon
// states through a TBO instead. While not as fast as using a UBO, TBOs are always
// available on any GPU that supports 3.2 Core Profile.
if (OGLRef.tboPolyStatesID == 0)
{
// Set up poly states TBO
Expand All @@ -1206,6 +1232,21 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
glActiveTexture(GL_TEXTURE0);
}
}
else
{
// For compatibility reasons, we can transfer the polygon states through a plain old
// integer texture. This can be useful for inheritors of this class that may not support
// 64k UBOs or TBOs.
glGenTextures(1, &OGLRef.texPolyStatesID);
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates);
glBindTexture(GL_TEXTURE_2D, OGLRef.texPolyStatesID);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32I, 256, 128, 0, GL_RED_INTEGER, GL_INT, NULL);
glActiveTexture(GL_TEXTURE0);
}

glGenTextures(1, &OGLRef.texFogDensityTableID);
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable);
Expand All @@ -1231,6 +1272,7 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
}
vtxShaderHeader << "\n";
vtxShaderHeader << "#define IS_USING_UBO_POLY_STATES " << ((OGLRef.uboPolyStatesID != 0) ? 1 : 0) << "\n";
vtxShaderHeader << "#define IS_USING_TBO_POLY_STATES " << ((OGLRef.tboPolyStatesID != 0) ? 1 : 0) << "\n";
vtxShaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0\n";
vtxShaderHeader << "\n";

Expand Down Expand Up @@ -2138,7 +2180,7 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D_State &renderState, co

OGLPolyStates *polyStates = this->_pendingPolyStates;

if (OGLRef.uboPolyStatesID == 0)
if (OGLRef.tboPolyStatesID != 0)
{
// Some drivers seem to have problems with GL_TEXTURE_BUFFER used as the target for
// glMapBufferRange() or glBufferSubData(), causing certain polygons to intermittently
Expand Down Expand Up @@ -2177,10 +2219,17 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D_State &renderState, co
glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboPolyStatesID);
glBufferSubData(GL_UNIFORM_BUFFER, 0, MAX_CLIPPED_POLY_COUNT_FOR_UBO * sizeof(OGLPolyStates), this->_pendingPolyStates);
}
else
else if (OGLRef.tboPolyStatesID != 0)
{
glUnmapBuffer(GL_TEXTURE_BUFFER);
}
else
{
const GLsizei texH = (GLsizei)((this->_clippedPolyCount >> 8) & 0x007F) + 1;
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates);
glBindTexture(GL_TEXTURE_2D, OGLRef.texPolyStatesID); // Why is this bind necessary? Theoretically, it shouldn't be necessary, but real-world testing has proven otherwise...
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, texH, GL_RED_INTEGER, GL_INT, this->_pendingPolyStates);
}

// Set up the default draw call states.
this->_geometryProgramFlags.value = 0;
Expand Down Expand Up @@ -2564,7 +2613,7 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)

if (this->_mappedFramebuffer != NULL)
{
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
glFinish();
}

Expand Down Expand Up @@ -2627,6 +2676,31 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
return error;
}

Render3DError OpenGLRenderer_3_2::RenderFinish()
{
if (!this->_renderNeedsFinish)
{
return OGLERROR_NOERR;
}

if (this->_pixelReadNeedsFinish)
{
this->_pixelReadNeedsFinish = false;

if(!BEGINGL())
{
return OGLERROR_BEGINGL_FAILED;
}
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);
ENDGL();
}

this->_renderNeedsFlushMain = true;
this->_renderNeedsFlush16 = true;

return OGLERROR_NOERR;
}

Render3DError OpenGLRenderer_3_2::RenderPowerOff()
{
OGLRenderRef &OGLRef = *this->ref;
Expand Down
3 changes: 3 additions & 0 deletions desmume/src/OGLRender_3_2.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class OpenGLRenderer_3_2 : public OpenGLRenderer_2_1
{
protected:
bool _is64kUBOSupported;
bool _isTBOSupported;
bool _isDualSourceBlendingSupported;
bool _isSampleShadingSupported;
bool _isConservativeDepthSupported;
Expand All @@ -39,6 +40,7 @@ class OpenGLRenderer_3_2 : public OpenGLRenderer_2_1
GLsync _syncBufferSetup;
CACHE_ALIGN OGLPolyStates _pendingPolyStates[CLIPPED_POLYLIST_SIZE];

virtual Render3DError CreatePBOs();
virtual Render3DError CreateFBOs();
virtual void DestroyFBOs();
virtual Render3DError CreateMultisampledFBO(GLsizei numSamples);
Expand Down Expand Up @@ -82,6 +84,7 @@ class OpenGLRenderer_3_2 : public OpenGLRenderer_2_1
~OpenGLRenderer_3_2();

virtual Render3DError InitExtensions();
virtual Render3DError RenderFinish();
virtual Render3DError RenderPowerOff();
};

Expand Down

0 comments on commit 7dbece1

Please sign in to comment.