Skip to content

Commit

Permalink
OpenGL ES Renderer: Fix a major performance bug on many ARM-based mob…
Browse files Browse the repository at this point in the history
…ile devices with integrated GPUs.
  • Loading branch information
rogerman committed Aug 10, 2024
1 parent ea648f7 commit 04f97d5
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 28 deletions.
84 changes: 58 additions & 26 deletions desmume/src/OGLRender_3_2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2525,15 +2525,18 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
glReadBuffer(OGL_WORKING_ATTACHMENT_ID);
}

// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
// format without a performance penalty.
if (this->_mappedFramebuffer != NULL)
if (this->isPBOSupported)
{
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
this->_mappedFramebuffer = NULL;
// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
// format without a performance penalty.
if (this->_mappedFramebuffer != NULL)
{
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
this->_mappedFramebuffer = NULL;
}

glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
}

glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);

this->_pixelReadNeedsFinish = true;
return OGLERROR_NOERR;
Expand Down Expand Up @@ -3172,19 +3175,23 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)

glFinish();

if (this->_mappedFramebuffer != NULL)
{
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glFinish();
}

const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);

if (this->_mappedFramebuffer != NULL)
if (this->isPBOSupported)
{
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
glFinish();
if (this->_mappedFramebuffer != NULL)
{
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glFinish();
}

glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);

if (this->_mappedFramebuffer != NULL)
{
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
glFinish();
}
}

glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor);
Expand All @@ -3208,7 +3215,18 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
this->_framebufferHeight = h;
this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.

if (this->isPBOSupported)
{
this->_framebufferColor = NULL;
}
else
{
Color4u8 *oldFramebufferColor = this->_framebufferColor;
Color4u8 *newFramebufferColor = (Color4u8 *)malloc_alignedPage(newFramebufferColorSizeBytes);
this->_framebufferColor = newFramebufferColor;
free_aligned(oldFramebufferColor);
}

// Recreate shaders that use the framebuffer size.
glUseProgram(0);
Expand Down Expand Up @@ -3261,6 +3279,8 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)

Render3DError OpenGLRenderer_3_2::RenderFinish()
{
OGLRenderRef &OGLRef = *this->ref;

if (!this->_renderNeedsFinish)
{
return OGLERROR_NOERR;
Expand All @@ -3270,11 +3290,20 @@ Render3DError OpenGLRenderer_3_2::RenderFinish()
{
this->_pixelReadNeedsFinish = false;

if(!BEGINGL())
if (!BEGINGL())
{
return OGLERROR_BEGINGL_FAILED;
}
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);

if (this->isPBOSupported)
{
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);
}
else
{
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, this->_framebufferColor);
}

ENDGL();
}

Expand All @@ -3298,7 +3327,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
memset(GPU->GetEngineMain()->Get3DFramebufferMain(), 0, this->_framebufferColorSizeBytes);
memset(GPU->GetEngineMain()->Get3DFramebuffer16(), 0, this->_framebufferPixCount * sizeof(u16));

if(!BEGINGL())
if (!BEGINGL())
{
return OGLERROR_BEGINGL_FAILED;
}
Expand All @@ -3308,14 +3337,17 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID);
glClearBufferfv(GL_COLOR, 0, oglColor);

if (this->_mappedFramebuffer != NULL)
if (this->isPBOSupported)
{
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
this->_mappedFramebuffer = NULL;
if (this->_mappedFramebuffer != NULL)
{
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
this->_mappedFramebuffer = NULL;
}

glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
}

glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);

ENDGL();

this->_pixelReadNeedsFinish = true;
Expand Down
16 changes: 14 additions & 2 deletions desmume/src/OGLRender_ES3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,20 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
this->isVBOSupported = true;
this->CreateVBOs();

this->isPBOSupported = true;
this->CreatePBOs();
// PBOs are only used when reading back the rendered framebuffer for the emulated
// BG0 layer. For desktop-class GPUs, doing an asynchronous glReadPixels() call
// is always advantageous since such devices are expected to have their GPUs
// connected to a data bus.
//
// However, many ARM-based mobile devices use integrated GPUs of varying degrees
// of memory latency and implementation quality. This means that the performance
// of an asynchronous glReadPixels() call is NOT guaranteed on such devices.
//
// In fact, many ARM-based devices suffer devastating performance drops when trying
// to do asynchronous framebuffer reads. Therefore, since most OpenGL ES users will
// be running an ARM-based iGPU, we will disable PBOs for OpenGL ES and stick with
// a traditional synchronous glReadPixels() call instead.
this->isPBOSupported = false;

this->isVAOSupported = true;
this->CreateVAOs();
Expand Down

0 comments on commit 04f97d5

Please sign in to comment.