Skip to content

Commit

Permalink
System: Rewrite memory save state handling
Browse files Browse the repository at this point in the history
Makes it more friendly to GPU thread.
  • Loading branch information
stenzek committed Dec 19, 2024
1 parent 6993e6c commit 831c982
Show file tree
Hide file tree
Showing 10 changed files with 387 additions and 231 deletions.
70 changes: 52 additions & 18 deletions src/core/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,16 +285,8 @@ void GPU::SoftReset()
UpdateGPUIdle();
}

bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display)
bool GPU::DoState(StateWrapper& sw, bool update_display)
{
FlushRender();

if (sw.IsReading())
{
// perform a reset to discard all pending draws/fb state
Reset(host_texture == nullptr);
}

sw.Do(&m_GPUSTAT.bits);

sw.Do(&m_draw_mode.mode_reg.bits);
Expand Down Expand Up @@ -390,32 +382,74 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
sw.Do(&m_max_run_ahead);
sw.Do(&m_fifo_size);

if (!sw.DoMarker("GPU-VRAM"))
return false;

sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));

if (sw.IsReading())
{
m_draw_mode.texture_page_changed = true;
m_drawing_area_changed = true;
SetClampedDrawingArea();
UpdateDMARequest();
UpdateCRTCConfig();
UpdateCommandTickEvent();

// If we're paused, need to update the display FB.
if (update_display)
UpdateDisplay();
}

if (!host_texture)
{
if (!sw.DoMarker("GPU-VRAM"))
return false;
return !sw.HasError();
}

sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
bool GPU::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display)
{
sw.Do(&m_GPUSTAT.bits);

sw.DoBytes(&m_draw_mode, sizeof(m_draw_mode));
sw.DoBytes(&m_drawing_area, sizeof(m_drawing_area));
sw.DoBytes(&m_drawing_offset, sizeof(m_drawing_offset));

sw.Do(&m_console_is_pal);
sw.Do(&m_set_texture_disable_mask);

sw.DoBytes(&m_crtc_state, sizeof(m_crtc_state));

sw.Do(&m_blitter_state);
sw.Do(&m_pending_command_ticks);
sw.Do(&m_command_total_words);
sw.Do(&m_GPUREAD_latch);

sw.Do(&m_current_clut_reg_bits);
sw.Do(&m_current_clut_is_8bit);
sw.DoBytes(g_gpu_clut, sizeof(g_gpu_clut));

sw.DoBytes(&m_vram_transfer, sizeof(m_vram_transfer));

sw.Do(&m_fifo);
sw.Do(&m_blit_buffer);
sw.Do(&m_blit_remaining_words);
sw.Do(&m_render_command.bits);

sw.Do(&m_max_run_ahead);
sw.Do(&m_fifo_size);

if (sw.IsReading())
{
m_draw_mode.texture_page_changed = true;
m_drawing_area_changed = true;
SetClampedDrawingArea();
UpdateDMARequest();
UpdateCRTCConfig();
UpdateCommandTickEvent();

if (update_display)
UpdateDisplay();

UpdateCommandTickEvent();
}

return !sw.HasError();
return true;
}

void GPU::RestoreDeviceContext()
Expand Down
7 changes: 6 additions & 1 deletion src/core/gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ namespace Threading {
class Thread;
}

namespace System {
struct MemorySaveState;
}

class GPU
{
public:
Expand Down Expand Up @@ -95,7 +99,8 @@ class GPU

virtual bool Initialize(Error* error);
virtual void Reset(bool clear_vram);
virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display);
virtual bool DoState(StateWrapper& sw, bool update_display);
virtual bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display);

// Graphics API state reset/restore - call when drawing the UI etc.
// TODO: replace with "invalidate cached state"
Expand Down
130 changes: 87 additions & 43 deletions src/core/gpu_hw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "gpu_sw_rasterizer.h"
#include "host.h"
#include "settings.h"
#include "system.h"
#include "system_private.h"

#include "util/imgui_manager.h"
#include "util/postprocessing.h"
Expand Down Expand Up @@ -315,14 +315,16 @@ void GPU_HW::Reset(bool clear_vram)
ClearFramebuffer();
}

bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display)
bool GPU_HW::DoState(StateWrapper& sw, bool update_display)
{
FlushRender();

// Need to download local VRAM copy before calling the base class, because it serializes this.
if (m_sw_renderer)
{
m_sw_renderer->Sync(true);
}
else if (sw.IsWriting() && !host_texture)
else if (sw.IsWriting())
{
// If SW renderer readbacks aren't enabled, the CLUT won't be populated, which means it'll be invalid if the user
// loads this state with software instead of hardware renderers. So force-update the CLUT.
Expand All @@ -331,65 +333,107 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
GPU::ReadCLUT(g_gpu_clut, GPUTexturePaletteReg{Truncate16(m_current_clut_reg_bits)}, m_current_clut_is_8bit);
}

if (!GPU::DoState(sw, host_texture, update_display))
if (!GPU::DoState(sw, false))
return false;

if (host_texture)
if (sw.IsReading())
{
GPUTexture* tex = *host_texture;
if (sw.IsReading())
{
if (tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() ||
tex->GetSamples() != m_vram_texture->GetSamples())
{
return false;
}

g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, tex, 0, 0, 0, 0, tex->GetWidth(),
tex->GetHeight());
}
else
{
if (!tex || tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() ||
tex->GetSamples() != m_vram_texture->GetSamples())
{
delete tex;

// We copy to/from the save state texture, but we can't have multisampled non-RTs.
tex = g_gpu_device
->FetchTexture(
m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(),
m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture,
GPUTexture::Format::RGBA8, GPUTexture::Flags::None)
.release();
*host_texture = tex;
if (!tex)
return false;
}
// Wipe out state.
m_batch = {};
m_current_depth = 1;
SetClampedDrawingArea();

g_gpu_device->CopyTextureRegion(tex, 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0, tex->GetWidth(),
tex->GetHeight());
}
}
else if (sw.IsReading())
{
// Need to update the VRAM copy on the GPU with the state data.
// Would invalidate the TC, but base DoState() calls Reset().
UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT);

// invalidate the whole VRAM read texture when loading state
DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr);
ClearVRAMDirtyRectangle();
SetFullVRAMDirtyRectangle();
UpdateVRAMReadTexture(true, false);
ClearVRAMDirtyRectangle();
ResetBatchVertexDepth();

// refresh display, has to be done here because of the upload above
if (update_display)
UpdateDisplay();
}

// invalidate the whole VRAM read texture when loading state
return GPUTextureCache::DoState(sw, !m_use_texture_cache);
}

bool GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display)
{
// sw-for-readbacks just makes a mess here
if (m_sw_renderer)
m_sw_renderer->Sync(true);
if (m_sw_renderer || m_use_texture_cache)
sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));

// This could be faster too.
if (m_use_texture_cache)
GPUTextureCache::DoState(sw, m_use_texture_cache);

// Base class never fails.
GPU::DoMemoryState(sw, mss, false);

if (sw.IsReading())
{
if (m_batch_vertex_ptr)
UnmapGPUBuffer(0, 0);

DebugAssert(mss.vram_texture->GetWidth() == m_vram_texture->GetWidth() &&
mss.vram_texture->GetHeight() == m_vram_texture->GetHeight() &&
mss.vram_texture->GetSamples() == m_vram_texture->GetSamples());
g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, mss.vram_texture.get(), 0, 0, 0, 0,
m_vram_texture->GetWidth(), m_vram_texture->GetHeight());

// Wipe out state.
DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr);
m_batch = {};
SetClampedDrawingArea();
ClearVRAMDirtyRectangle();
SetFullVRAMDirtyRectangle();
UpdateVRAMReadTexture(true, false);
ClearVRAMDirtyRectangle();
ResetBatchVertexDepth();

if (update_display)
UpdateDisplay();
}
else
{
FlushRender();

// saving state
if (!mss.vram_texture || mss.vram_texture->GetWidth() != m_vram_texture->GetWidth() ||
mss.vram_texture->GetHeight() != m_vram_texture->GetHeight() ||
mss.vram_texture->GetSamples() != m_vram_texture->GetSamples()) [[unlikely]]
{
g_gpu_device->RecycleTexture(std::move(mss.vram_texture));
mss.vram_texture.reset();
}
if (!mss.vram_texture)
{
// We copy to/from the save state texture, but we can't have multisampled non-RTs.
Error error;
mss.vram_texture = g_gpu_device->FetchTexture(
m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(),
m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture,
GPUTexture::Format::RGBA8, GPUTexture::Flags::None);
if (!mss.vram_texture) [[unlikely]]
{
ERROR_LOG("Failed to allocate VRAM texture for memory save state: {}", error.GetDescription());
return false;
}
}

g_gpu_device->CopyTextureRegion(mss.vram_texture.get(), 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0,
m_vram_texture->GetWidth(), m_vram_texture->GetHeight());
}

return GPUTextureCache::DoState(sw, !m_use_texture_cache);
return true;
}

void GPU_HW::RestoreDeviceContext()
Expand Down
3 changes: 2 additions & 1 deletion src/core/gpu_hw.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ class GPU_HW final : public GPU

bool Initialize(Error* error) override;
void Reset(bool clear_vram) override;
bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override;
bool DoState(StateWrapper& sw, bool update_display) override;
bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) override;

void RestoreDeviceContext() override;

Expand Down
12 changes: 10 additions & 2 deletions src/core/gpu_sw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "system.h"

#include "util/gpu_device.h"
#include "util/state_wrapper.h"

#include "common/align.h"
#include "common/assert.h"
Expand Down Expand Up @@ -58,19 +59,26 @@ bool GPU_SW::Initialize(Error* error)
return true;
}

bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display)
bool GPU_SW::DoState(StateWrapper& sw, bool update_display)
{
// need to ensure the worker thread is done
m_backend.Sync(true);

// ignore the host texture for software mode, since we want to save vram here
if (!GPU::DoState(sw, nullptr, update_display))
if (!GPU::DoState(sw, update_display))
return false;

// need to still call the TC, to toss any data in the state
return GPUTextureCache::DoState(sw, true);
}

bool GPU_SW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display)
{
m_backend.Sync(true);
sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
return GPU::DoMemoryState(sw, mss, update_display);
}

void GPU_SW::Reset(bool clear_vram)
{
GPU::Reset(clear_vram);
Expand Down
3 changes: 2 additions & 1 deletion src/core/gpu_sw.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class GPU_SW final : public GPU
bool IsHardwareRenderer() const override;

bool Initialize(Error* error) override;
bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override;
bool DoState(StateWrapper& sw, bool update_display) override;
bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) override;
void Reset(bool clear_vram) override;
void UpdateSettings(const Settings& old_settings) override;

Expand Down
Loading

0 comments on commit 831c982

Please sign in to comment.